View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import java.io.IOException;
22  import java.lang.reflect.Constructor;
23  import java.lang.reflect.InvocationTargetException;
24  import java.net.InetAddress;
25  import java.net.InetSocketAddress;
26  import java.net.UnknownHostException;
27  import java.util.ArrayList;
28  import java.util.Collection;
29  import java.util.Collections;
30  import java.util.Comparator;
31  import java.util.HashSet;
32  import java.util.List;
33  import java.util.Map;
34  import java.util.Set;
35  import java.util.concurrent.Callable;
36  import java.util.concurrent.ExecutionException;
37  import java.util.concurrent.Executors;
38  import java.util.concurrent.Future;
39  import java.util.concurrent.TimeUnit;
40  import java.util.concurrent.atomic.AtomicReference;
41  
42  import javax.management.ObjectName;
43  
44  import org.apache.commons.logging.Log;
45  import org.apache.commons.logging.LogFactory;
46  import org.apache.hadoop.classification.InterfaceAudience;
47  import org.apache.hadoop.conf.Configuration;
48  import org.apache.hadoop.fs.Path;
49  import org.apache.hadoop.hbase.Abortable;
50  import org.apache.hadoop.hbase.Chore;
51  import org.apache.hadoop.hbase.ClusterId;
52  import org.apache.hadoop.hbase.ClusterStatus;
53  import org.apache.hadoop.hbase.HBaseIOException;
54  import org.apache.hadoop.hbase.HColumnDescriptor;
55  import org.apache.hadoop.hbase.HConstants;
56  import org.apache.hadoop.hbase.HRegionInfo;
57  import org.apache.hadoop.hbase.HTableDescriptor;
58  import org.apache.hadoop.hbase.HealthCheckChore;
59  import org.apache.hadoop.hbase.MasterNotRunningException;
60  import org.apache.hadoop.hbase.NamespaceDescriptor;
61  import org.apache.hadoop.hbase.NamespaceNotFoundException;
62  import org.apache.hadoop.hbase.PleaseHoldException;
63  import org.apache.hadoop.hbase.Server;
64  import org.apache.hadoop.hbase.ServerLoad;
65  import org.apache.hadoop.hbase.ServerName;
66  import org.apache.hadoop.hbase.TableDescriptors;
67  import org.apache.hadoop.hbase.TableName;
68  import org.apache.hadoop.hbase.TableNotDisabledException;
69  import org.apache.hadoop.hbase.TableNotFoundException;
70  import org.apache.hadoop.hbase.UnknownRegionException;
71  import org.apache.hadoop.hbase.catalog.CatalogTracker;
72  import org.apache.hadoop.hbase.catalog.MetaReader;
73  import org.apache.hadoop.hbase.client.HConnectionManager;
74  import org.apache.hadoop.hbase.client.MetaScanner;
75  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
76  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
77  import org.apache.hadoop.hbase.client.Result;
78  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
79  import org.apache.hadoop.hbase.exceptions.DeserializationException;
80  import org.apache.hadoop.hbase.exceptions.MergeRegionException;
81  import org.apache.hadoop.hbase.exceptions.UnknownProtocolException;
82  import org.apache.hadoop.hbase.executor.ExecutorService;
83  import org.apache.hadoop.hbase.executor.ExecutorType;
84  import org.apache.hadoop.hbase.ipc.FifoRpcScheduler;
85  import org.apache.hadoop.hbase.ipc.RequestContext;
86  import org.apache.hadoop.hbase.ipc.RpcServer;
87  import org.apache.hadoop.hbase.ipc.RpcServer.BlockingServiceAndInterface;
88  import org.apache.hadoop.hbase.ipc.RpcServerInterface;
89  import org.apache.hadoop.hbase.ipc.ServerRpcController;
90  import org.apache.hadoop.hbase.master.RegionState.State;
91  import org.apache.hadoop.hbase.master.balancer.BalancerChore;
92  import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
93  import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
94  import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
95  import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
96  import org.apache.hadoop.hbase.master.handler.CreateTableHandler;
97  import org.apache.hadoop.hbase.master.handler.DeleteTableHandler;
98  import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
99  import org.apache.hadoop.hbase.master.handler.DispatchMergingRegionHandler;
100 import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
101 import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
102 import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
103 import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
104 import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
105 import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
106 import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
107 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
108 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
109 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
110 import org.apache.hadoop.hbase.protobuf.RequestConverter;
111 import org.apache.hadoop.hbase.protobuf.ResponseConverter;
112 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
113 import org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos;
114 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
115 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.NameStringPair;
116 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo;
117 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier.RegionSpecifierType;
118 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
119 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos;
120 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.AddColumnRequest;
121 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.AddColumnResponse;
122 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.AssignRegionRequest;
123 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.AssignRegionResponse;
124 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.BalanceRequest;
125 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.BalanceResponse;
126 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.CreateNamespaceRequest;
127 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.CreateNamespaceResponse;
128 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.CreateTableRequest;
129 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.CreateTableResponse;
130 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DeleteColumnRequest;
131 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DeleteColumnResponse;
132 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DeleteNamespaceRequest;
133 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DeleteNamespaceResponse;
134 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DeleteSnapshotRequest;
135 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DeleteSnapshotResponse;
136 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DeleteTableRequest;
137 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DeleteTableResponse;
138 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DisableTableRequest;
139 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DisableTableResponse;
140 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DispatchMergingRegionsRequest;
141 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DispatchMergingRegionsResponse;
142 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.EnableCatalogJanitorRequest;
143 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.EnableCatalogJanitorResponse;
144 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.EnableTableRequest;
145 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.EnableTableResponse;
146 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetClusterStatusRequest;
147 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetClusterStatusResponse;
148 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetCompletedSnapshotsRequest;
149 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetCompletedSnapshotsResponse;
150 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetNamespaceDescriptorRequest;
151 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetNamespaceDescriptorResponse;
152 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetSchemaAlterStatusRequest;
153 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetSchemaAlterStatusResponse;
154 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetTableDescriptorsRequest;
155 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetTableDescriptorsResponse;
156 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetTableNamesRequest;
157 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetTableNamesResponse;
158 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsCatalogJanitorEnabledRequest;
159 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsCatalogJanitorEnabledResponse;
160 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsMasterRunningRequest;
161 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsMasterRunningResponse;
162 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsRestoreSnapshotDoneRequest;
163 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsRestoreSnapshotDoneResponse;
164 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsSnapshotDoneRequest;
165 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsSnapshotDoneResponse;
166 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ListNamespaceDescriptorsRequest;
167 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ListNamespaceDescriptorsResponse;
168 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ListTableDescriptorsByNamespaceRequest;
169 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ListTableDescriptorsByNamespaceResponse;
170 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ListTableNamesByNamespaceRequest;
171 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ListTableNamesByNamespaceResponse;
172 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ModifyColumnRequest;
173 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ModifyColumnResponse;
174 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ModifyNamespaceRequest;
175 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ModifyNamespaceResponse;
176 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ModifyTableRequest;
177 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ModifyTableResponse;
178 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.MoveRegionRequest;
179 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.MoveRegionResponse;
180 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.OfflineRegionRequest;
181 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.OfflineRegionResponse;
182 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.RestoreSnapshotRequest;
183 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.RestoreSnapshotResponse;
184 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.RunCatalogScanRequest;
185 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.RunCatalogScanResponse;
186 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.SetBalancerRunningRequest;
187 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.SetBalancerRunningResponse;
188 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ShutdownRequest;
189 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ShutdownResponse;
190 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.SnapshotRequest;
191 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.SnapshotResponse;
192 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.StopMasterRequest;
193 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.StopMasterResponse;
194 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.UnassignRegionRequest;
195 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.UnassignRegionResponse;
196 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos;
197 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.GetLastFlushedSequenceIdRequest;
198 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.GetLastFlushedSequenceIdResponse;
199 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerReportRequest;
200 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerReportResponse;
201 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerStartupRequest;
202 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerStartupResponse;
203 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.ReportRSFatalErrorRequest;
204 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.ReportRSFatalErrorResponse;
205 import org.apache.hadoop.hbase.replication.regionserver.Replication;
206 import org.apache.hadoop.hbase.security.UserProvider;
207 import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
208 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
209 import org.apache.hadoop.hbase.trace.SpanReceiverHost;
210 import org.apache.hadoop.hbase.util.Bytes;
211 import org.apache.hadoop.hbase.util.CompressionTest;
212 import org.apache.hadoop.hbase.util.FSTableDescriptors;
213 import org.apache.hadoop.hbase.util.FSUtils;
214 import org.apache.hadoop.hbase.util.HFileArchiveUtil;
215 import org.apache.hadoop.hbase.util.HasThread;
216 import org.apache.hadoop.hbase.util.InfoServer;
217 import org.apache.hadoop.hbase.util.JvmPauseMonitor;
218 import org.apache.hadoop.hbase.util.Pair;
219 import org.apache.hadoop.hbase.util.Sleeper;
220 import org.apache.hadoop.hbase.util.Strings;
221 import org.apache.hadoop.hbase.util.Threads;
222 import org.apache.hadoop.hbase.util.VersionInfo;
223 import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
224 import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker;
225 import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker;
226 import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
227 import org.apache.hadoop.hbase.zookeeper.RegionServerTracker;
228 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
229 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
230 import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
231 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
232 import org.apache.hadoop.metrics.util.MBeanUtil;
233 import org.apache.hadoop.net.DNS;
234 import org.apache.zookeeper.KeeperException;
235 import org.apache.zookeeper.Watcher;
236 
237 import com.google.common.collect.Lists;
238 import com.google.common.collect.Maps;
239 import com.google.protobuf.Descriptors;
240 import com.google.protobuf.Message;
241 import com.google.protobuf.RpcCallback;
242 import com.google.protobuf.RpcController;
243 import com.google.protobuf.Service;
244 import com.google.protobuf.ServiceException;
245 
246 /**
247  * HMaster is the "master server" for HBase. An HBase cluster has one active
248  * master.  If many masters are started, all compete.  Whichever wins goes on to
249  * run the cluster.  All others park themselves in their constructor until
250  * master or cluster shutdown or until the active master loses its lease in
251  * zookeeper.  Thereafter, all running master jostle to take over master role.
252  *
253  * <p>The Master can be asked shutdown the cluster. See {@link #shutdown()}.  In
254  * this case it will tell all regionservers to go down and then wait on them
255  * all reporting in that they are down.  This master will then shut itself down.
256  *
257  * <p>You can also shutdown just this master.  Call {@link #stopMaster()}.
258  *
259  * @see Watcher
260  */
261 @InterfaceAudience.Private
262 @SuppressWarnings("deprecation")
263 public class HMaster extends HasThread implements MasterProtos.MasterService.BlockingInterface,
264 RegionServerStatusProtos.RegionServerStatusService.BlockingInterface,
265 MasterServices, Server {
266   private static final Log LOG = LogFactory.getLog(HMaster.class.getName());
267 
268   // MASTER is name of the webapp and the attribute name used stuffing this
269   //instance into web context.
270   public static final String MASTER = "master";
271 
272   // The configuration for the Master
273   private final Configuration conf;
274   // server for the web ui
275   private InfoServer infoServer;
276 
277   // Our zk client.
278   private ZooKeeperWatcher zooKeeper;
279   // Manager and zk listener for master election
280   private ActiveMasterManager activeMasterManager;
281   // Region server tracker
282   RegionServerTracker regionServerTracker;
283   // Draining region server tracker
284   private DrainingServerTracker drainingServerTracker;
285   // Tracker for load balancer state
286   private LoadBalancerTracker loadBalancerTracker;
287   // master address manager and watcher
288   private MasterAddressTracker masterAddressManager;
289 
290   // RPC server for the HMaster
291   private final RpcServerInterface rpcServer;
292   private JvmPauseMonitor pauseMonitor;
293   // Set after we've called HBaseServer#openServer and ready to receive RPCs.
294   // Set back to false after we stop rpcServer.  Used by tests.
295   private volatile boolean rpcServerOpen = false;
296 
297   /** Namespace stuff */
298   private TableNamespaceManager tableNamespaceManager;
299   private NamespaceJanitor namespaceJanitorChore;
300 
301   /**
302    * This servers address.
303    */
304   private final InetSocketAddress isa;
305 
306   // Metrics for the HMaster
307   private final MetricsMaster metricsMaster;
308   // file system manager for the master FS operations
309   private MasterFileSystem fileSystemManager;
310 
311   // server manager to deal with region server info
312   ServerManager serverManager;
313 
314   // manager of assignment nodes in zookeeper
315   AssignmentManager assignmentManager;
316   // manager of catalog regions
317   private CatalogTracker catalogTracker;
318   // Cluster status zk tracker and local setter
319   private ClusterStatusTracker clusterStatusTracker;
320 
321   // buffer for "fatal error" notices from region servers
322   // in the cluster. This is only used for assisting
323   // operations/debugging.
324   private MemoryBoundedLogMessageBuffer rsFatals;
325 
326   // This flag is for stopping this Master instance.  Its set when we are
327   // stopping or aborting
328   private volatile boolean stopped = false;
329   // Set on abort -- usually failure of our zk session.
330   private volatile boolean abort = false;
331   // flag set after we become the active master (used for testing)
332   private volatile boolean isActiveMaster = false;
333 
334   // flag set after we complete initialization once active,
335   // it is not private since it's used in unit tests
336   volatile boolean initialized = false;
337 
338   // flag set after we complete assignMeta.
339   private volatile boolean serverShutdownHandlerEnabled = false;
340 
341   // Instance of the hbase executor service.
342   ExecutorService executorService;
343 
344   private LoadBalancer balancer;
345   private Thread balancerChore;
346   private Thread clusterStatusChore;
347   private ClusterStatusPublisher clusterStatusPublisherChore = null;
348 
349   private CatalogJanitor catalogJanitorChore;
350   private LogCleaner logCleaner;
351   private HFileCleaner hfileCleaner;
352 
353   private MasterCoprocessorHost cpHost;
354   private final ServerName serverName;
355 
356   private TableDescriptors tableDescriptors;
357 
358   // Table level lock manager for schema changes
359   private TableLockManager tableLockManager;
360 
361   // Time stamps for when a hmaster was started and when it became active
362   private long masterStartTime;
363   private long masterActiveTime;
364 
365   /** time interval for emitting metrics values */
366   private final int msgInterval;
367   /**
368    * MX Bean for MasterInfo
369    */
370   private ObjectName mxBean = null;
371 
372   //should we check the compression codec type at master side, default true, HBASE-6370
373   private final boolean masterCheckCompression;
374 
375   private SpanReceiverHost spanReceiverHost;
376 
377   private Map<String, Service> coprocessorServiceHandlers = Maps.newHashMap();
378 
379   // monitor for snapshot of hbase tables
380   private SnapshotManager snapshotManager;
381 
382   /** The health check chore. */
383   private HealthCheckChore healthCheckChore;
384 
385   /**
386    * is in distributedLogReplay mode. When true, SplitLogWorker directly replays WAL edits to newly
387    * assigned region servers instead of creating recovered.edits files.
388    */
389   private final boolean distributedLogReplay;
390 
391   /** flag used in test cases in order to simulate RS failures during master initialization */
392   private volatile boolean initializationBeforeMetaAssignment = false;
393 
394   /** The following is used in master recovery scenario to re-register listeners */
395   private List<ZooKeeperListener> registeredZKListenersBeforeRecovery;
396 
397   /**
398    * Initializes the HMaster. The steps are as follows:
399    * <p>
400    * <ol>
401    * <li>Initialize HMaster RPC and address
402    * <li>Connect to ZooKeeper.
403    * </ol>
404    * <p>
405    * Remaining steps of initialization occur in {@link #run()} so that they
406    * run in their own thread rather than within the context of the constructor.
407    * @throws InterruptedException
408    */
409   public HMaster(final Configuration conf)
410   throws IOException, KeeperException, InterruptedException {
411     this.conf = new Configuration(conf);
412     // Disable the block cache on the master
413     this.conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
414     FSUtils.setupShortCircuitRead(conf);
415     // Server to handle client requests.
416     String hostname = Strings.domainNamePointerToHostName(DNS.getDefaultHost(
417       conf.get("hbase.master.dns.interface", "default"),
418       conf.get("hbase.master.dns.nameserver", "default")));
419     int port = conf.getInt(HConstants.MASTER_PORT, HConstants.DEFAULT_MASTER_PORT);
420     // Test that the hostname is reachable
421     InetSocketAddress initialIsa = new InetSocketAddress(hostname, port);
422     if (initialIsa.getAddress() == null) {
423       throw new IllegalArgumentException("Failed resolve of hostname " + initialIsa);
424     }
425     // Verify that the bind address is reachable if set
426     String bindAddress = conf.get("hbase.master.ipc.address");
427     if (bindAddress != null) {
428       initialIsa = new InetSocketAddress(bindAddress, port);
429       if (initialIsa.getAddress() == null) {
430         throw new IllegalArgumentException("Failed resolve of bind address " + initialIsa);
431       }
432     }
433     String name = "master/" + initialIsa.toString();
434     // Set how many times to retry talking to another server over HConnection.
435     HConnectionManager.setServerSideHConnectionRetries(this.conf, name, LOG);
436     int numHandlers = conf.getInt(HConstants.MASTER_HANDLER_COUNT,
437       conf.getInt(HConstants.REGION_SERVER_HANDLER_COUNT, HConstants.DEFAULT_MASTER_HANLDER_COUNT));
438     this.rpcServer = new RpcServer(this, name, getServices(),
439       initialIsa, // BindAddress is IP we got for this server.
440       conf,
441       new FifoRpcScheduler(conf, numHandlers));
442     // Set our address.
443     this.isa = this.rpcServer.getListenerAddress();
444     // We don't want to pass isa's hostname here since it could be 0.0.0.0
445     this.serverName = ServerName.valueOf(hostname, this.isa.getPort(), System.currentTimeMillis());
446     this.rsFatals = new MemoryBoundedLogMessageBuffer(
447       conf.getLong("hbase.master.buffer.for.rs.fatals", 1*1024*1024));
448 
449     // login the zookeeper client principal (if using security)
450     ZKUtil.loginClient(this.conf, "hbase.zookeeper.client.keytab.file",
451       "hbase.zookeeper.client.kerberos.principal", this.isa.getHostName());
452 
453     // initialize server principal (if using secure Hadoop)
454     UserProvider provider = UserProvider.instantiate(conf);
455     provider.login("hbase.master.keytab.file",
456       "hbase.master.kerberos.principal", this.isa.getHostName());
457 
458     LOG.info("hbase.rootdir=" + FSUtils.getRootDir(this.conf) +
459         ", hbase.cluster.distributed=" + this.conf.getBoolean("hbase.cluster.distributed", false));
460 
461     // set the thread name now we have an address
462     setName(MASTER + ":" + this.serverName.toShortString());
463 
464     Replication.decorateMasterConfiguration(this.conf);
465 
466     // Hack! Maps DFSClient => Master for logs.  HDFS made this
467     // config param for task trackers, but we can piggyback off of it.
468     if (this.conf.get("mapred.task.id") == null) {
469       this.conf.set("mapred.task.id", "hb_m_" + this.serverName.toString());
470     }
471 
472     this.zooKeeper = new ZooKeeperWatcher(conf, MASTER + ":" + isa.getPort(), this, true);
473     this.rpcServer.startThreads();
474     this.pauseMonitor = new JvmPauseMonitor(conf);
475     this.pauseMonitor.start();
476 
477     // metrics interval: using the same property as region server.
478     this.msgInterval = conf.getInt("hbase.regionserver.msginterval", 3 * 1000);
479 
480     //should we check the compression codec type at master side, default true, HBASE-6370
481     this.masterCheckCompression = conf.getBoolean("hbase.master.check.compression", true);
482 
483     this.metricsMaster = new MetricsMaster( new MetricsMasterWrapperImpl(this));
484 
485     // Health checker thread.
486     int sleepTime = this.conf.getInt(HConstants.HEALTH_CHORE_WAKE_FREQ,
487       HConstants.DEFAULT_THREAD_WAKE_FREQUENCY);
488     if (isHealthCheckerConfigured()) {
489       healthCheckChore = new HealthCheckChore(sleepTime, this, getConfiguration());
490     }
491 
492     // Do we publish the status?
493     boolean shouldPublish = conf.getBoolean(HConstants.STATUS_PUBLISHED,
494         HConstants.STATUS_PUBLISHED_DEFAULT);
495     Class<? extends ClusterStatusPublisher.Publisher> publisherClass =
496         conf.getClass(ClusterStatusPublisher.STATUS_PUBLISHER_CLASS,
497             ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS,
498             ClusterStatusPublisher.Publisher.class);
499 
500     if (shouldPublish) {
501       if (publisherClass == null) {
502         LOG.warn(HConstants.STATUS_PUBLISHED + " is true, but " +
503             ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS +
504             " is not set - not publishing status");
505       } else {
506         clusterStatusPublisherChore = new ClusterStatusPublisher(this, conf, publisherClass);
507         Threads.setDaemonThreadRunning(clusterStatusPublisherChore.getThread());
508       }
509     }
510 
511     distributedLogReplay = this.conf.getBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY,
512       HConstants.DEFAULT_DISTRIBUTED_LOG_REPLAY_CONFIG);
513   }
514 
515   /**
516    * @return list of blocking services and their security info classes that this server supports
517    */
518   private List<BlockingServiceAndInterface> getServices() {
519     List<BlockingServiceAndInterface> bssi = new ArrayList<BlockingServiceAndInterface>(3);
520     bssi.add(new BlockingServiceAndInterface(
521         MasterProtos.MasterService.newReflectiveBlockingService(this),
522         MasterProtos.MasterService.BlockingInterface.class));
523     bssi.add(new BlockingServiceAndInterface(
524         RegionServerStatusProtos.RegionServerStatusService.newReflectiveBlockingService(this),
525         RegionServerStatusProtos.RegionServerStatusService.BlockingInterface.class));
526     return bssi;
527   }
528 
529   /**
530    * Stall startup if we are designated a backup master; i.e. we want someone
531    * else to become the master before proceeding.
532    * @param c configuration
533    * @param amm
534    * @throws InterruptedException
535    */
536   private static void stallIfBackupMaster(final Configuration c,
537       final ActiveMasterManager amm)
538   throws InterruptedException {
539     // If we're a backup master, stall until a primary to writes his address
540     if (!c.getBoolean(HConstants.MASTER_TYPE_BACKUP,
541       HConstants.DEFAULT_MASTER_TYPE_BACKUP)) {
542       return;
543     }
544     LOG.debug("HMaster started in backup mode.  " +
545       "Stalling until master znode is written.");
546     // This will only be a minute or so while the cluster starts up,
547     // so don't worry about setting watches on the parent znode
548     while (!amm.isActiveMaster()) {
549       LOG.debug("Waiting for master address ZNode to be written " +
550         "(Also watching cluster state node)");
551       Thread.sleep(
552         c.getInt(HConstants.ZK_SESSION_TIMEOUT, HConstants.DEFAULT_ZK_SESSION_TIMEOUT));
553     }
554 
555   }
556 
557   MetricsMaster getMetrics() {
558     return metricsMaster;
559   }
560 
561   /**
562    * Main processing loop for the HMaster.
563    * <ol>
564    * <li>Block until becoming active master
565    * <li>Finish initialization via finishInitialization(MonitoredTask)
566    * <li>Enter loop until we are stopped
567    * <li>Stop services and perform cleanup once stopped
568    * </ol>
569    */
570   @Override
571   public void run() {
572     MonitoredTask startupStatus =
573       TaskMonitor.get().createStatus("Master startup");
574     startupStatus.setDescription("Master startup");
575     masterStartTime = System.currentTimeMillis();
576     try {
577       this.masterAddressManager = new MasterAddressTracker(getZooKeeperWatcher(), this);
578       this.masterAddressManager.start();
579 
580       // Put up info server.
581       int port = this.conf.getInt("hbase.master.info.port", 60010);
582       if (port >= 0) {
583         String a = this.conf.get("hbase.master.info.bindAddress", "0.0.0.0");
584         this.infoServer = new InfoServer(MASTER, a, port, false, this.conf);
585         this.infoServer.addServlet("status", "/master-status", MasterStatusServlet.class);
586         this.infoServer.addServlet("dump", "/dump", MasterDumpServlet.class);
587         this.infoServer.setAttribute(MASTER, this);
588         this.infoServer.start();
589       }
590 
591       this.registeredZKListenersBeforeRecovery = this.zooKeeper.getListeners();
592       /*
593        * Block on becoming the active master.
594        *
595        * We race with other masters to write our address into ZooKeeper.  If we
596        * succeed, we are the primary/active master and finish initialization.
597        *
598        * If we do not succeed, there is another active master and we should
599        * now wait until it dies to try and become the next active master.  If we
600        * do not succeed on our first attempt, this is no longer a cluster startup.
601        */
602       becomeActiveMaster(startupStatus);
603 
604       // We are either the active master or we were asked to shutdown
605       if (!this.stopped) {
606         finishInitialization(startupStatus, false);
607         loop();
608       }
609     } catch (Throwable t) {
610       // HBASE-5680: Likely hadoop23 vs hadoop 20.x/1.x incompatibility
611       if (t instanceof NoClassDefFoundError &&
612           t.getMessage().contains("org/apache/hadoop/hdfs/protocol/FSConstants$SafeModeAction")) {
613           // improved error message for this special case
614           abort("HBase is having a problem with its Hadoop jars.  You may need to "
615               + "recompile HBase against Hadoop version "
616               +  org.apache.hadoop.util.VersionInfo.getVersion()
617               + " or change your hadoop jars to start properly", t);
618       } else {
619         abort("Unhandled exception. Starting shutdown.", t);
620       }
621     } finally {
622       startupStatus.cleanup();
623 
624       stopChores();
625       // Wait for all the remaining region servers to report in IFF we were
626       // running a cluster shutdown AND we were NOT aborting.
627       if (!this.abort && this.serverManager != null &&
628           this.serverManager.isClusterShutdown()) {
629         this.serverManager.letRegionServersShutdown();
630       }
631       stopServiceThreads();
632       // Stop services started for both backup and active masters
633       if (this.activeMasterManager != null) this.activeMasterManager.stop();
634       if (this.catalogTracker != null) this.catalogTracker.stop();
635       if (this.serverManager != null) this.serverManager.stop();
636       if (this.assignmentManager != null) this.assignmentManager.stop();
637       if (this.fileSystemManager != null) this.fileSystemManager.stop();
638       if (this.snapshotManager != null) this.snapshotManager.stop("server shutting down.");
639       this.zooKeeper.close();
640     }
641     LOG.info("HMaster main thread exiting");
642   }
643 
644   /**
645    * Try becoming active master.
646    * @param startupStatus
647    * @return True if we could successfully become the active master.
648    * @throws InterruptedException
649    */
650   private boolean becomeActiveMaster(MonitoredTask startupStatus)
651   throws InterruptedException {
652     // TODO: This is wrong!!!! Should have new servername if we restart ourselves,
653     // if we come back to life.
654     this.activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName,
655         this);
656     this.zooKeeper.registerListener(activeMasterManager);
657     stallIfBackupMaster(this.conf, this.activeMasterManager);
658 
659     // The ClusterStatusTracker is setup before the other
660     // ZKBasedSystemTrackers because it's needed by the activeMasterManager
661     // to check if the cluster should be shutdown.
662     this.clusterStatusTracker = new ClusterStatusTracker(getZooKeeper(), this);
663     this.clusterStatusTracker.start();
664     return this.activeMasterManager.blockUntilBecomingActiveMaster(startupStatus);
665   }
666 
667   /**
668    * Initialize all ZK based system trackers.
669    * @throws IOException
670    * @throws InterruptedException
671    */
672   void initializeZKBasedSystemTrackers() throws IOException,
673       InterruptedException, KeeperException {
674     this.catalogTracker = createCatalogTracker(this.zooKeeper, this.conf, this);
675     this.catalogTracker.start();
676 
677     this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
678     this.loadBalancerTracker = new LoadBalancerTracker(zooKeeper, this);
679     this.loadBalancerTracker.start();
680     this.assignmentManager = new AssignmentManager(this, serverManager,
681       this.catalogTracker, this.balancer, this.executorService, this.metricsMaster,
682       this.tableLockManager);
683     zooKeeper.registerListenerFirst(assignmentManager);
684 
685     this.regionServerTracker = new RegionServerTracker(zooKeeper, this,
686         this.serverManager);
687     this.regionServerTracker.start();
688 
689     this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this,
690       this.serverManager);
691     this.drainingServerTracker.start();
692 
693     // Set the cluster as up.  If new RSs, they'll be waiting on this before
694     // going ahead with their startup.
695     boolean wasUp = this.clusterStatusTracker.isClusterUp();
696     if (!wasUp) this.clusterStatusTracker.setClusterUp();
697 
698     LOG.info("Server active/primary master=" + this.serverName +
699         ", sessionid=0x" +
700         Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()) +
701         ", setting cluster-up flag (Was=" + wasUp + ")");
702 
703     // create the snapshot manager
704     this.snapshotManager = new SnapshotManager(this, this.metricsMaster);
705   }
706 
707   /**
708    * Create CatalogTracker.
709    * In its own method so can intercept and mock it over in tests.
710    * @param zk If zk is null, we'll create an instance (and shut it down
711    * when {@link #stop(String)} is called) else we'll use what is passed.
712    * @param conf
713    * @param abortable If fatal exception we'll call abort on this.  May be null.
714    * If it is we'll use the Connection associated with the passed
715    * {@link Configuration} as our {@link Abortable}.
716    * ({@link Object#wait(long)} when passed a <code>0</code> waits for ever).
717    * @throws IOException
718    */
719   CatalogTracker createCatalogTracker(final ZooKeeperWatcher zk,
720       final Configuration conf, Abortable abortable)
721   throws IOException {
722     return new CatalogTracker(zk, conf, abortable);
723   }
724 
725   // Check if we should stop every 100ms
726   private Sleeper stopSleeper = new Sleeper(100, this);
727 
728   private void loop() {
729     long lastMsgTs = 0l;
730     long now = 0l;
731     while (!this.stopped) {
732       now = System.currentTimeMillis();
733       if ((now - lastMsgTs) >= this.msgInterval) {
734         doMetrics();
735         lastMsgTs = System.currentTimeMillis();
736       }
737       stopSleeper.sleep();
738     }
739   }
740 
741   /**
742    * Emit the HMaster metrics, such as region in transition metrics.
743    * Surrounding in a try block just to be sure metrics doesn't abort HMaster.
744    */
745   private void doMetrics() {
746     try {
747       this.assignmentManager.updateRegionsInTransitionMetrics();
748     } catch (Throwable e) {
749       LOG.error("Couldn't update metrics: " + e.getMessage());
750     }
751   }
752 
753   /**
754    * Finish initialization of HMaster after becoming the primary master.
755    *
756    * <ol>
757    * <li>Initialize master components - file system manager, server manager,
758    *     assignment manager, region server tracker, catalog tracker, etc</li>
759    * <li>Start necessary service threads - rpc server, info server,
760    *     executor services, etc</li>
761    * <li>Set cluster as UP in ZooKeeper</li>
762    * <li>Wait for RegionServers to check-in</li>
763    * <li>Split logs and perform data recovery, if necessary</li>
764    * <li>Ensure assignment of meta regions<li>
765    * <li>Handle either fresh cluster start or master failover</li>
766    * </ol>
767    *
768    * @param masterRecovery
769    *
770    * @throws IOException
771    * @throws InterruptedException
772    * @throws KeeperException
773    */
774   private void finishInitialization(MonitoredTask status, boolean masterRecovery)
775   throws IOException, InterruptedException, KeeperException {
776 
777     isActiveMaster = true;
778 
779     /*
780      * We are active master now... go initialize components we need to run.
781      * Note, there may be dross in zk from previous runs; it'll get addressed
782      * below after we determine if cluster startup or failover.
783      */
784 
785     status.setStatus("Initializing Master file system");
786 
787     this.masterActiveTime = System.currentTimeMillis();
788     // TODO: Do this using Dependency Injection, using PicoContainer, Guice or Spring.
789     this.fileSystemManager = new MasterFileSystem(this, this, masterRecovery);
790 
791     this.tableDescriptors =
792       new FSTableDescriptors(this.fileSystemManager.getFileSystem(),
793       this.fileSystemManager.getRootDir());
794 
795     // publish cluster ID
796     status.setStatus("Publishing Cluster ID in ZooKeeper");
797     ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
798 
799     if (!masterRecovery) {
800       this.executorService = new ExecutorService(getServerName().toShortString());
801       this.serverManager = createServerManager(this, this);
802     }
803 
804     //Initialize table lock manager, and ensure that all write locks held previously
805     //are invalidated
806     this.tableLockManager = TableLockManager.createTableLockManager(conf, zooKeeper, serverName);
807     if (!masterRecovery) {
808       this.tableLockManager.reapWriteLocks();
809     }
810 
811     status.setStatus("Initializing ZK system trackers");
812     initializeZKBasedSystemTrackers();
813 
814     if (!masterRecovery) {
815       // initialize master side coprocessors before we start handling requests
816       status.setStatus("Initializing master coprocessors");
817       this.cpHost = new MasterCoprocessorHost(this, this.conf);
818 
819       spanReceiverHost = SpanReceiverHost.getInstance(getConfiguration());
820 
821       // start up all service threads.
822       status.setStatus("Initializing master service threads");
823       startServiceThreads();
824     }
825 
826     // Wait for region servers to report in.
827     this.serverManager.waitForRegionServers(status);
828     // Check zk for region servers that are up but didn't register
829     for (ServerName sn: this.regionServerTracker.getOnlineServers()) {
830       // The isServerOnline check is opportunistic, correctness is handled inside
831       if (!this.serverManager.isServerOnline(sn)
832           && serverManager.checkAndRecordNewServer(sn, ServerLoad.EMPTY_SERVERLOAD)) {
833         LOG.info("Registered server found up in zk but who has not yet reported in: " + sn);
834       }
835     }
836 
837     if (!masterRecovery) {
838       this.assignmentManager.startTimeOutMonitor();
839     }
840 
841     // get a list for previously failed RS which need log splitting work
842     // we recover hbase:meta region servers inside master initialization and
843     // handle other failed servers in SSH in order to start up master node ASAP
844     Set<ServerName> previouslyFailedServers = this.fileSystemManager
845         .getFailedServersFromLogFolders();
846 
847     // remove stale recovering regions from previous run
848     this.fileSystemManager.removeStaleRecoveringRegionsFromZK(previouslyFailedServers);
849 
850     // log splitting for hbase:meta server
851     ServerName oldMetaServerLocation = this.catalogTracker.getMetaLocation();
852     if (oldMetaServerLocation != null && previouslyFailedServers.contains(oldMetaServerLocation)) {
853       splitMetaLogBeforeAssignment(oldMetaServerLocation);
854       // Note: we can't remove oldMetaServerLocation from previousFailedServers list because it
855       // may also host user regions
856     }
857     Set<ServerName> previouslyFailedMetaRSs = getPreviouselyFailedMetaServersFromZK();
858     // need to use union of previouslyFailedMetaRSs recorded in ZK and previouslyFailedServers
859     // instead of previouslyFailedMetaRSs alone to address the following two situations:
860     // 1) the chained failure situation(recovery failed multiple times in a row).
861     // 2) master get killed right before it could delete the recovering hbase:meta from ZK while the
862     // same server still has non-meta wals to be replayed so that
863     // removeStaleRecoveringRegionsFromZK can't delete the stale hbase:meta region
864     // Passing more servers into splitMetaLog is all right. If a server doesn't have hbase:meta wal,
865     // there is no op for the server.
866     previouslyFailedMetaRSs.addAll(previouslyFailedServers);
867 
868     this.initializationBeforeMetaAssignment = true;
869 
870     //initialize load balancer
871     this.balancer.setClusterStatus(getClusterStatus());
872     this.balancer.setMasterServices(this);
873     this.balancer.initialize();
874 
875     // Make sure meta assigned before proceeding.
876     status.setStatus("Assigning Meta Region");
877     assignMeta(status, previouslyFailedMetaRSs);
878     // check if master is shutting down because above assignMeta could return even hbase:meta isn't
879     // assigned when master is shutting down
880     if(this.stopped) return;
881 
882     status.setStatus("Submitting log splitting work for previously failed region servers");
883     // Master has recovered hbase:meta region server and we put
884     // other failed region servers in a queue to be handled later by SSH
885     for (ServerName tmpServer : previouslyFailedServers) {
886       this.serverManager.processDeadServer(tmpServer, true);
887     }
888 
889     // Update meta with new PB serialization if required. i.e migrate all HRI to PB serialization
890     // in meta. This must happen before we assign all user regions or else the assignment will
891     // fail.
892     org.apache.hadoop.hbase.catalog.MetaMigrationConvertingToPB
893       .updateMetaIfNecessary(this);
894 
895     // Fix up assignment manager status
896     status.setStatus("Starting assignment manager");
897     this.assignmentManager.joinCluster();
898 
899     //set cluster status again after user regions are assigned
900     this.balancer.setClusterStatus(getClusterStatus());
901 
902     if (!masterRecovery) {
903       // Start balancer and meta catalog janitor after meta and regions have
904       // been assigned.
905       status.setStatus("Starting balancer and catalog janitor");
906       this.clusterStatusChore = getAndStartClusterStatusChore(this);
907       this.balancerChore = getAndStartBalancerChore(this);
908       this.catalogJanitorChore = new CatalogJanitor(this, this);
909       startCatalogJanitorChore();
910     }
911 
912     status.setStatus("Starting namespace manager");
913     initNamespace();
914 
915     if (this.cpHost != null) {
916       try {
917         this.cpHost.preMasterInitialization();
918       } catch (IOException e) {
919         LOG.error("Coprocessor preMasterInitialization() hook failed", e);
920       }
921     }
922 
923     status.markComplete("Initialization successful");
924     LOG.info("Master has completed initialization");
925     initialized = true;
926     // clear the dead servers with same host name and port of online server because we are not
927     // removing dead server with same hostname and port of rs which is trying to check in before
928     // master initialization. See HBASE-5916.
929     this.serverManager.clearDeadServersWithSameHostNameAndPortOfOnlineServer();
930 
931     if (!masterRecovery) {
932       if (this.cpHost != null) {
933         // don't let cp initialization errors kill the master
934         try {
935           this.cpHost.postStartMaster();
936         } catch (IOException ioe) {
937           LOG.error("Coprocessor postStartMaster() hook failed", ioe);
938         }
939       }
940     }
941   }
942 
943   /**
944    * Useful for testing purpose also where we have
945    * master restart scenarios.
946    */
947   protected void startCatalogJanitorChore() {
948     Threads.setDaemonThreadRunning(catalogJanitorChore.getThread());
949   }
950 
951   /**
952    * Useful for testing purpose also where we have
953    * master restart scenarios.
954    */
955   protected void startNamespaceJanitorChore() {
956     Threads.setDaemonThreadRunning(namespaceJanitorChore.getThread());
957   }
958 
959   /**
960    * Create a {@link ServerManager} instance.
961    * @param master
962    * @param services
963    * @return An instance of {@link ServerManager}
964    * @throws org.apache.hadoop.hbase.ZooKeeperConnectionException
965    * @throws IOException
966    */
967   ServerManager createServerManager(final Server master,
968       final MasterServices services)
969   throws IOException {
970     // We put this out here in a method so can do a Mockito.spy and stub it out
971     // w/ a mocked up ServerManager.
972     return new ServerManager(master, services);
973   }
974 
975   /**
976    * Check <code>hbase:meta</code> is assigned. If not, assign it.
977    * @param status MonitoredTask
978    * @param previouslyFailedMetaRSs
979    * @throws InterruptedException
980    * @throws IOException
981    * @throws KeeperException
982    */
983   void assignMeta(MonitoredTask status, Set<ServerName> previouslyFailedMetaRSs)
984       throws InterruptedException, IOException, KeeperException {
985     // Work on meta region
986     int assigned = 0;
987     long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000);
988     status.setStatus("Assigning hbase:meta region");
989 
990     RegionStates regionStates = assignmentManager.getRegionStates();
991     regionStates.createRegionState(HRegionInfo.FIRST_META_REGIONINFO);
992     boolean rit = this.assignmentManager
993       .processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.FIRST_META_REGIONINFO);
994     boolean metaRegionLocation = this.catalogTracker.verifyMetaRegionLocation(timeout);
995     ServerName currentMetaServer = this.catalogTracker.getMetaLocation();
996     if (!metaRegionLocation) {
997       // Meta location is not verified. It should be in transition, or offline.
998       // We will wait for it to be assigned in enableSSHandWaitForMeta below.
999       assigned++;
1000       if (!rit) {
1001         // Assign meta since not already in transition
1002         if (currentMetaServer != null) {
1003           // If the meta server is not known to be dead or online,
1004           // just split the meta log, and don't expire it since this
1005           // could be a full cluster restart. Otherwise, we will think
1006           // this is a failover and lose previous region locations.
1007           // If it is really a failover case, AM will find out in rebuilding
1008           // user regions. Otherwise, we are good since all logs are split
1009           // or known to be replayed before user regions are assigned.
1010           if (serverManager.isServerOnline(currentMetaServer)) {
1011             LOG.info("Forcing expire of " + currentMetaServer);
1012             serverManager.expireServer(currentMetaServer);
1013           }
1014           splitMetaLogBeforeAssignment(currentMetaServer);
1015           previouslyFailedMetaRSs.add(currentMetaServer);
1016         }
1017         assignmentManager.assignMeta();
1018       }
1019     } else {
1020       // Region already assigned. We didn't assign it. Add to in-memory state.
1021       regionStates.updateRegionState(
1022         HRegionInfo.FIRST_META_REGIONINFO, State.OPEN, currentMetaServer);
1023       this.assignmentManager.regionOnline(
1024         HRegionInfo.FIRST_META_REGIONINFO, currentMetaServer);
1025     }
1026 
1027     enableMeta(TableName.META_TABLE_NAME);
1028 
1029     if (this.distributedLogReplay && (!previouslyFailedMetaRSs.isEmpty())) {
1030       // replay WAL edits mode need new hbase:meta RS is assigned firstly
1031       status.setStatus("replaying log for Meta Region");
1032       this.fileSystemManager.splitMetaLog(previouslyFailedMetaRSs);
1033     }
1034 
1035     // Make sure a hbase:meta location is set. We need to enable SSH here since
1036     // if the meta region server is died at this time, we need it to be re-assigned
1037     // by SSH so that system tables can be assigned.
1038     // No need to wait for meta is assigned = 0 when meta is just verified.
1039     enableServerShutdownHandler(assigned != 0);
1040 
1041     LOG.info("hbase:meta assigned=" + assigned + ", rit=" + rit +
1042       ", location=" + catalogTracker.getMetaLocation());
1043     status.setStatus("META assigned.");
1044   }
1045 
1046   void initNamespace() throws IOException {
1047     //create namespace manager
1048     tableNamespaceManager = new TableNamespaceManager(this);
1049     tableNamespaceManager.start();
1050   }
1051 
1052   private void splitMetaLogBeforeAssignment(ServerName currentMetaServer) throws IOException {
1053     if (this.distributedLogReplay) {
1054       // In log replay mode, we mark hbase:meta region as recovering in ZK
1055       Set<HRegionInfo> regions = new HashSet<HRegionInfo>();
1056       regions.add(HRegionInfo.FIRST_META_REGIONINFO);
1057       this.fileSystemManager.prepareLogReplay(currentMetaServer, regions);
1058     } else {
1059       // In recovered.edits mode: create recovered edits file for hbase:meta server
1060       this.fileSystemManager.splitMetaLog(currentMetaServer);
1061     }
1062   }
1063 
1064   private void enableServerShutdownHandler(
1065       final boolean waitForMeta) throws IOException, InterruptedException {
1066     // If ServerShutdownHandler is disabled, we enable it and expire those dead
1067     // but not expired servers. This is required so that if meta is assigning to
1068     // a server which dies after assignMeta starts assignment,
1069     // SSH can re-assign it. Otherwise, we will be
1070     // stuck here waiting forever if waitForMeta is specified.
1071     if (!serverShutdownHandlerEnabled) {
1072       serverShutdownHandlerEnabled = true;
1073       this.serverManager.processQueuedDeadServers();
1074     }
1075 
1076     if (waitForMeta) {
1077       this.catalogTracker.waitForMeta();
1078       // Above check waits for general meta availability but this does not
1079       // guarantee that the transition has completed
1080       this.assignmentManager.waitForAssignment(HRegionInfo.FIRST_META_REGIONINFO);
1081     }
1082   }
1083 
1084   private void enableMeta(TableName metaTableName) {
1085     if (!this.assignmentManager.getZKTable().isEnabledTable(metaTableName)) {
1086       this.assignmentManager.setEnabledTable(metaTableName);
1087     }
1088   }
1089 
1090   /**
1091    * This function returns a set of region server names under hbase:meta recovering region ZK node
1092    * @return Set of meta server names which were recorded in ZK
1093    * @throws KeeperException
1094    */
1095   private Set<ServerName> getPreviouselyFailedMetaServersFromZK() throws KeeperException {
1096     Set<ServerName> result = new HashSet<ServerName>();
1097     String metaRecoveringZNode = ZKUtil.joinZNode(zooKeeper.recoveringRegionsZNode,
1098       HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
1099     List<String> regionFailedServers = ZKUtil.listChildrenNoWatch(zooKeeper, metaRecoveringZNode);
1100     if (regionFailedServers == null) return result;
1101 
1102     for(String failedServer : regionFailedServers) {
1103       ServerName server = ServerName.parseServerName(failedServer);
1104       result.add(server);
1105     }
1106     return result;
1107   }
1108 
1109   @Override
1110   public TableDescriptors getTableDescriptors() {
1111     return this.tableDescriptors;
1112   }
1113 
1114   /** @return InfoServer object. Maybe null.*/
1115   public InfoServer getInfoServer() {
1116     return this.infoServer;
1117   }
1118 
1119   @Override
1120   public Configuration getConfiguration() {
1121     return this.conf;
1122   }
1123 
1124   @Override
1125   public ServerManager getServerManager() {
1126     return this.serverManager;
1127   }
1128 
1129   @Override
1130   public ExecutorService getExecutorService() {
1131     return this.executorService;
1132   }
1133 
1134   @Override
1135   public MasterFileSystem getMasterFileSystem() {
1136     return this.fileSystemManager;
1137   }
1138 
1139   /**
1140    * Get the ZK wrapper object - needed by master_jsp.java
1141    * @return the zookeeper wrapper
1142    */
1143   public ZooKeeperWatcher getZooKeeperWatcher() {
1144     return this.zooKeeper;
1145   }
1146 
1147   public ActiveMasterManager getActiveMasterManager() {
1148     return this.activeMasterManager;
1149   }
1150 
1151   public MasterAddressTracker getMasterAddressManager() {
1152     return this.masterAddressManager;
1153   }
1154 
1155   /*
1156    * Start up all services. If any of these threads gets an unhandled exception
1157    * then they just die with a logged message.  This should be fine because
1158    * in general, we do not expect the master to get such unhandled exceptions
1159    *  as OOMEs; it should be lightly loaded. See what HRegionServer does if
1160    *  need to install an unexpected exception handler.
1161    */
1162   void startServiceThreads() throws IOException{
1163    // Start the executor service pools
1164    this.executorService.startExecutorService(ExecutorType.MASTER_OPEN_REGION,
1165       conf.getInt("hbase.master.executor.openregion.threads", 5));
1166    this.executorService.startExecutorService(ExecutorType.MASTER_CLOSE_REGION,
1167       conf.getInt("hbase.master.executor.closeregion.threads", 5));
1168    this.executorService.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS,
1169       conf.getInt("hbase.master.executor.serverops.threads", 5));
1170    this.executorService.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS,
1171       conf.getInt("hbase.master.executor.serverops.threads", 5));
1172    this.executorService.startExecutorService(ExecutorType.M_LOG_REPLAY_OPS,
1173       conf.getInt("hbase.master.executor.logreplayops.threads", 10));
1174 
1175    // We depend on there being only one instance of this executor running
1176    // at a time.  To do concurrency, would need fencing of enable/disable of
1177    // tables.
1178    this.executorService.startExecutorService(ExecutorType.MASTER_TABLE_OPERATIONS, 1);
1179 
1180    // Start log cleaner thread
1181    String n = Thread.currentThread().getName();
1182    int cleanerInterval = conf.getInt("hbase.master.cleaner.interval", 60 * 1000);
1183    this.logCleaner =
1184       new LogCleaner(cleanerInterval,
1185          this, conf, getMasterFileSystem().getFileSystem(),
1186          getMasterFileSystem().getOldLogDir());
1187          Threads.setDaemonThreadRunning(logCleaner.getThread(), n + ".oldLogCleaner");
1188 
1189    //start the hfile archive cleaner thread
1190     Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
1191     this.hfileCleaner = new HFileCleaner(cleanerInterval, this, conf, getMasterFileSystem()
1192         .getFileSystem(), archiveDir);
1193     Threads.setDaemonThreadRunning(hfileCleaner.getThread(), n + ".archivedHFileCleaner");
1194 
1195     // Start the health checker
1196     if (this.healthCheckChore != null) {
1197       Threads.setDaemonThreadRunning(this.healthCheckChore.getThread(), n + ".healthChecker");
1198     }
1199 
1200     // Start allowing requests to happen.
1201     this.rpcServer.openServer();
1202     this.rpcServerOpen = true;
1203     if (LOG.isTraceEnabled()) {
1204       LOG.trace("Started service threads");
1205     }
1206   }
1207 
1208   /**
1209    * Use this when trying to figure when its ok to send in rpcs.  Used by tests.
1210    * @return True if we have successfully run {@link RpcServer#openServer()}
1211    */
1212   boolean isRpcServerOpen() {
1213     return this.rpcServerOpen;
1214   }
1215 
1216   private void stopServiceThreads() {
1217     if (LOG.isDebugEnabled()) {
1218       LOG.debug("Stopping service threads");
1219     }
1220     if (this.rpcServer != null) this.rpcServer.stop();
1221     this.rpcServerOpen = false;
1222     // Clean up and close up shop
1223     if (this.logCleaner!= null) this.logCleaner.interrupt();
1224     if (this.hfileCleaner != null) this.hfileCleaner.interrupt();
1225 
1226     if (this.infoServer != null) {
1227       LOG.info("Stopping infoServer");
1228       try {
1229         this.infoServer.stop();
1230       } catch (Exception ex) {
1231         ex.printStackTrace();
1232       }
1233     }
1234     if (this.executorService != null) this.executorService.shutdown();
1235     if (this.healthCheckChore != null) {
1236       this.healthCheckChore.interrupt();
1237     }
1238     if (this.pauseMonitor != null) {
1239       this.pauseMonitor.stop();
1240     }
1241   }
1242 
1243   private static Thread getAndStartClusterStatusChore(HMaster master) {
1244     if (master == null || master.balancer == null) {
1245       return null;
1246     }
1247     Chore chore = new ClusterStatusChore(master, master.balancer);
1248     return Threads.setDaemonThreadRunning(chore.getThread());
1249   }
1250 
1251   private static Thread getAndStartBalancerChore(final HMaster master) {
1252     // Start up the load balancer chore
1253     Chore chore = new BalancerChore(master);
1254     return Threads.setDaemonThreadRunning(chore.getThread());
1255   }
1256 
1257   private void stopChores() {
1258     if (this.balancerChore != null) {
1259       this.balancerChore.interrupt();
1260     }
1261     if (this.clusterStatusChore != null) {
1262       this.clusterStatusChore.interrupt();
1263     }
1264     if (this.catalogJanitorChore != null) {
1265       this.catalogJanitorChore.interrupt();
1266     }
1267     if (this.clusterStatusPublisherChore != null){
1268       clusterStatusPublisherChore.interrupt();
1269     }
1270     if (this.namespaceJanitorChore != null){
1271       namespaceJanitorChore.interrupt();
1272     }
1273   }
1274 
1275   @Override
1276   public RegionServerStartupResponse regionServerStartup(
1277       RpcController controller, RegionServerStartupRequest request) throws ServiceException {
1278     // Register with server manager
1279     try {
1280       InetAddress ia = getRemoteInetAddress(request.getPort(), request.getServerStartCode());
1281       ServerName rs = this.serverManager.regionServerStartup(ia, request.getPort(),
1282         request.getServerStartCode(), request.getServerCurrentTime());
1283 
1284       // Send back some config info
1285       RegionServerStartupResponse.Builder resp = createConfigurationSubset();
1286       NameStringPair.Builder entry = NameStringPair.newBuilder()
1287         .setName(HConstants.KEY_FOR_HOSTNAME_SEEN_BY_MASTER)
1288         .setValue(rs.getHostname());
1289       resp.addMapEntries(entry.build());
1290 
1291       return resp.build();
1292     } catch (IOException ioe) {
1293       throw new ServiceException(ioe);
1294     }
1295   }
1296 
1297   /**
1298    * @return Get remote side's InetAddress
1299    * @throws UnknownHostException
1300    */
1301   InetAddress getRemoteInetAddress(final int port, final long serverStartCode)
1302   throws UnknownHostException {
1303     // Do it out here in its own little method so can fake an address when
1304     // mocking up in tests.
1305     return RpcServer.getRemoteIp();
1306   }
1307 
1308   /**
1309    * @return Subset of configuration to pass initializing regionservers: e.g.
1310    * the filesystem to use and root directory to use.
1311    */
1312   protected RegionServerStartupResponse.Builder createConfigurationSubset() {
1313     RegionServerStartupResponse.Builder resp = addConfig(
1314       RegionServerStartupResponse.newBuilder(), HConstants.HBASE_DIR);
1315     return addConfig(resp, "fs.default.name");
1316   }
1317 
1318   private RegionServerStartupResponse.Builder addConfig(
1319       final RegionServerStartupResponse.Builder resp, final String key) {
1320     NameStringPair.Builder entry = NameStringPair.newBuilder()
1321       .setName(key)
1322       .setValue(this.conf.get(key));
1323     resp.addMapEntries(entry.build());
1324     return resp;
1325   }
1326 
1327   @Override
1328   public GetLastFlushedSequenceIdResponse getLastFlushedSequenceId(RpcController controller,
1329       GetLastFlushedSequenceIdRequest request) throws ServiceException {
1330     byte[] regionName = request.getRegionName().toByteArray();
1331     long seqId = serverManager.getLastFlushedSequenceId(regionName);
1332     return ResponseConverter.buildGetLastFlushedSequenceIdResponse(seqId);
1333   }
1334 
1335   @Override
1336   public RegionServerReportResponse regionServerReport(
1337       RpcController controller, RegionServerReportRequest request) throws ServiceException {
1338     try {
1339       ClusterStatusProtos.ServerLoad sl = request.getLoad();
1340       this.serverManager.regionServerReport(ProtobufUtil.toServerName(request.getServer()), new ServerLoad(sl));
1341       if (sl != null && this.metricsMaster != null) {
1342         // Up our metrics.
1343         this.metricsMaster.incrementRequests(sl.getTotalNumberOfRequests());
1344       }
1345     } catch (IOException ioe) {
1346       throw new ServiceException(ioe);
1347     }
1348 
1349     return RegionServerReportResponse.newBuilder().build();
1350   }
1351 
1352   @Override
1353   public ReportRSFatalErrorResponse reportRSFatalError(
1354       RpcController controller, ReportRSFatalErrorRequest request) throws ServiceException {
1355     String errorText = request.getErrorMessage();
1356     ServerName sn = ProtobufUtil.toServerName(request.getServer());
1357     String msg = "Region server " + sn +
1358       " reported a fatal error:\n" + errorText;
1359     LOG.error(msg);
1360     rsFatals.add(msg);
1361 
1362     return ReportRSFatalErrorResponse.newBuilder().build();
1363   }
1364 
1365   public boolean isMasterRunning() {
1366     return !isStopped();
1367   }
1368 
1369   @Override
1370   public IsMasterRunningResponse isMasterRunning(RpcController c, IsMasterRunningRequest req)
1371   throws ServiceException {
1372     return IsMasterRunningResponse.newBuilder().setIsMasterRunning(isMasterRunning()).build();
1373   }
1374 
1375   @Override
1376   public RunCatalogScanResponse runCatalogScan(RpcController c,
1377       RunCatalogScanRequest req) throws ServiceException {
1378     try {
1379       return ResponseConverter.buildRunCatalogScanResponse(catalogJanitorChore.scan());
1380     } catch (IOException ioe) {
1381       throw new ServiceException(ioe);
1382     }
1383   }
1384 
1385   @Override
1386   public EnableCatalogJanitorResponse enableCatalogJanitor(RpcController c,
1387       EnableCatalogJanitorRequest req) throws ServiceException {
1388     return EnableCatalogJanitorResponse.newBuilder().
1389         setPrevValue(catalogJanitorChore.setEnabled(req.getEnable())).build();
1390   }
1391 
1392   @Override
1393   public IsCatalogJanitorEnabledResponse isCatalogJanitorEnabled(RpcController c,
1394       IsCatalogJanitorEnabledRequest req) throws ServiceException {
1395     boolean isEnabled = catalogJanitorChore != null ? catalogJanitorChore.getEnabled() : false;
1396     return IsCatalogJanitorEnabledResponse.newBuilder().setValue(isEnabled).build();
1397   }
1398 
1399   /**
1400    * @return Maximum time we should run balancer for
1401    */
1402   private int getBalancerCutoffTime() {
1403     int balancerCutoffTime =
1404       getConfiguration().getInt("hbase.balancer.max.balancing", -1);
1405     if (balancerCutoffTime == -1) {
1406       // No time period set so create one
1407       int balancerPeriod =
1408         getConfiguration().getInt("hbase.balancer.period", 300000);
1409       balancerCutoffTime = balancerPeriod;
1410       // If nonsense period, set it to balancerPeriod
1411       if (balancerCutoffTime <= 0) balancerCutoffTime = balancerPeriod;
1412     }
1413     return balancerCutoffTime;
1414   }
1415 
1416   public boolean balance() throws HBaseIOException {
1417     // if master not initialized, don't run balancer.
1418     if (!this.initialized) {
1419       LOG.debug("Master has not been initialized, don't run balancer.");
1420       return false;
1421     }
1422     // Do this call outside of synchronized block.
1423     int maximumBalanceTime = getBalancerCutoffTime();
1424     boolean balancerRan;
1425     synchronized (this.balancer) {
1426       // If balance not true, don't run balancer.
1427       if (!this.loadBalancerTracker.isBalancerOn()) return false;
1428       // Only allow one balance run at at time.
1429       if (this.assignmentManager.getRegionStates().isRegionsInTransition()) {
1430         Map<String, RegionState> regionsInTransition =
1431           this.assignmentManager.getRegionStates().getRegionsInTransition();
1432         LOG.debug("Not running balancer because " + regionsInTransition.size() +
1433           " region(s) in transition: " + org.apache.commons.lang.StringUtils.
1434             abbreviate(regionsInTransition.toString(), 256));
1435         return false;
1436       }
1437       if (this.serverManager.areDeadServersInProgress()) {
1438         LOG.debug("Not running balancer because processing dead regionserver(s): " +
1439           this.serverManager.getDeadServers());
1440         return false;
1441       }
1442 
1443       if (this.cpHost != null) {
1444         try {
1445           if (this.cpHost.preBalance()) {
1446             LOG.debug("Coprocessor bypassing balancer request");
1447             return false;
1448           }
1449         } catch (IOException ioe) {
1450           LOG.error("Error invoking master coprocessor preBalance()", ioe);
1451           return false;
1452         }
1453       }
1454 
1455       Map<TableName, Map<ServerName, List<HRegionInfo>>> assignmentsByTable =
1456         this.assignmentManager.getRegionStates().getAssignmentsByTable();
1457 
1458       List<RegionPlan> plans = new ArrayList<RegionPlan>();
1459       //Give the balancer the current cluster state.
1460       this.balancer.setClusterStatus(getClusterStatus());
1461       for (Map<ServerName, List<HRegionInfo>> assignments : assignmentsByTable.values()) {
1462         List<RegionPlan> partialPlans = this.balancer.balanceCluster(assignments);
1463         if (partialPlans != null) plans.addAll(partialPlans);
1464       }
1465       long cutoffTime = System.currentTimeMillis() + maximumBalanceTime;
1466       int rpCount = 0;  // number of RegionPlans balanced so far
1467       long totalRegPlanExecTime = 0;
1468       balancerRan = plans != null;
1469       if (plans != null && !plans.isEmpty()) {
1470         for (RegionPlan plan: plans) {
1471           LOG.info("balance " + plan);
1472           long balStartTime = System.currentTimeMillis();
1473           //TODO: bulk assign
1474           this.assignmentManager.balance(plan);
1475           totalRegPlanExecTime += System.currentTimeMillis()-balStartTime;
1476           rpCount++;
1477           if (rpCount < plans.size() &&
1478               // if performing next balance exceeds cutoff time, exit the loop
1479               (System.currentTimeMillis() + (totalRegPlanExecTime / rpCount)) > cutoffTime) {
1480             //TODO: After balance, there should not be a cutoff time (keeping it as a security net for now)
1481             LOG.debug("No more balancing till next balance run; maximumBalanceTime=" +
1482               maximumBalanceTime);
1483             break;
1484           }
1485         }
1486       }
1487       if (this.cpHost != null) {
1488         try {
1489           this.cpHost.postBalance(rpCount < plans.size() ? plans.subList(0, rpCount) : plans);
1490         } catch (IOException ioe) {
1491           // balancing already succeeded so don't change the result
1492           LOG.error("Error invoking master coprocessor postBalance()", ioe);
1493         }
1494       }
1495     }
1496     return balancerRan;
1497   }
1498 
1499   @Override
1500   public BalanceResponse balance(RpcController c, BalanceRequest request) throws ServiceException {
1501     try {
1502       return BalanceResponse.newBuilder().setBalancerRan(balance()).build();
1503     } catch (HBaseIOException ex) {
1504       throw new ServiceException(ex);
1505     }
1506   }
1507 
1508   enum BalanceSwitchMode {
1509     SYNC,
1510     ASYNC
1511   }
1512 
1513   /**
1514    * Assigns balancer switch according to BalanceSwitchMode
1515    * @param b new balancer switch
1516    * @param mode BalanceSwitchMode
1517    * @return old balancer switch
1518    */
1519   public boolean switchBalancer(final boolean b, BalanceSwitchMode mode) throws IOException {
1520     boolean oldValue = this.loadBalancerTracker.isBalancerOn();
1521     boolean newValue = b;
1522     try {
1523       if (this.cpHost != null) {
1524         newValue = this.cpHost.preBalanceSwitch(newValue);
1525       }
1526       try {
1527         if (mode == BalanceSwitchMode.SYNC) {
1528           synchronized (this.balancer) {
1529             this.loadBalancerTracker.setBalancerOn(newValue);
1530           }
1531         } else {
1532           this.loadBalancerTracker.setBalancerOn(newValue);
1533         }
1534       } catch (KeeperException ke) {
1535         throw new IOException(ke);
1536       }
1537       LOG.info(getClientIdAuditPrefix() + " set balanceSwitch=" + newValue);
1538       if (this.cpHost != null) {
1539         this.cpHost.postBalanceSwitch(oldValue, newValue);
1540       }
1541     } catch (IOException ioe) {
1542       LOG.warn("Error flipping balance switch", ioe);
1543     }
1544     return oldValue;
1545   }
1546 
1547   /**
1548    * @return Client info for use as prefix on an audit log string; who did an action
1549    */
1550   String getClientIdAuditPrefix() {
1551     return "Client=" + RequestContext.getRequestUserName() + "/" +
1552       RequestContext.get().getRemoteAddress();
1553   }
1554 
1555   public boolean synchronousBalanceSwitch(final boolean b) throws IOException {
1556     return switchBalancer(b, BalanceSwitchMode.SYNC);
1557   }
1558 
1559   public boolean balanceSwitch(final boolean b) throws IOException {
1560     return switchBalancer(b, BalanceSwitchMode.ASYNC);
1561   }
1562 
1563   @Override
1564   public SetBalancerRunningResponse setBalancerRunning(
1565       RpcController controller, SetBalancerRunningRequest req) throws ServiceException {
1566     try {
1567       boolean prevValue = (req.getSynchronous())?
1568         synchronousBalanceSwitch(req.getOn()):balanceSwitch(req.getOn());
1569       return SetBalancerRunningResponse.newBuilder().setPrevBalanceValue(prevValue).build();
1570     } catch (IOException ioe) {
1571       throw new ServiceException(ioe);
1572     }
1573   }
1574 
1575   /**
1576    * Switch for the background CatalogJanitor thread.
1577    * Used for testing.  The thread will continue to run.  It will just be a noop
1578    * if disabled.
1579    * @param b If false, the catalog janitor won't do anything.
1580    */
1581   public void setCatalogJanitorEnabled(final boolean b) {
1582     this.catalogJanitorChore.setEnabled(b);
1583   }
1584 
1585   @Override
1586   public DispatchMergingRegionsResponse dispatchMergingRegions(
1587       RpcController controller, DispatchMergingRegionsRequest request)
1588       throws ServiceException {
1589     final byte[] encodedNameOfRegionA = request.getRegionA().getValue()
1590         .toByteArray();
1591     final byte[] encodedNameOfRegionB = request.getRegionB().getValue()
1592         .toByteArray();
1593     final boolean forcible = request.getForcible();
1594     if (request.getRegionA().getType() != RegionSpecifierType.ENCODED_REGION_NAME
1595         || request.getRegionB().getType() != RegionSpecifierType.ENCODED_REGION_NAME) {
1596       LOG.warn("mergeRegions specifier type: expected: "
1597           + RegionSpecifierType.ENCODED_REGION_NAME + " actual: region_a="
1598           + request.getRegionA().getType() + ", region_b="
1599           + request.getRegionB().getType());
1600     }
1601     RegionState regionStateA = assignmentManager.getRegionStates()
1602         .getRegionState(Bytes.toString(encodedNameOfRegionA));
1603     RegionState regionStateB = assignmentManager.getRegionStates()
1604         .getRegionState(Bytes.toString(encodedNameOfRegionB));
1605     if (regionStateA == null || regionStateB == null) {
1606       throw new ServiceException(new UnknownRegionException(
1607           Bytes.toStringBinary(regionStateA == null ? encodedNameOfRegionA
1608               : encodedNameOfRegionB)));
1609     }
1610 
1611     if (!regionStateA.isOpened() || !regionStateB.isOpened()) {
1612       throw new ServiceException(new MergeRegionException(
1613         "Unable to merge regions not online " + regionStateA + ", " + regionStateB));
1614     }
1615 
1616     HRegionInfo regionInfoA = regionStateA.getRegion();
1617     HRegionInfo regionInfoB = regionStateB.getRegion();
1618     if (regionInfoA.compareTo(regionInfoB) == 0) {
1619       throw new ServiceException(new MergeRegionException(
1620         "Unable to merge a region to itself " + regionInfoA + ", " + regionInfoB));
1621     }
1622 
1623     if (!forcible && !HRegionInfo.areAdjacent(regionInfoA, regionInfoB)) {
1624       throw new ServiceException(new MergeRegionException(
1625         "Unable to merge not adjacent regions "
1626           + regionInfoA.getRegionNameAsString() + ", "
1627           + regionInfoB.getRegionNameAsString()
1628           + " where forcible = " + forcible));
1629     }
1630 
1631     try {
1632       dispatchMergingRegions(regionInfoA, regionInfoB, forcible);
1633     } catch (IOException ioe) {
1634       throw new ServiceException(ioe);
1635     }
1636 
1637     return DispatchMergingRegionsResponse.newBuilder().build();
1638   }
1639 
1640   @Override
1641   public void dispatchMergingRegions(final HRegionInfo region_a,
1642       final HRegionInfo region_b, final boolean forcible) throws IOException {
1643     checkInitialized();
1644     this.executorService.submit(new DispatchMergingRegionHandler(this,
1645         this.catalogJanitorChore, region_a, region_b, forcible));
1646   }
1647 
1648   @Override
1649   public MoveRegionResponse moveRegion(RpcController controller, MoveRegionRequest req)
1650   throws ServiceException {
1651     final byte [] encodedRegionName = req.getRegion().getValue().toByteArray();
1652     RegionSpecifierType type = req.getRegion().getType();
1653     final byte [] destServerName = (req.hasDestServerName())?
1654       Bytes.toBytes(ProtobufUtil.toServerName(req.getDestServerName()).getServerName()):null;
1655     MoveRegionResponse mrr = MoveRegionResponse.newBuilder().build();
1656 
1657     if (type != RegionSpecifierType.ENCODED_REGION_NAME) {
1658       LOG.warn("moveRegion specifier type: expected: " + RegionSpecifierType.ENCODED_REGION_NAME
1659         + " actual: " + type);
1660     }
1661 
1662     try {
1663       move(encodedRegionName, destServerName);
1664     } catch (HBaseIOException ioe) {
1665       throw new ServiceException(ioe);
1666     }
1667     return mrr;
1668   }
1669 
1670   void move(final byte[] encodedRegionName,
1671       final byte[] destServerName) throws HBaseIOException {
1672     RegionState regionState = assignmentManager.getRegionStates().
1673       getRegionState(Bytes.toString(encodedRegionName));
1674     if (regionState == null) {
1675       throw new UnknownRegionException(Bytes.toStringBinary(encodedRegionName));
1676     }
1677 
1678     HRegionInfo hri = regionState.getRegion();
1679     ServerName dest;
1680     if (destServerName == null || destServerName.length == 0) {
1681       LOG.info("Passed destination servername is null/empty so " +
1682         "choosing a server at random");
1683       final List<ServerName> destServers = this.serverManager.createDestinationServersList(
1684         regionState.getServerName());
1685       dest = balancer.randomAssignment(hri, destServers);
1686     } else {
1687       dest = ServerName.valueOf(Bytes.toString(destServerName));
1688       if (dest.equals(regionState.getServerName())) {
1689         LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
1690           + " because region already assigned to the same server " + dest + ".");
1691         return;
1692       }
1693     }
1694 
1695     // Now we can do the move
1696     RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), dest);
1697 
1698     try {
1699       checkInitialized();
1700       if (this.cpHost != null) {
1701         if (this.cpHost.preMove(hri, rp.getSource(), rp.getDestination())) {
1702           return;
1703         }
1704       }
1705       LOG.info(getClientIdAuditPrefix() + " move " + rp + ", running balancer");
1706       this.assignmentManager.balance(rp);
1707       if (this.cpHost != null) {
1708         this.cpHost.postMove(hri, rp.getSource(), rp.getDestination());
1709       }
1710     } catch (IOException ioe) {
1711       if (ioe instanceof HBaseIOException) {
1712         throw (HBaseIOException)ioe;
1713       }
1714       throw new HBaseIOException(ioe);
1715     }
1716   }
1717 
1718   @Override
1719   public void createTable(HTableDescriptor hTableDescriptor,
1720     byte [][] splitKeys)
1721   throws IOException {
1722     if (!isMasterRunning()) {
1723       throw new MasterNotRunningException();
1724     }
1725 
1726     String namespace = hTableDescriptor.getTableName().getNamespaceAsString();
1727     getNamespaceDescriptor(namespace); // ensure namespace exists
1728 
1729     HRegionInfo[] newRegions = getHRegionInfos(hTableDescriptor, splitKeys);
1730     checkInitialized();
1731     checkCompression(hTableDescriptor);
1732     if (cpHost != null) {
1733       cpHost.preCreateTable(hTableDescriptor, newRegions);
1734     }
1735     LOG.info(getClientIdAuditPrefix() + " create " + hTableDescriptor);
1736     this.executorService.submit(new CreateTableHandler(this,
1737       this.fileSystemManager, hTableDescriptor, conf,
1738       newRegions, this).prepare());
1739     if (cpHost != null) {
1740       cpHost.postCreateTable(hTableDescriptor, newRegions);
1741     }
1742 
1743   }
1744 
1745   private void checkCompression(final HTableDescriptor htd)
1746   throws IOException {
1747     if (!this.masterCheckCompression) return;
1748     for (HColumnDescriptor hcd : htd.getColumnFamilies()) {
1749       checkCompression(hcd);
1750     }
1751   }
1752 
1753   private void checkCompression(final HColumnDescriptor hcd)
1754   throws IOException {
1755     if (!this.masterCheckCompression) return;
1756     CompressionTest.testCompression(hcd.getCompression());
1757     CompressionTest.testCompression(hcd.getCompactionCompression());
1758   }
1759 
1760   @Override
1761   public CreateTableResponse createTable(RpcController controller, CreateTableRequest req)
1762   throws ServiceException {
1763     HTableDescriptor hTableDescriptor = HTableDescriptor.convert(req.getTableSchema());
1764     byte [][] splitKeys = ProtobufUtil.getSplitKeysArray(req);
1765     try {
1766       createTable(hTableDescriptor,splitKeys);
1767     } catch (IOException ioe) {
1768       throw new ServiceException(ioe);
1769     }
1770     return CreateTableResponse.newBuilder().build();
1771   }
1772 
1773   private HRegionInfo[] getHRegionInfos(HTableDescriptor hTableDescriptor,
1774     byte[][] splitKeys) {
1775     HRegionInfo[] hRegionInfos = null;
1776     if (splitKeys == null || splitKeys.length == 0) {
1777       hRegionInfos = new HRegionInfo[]{
1778           new HRegionInfo(hTableDescriptor.getTableName(), null, null)};
1779     } else {
1780       int numRegions = splitKeys.length + 1;
1781       hRegionInfos = new HRegionInfo[numRegions];
1782       byte[] startKey = null;
1783       byte[] endKey = null;
1784       for (int i = 0; i < numRegions; i++) {
1785         endKey = (i == splitKeys.length) ? null : splitKeys[i];
1786         hRegionInfos[i] =
1787             new HRegionInfo(hTableDescriptor.getTableName(), startKey, endKey);
1788         startKey = endKey;
1789       }
1790     }
1791     return hRegionInfos;
1792   }
1793 
1794   private static boolean isCatalogTable(final TableName tableName) {
1795     return tableName.equals(TableName.META_TABLE_NAME);
1796   }
1797 
1798   @Override
1799   public void deleteTable(final TableName tableName) throws IOException {
1800     checkInitialized();
1801     if (cpHost != null) {
1802       cpHost.preDeleteTable(tableName);
1803     }
1804     LOG.info(getClientIdAuditPrefix() + " delete " + tableName);
1805     this.executorService.submit(new DeleteTableHandler(tableName, this, this).prepare());
1806     if (cpHost != null) {
1807       cpHost.postDeleteTable(tableName);
1808     }
1809   }
1810 
1811   @Override
1812   public DeleteTableResponse deleteTable(RpcController controller, DeleteTableRequest request)
1813   throws ServiceException {
1814     try {
1815       deleteTable(ProtobufUtil.toTableName(request.getTableName()));
1816     } catch (IOException ioe) {
1817       throw new ServiceException(ioe);
1818     }
1819     return DeleteTableResponse.newBuilder().build();
1820   }
1821 
1822   /**
1823    * Get the number of regions of the table that have been updated by the alter.
1824    *
1825    * @return Pair indicating the number of regions updated Pair.getFirst is the
1826    *         regions that are yet to be updated Pair.getSecond is the total number
1827    *         of regions of the table
1828    * @throws IOException
1829    */
1830   @Override
1831   public GetSchemaAlterStatusResponse getSchemaAlterStatus(
1832       RpcController controller, GetSchemaAlterStatusRequest req) throws ServiceException {
1833     // TODO: currently, we query using the table name on the client side. this
1834     // may overlap with other table operations or the table operation may
1835     // have completed before querying this API. We need to refactor to a
1836     // transaction system in the future to avoid these ambiguities.
1837     TableName tableName = ProtobufUtil.toTableName(req.getTableName());
1838 
1839     try {
1840       Pair<Integer,Integer> pair = this.assignmentManager.getReopenStatus(tableName);
1841       GetSchemaAlterStatusResponse.Builder ret = GetSchemaAlterStatusResponse.newBuilder();
1842       ret.setYetToUpdateRegions(pair.getFirst());
1843       ret.setTotalRegions(pair.getSecond());
1844       return ret.build();
1845     } catch (IOException ioe) {
1846       throw new ServiceException(ioe);
1847     }
1848   }
1849 
1850   @Override
1851   public void addColumn(final TableName tableName, final HColumnDescriptor column)
1852       throws IOException {
1853     checkInitialized();
1854     if (cpHost != null) {
1855       if (cpHost.preAddColumn(tableName, column)) {
1856         return;
1857       }
1858     }
1859     //TODO: we should process this (and some others) in an executor
1860     new TableAddFamilyHandler(tableName, column, this, this).prepare().process();
1861     if (cpHost != null) {
1862       cpHost.postAddColumn(tableName, column);
1863     }
1864   }
1865 
1866   @Override
1867   public AddColumnResponse addColumn(RpcController controller, AddColumnRequest req)
1868   throws ServiceException {
1869     try {
1870       addColumn(ProtobufUtil.toTableName(req.getTableName()),
1871         HColumnDescriptor.convert(req.getColumnFamilies()));
1872     } catch (IOException ioe) {
1873       throw new ServiceException(ioe);
1874     }
1875     return AddColumnResponse.newBuilder().build();
1876   }
1877 
1878   @Override
1879   public void modifyColumn(TableName tableName, HColumnDescriptor descriptor)
1880       throws IOException {
1881     checkInitialized();
1882     checkCompression(descriptor);
1883     if (cpHost != null) {
1884       if (cpHost.preModifyColumn(tableName, descriptor)) {
1885         return;
1886       }
1887     }
1888     LOG.info(getClientIdAuditPrefix() + " modify " + descriptor);
1889     new TableModifyFamilyHandler(tableName, descriptor, this, this)
1890       .prepare().process();
1891     if (cpHost != null) {
1892       cpHost.postModifyColumn(tableName, descriptor);
1893     }
1894   }
1895 
1896   @Override
1897   public ModifyColumnResponse modifyColumn(RpcController controller, ModifyColumnRequest req)
1898   throws ServiceException {
1899     try {
1900       modifyColumn(ProtobufUtil.toTableName(req.getTableName()),
1901         HColumnDescriptor.convert(req.getColumnFamilies()));
1902     } catch (IOException ioe) {
1903       throw new ServiceException(ioe);
1904     }
1905     return ModifyColumnResponse.newBuilder().build();
1906   }
1907 
1908   @Override
1909   public void deleteColumn(final TableName tableName, final byte[] columnName)
1910       throws IOException {
1911     checkInitialized();
1912     if (cpHost != null) {
1913       if (cpHost.preDeleteColumn(tableName, columnName)) {
1914         return;
1915       }
1916     }
1917     LOG.info(getClientIdAuditPrefix() + " delete " + Bytes.toString(columnName));
1918     new TableDeleteFamilyHandler(tableName, columnName, this, this).prepare().process();
1919     if (cpHost != null) {
1920       cpHost.postDeleteColumn(tableName, columnName);
1921     }
1922   }
1923 
1924   @Override
1925   public DeleteColumnResponse deleteColumn(RpcController controller, DeleteColumnRequest req)
1926   throws ServiceException {
1927     try {
1928       deleteColumn(ProtobufUtil.toTableName(req.getTableName()),
1929           req.getColumnName().toByteArray());
1930     } catch (IOException ioe) {
1931       throw new ServiceException(ioe);
1932     }
1933     return DeleteColumnResponse.newBuilder().build();
1934   }
1935 
1936   @Override
1937   public void enableTable(final TableName tableName) throws IOException {
1938     checkInitialized();
1939     if (cpHost != null) {
1940       cpHost.preEnableTable(tableName);
1941     }
1942     LOG.info(getClientIdAuditPrefix() + " enable " + tableName);
1943     this.executorService.submit(new EnableTableHandler(this, tableName,
1944       catalogTracker, assignmentManager, tableLockManager, false).prepare());
1945     if (cpHost != null) {
1946       cpHost.postEnableTable(tableName);
1947    }
1948   }
1949 
1950   @Override
1951   public EnableTableResponse enableTable(RpcController controller, EnableTableRequest request)
1952   throws ServiceException {
1953     try {
1954       enableTable(ProtobufUtil.toTableName(request.getTableName()));
1955     } catch (IOException ioe) {
1956       throw new ServiceException(ioe);
1957     }
1958     return EnableTableResponse.newBuilder().build();
1959   }
1960 
1961   @Override
1962   public void disableTable(final TableName tableName) throws IOException {
1963     checkInitialized();
1964     if (cpHost != null) {
1965       cpHost.preDisableTable(tableName);
1966     }
1967     LOG.info(getClientIdAuditPrefix() + " disable " + tableName);
1968     this.executorService.submit(new DisableTableHandler(this, tableName,
1969       catalogTracker, assignmentManager, tableLockManager, false).prepare());
1970     if (cpHost != null) {
1971       cpHost.postDisableTable(tableName);
1972     }
1973   }
1974 
1975   @Override
1976   public DisableTableResponse disableTable(RpcController controller, DisableTableRequest request)
1977   throws ServiceException {
1978     try {
1979       disableTable(ProtobufUtil.toTableName(request.getTableName()));
1980     } catch (IOException ioe) {
1981       throw new ServiceException(ioe);
1982     }
1983     return DisableTableResponse.newBuilder().build();
1984   }
1985 
1986   /**
1987    * Return the region and current deployment for the region containing
1988    * the given row. If the region cannot be found, returns null. If it
1989    * is found, but not currently deployed, the second element of the pair
1990    * may be null.
1991    */
1992   Pair<HRegionInfo, ServerName> getTableRegionForRow(
1993       final TableName tableName, final byte [] rowKey)
1994   throws IOException {
1995     final AtomicReference<Pair<HRegionInfo, ServerName>> result =
1996       new AtomicReference<Pair<HRegionInfo, ServerName>>(null);
1997 
1998     MetaScannerVisitor visitor =
1999       new MetaScannerVisitorBase() {
2000         @Override
2001         public boolean processRow(Result data) throws IOException {
2002           if (data == null || data.size() <= 0) {
2003             return true;
2004           }
2005           Pair<HRegionInfo, ServerName> pair = HRegionInfo.getHRegionInfoAndServerName(data);
2006           if (pair == null) {
2007             return false;
2008           }
2009           if (!pair.getFirst().getTable().equals(tableName)) {
2010             return false;
2011           }
2012           result.set(pair);
2013           return true;
2014         }
2015     };
2016 
2017     MetaScanner.metaScan(conf, visitor, tableName, rowKey, 1);
2018     return result.get();
2019   }
2020 
2021   @Override
2022   public void modifyTable(final TableName tableName, final HTableDescriptor descriptor)
2023       throws IOException {
2024     checkInitialized();
2025     checkCompression(descriptor);
2026     if (cpHost != null) {
2027       cpHost.preModifyTable(tableName, descriptor);
2028     }
2029     LOG.info(getClientIdAuditPrefix() + " modify " + tableName);
2030     new ModifyTableHandler(tableName, descriptor, this, this).prepare().process();
2031     if (cpHost != null) {
2032       cpHost.postModifyTable(tableName, descriptor);
2033     }
2034   }
2035 
2036   @Override
2037   public ModifyTableResponse modifyTable(RpcController controller, ModifyTableRequest req)
2038   throws ServiceException {
2039     try {
2040       modifyTable(ProtobufUtil.toTableName(req.getTableName()),
2041         HTableDescriptor.convert(req.getTableSchema()));
2042     } catch (IOException ioe) {
2043       throw new ServiceException(ioe);
2044     }
2045     return ModifyTableResponse.newBuilder().build();
2046   }
2047 
2048   @Override
2049   public void checkTableModifiable(final TableName tableName)
2050       throws IOException, TableNotFoundException, TableNotDisabledException {
2051     if (isCatalogTable(tableName)) {
2052       throw new IOException("Can't modify catalog tables");
2053     }
2054     if (!MetaReader.tableExists(getCatalogTracker(), tableName)) {
2055       throw new TableNotFoundException(tableName);
2056     }
2057     if (!getAssignmentManager().getZKTable().
2058         isDisabledTable(tableName)) {
2059       throw new TableNotDisabledException(tableName);
2060     }
2061   }
2062 
2063   @Override
2064   public GetClusterStatusResponse getClusterStatus(RpcController controller,
2065       GetClusterStatusRequest req)
2066   throws ServiceException {
2067     GetClusterStatusResponse.Builder response = GetClusterStatusResponse.newBuilder();
2068     response.setClusterStatus(getClusterStatus().convert());
2069     return response.build();
2070   }
2071 
2072   /**
2073    * @return cluster status
2074    */
2075   public ClusterStatus getClusterStatus() {
2076     // Build Set of backup masters from ZK nodes
2077     List<String> backupMasterStrings;
2078     try {
2079       backupMasterStrings = ZKUtil.listChildrenNoWatch(this.zooKeeper,
2080         this.zooKeeper.backupMasterAddressesZNode);
2081     } catch (KeeperException e) {
2082       LOG.warn(this.zooKeeper.prefix("Unable to list backup servers"), e);
2083       backupMasterStrings = new ArrayList<String>(0);
2084     }
2085     List<ServerName> backupMasters = new ArrayList<ServerName>(
2086                                           backupMasterStrings.size());
2087     for (String s: backupMasterStrings) {
2088       try {
2089         byte [] bytes =
2090             ZKUtil.getData(this.zooKeeper, ZKUtil.joinZNode(
2091                 this.zooKeeper.backupMasterAddressesZNode, s));
2092         if (bytes != null) {
2093           ServerName sn;
2094           try {
2095             sn = ServerName.parseFrom(bytes);
2096           } catch (DeserializationException e) {
2097             LOG.warn("Failed parse, skipping registering backup server", e);
2098             continue;
2099           }
2100           backupMasters.add(sn);
2101         }
2102       } catch (KeeperException e) {
2103         LOG.warn(this.zooKeeper.prefix("Unable to get information about " +
2104                  "backup servers"), e);
2105       }
2106     }
2107     Collections.sort(backupMasters, new Comparator<ServerName>() {
2108       @Override
2109       public int compare(ServerName s1, ServerName s2) {
2110         return s1.getServerName().compareTo(s2.getServerName());
2111       }});
2112 
2113     return new ClusterStatus(VersionInfo.getVersion(),
2114       this.fileSystemManager.getClusterId().toString(),
2115       this.serverManager.getOnlineServers(),
2116       this.serverManager.getDeadServers().copyServerNames(),
2117       this.serverName,
2118       backupMasters,
2119       this.assignmentManager.getRegionStates().getRegionsInTransition(),
2120       this.getCoprocessors(), this.loadBalancerTracker.isBalancerOn());
2121   }
2122 
2123   public String getClusterId() {
2124     if (fileSystemManager == null) {
2125       return "";
2126     }
2127     ClusterId id = fileSystemManager.getClusterId();
2128     if (id == null) {
2129       return "";
2130     }
2131     return id.toString();
2132   }
2133 
2134   /**
2135    * The set of loaded coprocessors is stored in a static set. Since it's
2136    * statically allocated, it does not require that HMaster's cpHost be
2137    * initialized prior to accessing it.
2138    * @return a String representation of the set of names of the loaded
2139    * coprocessors.
2140    */
2141   public static String getLoadedCoprocessors() {
2142     return CoprocessorHost.getLoadedCoprocessors().toString();
2143   }
2144 
2145   /**
2146    * @return timestamp in millis when HMaster was started.
2147    */
2148   public long getMasterStartTime() {
2149     return masterStartTime;
2150   }
2151 
2152   /**
2153    * @return timestamp in millis when HMaster became the active master.
2154    */
2155   public long getMasterActiveTime() {
2156     return masterActiveTime;
2157   }
2158 
2159   public int getRegionServerInfoPort(final ServerName sn) {
2160     RegionServerInfo info = this.regionServerTracker.getRegionServerInfo(sn);
2161     if (info == null || info.getInfoPort() == 0) {
2162       return conf.getInt(HConstants.REGIONSERVER_INFO_PORT,
2163         HConstants.DEFAULT_REGIONSERVER_INFOPORT);
2164     }
2165     return info.getInfoPort();
2166   }
2167   
2168   /**
2169    * @return array of coprocessor SimpleNames.
2170    */
2171   public String[] getCoprocessors() {
2172     Set<String> masterCoprocessors =
2173         getCoprocessorHost().getCoprocessors();
2174     return masterCoprocessors.toArray(new String[masterCoprocessors.size()]);
2175   }
2176 
2177   @Override
2178   public void abort(final String msg, final Throwable t) {
2179     if (cpHost != null) {
2180       // HBASE-4014: dump a list of loaded coprocessors.
2181       LOG.fatal("Master server abort: loaded coprocessors are: " +
2182           getLoadedCoprocessors());
2183     }
2184 
2185     if (abortNow(msg, t)) {
2186       if (t != null) LOG.fatal(msg, t);
2187       else LOG.fatal(msg);
2188       this.abort = true;
2189       stop("Aborting");
2190     }
2191   }
2192 
2193   /**
2194    * We do the following in a different thread.  If it is not completed
2195    * in time, we will time it out and assume it is not easy to recover.
2196    *
2197    * 1. Create a new ZK session. (since our current one is expired)
2198    * 2. Try to become a primary master again
2199    * 3. Initialize all ZK based system trackers.
2200    * 4. Assign meta. (they are already assigned, but we need to update our
2201    * internal memory state to reflect it)
2202    * 5. Process any RIT if any during the process of our recovery.
2203    *
2204    * @return True if we could successfully recover from ZK session expiry.
2205    * @throws InterruptedException
2206    * @throws IOException
2207    * @throws KeeperException
2208    * @throws ExecutionException
2209    */
2210   private boolean tryRecoveringExpiredZKSession() throws InterruptedException,
2211       IOException, KeeperException, ExecutionException {
2212 
2213     this.zooKeeper.unregisterAllListeners();
2214     // add back listeners which were registered before master initialization
2215     // because they won't be added back in below Master re-initialization code
2216     if (this.registeredZKListenersBeforeRecovery != null) {
2217       for (ZooKeeperListener curListener : this.registeredZKListenersBeforeRecovery) {
2218         this.zooKeeper.registerListener(curListener);
2219       }
2220     }
2221 
2222     this.zooKeeper.reconnectAfterExpiration();
2223 
2224     Callable<Boolean> callable = new Callable<Boolean> () {
2225       @Override
2226       public Boolean call() throws InterruptedException,
2227           IOException, KeeperException {
2228         MonitoredTask status =
2229           TaskMonitor.get().createStatus("Recovering expired ZK session");
2230         try {
2231           if (!becomeActiveMaster(status)) {
2232             return Boolean.FALSE;
2233           }
2234           serverShutdownHandlerEnabled = false;
2235           initialized = false;
2236           finishInitialization(status, true);
2237           return !stopped;
2238         } finally {
2239           status.cleanup();
2240         }
2241       }
2242     };
2243 
2244     long timeout =
2245       conf.getLong("hbase.master.zksession.recover.timeout", 300000);
2246     java.util.concurrent.ExecutorService executor =
2247       Executors.newSingleThreadExecutor();
2248     Future<Boolean> result = executor.submit(callable);
2249     executor.shutdown();
2250     if (executor.awaitTermination(timeout, TimeUnit.MILLISECONDS)
2251         && result.isDone()) {
2252       Boolean recovered = result.get();
2253       if (recovered != null) {
2254         return recovered.booleanValue();
2255       }
2256     }
2257     executor.shutdownNow();
2258     return false;
2259   }
2260 
2261   /**
2262    * Check to see if the current trigger for abort is due to ZooKeeper session
2263    * expiry, and If yes, whether we can recover from ZK session expiry.
2264    *
2265    * @param msg Original abort message
2266    * @param t   The cause for current abort request
2267    * @return true if we should proceed with abort operation, false other wise.
2268    */
2269   private boolean abortNow(final String msg, final Throwable t) {
2270     if (!this.isActiveMaster || this.stopped) {
2271       return true;
2272     }
2273 
2274     boolean failFast = conf.getBoolean("fail.fast.expired.active.master", false);
2275     if (t != null && t instanceof KeeperException.SessionExpiredException
2276         && !failFast) {
2277       try {
2278         LOG.info("Primary Master trying to recover from ZooKeeper session " +
2279             "expiry.");
2280         return !tryRecoveringExpiredZKSession();
2281       } catch (Throwable newT) {
2282         LOG.error("Primary master encountered unexpected exception while " +
2283             "trying to recover from ZooKeeper session" +
2284             " expiry. Proceeding with server abort.", newT);
2285       }
2286     }
2287     return true;
2288   }
2289 
2290   @Override
2291   public ZooKeeperWatcher getZooKeeper() {
2292     return zooKeeper;
2293   }
2294 
2295   @Override
2296   public MasterCoprocessorHost getCoprocessorHost() {
2297     return cpHost;
2298   }
2299 
2300   @Override
2301   public ServerName getServerName() {
2302     return this.serverName;
2303   }
2304 
2305   @Override
2306   public CatalogTracker getCatalogTracker() {
2307     return catalogTracker;
2308   }
2309 
2310   @Override
2311   public AssignmentManager getAssignmentManager() {
2312     return this.assignmentManager;
2313   }
2314 
2315   @Override
2316   public TableLockManager getTableLockManager() {
2317     return this.tableLockManager;
2318   }
2319 
2320   public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() {
2321     return rsFatals;
2322   }
2323 
2324   public void shutdown() {
2325     if (spanReceiverHost != null) {
2326       spanReceiverHost.closeReceivers();
2327     }
2328     if (cpHost != null) {
2329       try {
2330         cpHost.preShutdown();
2331       } catch (IOException ioe) {
2332         LOG.error("Error call master coprocessor preShutdown()", ioe);
2333       }
2334     }
2335     if (mxBean != null) {
2336       MBeanUtil.unregisterMBean(mxBean);
2337       mxBean = null;
2338     }
2339     if (this.assignmentManager != null) this.assignmentManager.shutdown();
2340     if (this.serverManager != null) this.serverManager.shutdownCluster();
2341     try {
2342       if (this.clusterStatusTracker != null){
2343         this.clusterStatusTracker.setClusterDown();
2344       }
2345     } catch (KeeperException e) {
2346       LOG.error("ZooKeeper exception trying to set cluster as down in ZK", e);
2347     }
2348   }
2349 
2350   @Override
2351   public ShutdownResponse shutdown(RpcController controller, ShutdownRequest request)
2352   throws ServiceException {
2353     LOG.info(getClientIdAuditPrefix() + " shutdown");
2354     shutdown();
2355     return ShutdownResponse.newBuilder().build();
2356   }
2357 
2358   public void stopMaster() {
2359     if (cpHost != null) {
2360       try {
2361         cpHost.preStopMaster();
2362       } catch (IOException ioe) {
2363         LOG.error("Error call master coprocessor preStopMaster()", ioe);
2364       }
2365     }
2366     stop("Stopped by " + Thread.currentThread().getName());
2367   }
2368 
2369   @Override
2370   public StopMasterResponse stopMaster(RpcController controller, StopMasterRequest request)
2371   throws ServiceException {
2372     LOG.info(getClientIdAuditPrefix() + " stop");
2373     stopMaster();
2374     return StopMasterResponse.newBuilder().build();
2375   }
2376 
2377   @Override
2378   public void stop(final String why) {
2379     LOG.info(why);
2380     this.stopped = true;
2381     // We wake up the stopSleeper to stop immediately
2382     stopSleeper.skipSleepCycle();
2383     // If we are a backup master, we need to interrupt wait
2384     if (this.activeMasterManager != null) {
2385       synchronized (this.activeMasterManager.clusterHasActiveMaster) {
2386         this.activeMasterManager.clusterHasActiveMaster.notifyAll();
2387       }
2388     }
2389     // If no region server is online then master may stuck waiting on hbase:meta to come on line.
2390     // See HBASE-8422.
2391     if (this.catalogTracker != null && this.serverManager.getOnlineServers().isEmpty()) {
2392       this.catalogTracker.stop();
2393     }
2394   }
2395 
2396   @Override
2397   public boolean isStopped() {
2398     return this.stopped;
2399   }
2400 
2401   @Override
2402   public boolean isAborted() {
2403     return this.abort;
2404   }
2405 
2406   void checkInitialized() throws PleaseHoldException {
2407     if (!this.initialized) {
2408       throw new PleaseHoldException("Master is initializing");
2409     }
2410   }
2411 
2412   /**
2413    * Report whether this master is currently the active master or not.
2414    * If not active master, we are parked on ZK waiting to become active.
2415    *
2416    * This method is used for testing.
2417    *
2418    * @return true if active master, false if not.
2419    */
2420   public boolean isActiveMaster() {
2421     return isActiveMaster;
2422   }
2423 
2424   /**
2425    * Report whether this master has completed with its initialization and is
2426    * ready.  If ready, the master is also the active master.  A standby master
2427    * is never ready.
2428    *
2429    * This method is used for testing.
2430    *
2431    * @return true if master is ready to go, false if not.
2432    */
2433   @Override
2434   public boolean isInitialized() {
2435     return initialized;
2436   }
2437 
2438   /**
2439    * ServerShutdownHandlerEnabled is set false before completing
2440    * assignMeta to prevent processing of ServerShutdownHandler.
2441    * @return true if assignMeta has completed;
2442    */
2443   @Override
2444   public boolean isServerShutdownHandlerEnabled() {
2445     return this.serverShutdownHandlerEnabled;
2446   }
2447 
2448   /**
2449    * Report whether this master has started initialization and is about to do meta region assignment
2450    * @return true if master is in initialization & about to assign hbase:meta regions
2451    */
2452   public boolean isInitializationStartsMetaRegionAssignment() {
2453     return this.initializationBeforeMetaAssignment;
2454   }
2455 
2456   @Override
2457   public AssignRegionResponse assignRegion(RpcController controller, AssignRegionRequest req)
2458   throws ServiceException {
2459     try {
2460       final byte [] regionName = req.getRegion().getValue().toByteArray();
2461       RegionSpecifierType type = req.getRegion().getType();
2462       AssignRegionResponse arr = AssignRegionResponse.newBuilder().build();
2463 
2464       checkInitialized();
2465       if (type != RegionSpecifierType.REGION_NAME) {
2466         LOG.warn("assignRegion specifier type: expected: " + RegionSpecifierType.REGION_NAME
2467           + " actual: " + type);
2468       }
2469       HRegionInfo regionInfo = assignmentManager.getRegionStates().getRegionInfo(regionName);
2470       if (regionInfo == null) throw new UnknownRegionException(Bytes.toString(regionName));
2471       if (cpHost != null) {
2472         if (cpHost.preAssign(regionInfo)) {
2473           return arr;
2474         }
2475       }
2476       LOG.info(getClientIdAuditPrefix() + " assign " + regionInfo.getRegionNameAsString());
2477       assignmentManager.assign(regionInfo, true, true);
2478       if (cpHost != null) {
2479         cpHost.postAssign(regionInfo);
2480       }
2481 
2482       return arr;
2483     } catch (IOException ioe) {
2484       throw new ServiceException(ioe);
2485     }
2486   }
2487 
2488   public void assignRegion(HRegionInfo hri) {
2489     assignmentManager.assign(hri, true);
2490   }
2491 
2492   @Override
2493   public UnassignRegionResponse unassignRegion(RpcController controller, UnassignRegionRequest req)
2494   throws ServiceException {
2495     try {
2496       final byte [] regionName = req.getRegion().getValue().toByteArray();
2497       RegionSpecifierType type = req.getRegion().getType();
2498       final boolean force = req.getForce();
2499       UnassignRegionResponse urr = UnassignRegionResponse.newBuilder().build();
2500 
2501       checkInitialized();
2502       if (type != RegionSpecifierType.REGION_NAME) {
2503         LOG.warn("unassignRegion specifier type: expected: " + RegionSpecifierType.REGION_NAME
2504           + " actual: " + type);
2505       }
2506       Pair<HRegionInfo, ServerName> pair =
2507         MetaReader.getRegion(this.catalogTracker, regionName);
2508       if (pair == null) throw new UnknownRegionException(Bytes.toString(regionName));
2509       HRegionInfo hri = pair.getFirst();
2510       if (cpHost != null) {
2511         if (cpHost.preUnassign(hri, force)) {
2512           return urr;
2513         }
2514       }
2515       LOG.debug(getClientIdAuditPrefix() + " unassign " + hri.getRegionNameAsString()
2516           + " in current location if it is online and reassign.force=" + force);
2517       this.assignmentManager.unassign(hri, force);
2518       if (this.assignmentManager.getRegionStates().isRegionOffline(hri)) {
2519         LOG.debug("Region " + hri.getRegionNameAsString()
2520             + " is not online on any region server, reassigning it.");
2521         assignRegion(hri);
2522       }
2523       if (cpHost != null) {
2524         cpHost.postUnassign(hri, force);
2525       }
2526 
2527       return urr;
2528     } catch (IOException ioe) {
2529       throw new ServiceException(ioe);
2530     }
2531   }
2532 
2533   /**
2534    * Get list of TableDescriptors for requested tables.
2535    * @param controller Unused (set to null).
2536    * @param req GetTableDescriptorsRequest that contains:
2537    * - tableNames: requested tables, or if empty, all are requested
2538    * @return GetTableDescriptorsResponse
2539    * @throws ServiceException
2540    */
2541   @Override
2542   public GetTableDescriptorsResponse getTableDescriptors(
2543 	      RpcController controller, GetTableDescriptorsRequest req) throws ServiceException {
2544     List<HTableDescriptor> descriptors = new ArrayList<HTableDescriptor>();
2545     List<TableName> tableNameList = new ArrayList<TableName>();
2546     for(HBaseProtos.TableName tableNamePB: req.getTableNamesList()) {
2547       tableNameList.add(ProtobufUtil.toTableName(tableNamePB));
2548     }
2549     boolean bypass = false;
2550     if (this.cpHost != null) {
2551       try {
2552         bypass = this.cpHost.preGetTableDescriptors(tableNameList, descriptors);
2553       } catch (IOException ioe) {
2554         throw new ServiceException(ioe);
2555       }
2556     }
2557 
2558     if (!bypass) {
2559       if (req.getTableNamesCount() == 0) {
2560         // request for all TableDescriptors
2561         Map<String, HTableDescriptor> descriptorMap = null;
2562         try {
2563           descriptorMap = this.tableDescriptors.getAll();
2564         } catch (IOException e) {
2565           LOG.warn("Failed getting all descriptors", e);
2566         }
2567         if (descriptorMap != null) {
2568           for(HTableDescriptor desc: descriptorMap.values()) {
2569             if(!desc.getTableName().isSystemTable()) {
2570               descriptors.add(desc);
2571             }
2572           }
2573         }
2574       } else {
2575         for (TableName s: tableNameList) {
2576           try {
2577             HTableDescriptor desc = this.tableDescriptors.get(s);
2578             if (desc != null) {
2579               descriptors.add(desc);
2580             }
2581           } catch (IOException e) {
2582             LOG.warn("Failed getting descriptor for " + s, e);
2583           }
2584         }
2585       }
2586 
2587       if (this.cpHost != null) {
2588         try {
2589           this.cpHost.postGetTableDescriptors(descriptors);
2590         } catch (IOException ioe) {
2591           throw new ServiceException(ioe);
2592         }
2593       }
2594     }
2595 
2596     GetTableDescriptorsResponse.Builder builder = GetTableDescriptorsResponse.newBuilder();
2597     for (HTableDescriptor htd: descriptors) {
2598       builder.addTableSchema(htd.convert());
2599     }
2600     return builder.build();
2601   }
2602 
2603   /**
2604    * Get list of userspace table names
2605    * @param controller Unused (set to null).
2606    * @param req GetTableNamesRequest
2607    * @return GetTableNamesResponse
2608    * @throws ServiceException
2609    */
2610   @Override
2611   public GetTableNamesResponse getTableNames(
2612         RpcController controller, GetTableNamesRequest req) throws ServiceException {
2613     try {
2614       Collection<HTableDescriptor> descriptors = this.tableDescriptors.getAll().values();
2615       GetTableNamesResponse.Builder builder = GetTableNamesResponse.newBuilder();
2616       for (HTableDescriptor descriptor: descriptors) {
2617         if (descriptor.getTableName().isSystemTable()) {
2618           continue;
2619         }
2620         builder.addTableNames(ProtobufUtil.toProtoTableName(descriptor.getTableName()));
2621       }
2622       return builder.build();
2623     } catch (IOException e) {
2624       throw new ServiceException(e);
2625     }
2626   }
2627 
2628   /**
2629    * Compute the average load across all region servers.
2630    * Currently, this uses a very naive computation - just uses the number of
2631    * regions being served, ignoring stats about number of requests.
2632    * @return the average load
2633    */
2634   public double getAverageLoad() {
2635     if (this.assignmentManager == null) {
2636       return 0;
2637     }
2638 
2639     RegionStates regionStates = this.assignmentManager.getRegionStates();
2640     if (regionStates == null) {
2641       return 0;
2642     }
2643     return regionStates.getAverageLoad();
2644   }
2645 
2646   /**
2647    * Offline specified region from master's in-memory state. It will not attempt to
2648    * reassign the region as in unassign.
2649    *
2650    * This is a special method that should be used by experts or hbck.
2651    *
2652    */
2653   @Override
2654   public OfflineRegionResponse offlineRegion(RpcController controller, OfflineRegionRequest request)
2655   throws ServiceException {
2656     final byte [] regionName = request.getRegion().getValue().toByteArray();
2657     RegionSpecifierType type = request.getRegion().getType();
2658     if (type != RegionSpecifierType.REGION_NAME) {
2659       LOG.warn("moveRegion specifier type: expected: " + RegionSpecifierType.REGION_NAME
2660         + " actual: " + type);
2661     }
2662 
2663     try {
2664       Pair<HRegionInfo, ServerName> pair =
2665         MetaReader.getRegion(this.catalogTracker, regionName);
2666       if (pair == null) throw new UnknownRegionException(Bytes.toStringBinary(regionName));
2667       HRegionInfo hri = pair.getFirst();
2668       if (cpHost != null) {
2669         cpHost.preRegionOffline(hri);
2670       }
2671       LOG.info(getClientIdAuditPrefix() + " offline " + hri.getRegionNameAsString());
2672       this.assignmentManager.regionOffline(hri);
2673       if (cpHost != null) {
2674         cpHost.postRegionOffline(hri);
2675       }
2676     } catch (IOException ioe) {
2677       throw new ServiceException(ioe);
2678     }
2679     return OfflineRegionResponse.newBuilder().build();
2680   }
2681 
2682   @Override
2683   public boolean registerService(Service instance) {
2684     /*
2685      * No stacking of instances is allowed for a single service name
2686      */
2687     Descriptors.ServiceDescriptor serviceDesc = instance.getDescriptorForType();
2688     if (coprocessorServiceHandlers.containsKey(serviceDesc.getFullName())) {
2689       LOG.error("Coprocessor service "+serviceDesc.getFullName()+
2690           " already registered, rejecting request from "+instance
2691       );
2692       return false;
2693     }
2694 
2695     coprocessorServiceHandlers.put(serviceDesc.getFullName(), instance);
2696     if (LOG.isDebugEnabled()) {
2697       LOG.debug("Registered master coprocessor service: service="+serviceDesc.getFullName());
2698     }
2699     return true;
2700   }
2701 
2702   @Override
2703   public ClientProtos.CoprocessorServiceResponse execMasterService(final RpcController controller,
2704       final ClientProtos.CoprocessorServiceRequest request) throws ServiceException {
2705     try {
2706       ServerRpcController execController = new ServerRpcController();
2707 
2708       ClientProtos.CoprocessorServiceCall call = request.getCall();
2709       String serviceName = call.getServiceName();
2710       String methodName = call.getMethodName();
2711       if (!coprocessorServiceHandlers.containsKey(serviceName)) {
2712         throw new UnknownProtocolException(null,
2713             "No registered master coprocessor service found for name "+serviceName);
2714       }
2715 
2716       Service service = coprocessorServiceHandlers.get(serviceName);
2717       Descriptors.ServiceDescriptor serviceDesc = service.getDescriptorForType();
2718       Descriptors.MethodDescriptor methodDesc = serviceDesc.findMethodByName(methodName);
2719       if (methodDesc == null) {
2720         throw new UnknownProtocolException(service.getClass(),
2721             "Unknown method "+methodName+" called on master service "+serviceName);
2722       }
2723 
2724       //invoke the method
2725       Message execRequest = service.getRequestPrototype(methodDesc).newBuilderForType()
2726           .mergeFrom(call.getRequest()).build();
2727       final Message.Builder responseBuilder =
2728           service.getResponsePrototype(methodDesc).newBuilderForType();
2729       service.callMethod(methodDesc, execController, execRequest, new RpcCallback<Message>() {
2730         @Override
2731         public void run(Message message) {
2732           if (message != null) {
2733             responseBuilder.mergeFrom(message);
2734           }
2735         }
2736       });
2737       Message execResult = responseBuilder.build();
2738 
2739       if (execController.getFailedOn() != null) {
2740         throw execController.getFailedOn();
2741       }
2742       ClientProtos.CoprocessorServiceResponse.Builder builder =
2743           ClientProtos.CoprocessorServiceResponse.newBuilder();
2744       builder.setRegion(RequestConverter.buildRegionSpecifier(
2745           RegionSpecifierType.REGION_NAME, HConstants.EMPTY_BYTE_ARRAY));
2746       builder.setValue(
2747           builder.getValueBuilder().setName(execResult.getClass().getName())
2748               .setValue(execResult.toByteString()));
2749       return builder.build();
2750     } catch (IOException ie) {
2751       throw new ServiceException(ie);
2752     }
2753   }
2754 
2755   /**
2756    * Utility for constructing an instance of the passed HMaster class.
2757    * @param masterClass
2758    * @param conf
2759    * @return HMaster instance.
2760    */
2761   public static HMaster constructMaster(Class<? extends HMaster> masterClass,
2762       final Configuration conf)  {
2763     try {
2764       Constructor<? extends HMaster> c =
2765         masterClass.getConstructor(Configuration.class);
2766       return c.newInstance(conf);
2767     } catch (InvocationTargetException ite) {
2768       Throwable target = ite.getTargetException() != null?
2769         ite.getTargetException(): ite;
2770       if (target.getCause() != null) target = target.getCause();
2771       throw new RuntimeException("Failed construction of Master: " +
2772         masterClass.toString(), target);
2773     } catch (Exception e) {
2774       throw new RuntimeException("Failed construction of Master: " +
2775         masterClass.toString() + ((e.getCause() != null)?
2776           e.getCause().getMessage(): ""), e);
2777     }
2778   }
2779 
2780   /**
2781    * @see org.apache.hadoop.hbase.master.HMasterCommandLine
2782    */
2783   public static void main(String [] args) {
2784     VersionInfo.logVersion();
2785     new HMasterCommandLine(HMaster.class).doMain(args);
2786   }
2787 
2788   public HFileCleaner getHFileCleaner() {
2789     return this.hfileCleaner;
2790   }
2791 
2792   /**
2793    * Exposed for TESTING!
2794    * @return the underlying snapshot manager
2795    */
2796   public SnapshotManager getSnapshotManagerForTesting() {
2797     return this.snapshotManager;
2798   }
2799 
2800   /**
2801    * Triggers an asynchronous attempt to take a snapshot.
2802    * {@inheritDoc}
2803    */
2804   @Override
2805   public SnapshotResponse snapshot(RpcController controller, SnapshotRequest request)
2806       throws ServiceException {
2807     try {
2808       this.snapshotManager.checkSnapshotSupport();
2809     } catch (UnsupportedOperationException e) {
2810       throw new ServiceException(e);
2811     }
2812 
2813     LOG.info(getClientIdAuditPrefix() + " snapshot request for:" +
2814         ClientSnapshotDescriptionUtils.toString(request.getSnapshot()));
2815     // get the snapshot information
2816     SnapshotDescription snapshot = SnapshotDescriptionUtils.validate(request.getSnapshot(),
2817       this.conf);
2818     try {
2819       snapshotManager.takeSnapshot(snapshot);
2820     } catch (IOException e) {
2821       throw new ServiceException(e);
2822     }
2823 
2824     // send back the max amount of time the client should wait for the snapshot to complete
2825     long waitTime = SnapshotDescriptionUtils.getMaxMasterTimeout(conf, snapshot.getType(),
2826       SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME);
2827     return SnapshotResponse.newBuilder().setExpectedTimeout(waitTime).build();
2828   }
2829 
2830   /**
2831    * List the currently available/stored snapshots. Any in-progress snapshots are ignored
2832    */
2833   @Override
2834   public GetCompletedSnapshotsResponse getCompletedSnapshots(RpcController controller,
2835       GetCompletedSnapshotsRequest request) throws ServiceException {
2836     try {
2837       GetCompletedSnapshotsResponse.Builder builder = GetCompletedSnapshotsResponse.newBuilder();
2838       List<SnapshotDescription> snapshots = snapshotManager.getCompletedSnapshots();
2839 
2840       // convert to protobuf
2841       for (SnapshotDescription snapshot : snapshots) {
2842         builder.addSnapshots(snapshot);
2843       }
2844       return builder.build();
2845     } catch (IOException e) {
2846       throw new ServiceException(e);
2847     }
2848   }
2849 
2850   /**
2851    * Execute Delete Snapshot operation.
2852    * @return DeleteSnapshotResponse (a protobuf wrapped void) if the snapshot existed and was
2853    *    deleted properly.
2854    * @throws ServiceException wrapping SnapshotDoesNotExistException if specified snapshot did not
2855    *    exist.
2856    */
2857   @Override
2858   public DeleteSnapshotResponse deleteSnapshot(RpcController controller,
2859       DeleteSnapshotRequest request) throws ServiceException {
2860     try {
2861       this.snapshotManager.checkSnapshotSupport();
2862     } catch (UnsupportedOperationException e) {
2863       throw new ServiceException(e);
2864     }
2865 
2866     try {
2867       LOG.info(getClientIdAuditPrefix() + " delete " + request.getSnapshot());
2868       snapshotManager.deleteSnapshot(request.getSnapshot());
2869       return DeleteSnapshotResponse.newBuilder().build();
2870     } catch (IOException e) {
2871       throw new ServiceException(e);
2872     }
2873   }
2874 
2875   /**
2876    * Checks if the specified snapshot is done.
2877    * @return true if the snapshot is in file system ready to use,
2878    *   false if the snapshot is in the process of completing
2879    * @throws ServiceException wrapping UnknownSnapshotException if invalid snapshot, or
2880    *  a wrapped HBaseSnapshotException with progress failure reason.
2881    */
2882   @Override
2883   public IsSnapshotDoneResponse isSnapshotDone(RpcController controller,
2884       IsSnapshotDoneRequest request) throws ServiceException {
2885     LOG.debug("Checking to see if snapshot from request:" +
2886         ClientSnapshotDescriptionUtils.toString(request.getSnapshot()) + " is done");
2887     try {
2888       IsSnapshotDoneResponse.Builder builder = IsSnapshotDoneResponse.newBuilder();
2889       boolean done = snapshotManager.isSnapshotDone(request.getSnapshot());
2890       builder.setDone(done);
2891       return builder.build();
2892     } catch (IOException e) {
2893       throw new ServiceException(e);
2894     }
2895   }
2896 
2897   /**
2898    * Execute Restore/Clone snapshot operation.
2899    *
2900    * <p>If the specified table exists a "Restore" is executed, replacing the table
2901    * schema and directory data with the content of the snapshot.
2902    * The table must be disabled, or a UnsupportedOperationException will be thrown.
2903    *
2904    * <p>If the table doesn't exist a "Clone" is executed, a new table is created
2905    * using the schema at the time of the snapshot, and the content of the snapshot.
2906    *
2907    * <p>The restore/clone operation does not require copying HFiles. Since HFiles
2908    * are immutable the table can point to and use the same files as the original one.
2909    */
2910   @Override
2911   public RestoreSnapshotResponse restoreSnapshot(RpcController controller,
2912       RestoreSnapshotRequest request) throws ServiceException {
2913     try {
2914       this.snapshotManager.checkSnapshotSupport();
2915     } catch (UnsupportedOperationException e) {
2916       throw new ServiceException(e);
2917     }
2918 
2919     try {
2920       SnapshotDescription reqSnapshot = request.getSnapshot();
2921       snapshotManager.restoreSnapshot(reqSnapshot);
2922       return RestoreSnapshotResponse.newBuilder().build();
2923     } catch (IOException e) {
2924       throw new ServiceException(e);
2925     }
2926   }
2927 
2928   /**
2929    * Returns the status of the requested snapshot restore/clone operation.
2930    * This method is not exposed to the user, it is just used internally by HBaseAdmin
2931    * to verify if the restore is completed.
2932    *
2933    * No exceptions are thrown if the restore is not running, the result will be "done".
2934    *
2935    * @return done <tt>true</tt> if the restore/clone operation is completed.
2936    * @throws ServiceException if the operation failed.
2937    */
2938   @Override
2939   public IsRestoreSnapshotDoneResponse isRestoreSnapshotDone(RpcController controller,
2940       IsRestoreSnapshotDoneRequest request) throws ServiceException {
2941     try {
2942       SnapshotDescription snapshot = request.getSnapshot();
2943       IsRestoreSnapshotDoneResponse.Builder builder = IsRestoreSnapshotDoneResponse.newBuilder();
2944       boolean done = snapshotManager.isRestoreDone(snapshot);
2945       builder.setDone(done);
2946       return builder.build();
2947     } catch (IOException e) {
2948       throw new ServiceException(e);
2949     }
2950   }
2951 
2952   @Override
2953   public ModifyNamespaceResponse modifyNamespace(RpcController controller,
2954       ModifyNamespaceRequest request) throws ServiceException {
2955     try {
2956       modifyNamespace(ProtobufUtil.toNamespaceDescriptor(request.getNamespaceDescriptor()));
2957       return ModifyNamespaceResponse.getDefaultInstance();
2958     } catch (IOException e) {
2959       throw new ServiceException(e);
2960     }
2961   }
2962 
2963   @Override
2964   public CreateNamespaceResponse createNamespace(RpcController controller,
2965      CreateNamespaceRequest request) throws ServiceException {
2966     try {
2967       createNamespace(ProtobufUtil.toNamespaceDescriptor(request.getNamespaceDescriptor()));
2968       return CreateNamespaceResponse.getDefaultInstance();
2969     } catch (IOException e) {
2970       throw new ServiceException(e);
2971     }
2972   }
2973 
2974   @Override
2975   public DeleteNamespaceResponse deleteNamespace(RpcController controller,
2976       DeleteNamespaceRequest request) throws ServiceException {
2977     try {
2978       deleteNamespace(request.getNamespaceName());
2979       return DeleteNamespaceResponse.getDefaultInstance();
2980     } catch (IOException e) {
2981       throw new ServiceException(e);
2982     }
2983   }
2984 
2985   @Override
2986   public GetNamespaceDescriptorResponse getNamespaceDescriptor(
2987       RpcController controller, GetNamespaceDescriptorRequest request)
2988       throws ServiceException {
2989     try {
2990       return GetNamespaceDescriptorResponse.newBuilder()
2991           .setNamespaceDescriptor(
2992               ProtobufUtil.toProtoNamespaceDescriptor(getNamespaceDescriptor(request.getNamespaceName())))
2993           .build();
2994     } catch (IOException e) {
2995       throw new ServiceException(e);
2996     }
2997   }
2998 
2999   @Override
3000   public ListNamespaceDescriptorsResponse listNamespaceDescriptors(
3001       RpcController controller, ListNamespaceDescriptorsRequest request)
3002       throws ServiceException {
3003     try {
3004       ListNamespaceDescriptorsResponse.Builder response =
3005           ListNamespaceDescriptorsResponse.newBuilder();
3006       for(NamespaceDescriptor ns: listNamespaceDescriptors()) {
3007         response.addNamespaceDescriptor(ProtobufUtil.toProtoNamespaceDescriptor(ns));
3008       }
3009       return response.build();
3010     } catch (IOException e) {
3011       throw new ServiceException(e);
3012     }
3013   }
3014 
3015   @Override
3016   public ListTableDescriptorsByNamespaceResponse listTableDescriptorsByNamespace(
3017       RpcController controller, ListTableDescriptorsByNamespaceRequest request)
3018       throws ServiceException {
3019     try {
3020       ListTableDescriptorsByNamespaceResponse.Builder b =
3021           ListTableDescriptorsByNamespaceResponse.newBuilder();
3022       for(HTableDescriptor htd: listTableDescriptorsByNamespace(request.getNamespaceName())) {
3023         b.addTableSchema(htd.convert());
3024       }
3025       return b.build();
3026     } catch (IOException e) {
3027       throw new ServiceException(e);
3028     }
3029   }
3030 
3031   @Override
3032   public ListTableNamesByNamespaceResponse listTableNamesByNamespace(
3033       RpcController controller, ListTableNamesByNamespaceRequest request)
3034       throws ServiceException {
3035     try {
3036       ListTableNamesByNamespaceResponse.Builder b =
3037           ListTableNamesByNamespaceResponse.newBuilder();
3038       for (TableName tableName: listTableNamesByNamespace(request.getNamespaceName())) {
3039         b.addTableName(ProtobufUtil.toProtoTableName(tableName));
3040       }
3041       return b.build();
3042     } catch (IOException e) {
3043       throw new ServiceException(e);
3044     }
3045   }
3046 
3047   private boolean isHealthCheckerConfigured() {
3048     String healthScriptLocation = this.conf.get(HConstants.HEALTH_SCRIPT_LOC);
3049     return org.apache.commons.lang.StringUtils.isNotBlank(healthScriptLocation);
3050   }
3051 
3052   @Override
3053   public void createNamespace(NamespaceDescriptor descriptor) throws IOException {
3054     TableName.isLegalNamespaceName(Bytes.toBytes(descriptor.getName()));
3055     if (cpHost != null) {
3056       if (cpHost.preCreateNamespace(descriptor)) {
3057         return;
3058       }
3059     }
3060     LOG.info(getClientIdAuditPrefix() + " creating " + descriptor);
3061     tableNamespaceManager.create(descriptor);
3062     if (cpHost != null) {
3063       cpHost.postCreateNamespace(descriptor);
3064     }
3065   }
3066 
3067   @Override
3068   public void modifyNamespace(NamespaceDescriptor descriptor) throws IOException {
3069     TableName.isLegalNamespaceName(Bytes.toBytes(descriptor.getName()));
3070     if (cpHost != null) {
3071       if (cpHost.preModifyNamespace(descriptor)) {
3072         return;
3073       }
3074     }
3075     LOG.info(getClientIdAuditPrefix() + " modify " + descriptor);
3076     tableNamespaceManager.update(descriptor);
3077     if (cpHost != null) {
3078       cpHost.postModifyNamespace(descriptor);
3079     }
3080   }
3081 
3082   @Override
3083   public void deleteNamespace(String name) throws IOException {
3084     if (cpHost != null) {
3085       if (cpHost.preDeleteNamespace(name)) {
3086         return;
3087       }
3088     }
3089     LOG.info(getClientIdAuditPrefix() + " delete " + name);
3090     tableNamespaceManager.remove(name);
3091     if (cpHost != null) {
3092       cpHost.postDeleteNamespace(name);
3093     }
3094   }
3095 
3096   @Override
3097   public NamespaceDescriptor getNamespaceDescriptor(String name) throws IOException {
3098     boolean ready = tableNamespaceManager != null &&
3099         tableNamespaceManager.isTableAvailableAndInitialized();
3100     if (!ready) {
3101       throw new IOException("Table Namespace Manager not ready yet, try again later");
3102     }
3103     NamespaceDescriptor nsd = tableNamespaceManager.get(name);
3104     if (nsd == null) {
3105       throw new NamespaceNotFoundException(name);
3106     }
3107     return nsd;
3108   }
3109 
3110   @Override
3111   public List<NamespaceDescriptor> listNamespaceDescriptors() throws IOException {
3112     return Lists.newArrayList(tableNamespaceManager.list());
3113   }
3114 
3115   @Override
3116   public List<HTableDescriptor> listTableDescriptorsByNamespace(String name) throws IOException {
3117     getNamespaceDescriptor(name); // check that namespace exists
3118     return Lists.newArrayList(tableDescriptors.getByNamespace(name).values());
3119   }
3120 
3121   @Override
3122   public List<TableName> listTableNamesByNamespace(String name) throws IOException {
3123     List<TableName> tableNames = Lists.newArrayList();
3124     getNamespaceDescriptor(name); // check that namespace exists
3125     for (HTableDescriptor descriptor: tableDescriptors.getByNamespace(name).values()) {
3126       tableNames.add(descriptor.getTableName());
3127     }
3128     return tableNames;
3129   }
3130 
3131 }