1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master;
20
21 import java.io.IOException;
22 import java.lang.reflect.Constructor;
23 import java.lang.reflect.InvocationTargetException;
24 import java.net.InetAddress;
25 import java.net.InetSocketAddress;
26 import java.net.UnknownHostException;
27 import java.util.ArrayList;
28 import java.util.Collections;
29 import java.util.Comparator;
30 import java.util.List;
31 import java.util.Map;
32 import java.util.Set;
33 import java.util.concurrent.Callable;
34 import java.util.concurrent.ExecutionException;
35 import java.util.concurrent.Executors;
36 import java.util.concurrent.Future;
37 import java.util.concurrent.TimeUnit;
38 import java.util.concurrent.atomic.AtomicReference;
39
40 import javax.management.ObjectName;
41
42 import org.apache.commons.logging.Log;
43 import org.apache.commons.logging.LogFactory;
44 import org.apache.hadoop.classification.InterfaceAudience;
45 import org.apache.hadoop.conf.Configuration;
46 import org.apache.hadoop.fs.Path;
47 import org.apache.hadoop.hbase.Abortable;
48 import org.apache.hadoop.hbase.Chore;
49 import org.apache.hadoop.hbase.ClusterId;
50 import org.apache.hadoop.hbase.ClusterStatus;
51 import org.apache.hadoop.hbase.HColumnDescriptor;
52 import org.apache.hadoop.hbase.HConstants;
53 import org.apache.hadoop.hbase.HRegionInfo;
54 import org.apache.hadoop.hbase.HTableDescriptor;
55 import org.apache.hadoop.hbase.HealthCheckChore;
56 import org.apache.hadoop.hbase.MasterAdminProtocol;
57 import org.apache.hadoop.hbase.MasterMonitorProtocol;
58 import org.apache.hadoop.hbase.RegionServerStatusProtocol;
59 import org.apache.hadoop.hbase.Server;
60 import org.apache.hadoop.hbase.ServerLoad;
61 import org.apache.hadoop.hbase.ServerName;
62 import org.apache.hadoop.hbase.TableDescriptors;
63 import org.apache.hadoop.hbase.catalog.CatalogTracker;
64 import org.apache.hadoop.hbase.catalog.MetaReader;
65 import org.apache.hadoop.hbase.client.HConnectionManager;
66 import org.apache.hadoop.hbase.client.MetaScanner;
67 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
68 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
69 import org.apache.hadoop.hbase.client.Result;
70 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
71 import org.apache.hadoop.hbase.exceptions.DeserializationException;
72 import org.apache.hadoop.hbase.exceptions.HBaseIOException;
73 import org.apache.hadoop.hbase.exceptions.MasterNotRunningException;
74 import org.apache.hadoop.hbase.exceptions.NotAllMetaRegionsOnlineException;
75 import org.apache.hadoop.hbase.exceptions.PleaseHoldException;
76 import org.apache.hadoop.hbase.exceptions.TableNotDisabledException;
77 import org.apache.hadoop.hbase.exceptions.TableNotFoundException;
78 import org.apache.hadoop.hbase.exceptions.UnknownProtocolException;
79 import org.apache.hadoop.hbase.exceptions.UnknownRegionException;
80 import org.apache.hadoop.hbase.executor.ExecutorService;
81 import org.apache.hadoop.hbase.executor.ExecutorType;
82 import org.apache.hadoop.hbase.ipc.HBaseServer;
83 import org.apache.hadoop.hbase.ipc.HBaseServerRPC;
84 import org.apache.hadoop.hbase.ipc.RpcServer;
85 import org.apache.hadoop.hbase.ipc.ServerRpcController;
86 import org.apache.hadoop.hbase.master.balancer.BalancerChore;
87 import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
88 import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
89 import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
90 import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
91 import org.apache.hadoop.hbase.master.handler.CreateTableHandler;
92 import org.apache.hadoop.hbase.master.handler.DeleteTableHandler;
93 import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
94 import org.apache.hadoop.hbase.master.handler.DispatchMergingRegionHandler;
95 import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
96 import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
97 import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
98 import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
99 import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
100 import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
101 import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
102 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
103 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
104 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
105 import org.apache.hadoop.hbase.protobuf.RequestConverter;
106 import org.apache.hadoop.hbase.protobuf.ResponseConverter;
107 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
108 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
109 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.NameStringPair;
110 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier.RegionSpecifierType;
111 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
112 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.AddColumnRequest;
113 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.AddColumnResponse;
114 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.AssignRegionRequest;
115 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.AssignRegionResponse;
116 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.BalanceRequest;
117 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.BalanceResponse;
118 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.CatalogScanRequest;
119 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.CatalogScanResponse;
120 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.CreateTableRequest;
121 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.CreateTableResponse;
122 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteColumnRequest;
123 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteColumnResponse;
124 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteSnapshotRequest;
125 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteSnapshotResponse;
126 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteTableRequest;
127 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteTableResponse;
128 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DisableTableRequest;
129 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DisableTableResponse;
130 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DispatchMergingRegionsRequest;
131 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DispatchMergingRegionsResponse;
132 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.EnableCatalogJanitorRequest;
133 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.EnableCatalogJanitorResponse;
134 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.EnableTableRequest;
135 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.EnableTableResponse;
136 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.IsCatalogJanitorEnabledRequest;
137 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.IsCatalogJanitorEnabledResponse;
138 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.IsRestoreSnapshotDoneRequest;
139 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.IsRestoreSnapshotDoneResponse;
140 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.IsSnapshotDoneRequest;
141 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.IsSnapshotDoneResponse;
142 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ListSnapshotRequest;
143 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ListSnapshotResponse;
144 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ModifyColumnRequest;
145 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ModifyColumnResponse;
146 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ModifyTableRequest;
147 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ModifyTableResponse;
148 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.MoveRegionRequest;
149 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.MoveRegionResponse;
150 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.OfflineRegionRequest;
151 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.OfflineRegionResponse;
152 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.RestoreSnapshotRequest;
153 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.RestoreSnapshotResponse;
154 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.SetBalancerRunningRequest;
155 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.SetBalancerRunningResponse;
156 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ShutdownRequest;
157 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ShutdownResponse;
158 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.StopMasterRequest;
159 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.StopMasterResponse;
160 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.TakeSnapshotRequest;
161 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.TakeSnapshotResponse;
162 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.UnassignRegionRequest;
163 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.UnassignRegionResponse;
164 import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetClusterStatusRequest;
165 import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetClusterStatusResponse;
166 import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetSchemaAlterStatusRequest;
167 import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetSchemaAlterStatusResponse;
168 import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetTableDescriptorsRequest;
169 import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetTableDescriptorsResponse;
170 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsMasterRunningRequest;
171 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsMasterRunningResponse;
172 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.GetLastFlushedSequenceIdRequest;
173 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.GetLastFlushedSequenceIdResponse;
174 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerReportRequest;
175 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerReportResponse;
176 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerStartupRequest;
177 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerStartupResponse;
178 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.ReportRSFatalErrorRequest;
179 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.ReportRSFatalErrorResponse;
180 import org.apache.hadoop.hbase.replication.regionserver.Replication;
181 import org.apache.hadoop.hbase.security.User;
182 import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
183 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
184 import org.apache.hadoop.hbase.trace.SpanReceiverHost;
185 import org.apache.hadoop.hbase.util.Bytes;
186 import org.apache.hadoop.hbase.util.CompressionTest;
187 import org.apache.hadoop.hbase.util.FSTableDescriptors;
188 import org.apache.hadoop.hbase.util.FSUtils;
189 import org.apache.hadoop.hbase.util.HFileArchiveUtil;
190 import org.apache.hadoop.hbase.util.HasThread;
191 import org.apache.hadoop.hbase.util.InfoServer;
192 import org.apache.hadoop.hbase.util.Pair;
193 import org.apache.hadoop.hbase.util.Sleeper;
194 import org.apache.hadoop.hbase.util.Strings;
195 import org.apache.hadoop.hbase.util.Threads;
196 import org.apache.hadoop.hbase.util.VersionInfo;
197 import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
198 import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker;
199 import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker;
200 import org.apache.hadoop.hbase.zookeeper.RegionServerTracker;
201 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
202 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
203 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
204 import org.apache.hadoop.metrics.util.MBeanUtil;
205 import org.apache.hadoop.net.DNS;
206 import org.apache.zookeeper.KeeperException;
207 import org.apache.zookeeper.Watcher;
208
209 import com.google.common.collect.Maps;
210 import com.google.protobuf.Descriptors;
211 import com.google.protobuf.Message;
212 import com.google.protobuf.RpcCallback;
213 import com.google.protobuf.RpcController;
214 import com.google.protobuf.Service;
215 import com.google.protobuf.ServiceException;
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235 @InterfaceAudience.Private
236 @SuppressWarnings("deprecation")
237 public class HMaster extends HasThread
238 implements MasterMonitorProtocol, MasterAdminProtocol, RegionServerStatusProtocol, MasterServices,
239 Server {
240 private static final Log LOG = LogFactory.getLog(HMaster.class.getName());
241
242
243
244 public static final String MASTER = "master";
245
246
247 private final Configuration conf;
248
249 private InfoServer infoServer;
250
251
252 private ZooKeeperWatcher zooKeeper;
253
254 private ActiveMasterManager activeMasterManager;
255
256 private RegionServerTracker regionServerTracker;
257
258 private DrainingServerTracker drainingServerTracker;
259
260 private LoadBalancerTracker loadBalancerTracker;
261
262
263 private final RpcServer rpcServer;
264
265
266 private volatile boolean rpcServerOpen = false;
267
268
269
270
271 private final InetSocketAddress isa;
272
273
274 private final MetricsMaster metricsMaster;
275
276 private MasterFileSystem fileSystemManager;
277
278
279 private ServerManager serverManager;
280
281
282 AssignmentManager assignmentManager;
283
284 private CatalogTracker catalogTracker;
285
286 private ClusterStatusTracker clusterStatusTracker;
287
288
289
290
291 private MemoryBoundedLogMessageBuffer rsFatals;
292
293
294
295 private volatile boolean stopped = false;
296
297 private volatile boolean abort = false;
298
299 private volatile boolean isActiveMaster = false;
300
301
302
303 volatile boolean initialized = false;
304
305
306 private volatile boolean serverShutdownHandlerEnabled = false;
307
308
309 ExecutorService executorService;
310
311 private LoadBalancer balancer;
312 private Thread balancerChore;
313 private Thread clusterStatusChore;
314 private ClusterStatusPublisher clusterStatusPublisherChore = null;
315
316 private CatalogJanitor catalogJanitorChore;
317 private LogCleaner logCleaner;
318 private HFileCleaner hfileCleaner;
319
320 private MasterCoprocessorHost cpHost;
321 private final ServerName serverName;
322
323 private TableDescriptors tableDescriptors;
324
325
326 private TableLockManager tableLockManager;
327
328
329 private long masterStartTime;
330 private long masterActiveTime;
331
332
333 private final int msgInterval;
334
335
336
337 private ObjectName mxBean = null;
338
339
340 private final boolean masterCheckCompression;
341
342 private SpanReceiverHost spanReceiverHost;
343
344 private Map<String, Service> coprocessorServiceHandlers = Maps.newHashMap();
345
346
347 private SnapshotManager snapshotManager;
348
349
350 private HealthCheckChore healthCheckChore;
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365 public HMaster(final Configuration conf)
366 throws IOException, KeeperException, InterruptedException {
367 this.conf = new Configuration(conf);
368
369 this.conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
370
371 HConnectionManager.setServerSideHConnectionRetries(this.conf, LOG);
372
373 String hostname = Strings.domainNamePointerToHostName(DNS.getDefaultHost(
374 conf.get("hbase.master.dns.interface", "default"),
375 conf.get("hbase.master.dns.nameserver", "default")));
376 int port = conf.getInt(HConstants.MASTER_PORT, HConstants.DEFAULT_MASTER_PORT);
377
378 InetSocketAddress initialIsa = new InetSocketAddress(hostname, port);
379 if (initialIsa.getAddress() == null) {
380 throw new IllegalArgumentException("Failed resolve of hostname " + initialIsa);
381 }
382
383 String bindAddress = conf.get("hbase.master.ipc.address");
384 if (bindAddress != null) {
385 initialIsa = new InetSocketAddress(bindAddress, port);
386 if (initialIsa.getAddress() == null) {
387 throw new IllegalArgumentException("Failed resolve of bind address " + initialIsa);
388 }
389 }
390 int numHandlers = conf.getInt("hbase.master.handler.count",
391 conf.getInt("hbase.regionserver.handler.count", 25));
392 this.rpcServer = HBaseServerRPC.getServer(MasterMonitorProtocol.class, this,
393 new Class<?>[]{MasterMonitorProtocol.class,
394 MasterAdminProtocol.class, RegionServerStatusProtocol.class},
395 initialIsa.getHostName(),
396 initialIsa.getPort(),
397 numHandlers,
398 0,
399 conf.getBoolean("hbase.rpc.verbose", false), conf,
400 0);
401
402 this.isa = this.rpcServer.getListenerAddress();
403 this.serverName = new ServerName(hostname,
404 this.isa.getPort(), System.currentTimeMillis());
405 this.rsFatals = new MemoryBoundedLogMessageBuffer(
406 conf.getLong("hbase.master.buffer.for.rs.fatals", 1*1024*1024));
407
408
409 ZKUtil.loginClient(this.conf, "hbase.zookeeper.client.keytab.file",
410 "hbase.zookeeper.client.kerberos.principal", this.isa.getHostName());
411
412
413 User.login(conf, "hbase.master.keytab.file",
414 "hbase.master.kerberos.principal", this.isa.getHostName());
415
416 LOG.info("hbase.rootdir=" + FSUtils.getRootDir(this.conf) +
417 ", hbase.cluster.distributed=" + this.conf.getBoolean("hbase.cluster.distributed", false));
418
419
420 setName(MASTER + "-" + this.serverName.toString());
421
422 Replication.decorateMasterConfiguration(this.conf);
423
424
425
426 if (this.conf.get("mapred.task.id") == null) {
427 this.conf.set("mapred.task.id", "hb_m_" + this.serverName.toString());
428 }
429
430 this.zooKeeper = new ZooKeeperWatcher(conf, MASTER + ":" + isa.getPort(), this, true);
431 this.rpcServer.startThreads();
432
433
434 this.msgInterval = conf.getInt("hbase.regionserver.msginterval", 3 * 1000);
435
436
437 this.masterCheckCompression = conf.getBoolean("hbase.master.check.compression", true);
438
439 this.metricsMaster = new MetricsMaster( new MetricsMasterWrapperImpl(this));
440
441
442 int sleepTime = this.conf.getInt(HConstants.HEALTH_CHORE_WAKE_FREQ,
443 HConstants.DEFAULT_THREAD_WAKE_FREQUENCY);
444 if (isHealthCheckerConfigured()) {
445 healthCheckChore = new HealthCheckChore(sleepTime, this, getConfiguration());
446 }
447
448
449 Class<? extends ClusterStatusPublisher.Publisher> publisherClass =
450 conf.getClass(ClusterStatusPublisher.STATUS_PUBLISHER_CLASS,
451 ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS,
452 ClusterStatusPublisher.Publisher.class);
453
454 if (publisherClass != null) {
455 clusterStatusPublisherChore = new ClusterStatusPublisher(this, conf, publisherClass);
456 Threads.setDaemonThreadRunning(clusterStatusPublisherChore.getThread());
457 }
458 }
459
460
461
462
463
464
465
466
467 private static void stallIfBackupMaster(final Configuration c,
468 final ActiveMasterManager amm)
469 throws InterruptedException {
470
471 if (!c.getBoolean(HConstants.MASTER_TYPE_BACKUP,
472 HConstants.DEFAULT_MASTER_TYPE_BACKUP)) {
473 return;
474 }
475 LOG.debug("HMaster started in backup mode. " +
476 "Stalling until master znode is written.");
477
478
479 while (!amm.isActiveMaster()) {
480 LOG.debug("Waiting for master address ZNode to be written " +
481 "(Also watching cluster state node)");
482 Thread.sleep(
483 c.getInt(HConstants.ZK_SESSION_TIMEOUT, HConstants.DEFAULT_ZK_SESSION_TIMEOUT));
484 }
485
486 }
487
488 MetricsMaster getMetrics() {
489 return metricsMaster;
490 }
491
492
493
494
495
496
497
498
499
500
501 @Override
502 public void run() {
503 MonitoredTask startupStatus =
504 TaskMonitor.get().createStatus("Master startup");
505 startupStatus.setDescription("Master startup");
506 masterStartTime = System.currentTimeMillis();
507 try {
508
509
510
511
512
513
514
515
516
517
518 becomeActiveMaster(startupStatus);
519
520
521 if (!this.stopped) {
522 finishInitialization(startupStatus, false);
523 loop();
524 }
525 } catch (Throwable t) {
526
527 if (t instanceof NoClassDefFoundError &&
528 t.getMessage().contains("org/apache/hadoop/hdfs/protocol/FSConstants$SafeModeAction")) {
529
530 abort("HBase is having a problem with its Hadoop jars. You may need to "
531 + "recompile HBase against Hadoop version "
532 + org.apache.hadoop.util.VersionInfo.getVersion()
533 + " or change your hadoop jars to start properly", t);
534 } else {
535 abort("Unhandled exception. Starting shutdown.", t);
536 }
537 } finally {
538 startupStatus.cleanup();
539
540 stopChores();
541
542
543 if (!this.abort && this.serverManager != null &&
544 this.serverManager.isClusterShutdown()) {
545 this.serverManager.letRegionServersShutdown();
546 }
547 stopServiceThreads();
548
549 if (this.activeMasterManager != null) this.activeMasterManager.stop();
550 if (this.catalogTracker != null) this.catalogTracker.stop();
551 if (this.serverManager != null) this.serverManager.stop();
552 if (this.assignmentManager != null) this.assignmentManager.stop();
553 if (this.fileSystemManager != null) this.fileSystemManager.stop();
554 if (this.snapshotManager != null) this.snapshotManager.stop("server shutting down.");
555 this.zooKeeper.close();
556 }
557 LOG.info("HMaster main thread exiting");
558 }
559
560
561
562
563
564
565
566 private boolean becomeActiveMaster(MonitoredTask startupStatus)
567 throws InterruptedException {
568
569
570 this.activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName,
571 this);
572 this.zooKeeper.registerListener(activeMasterManager);
573 stallIfBackupMaster(this.conf, this.activeMasterManager);
574
575
576
577
578 this.clusterStatusTracker = new ClusterStatusTracker(getZooKeeper(), this);
579 this.clusterStatusTracker.start();
580 return this.activeMasterManager.blockUntilBecomingActiveMaster(startupStatus,
581 this.clusterStatusTracker);
582 }
583
584
585
586
587
588
589 private void initializeZKBasedSystemTrackers() throws IOException,
590 InterruptedException, KeeperException {
591 this.catalogTracker = createCatalogTracker(this.zooKeeper, this.conf, this);
592 this.catalogTracker.start();
593
594 this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
595 this.loadBalancerTracker = new LoadBalancerTracker(zooKeeper, this);
596 this.loadBalancerTracker.start();
597 this.assignmentManager = new AssignmentManager(this, serverManager,
598 this.catalogTracker, this.balancer, this.executorService, this.metricsMaster,
599 this.tableLockManager);
600 zooKeeper.registerListenerFirst(assignmentManager);
601
602 this.regionServerTracker = new RegionServerTracker(zooKeeper, this,
603 this.serverManager);
604 this.regionServerTracker.start();
605
606 this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this,
607 this.serverManager);
608 this.drainingServerTracker.start();
609
610
611
612 boolean wasUp = this.clusterStatusTracker.isClusterUp();
613 if (!wasUp) this.clusterStatusTracker.setClusterUp();
614
615 LOG.info("Server active/primary master; " + this.serverName +
616 ", sessionid=0x" +
617 Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()) +
618 ", cluster-up flag was=" + wasUp);
619
620
621 this.snapshotManager = new SnapshotManager(this);
622 }
623
624
625
626
627
628
629
630
631
632
633
634
635
636 CatalogTracker createCatalogTracker(final ZooKeeperWatcher zk,
637 final Configuration conf, Abortable abortable)
638 throws IOException {
639 return new CatalogTracker(zk, conf, abortable);
640 }
641
642
643 private Sleeper stopSleeper = new Sleeper(100, this);
644
645 private void loop() {
646 long lastMsgTs = 0l;
647 long now = 0l;
648 while (!this.stopped) {
649 now = System.currentTimeMillis();
650 if ((now - lastMsgTs) >= this.msgInterval) {
651 doMetrics();
652 lastMsgTs = System.currentTimeMillis();
653 }
654 stopSleeper.sleep();
655 }
656 }
657
658
659
660
661
662 private void doMetrics() {
663 try {
664 this.assignmentManager.updateRegionsInTransitionMetrics();
665 } catch (Throwable e) {
666 LOG.error("Couldn't update metrics: " + e.getMessage());
667 }
668 }
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691 private void finishInitialization(MonitoredTask status, boolean masterRecovery)
692 throws IOException, InterruptedException, KeeperException {
693
694 isActiveMaster = true;
695
696
697
698
699
700
701
702 status.setStatus("Initializing Master file system");
703 this.masterActiveTime = System.currentTimeMillis();
704
705 this.fileSystemManager = new MasterFileSystem(this, this, metricsMaster, masterRecovery);
706
707 this.tableDescriptors =
708 new FSTableDescriptors(this.fileSystemManager.getFileSystem(),
709 this.fileSystemManager.getRootDir());
710
711
712 status.setStatus("Publishing Cluster ID in ZooKeeper");
713 ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
714
715 if (!masterRecovery) {
716 this.executorService = new ExecutorService(getServerName().toString());
717 this.serverManager = createServerManager(this, this);
718 }
719
720
721
722 this.tableLockManager = TableLockManager.createTableLockManager(conf, zooKeeper, serverName);
723 if (!masterRecovery) {
724 this.tableLockManager.reapAllTableWriteLocks();
725 }
726
727 status.setStatus("Initializing ZK system trackers");
728 initializeZKBasedSystemTrackers();
729
730 if (!masterRecovery) {
731
732 status.setStatus("Initializing master coprocessors");
733 this.cpHost = new MasterCoprocessorHost(this, this.conf);
734
735 spanReceiverHost = new SpanReceiverHost(getConfiguration());
736 spanReceiverHost.loadSpanReceivers();
737
738
739 status.setStatus("Initializing master service threads");
740 startServiceThreads();
741 }
742
743
744 this.serverManager.waitForRegionServers(status);
745
746 for (ServerName sn: this.regionServerTracker.getOnlineServers()) {
747 if (!this.serverManager.isServerOnline(sn)) {
748
749 LOG.info("Registering server found up in zk but who has not yet " +
750 "reported in: " + sn);
751 this.serverManager.recordNewServer(sn, ServerLoad.EMPTY_SERVERLOAD);
752 }
753 }
754
755 if (!masterRecovery) {
756 this.assignmentManager.startTimeOutMonitor();
757 }
758
759
760 status.setStatus("Splitting logs after master startup");
761 splitLogAfterStartup(this.fileSystemManager);
762
763
764 if (!assignMeta(status)) return;
765 enableServerShutdownHandler();
766
767
768
769
770
771 org.apache.hadoop.hbase.catalog.MetaMigrationConvertingToPB
772 .updateRootAndMetaIfNecessary(this);
773
774 this.balancer.setMasterServices(this);
775
776 status.setStatus("Starting assignment manager");
777 this.assignmentManager.joinCluster();
778
779 this.balancer.setClusterStatus(getClusterStatus());
780
781 if (!masterRecovery) {
782
783
784 status.setStatus("Starting balancer and catalog janitor");
785 this.clusterStatusChore = getAndStartClusterStatusChore(this);
786 this.balancerChore = getAndStartBalancerChore(this);
787 this.catalogJanitorChore = new CatalogJanitor(this, this);
788 startCatalogJanitorChore();
789 }
790
791 status.markComplete("Initialization successful");
792 LOG.info("Master has completed initialization");
793 initialized = true;
794
795
796
797 this.serverManager.clearDeadServersWithSameHostNameAndPortOfOnlineServer();
798
799 if (!masterRecovery) {
800 if (this.cpHost != null) {
801
802 try {
803 this.cpHost.postStartMaster();
804 } catch (IOException ioe) {
805 LOG.error("Coprocessor postStartMaster() hook failed", ioe);
806 }
807 }
808 }
809 }
810
811
812
813
814
815 protected void startCatalogJanitorChore() {
816 Threads.setDaemonThreadRunning(catalogJanitorChore.getThread());
817 }
818
819
820
821
822
823 protected void splitLogAfterStartup(final MasterFileSystem mfs) {
824 mfs.splitLogAfterStartup();
825 }
826
827
828
829
830
831
832
833
834
835 ServerManager createServerManager(final Server master,
836 final MasterServices services)
837 throws IOException {
838
839
840 return new ServerManager(master, services);
841 }
842
843
844
845
846
847 private void enableServerShutdownHandler() {
848 if (!serverShutdownHandlerEnabled) {
849 serverShutdownHandlerEnabled = true;
850 this.serverManager.processQueuedDeadServers();
851 }
852 }
853
854
855
856
857
858
859
860
861
862 boolean assignMeta(MonitoredTask status)
863 throws InterruptedException, IOException, KeeperException {
864 int assigned = 0;
865 long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000);
866
867
868 status.setStatus("Assigning META region");
869 assignmentManager.getRegionStates().createRegionState(
870 HRegionInfo.FIRST_META_REGIONINFO);
871 boolean rit = this.assignmentManager.
872 processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.FIRST_META_REGIONINFO);
873 ServerName currentMetaServer = null;
874 boolean metaRegionLocation = catalogTracker.verifyMetaRegionLocation(timeout);
875 if (!rit && !metaRegionLocation) {
876 currentMetaServer = this.catalogTracker.getMetaLocation();
877 splitLogAndExpireIfOnline(currentMetaServer);
878 this.assignmentManager.assignMeta();
879 enableSSHandWaitForMeta();
880
881 if (!isMetaLocation()) return false;
882
883 this.assignmentManager.waitForAssignment(HRegionInfo.FIRST_META_REGIONINFO);
884 assigned++;
885 } else if (rit && !metaRegionLocation) {
886
887 if (!isMetaLocation()) return false;
888
889 this.assignmentManager.waitForAssignment(HRegionInfo.FIRST_META_REGIONINFO);
890 assigned++;
891 } else if (metaRegionLocation) {
892
893 this.assignmentManager.regionOnline(HRegionInfo.FIRST_META_REGIONINFO,
894 this.catalogTracker.getMetaLocation());
895 }
896 enableCatalogTables(Bytes.toString(HConstants.META_TABLE_NAME));
897 LOG.info(".META. assigned=" + assigned + ", rit=" + rit +
898 ", location=" + catalogTracker.getMetaLocation());
899 status.setStatus("META assigned.");
900 return true;
901 }
902
903 private void enableSSHandWaitForMeta() throws IOException, InterruptedException {
904 enableServerShutdownHandler();
905 this.catalogTracker.waitForMeta();
906
907
908 this.assignmentManager.waitForAssignment(HRegionInfo.FIRST_META_REGIONINFO);
909 }
910
911
912
913
914
915 private boolean isMetaLocation() throws InterruptedException {
916
917
918 while (!this.stopped) {
919 try {
920 if (this.catalogTracker.waitForMeta(100) != null) break;
921 } catch (NotAllMetaRegionsOnlineException e) {
922
923 }
924 }
925
926 return !this.stopped;
927 }
928
929 private void enableCatalogTables(String catalogTableName) {
930 if (!this.assignmentManager.getZKTable().isEnabledTable(catalogTableName)) {
931 this.assignmentManager.setEnabledTable(catalogTableName);
932 }
933 }
934
935
936
937
938
939
940
941 private void splitLogAndExpireIfOnline(final ServerName sn)
942 throws IOException {
943 if (sn == null || !serverManager.isServerOnline(sn)) {
944 return;
945 }
946 LOG.info("Forcing splitLog and expire of " + sn);
947 fileSystemManager.splitMetaLog(sn);
948 fileSystemManager.splitLog(sn);
949 serverManager.expireServer(sn);
950 }
951
952 @Override
953 public TableDescriptors getTableDescriptors() {
954 return this.tableDescriptors;
955 }
956
957
958 public InfoServer getInfoServer() {
959 return this.infoServer;
960 }
961
962 @Override
963 public Configuration getConfiguration() {
964 return this.conf;
965 }
966
967 @Override
968 public ServerManager getServerManager() {
969 return this.serverManager;
970 }
971
972 @Override
973 public ExecutorService getExecutorService() {
974 return this.executorService;
975 }
976
977 @Override
978 public MasterFileSystem getMasterFileSystem() {
979 return this.fileSystemManager;
980 }
981
982
983
984
985
986 public ZooKeeperWatcher getZooKeeperWatcher() {
987 return this.zooKeeper;
988 }
989
990
991
992
993
994
995
996
997 void startServiceThreads() throws IOException{
998
999
1000 this.executorService.startExecutorService(ExecutorType.MASTER_OPEN_REGION,
1001 conf.getInt("hbase.master.executor.openregion.threads", 5));
1002 this.executorService.startExecutorService(ExecutorType.MASTER_CLOSE_REGION,
1003 conf.getInt("hbase.master.executor.closeregion.threads", 5));
1004 this.executorService.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS,
1005 conf.getInt("hbase.master.executor.serverops.threads", 3));
1006 this.executorService.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS,
1007 conf.getInt("hbase.master.executor.serverops.threads", 5));
1008
1009
1010
1011
1012 this.executorService.startExecutorService(ExecutorType.MASTER_TABLE_OPERATIONS, 1);
1013
1014
1015 String n = Thread.currentThread().getName();
1016 int cleanerInterval = conf.getInt("hbase.master.cleaner.interval", 60 * 1000);
1017 this.logCleaner =
1018 new LogCleaner(cleanerInterval,
1019 this, conf, getMasterFileSystem().getFileSystem(),
1020 getMasterFileSystem().getOldLogDir());
1021 Threads.setDaemonThreadRunning(logCleaner.getThread(), n + ".oldLogCleaner");
1022
1023
1024 Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
1025 this.hfileCleaner = new HFileCleaner(cleanerInterval, this, conf, getMasterFileSystem()
1026 .getFileSystem(), archiveDir);
1027 Threads.setDaemonThreadRunning(hfileCleaner.getThread(), n + ".archivedHFileCleaner");
1028
1029
1030 int port = this.conf.getInt(HConstants.MASTER_INFO_PORT, 60010);
1031 if (port >= 0) {
1032 String a = this.conf.get("hbase.master.info.bindAddress", "0.0.0.0");
1033 this.infoServer = new InfoServer(MASTER, a, port, false, this.conf);
1034 this.infoServer.addServlet("status", "/master-status", MasterStatusServlet.class);
1035 this.infoServer.addServlet("dump", "/dump", MasterDumpServlet.class);
1036 this.infoServer.setAttribute(MASTER, this);
1037 this.infoServer.start();
1038 }
1039
1040
1041 if (this.healthCheckChore != null) {
1042 Threads.setDaemonThreadRunning(this.healthCheckChore.getThread(), n + ".healthChecker");
1043 }
1044
1045
1046 this.rpcServer.openServer();
1047 this.rpcServerOpen = true;
1048 if (LOG.isDebugEnabled()) {
1049 LOG.debug("Started service threads");
1050 }
1051 }
1052
1053
1054
1055
1056
1057 boolean isRpcServerOpen() {
1058 return this.rpcServerOpen;
1059 }
1060
1061 private void stopServiceThreads() {
1062 if (LOG.isDebugEnabled()) {
1063 LOG.debug("Stopping service threads");
1064 }
1065 if (this.rpcServer != null) this.rpcServer.stop();
1066 this.rpcServerOpen = false;
1067
1068 if (this.logCleaner!= null) this.logCleaner.interrupt();
1069 if (this.hfileCleaner != null) this.hfileCleaner.interrupt();
1070
1071 if (this.infoServer != null) {
1072 LOG.info("Stopping infoServer");
1073 try {
1074 this.infoServer.stop();
1075 } catch (Exception ex) {
1076 ex.printStackTrace();
1077 }
1078 }
1079 if (this.executorService != null) this.executorService.shutdown();
1080 if (this.healthCheckChore != null) {
1081 this.healthCheckChore.interrupt();
1082 }
1083 }
1084
1085 private static Thread getAndStartClusterStatusChore(HMaster master) {
1086 if (master == null || master.balancer == null) {
1087 return null;
1088 }
1089 Chore chore = new ClusterStatusChore(master, master.balancer);
1090 return Threads.setDaemonThreadRunning(chore.getThread());
1091 }
1092
1093 private static Thread getAndStartBalancerChore(final HMaster master) {
1094
1095 Chore chore = new BalancerChore(master);
1096 return Threads.setDaemonThreadRunning(chore.getThread());
1097 }
1098
1099 private void stopChores() {
1100 if (this.balancerChore != null) {
1101 this.balancerChore.interrupt();
1102 }
1103 if (this.clusterStatusChore != null) {
1104 this.clusterStatusChore.interrupt();
1105 }
1106 if (this.catalogJanitorChore != null) {
1107 this.catalogJanitorChore.interrupt();
1108 }
1109 if (this.clusterStatusPublisherChore != null){
1110 clusterStatusPublisherChore.interrupt();
1111 }
1112 }
1113
1114 @Override
1115 public RegionServerStartupResponse regionServerStartup(
1116 RpcController controller, RegionServerStartupRequest request) throws ServiceException {
1117
1118 try {
1119 InetAddress ia = getRemoteInetAddress(request.getPort(), request.getServerStartCode());
1120 ServerName rs = this.serverManager.regionServerStartup(ia, request.getPort(),
1121 request.getServerStartCode(), request.getServerCurrentTime());
1122
1123
1124 RegionServerStartupResponse.Builder resp = createConfigurationSubset();
1125 NameStringPair.Builder entry = NameStringPair.newBuilder()
1126 .setName(HConstants.KEY_FOR_HOSTNAME_SEEN_BY_MASTER)
1127 .setValue(rs.getHostname());
1128 resp.addMapEntries(entry.build());
1129
1130 return resp.build();
1131 } catch (IOException ioe) {
1132 throw new ServiceException(ioe);
1133 }
1134 }
1135
1136
1137
1138
1139
1140 InetAddress getRemoteInetAddress(final int port, final long serverStartCode)
1141 throws UnknownHostException {
1142
1143
1144 return HBaseServer.getRemoteIp();
1145 }
1146
1147
1148
1149
1150
1151 protected RegionServerStartupResponse.Builder createConfigurationSubset() {
1152 RegionServerStartupResponse.Builder resp = addConfig(
1153 RegionServerStartupResponse.newBuilder(), HConstants.HBASE_DIR);
1154 return addConfig(resp, "fs.default.name");
1155 }
1156
1157 private RegionServerStartupResponse.Builder addConfig(
1158 final RegionServerStartupResponse.Builder resp, final String key) {
1159 NameStringPair.Builder entry = NameStringPair.newBuilder()
1160 .setName(key)
1161 .setValue(this.conf.get(key));
1162 resp.addMapEntries(entry.build());
1163 return resp;
1164 }
1165
1166 @Override
1167 public GetLastFlushedSequenceIdResponse getLastFlushedSequenceId(RpcController controller,
1168 GetLastFlushedSequenceIdRequest request) throws ServiceException {
1169 byte[] regionName = request.getRegionName().toByteArray();
1170 long seqId = serverManager.getLastFlushedSequenceId(regionName);
1171 return ResponseConverter.buildGetLastFlushedSequenceIdResponse(seqId);
1172 }
1173
1174 @Override
1175 public RegionServerReportResponse regionServerReport(
1176 RpcController controller, RegionServerReportRequest request) throws ServiceException {
1177 try {
1178 HBaseProtos.ServerLoad sl = request.getLoad();
1179 this.serverManager.regionServerReport(ProtobufUtil.toServerName(request.getServer()), new ServerLoad(sl));
1180 if (sl != null && this.metricsMaster != null) {
1181
1182 this.metricsMaster.incrementRequests(sl.getTotalNumberOfRequests());
1183 }
1184 } catch (IOException ioe) {
1185 throw new ServiceException(ioe);
1186 }
1187
1188 return RegionServerReportResponse.newBuilder().build();
1189 }
1190
1191 @Override
1192 public ReportRSFatalErrorResponse reportRSFatalError(
1193 RpcController controller, ReportRSFatalErrorRequest request) throws ServiceException {
1194 String errorText = request.getErrorMessage();
1195 ServerName sn = ProtobufUtil.toServerName(request.getServer());
1196 String msg = "Region server " + Bytes.toString(sn.getVersionedBytes()) +
1197 " reported a fatal error:\n" + errorText;
1198 LOG.error(msg);
1199 rsFatals.add(msg);
1200
1201 return ReportRSFatalErrorResponse.newBuilder().build();
1202 }
1203
1204 public boolean isMasterRunning() {
1205 return !isStopped();
1206 }
1207
1208 public IsMasterRunningResponse isMasterRunning(RpcController c, IsMasterRunningRequest req)
1209 throws ServiceException {
1210 return IsMasterRunningResponse.newBuilder().setIsMasterRunning(isMasterRunning()).build();
1211 }
1212
1213 @Override
1214 public CatalogScanResponse runCatalogScan(RpcController c,
1215 CatalogScanRequest req) throws ServiceException {
1216 try {
1217 return ResponseConverter.buildCatalogScanResponse(catalogJanitorChore.scan());
1218 } catch (IOException ioe) {
1219 throw new ServiceException(ioe);
1220 }
1221 }
1222
1223 @Override
1224 public EnableCatalogJanitorResponse enableCatalogJanitor(RpcController c,
1225 EnableCatalogJanitorRequest req) throws ServiceException {
1226 return EnableCatalogJanitorResponse.newBuilder().
1227 setPrevValue(catalogJanitorChore.setEnabled(req.getEnable())).build();
1228 }
1229
1230 @Override
1231 public IsCatalogJanitorEnabledResponse isCatalogJanitorEnabled(RpcController c,
1232 IsCatalogJanitorEnabledRequest req) throws ServiceException {
1233 boolean isEnabled = catalogJanitorChore != null ? catalogJanitorChore.getEnabled() : false;
1234 return IsCatalogJanitorEnabledResponse.newBuilder().setValue(isEnabled).build();
1235 }
1236
1237
1238
1239
1240 private int getBalancerCutoffTime() {
1241 int balancerCutoffTime =
1242 getConfiguration().getInt("hbase.balancer.max.balancing", -1);
1243 if (balancerCutoffTime == -1) {
1244
1245 int balancerPeriod =
1246 getConfiguration().getInt("hbase.balancer.period", 300000);
1247 balancerCutoffTime = balancerPeriod / 2;
1248
1249 if (balancerCutoffTime <= 0) balancerCutoffTime = balancerPeriod;
1250 }
1251 return balancerCutoffTime;
1252 }
1253
1254 public boolean balance() throws IOException {
1255
1256 if (!this.initialized) {
1257 LOG.debug("Master has not been initialized, don't run balancer.");
1258 return false;
1259 }
1260
1261 if (!this.loadBalancerTracker.isBalancerOn()) return false;
1262
1263 int maximumBalanceTime = getBalancerCutoffTime();
1264 long cutoffTime = System.currentTimeMillis() + maximumBalanceTime;
1265 boolean balancerRan;
1266 synchronized (this.balancer) {
1267
1268 if (this.assignmentManager.getRegionStates().isRegionsInTransition()) {
1269 Map<String, RegionState> regionsInTransition =
1270 this.assignmentManager.getRegionStates().getRegionsInTransition();
1271 LOG.debug("Not running balancer because " + regionsInTransition.size() +
1272 " region(s) in transition: " + org.apache.commons.lang.StringUtils.
1273 abbreviate(regionsInTransition.toString(), 256));
1274 return false;
1275 }
1276 if (this.serverManager.areDeadServersInProgress()) {
1277 LOG.debug("Not running balancer because processing dead regionserver(s): " +
1278 this.serverManager.getDeadServers());
1279 return false;
1280 }
1281
1282 if (this.cpHost != null) {
1283 if (this.cpHost.preBalance()) {
1284 LOG.debug("Coprocessor bypassing balancer request");
1285 return false;
1286 }
1287 }
1288
1289 Map<String, Map<ServerName, List<HRegionInfo>>> assignmentsByTable =
1290 this.assignmentManager.getRegionStates().getAssignmentsByTable();
1291
1292 List<RegionPlan> plans = new ArrayList<RegionPlan>();
1293
1294 this.balancer.setClusterStatus(getClusterStatus());
1295 for (Map<ServerName, List<HRegionInfo>> assignments : assignmentsByTable.values()) {
1296 List<RegionPlan> partialPlans = this.balancer.balanceCluster(assignments);
1297 if (partialPlans != null) plans.addAll(partialPlans);
1298 }
1299 int rpCount = 0;
1300 long totalRegPlanExecTime = 0;
1301 balancerRan = plans != null;
1302 if (plans != null && !plans.isEmpty()) {
1303 for (RegionPlan plan: plans) {
1304 LOG.info("balance " + plan);
1305 long balStartTime = System.currentTimeMillis();
1306 this.assignmentManager.balance(plan);
1307 totalRegPlanExecTime += System.currentTimeMillis()-balStartTime;
1308 rpCount++;
1309 if (rpCount < plans.size() &&
1310
1311 (System.currentTimeMillis() + (totalRegPlanExecTime / rpCount)) > cutoffTime) {
1312 LOG.debug("No more balancing till next balance run; maximumBalanceTime=" +
1313 maximumBalanceTime);
1314 break;
1315 }
1316 }
1317 }
1318 if (this.cpHost != null) {
1319 this.cpHost.postBalance(rpCount < plans.size() ? plans.subList(0, rpCount) : plans);
1320 }
1321 }
1322 return balancerRan;
1323 }
1324
1325 @Override
1326 public BalanceResponse balance(RpcController c, BalanceRequest request) throws ServiceException {
1327 try {
1328 return BalanceResponse.newBuilder().setBalancerRan(balance()).build();
1329 } catch (IOException e) {
1330 throw new ServiceException(e);
1331 }
1332 }
1333
1334 enum BalanceSwitchMode {
1335 SYNC,
1336 ASYNC
1337 }
1338
1339
1340
1341
1342
1343
1344 public boolean switchBalancer(final boolean b, BalanceSwitchMode mode) throws IOException {
1345 boolean oldValue = this.loadBalancerTracker.isBalancerOn();
1346 boolean newValue = b;
1347 try {
1348 if (this.cpHost != null) {
1349 newValue = this.cpHost.preBalanceSwitch(newValue);
1350 }
1351 try {
1352 if (mode == BalanceSwitchMode.SYNC) {
1353 synchronized (this.balancer) {
1354 this.loadBalancerTracker.setBalancerOn(newValue);
1355 }
1356 } else {
1357 this.loadBalancerTracker.setBalancerOn(newValue);
1358 }
1359 } catch (KeeperException ke) {
1360 throw new IOException(ke);
1361 }
1362 LOG.info("BalanceSwitch=" + newValue);
1363 if (this.cpHost != null) {
1364 this.cpHost.postBalanceSwitch(oldValue, newValue);
1365 }
1366 } catch (IOException ioe) {
1367 LOG.warn("Error flipping balance switch", ioe);
1368 }
1369 return oldValue;
1370 }
1371
1372 public boolean synchronousBalanceSwitch(final boolean b) throws IOException {
1373 return switchBalancer(b, BalanceSwitchMode.SYNC);
1374 }
1375
1376 public boolean balanceSwitch(final boolean b) throws IOException {
1377 return switchBalancer(b, BalanceSwitchMode.ASYNC);
1378 }
1379
1380 @Override
1381 public SetBalancerRunningResponse setBalancerRunning(
1382 RpcController controller, SetBalancerRunningRequest req) throws ServiceException {
1383 try {
1384 boolean prevValue = (req.getSynchronous())?
1385 synchronousBalanceSwitch(req.getOn()):balanceSwitch(req.getOn());
1386 return SetBalancerRunningResponse.newBuilder().setPrevBalanceValue(prevValue).build();
1387 } catch (IOException ioe) {
1388 throw new ServiceException(ioe);
1389 }
1390 }
1391
1392
1393
1394
1395
1396
1397
1398 public void setCatalogJanitorEnabled(final boolean b) {
1399 this.catalogJanitorChore.setEnabled(b);
1400 }
1401
1402 @Override
1403 public DispatchMergingRegionsResponse dispatchMergingRegions(
1404 RpcController controller, DispatchMergingRegionsRequest request)
1405 throws ServiceException {
1406 final byte[] encodedNameOfRegionA = request.getRegionA().getValue()
1407 .toByteArray();
1408 final byte[] encodedNameOfRegionB = request.getRegionB().getValue()
1409 .toByteArray();
1410 final boolean forcible = request.getForcible();
1411 if (request.getRegionA().getType() != RegionSpecifierType.ENCODED_REGION_NAME
1412 || request.getRegionB().getType() != RegionSpecifierType.ENCODED_REGION_NAME) {
1413 LOG.warn("mergeRegions specifier type: expected: "
1414 + RegionSpecifierType.ENCODED_REGION_NAME + " actual: region_a="
1415 + request.getRegionA().getType() + ", region_b="
1416 + request.getRegionB().getType());
1417 }
1418 RegionState regionStateA = assignmentManager.getRegionStates()
1419 .getRegionState(Bytes.toString(encodedNameOfRegionA));
1420 RegionState regionStateB = assignmentManager.getRegionStates()
1421 .getRegionState(Bytes.toString(encodedNameOfRegionB));
1422 if (regionStateA == null || regionStateB == null) {
1423 throw new ServiceException(new UnknownRegionException(
1424 Bytes.toStringBinary(regionStateA == null ? encodedNameOfRegionA
1425 : encodedNameOfRegionB)));
1426 }
1427
1428 if (!forcible && !HRegionInfo.areAdjacent(regionStateA.getRegion(),
1429 regionStateB.getRegion())) {
1430 throw new ServiceException("Unable to merge not adjacent regions "
1431 + regionStateA.getRegion().getRegionNameAsString() + ", "
1432 + regionStateB.getRegion().getRegionNameAsString()
1433 + " where forcible = " + forcible);
1434 }
1435
1436 try {
1437 dispatchMergingRegions(regionStateA.getRegion(), regionStateB.getRegion(), forcible);
1438 } catch (IOException ioe) {
1439 throw new ServiceException(ioe);
1440 }
1441
1442 return DispatchMergingRegionsResponse.newBuilder().build();
1443 }
1444
1445 @Override
1446 public void dispatchMergingRegions(final HRegionInfo region_a,
1447 final HRegionInfo region_b, final boolean forcible) throws IOException {
1448 checkInitialized();
1449 this.executorService.submit(new DispatchMergingRegionHandler(this,
1450 this.catalogJanitorChore, region_a, region_b, forcible));
1451 }
1452
1453 @Override
1454 public MoveRegionResponse moveRegion(RpcController controller, MoveRegionRequest req)
1455 throws ServiceException {
1456 final byte [] encodedRegionName = req.getRegion().getValue().toByteArray();
1457 RegionSpecifierType type = req.getRegion().getType();
1458 final byte [] destServerName = (req.hasDestServerName())?
1459 Bytes.toBytes(ProtobufUtil.toServerName(req.getDestServerName()).getServerName()):null;
1460 MoveRegionResponse mrr = MoveRegionResponse.newBuilder().build();
1461
1462 if (type != RegionSpecifierType.ENCODED_REGION_NAME) {
1463 LOG.warn("moveRegion specifier type: expected: " + RegionSpecifierType.ENCODED_REGION_NAME
1464 + " actual: " + type);
1465 }
1466
1467 try {
1468 move(encodedRegionName, destServerName);
1469 } catch (HBaseIOException ioe) {
1470 throw new ServiceException(ioe);
1471 }
1472 return mrr;
1473 }
1474
1475 void move(final byte[] encodedRegionName,
1476 final byte[] destServerName) throws HBaseIOException {
1477 RegionState regionState = assignmentManager.getRegionStates().
1478 getRegionState(Bytes.toString(encodedRegionName));
1479 if (regionState == null) {
1480 throw new UnknownRegionException(Bytes.toStringBinary(encodedRegionName));
1481 }
1482
1483 HRegionInfo hri = regionState.getRegion();
1484 ServerName dest;
1485 if (destServerName == null || destServerName.length == 0) {
1486 LOG.info("Passed destination servername is null/empty so " +
1487 "choosing a server at random");
1488 final List<ServerName> destServers = this.serverManager.createDestinationServersList(
1489 regionState.getServerName());
1490 dest = balancer.randomAssignment(hri, destServers);
1491 } else {
1492 dest = new ServerName(Bytes.toString(destServerName));
1493 if (dest.equals(regionState.getServerName())) {
1494 LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
1495 + " because region already assigned to the same server " + dest + ".");
1496 return;
1497 }
1498 }
1499
1500
1501 RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), dest);
1502
1503 try {
1504 checkInitialized();
1505 if (this.cpHost != null) {
1506 if (this.cpHost.preMove(hri, rp.getSource(), rp.getDestination())) {
1507 return;
1508 }
1509 }
1510 LOG.info("Added move plan " + rp + ", running balancer");
1511 this.assignmentManager.balance(rp);
1512 if (this.cpHost != null) {
1513 this.cpHost.postMove(hri, rp.getSource(), rp.getDestination());
1514 }
1515 } catch (IOException ioe) {
1516 if (ioe instanceof HBaseIOException) {
1517 throw (HBaseIOException)ioe;
1518 }
1519 throw new HBaseIOException(ioe);
1520 }
1521 }
1522
1523 @Override
1524 public void createTable(HTableDescriptor hTableDescriptor,
1525 byte [][] splitKeys)
1526 throws IOException {
1527 if (!isMasterRunning()) {
1528 throw new MasterNotRunningException();
1529 }
1530
1531 HRegionInfo [] newRegions = getHRegionInfos(hTableDescriptor, splitKeys);
1532 checkInitialized();
1533 checkCompression(hTableDescriptor);
1534 if (cpHost != null) {
1535 cpHost.preCreateTable(hTableDescriptor, newRegions);
1536 }
1537
1538 this.executorService.submit(new CreateTableHandler(this,
1539 this.fileSystemManager, hTableDescriptor, conf,
1540 newRegions, this).prepare());
1541 if (cpHost != null) {
1542 cpHost.postCreateTable(hTableDescriptor, newRegions);
1543 }
1544
1545 }
1546
1547 private void checkCompression(final HTableDescriptor htd)
1548 throws IOException {
1549 if (!this.masterCheckCompression) return;
1550 for (HColumnDescriptor hcd : htd.getColumnFamilies()) {
1551 checkCompression(hcd);
1552 }
1553 }
1554
1555 private void checkCompression(final HColumnDescriptor hcd)
1556 throws IOException {
1557 if (!this.masterCheckCompression) return;
1558 CompressionTest.testCompression(hcd.getCompression());
1559 CompressionTest.testCompression(hcd.getCompactionCompression());
1560 }
1561
1562 @Override
1563 public CreateTableResponse createTable(RpcController controller, CreateTableRequest req)
1564 throws ServiceException {
1565 HTableDescriptor hTableDescriptor = HTableDescriptor.convert(req.getTableSchema());
1566 byte [][] splitKeys = ProtobufUtil.getSplitKeysArray(req);
1567 try {
1568 createTable(hTableDescriptor,splitKeys);
1569 } catch (IOException ioe) {
1570 throw new ServiceException(ioe);
1571 }
1572 return CreateTableResponse.newBuilder().build();
1573 }
1574
1575 private HRegionInfo[] getHRegionInfos(HTableDescriptor hTableDescriptor,
1576 byte[][] splitKeys) {
1577 HRegionInfo[] hRegionInfos = null;
1578 if (splitKeys == null || splitKeys.length == 0) {
1579 hRegionInfos = new HRegionInfo[]{
1580 new HRegionInfo(hTableDescriptor.getName(), null, null)};
1581 } else {
1582 int numRegions = splitKeys.length + 1;
1583 hRegionInfos = new HRegionInfo[numRegions];
1584 byte[] startKey = null;
1585 byte[] endKey = null;
1586 for (int i = 0; i < numRegions; i++) {
1587 endKey = (i == splitKeys.length) ? null : splitKeys[i];
1588 hRegionInfos[i] =
1589 new HRegionInfo(hTableDescriptor.getName(), startKey, endKey);
1590 startKey = endKey;
1591 }
1592 }
1593 return hRegionInfos;
1594 }
1595
1596 private static boolean isCatalogTable(final byte [] tableName) {
1597 return Bytes.equals(tableName, HConstants.META_TABLE_NAME);
1598 }
1599
1600 @Override
1601 public void deleteTable(final byte[] tableName) throws IOException {
1602 checkInitialized();
1603 if (cpHost != null) {
1604 cpHost.preDeleteTable(tableName);
1605 }
1606 this.executorService.submit(new DeleteTableHandler(tableName, this, this).prepare());
1607 if (cpHost != null) {
1608 cpHost.postDeleteTable(tableName);
1609 }
1610 }
1611
1612 @Override
1613 public DeleteTableResponse deleteTable(RpcController controller, DeleteTableRequest request)
1614 throws ServiceException {
1615 try {
1616 deleteTable(request.getTableName().toByteArray());
1617 } catch (IOException ioe) {
1618 throw new ServiceException(ioe);
1619 }
1620 return DeleteTableResponse.newBuilder().build();
1621 }
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631 @Override
1632 public GetSchemaAlterStatusResponse getSchemaAlterStatus(
1633 RpcController controller, GetSchemaAlterStatusRequest req) throws ServiceException {
1634
1635
1636
1637
1638 byte [] tableName = req.getTableName().toByteArray();
1639
1640 try {
1641 Pair<Integer,Integer> pair = this.assignmentManager.getReopenStatus(tableName);
1642 GetSchemaAlterStatusResponse.Builder ret = GetSchemaAlterStatusResponse.newBuilder();
1643 ret.setYetToUpdateRegions(pair.getFirst());
1644 ret.setTotalRegions(pair.getSecond());
1645 return ret.build();
1646 } catch (IOException ioe) {
1647 throw new ServiceException(ioe);
1648 }
1649 }
1650
1651 @Override
1652 public void addColumn(final byte[] tableName, final HColumnDescriptor column)
1653 throws IOException {
1654 checkInitialized();
1655 if (cpHost != null) {
1656 if (cpHost.preAddColumn(tableName, column)) {
1657 return;
1658 }
1659 }
1660
1661 new TableAddFamilyHandler(tableName, column, this, this)
1662 .prepare().process();
1663 if (cpHost != null) {
1664 cpHost.postAddColumn(tableName, column);
1665 }
1666 }
1667
1668 @Override
1669 public AddColumnResponse addColumn(RpcController controller, AddColumnRequest req)
1670 throws ServiceException {
1671 try {
1672 addColumn(req.getTableName().toByteArray(),
1673 HColumnDescriptor.convert(req.getColumnFamilies()));
1674 } catch (IOException ioe) {
1675 throw new ServiceException(ioe);
1676 }
1677 return AddColumnResponse.newBuilder().build();
1678 }
1679
1680 @Override
1681 public void modifyColumn(byte[] tableName, HColumnDescriptor descriptor)
1682 throws IOException {
1683 checkInitialized();
1684 checkCompression(descriptor);
1685 if (cpHost != null) {
1686 if (cpHost.preModifyColumn(tableName, descriptor)) {
1687 return;
1688 }
1689 }
1690 new TableModifyFamilyHandler(tableName, descriptor, this, this)
1691 .prepare().process();
1692 if (cpHost != null) {
1693 cpHost.postModifyColumn(tableName, descriptor);
1694 }
1695 }
1696
1697 @Override
1698 public ModifyColumnResponse modifyColumn(RpcController controller, ModifyColumnRequest req)
1699 throws ServiceException {
1700 try {
1701 modifyColumn(req.getTableName().toByteArray(),
1702 HColumnDescriptor.convert(req.getColumnFamilies()));
1703 } catch (IOException ioe) {
1704 throw new ServiceException(ioe);
1705 }
1706 return ModifyColumnResponse.newBuilder().build();
1707 }
1708
1709 @Override
1710 public void deleteColumn(final byte[] tableName, final byte[] columnName)
1711 throws IOException {
1712 checkInitialized();
1713 if (cpHost != null) {
1714 if (cpHost.preDeleteColumn(tableName, columnName)) {
1715 return;
1716 }
1717 }
1718 new TableDeleteFamilyHandler(tableName, columnName, this, this).prepare().process();
1719 if (cpHost != null) {
1720 cpHost.postDeleteColumn(tableName, columnName);
1721 }
1722 }
1723
1724 @Override
1725 public DeleteColumnResponse deleteColumn(RpcController controller, DeleteColumnRequest req)
1726 throws ServiceException {
1727 try {
1728 deleteColumn(req.getTableName().toByteArray(), req.getColumnName().toByteArray());
1729 } catch (IOException ioe) {
1730 throw new ServiceException(ioe);
1731 }
1732 return DeleteColumnResponse.newBuilder().build();
1733 }
1734
1735 @Override
1736 public void enableTable(final byte[] tableName) throws IOException {
1737 checkInitialized();
1738 if (cpHost != null) {
1739 cpHost.preEnableTable(tableName);
1740 }
1741 this.executorService.submit(new EnableTableHandler(this, tableName,
1742 catalogTracker, assignmentManager, tableLockManager, false).prepare());
1743 if (cpHost != null) {
1744 cpHost.postEnableTable(tableName);
1745 }
1746 }
1747
1748 @Override
1749 public EnableTableResponse enableTable(RpcController controller, EnableTableRequest request)
1750 throws ServiceException {
1751 try {
1752 enableTable(request.getTableName().toByteArray());
1753 } catch (IOException ioe) {
1754 throw new ServiceException(ioe);
1755 }
1756 return EnableTableResponse.newBuilder().build();
1757 }
1758
1759 @Override
1760 public void disableTable(final byte[] tableName) throws IOException {
1761 checkInitialized();
1762 if (cpHost != null) {
1763 cpHost.preDisableTable(tableName);
1764 }
1765 this.executorService.submit(new DisableTableHandler(this, tableName,
1766 catalogTracker, assignmentManager, tableLockManager, false).prepare());
1767 if (cpHost != null) {
1768 cpHost.postDisableTable(tableName);
1769 }
1770 }
1771
1772 @Override
1773 public DisableTableResponse disableTable(RpcController controller, DisableTableRequest request)
1774 throws ServiceException {
1775 try {
1776 disableTable(request.getTableName().toByteArray());
1777 } catch (IOException ioe) {
1778 throw new ServiceException(ioe);
1779 }
1780 return DisableTableResponse.newBuilder().build();
1781 }
1782
1783
1784
1785
1786
1787
1788
1789 Pair<HRegionInfo, ServerName> getTableRegionForRow(
1790 final byte [] tableName, final byte [] rowKey)
1791 throws IOException {
1792 final AtomicReference<Pair<HRegionInfo, ServerName>> result =
1793 new AtomicReference<Pair<HRegionInfo, ServerName>>(null);
1794
1795 MetaScannerVisitor visitor =
1796 new MetaScannerVisitorBase() {
1797 @Override
1798 public boolean processRow(Result data) throws IOException {
1799 if (data == null || data.size() <= 0) {
1800 return true;
1801 }
1802 Pair<HRegionInfo, ServerName> pair = HRegionInfo.getHRegionInfoAndServerName(data);
1803 if (pair == null) {
1804 return false;
1805 }
1806 if (!Bytes.equals(pair.getFirst().getTableName(), tableName)) {
1807 return false;
1808 }
1809 result.set(pair);
1810 return true;
1811 }
1812 };
1813
1814 MetaScanner.metaScan(conf, visitor, tableName, rowKey, 1);
1815 return result.get();
1816 }
1817
1818 @Override
1819 public void modifyTable(final byte[] tableName, final HTableDescriptor descriptor)
1820 throws IOException {
1821 checkInitialized();
1822 checkCompression(descriptor);
1823 if (cpHost != null) {
1824 cpHost.preModifyTable(tableName, descriptor);
1825 }
1826 new ModifyTableHandler(tableName, descriptor, this, this).prepare().process();
1827 if (cpHost != null) {
1828 cpHost.postModifyTable(tableName, descriptor);
1829 }
1830 }
1831
1832 @Override
1833 public ModifyTableResponse modifyTable(RpcController controller, ModifyTableRequest req)
1834 throws ServiceException {
1835 try {
1836 modifyTable(req.getTableName().toByteArray(),
1837 HTableDescriptor.convert(req.getTableSchema()));
1838 } catch (IOException ioe) {
1839 throw new ServiceException(ioe);
1840 }
1841 return ModifyTableResponse.newBuilder().build();
1842 }
1843
1844 @Override
1845 public void checkTableModifiable(final byte [] tableName)
1846 throws IOException, TableNotFoundException, TableNotDisabledException {
1847 String tableNameStr = Bytes.toString(tableName);
1848 if (isCatalogTable(tableName)) {
1849 throw new IOException("Can't modify catalog tables");
1850 }
1851 if (!MetaReader.tableExists(getCatalogTracker(), tableNameStr)) {
1852 throw new TableNotFoundException(tableNameStr);
1853 }
1854 if (!getAssignmentManager().getZKTable().
1855 isDisabledTable(Bytes.toString(tableName))) {
1856 throw new TableNotDisabledException(tableName);
1857 }
1858 }
1859
1860 @Override
1861 public GetClusterStatusResponse getClusterStatus(RpcController controller,
1862 GetClusterStatusRequest req)
1863 throws ServiceException {
1864 GetClusterStatusResponse.Builder response = GetClusterStatusResponse.newBuilder();
1865 response.setClusterStatus(getClusterStatus().convert());
1866 return response.build();
1867 }
1868
1869
1870
1871
1872 public ClusterStatus getClusterStatus() {
1873
1874 List<String> backupMasterStrings;
1875 try {
1876 backupMasterStrings = ZKUtil.listChildrenNoWatch(this.zooKeeper,
1877 this.zooKeeper.backupMasterAddressesZNode);
1878 } catch (KeeperException e) {
1879 LOG.warn(this.zooKeeper.prefix("Unable to list backup servers"), e);
1880 backupMasterStrings = new ArrayList<String>(0);
1881 }
1882 List<ServerName> backupMasters = new ArrayList<ServerName>(
1883 backupMasterStrings.size());
1884 for (String s: backupMasterStrings) {
1885 try {
1886 byte [] bytes =
1887 ZKUtil.getData(this.zooKeeper, ZKUtil.joinZNode(
1888 this.zooKeeper.backupMasterAddressesZNode, s));
1889 if (bytes != null) {
1890 ServerName sn;
1891 try {
1892 sn = ServerName.parseFrom(bytes);
1893 } catch (DeserializationException e) {
1894 LOG.warn("Failed parse, skipping registering backup server", e);
1895 continue;
1896 }
1897 backupMasters.add(sn);
1898 }
1899 } catch (KeeperException e) {
1900 LOG.warn(this.zooKeeper.prefix("Unable to get information about " +
1901 "backup servers"), e);
1902 }
1903 }
1904 Collections.sort(backupMasters, new Comparator<ServerName>() {
1905 public int compare(ServerName s1, ServerName s2) {
1906 return s1.getServerName().compareTo(s2.getServerName());
1907 }});
1908
1909 return new ClusterStatus(VersionInfo.getVersion(),
1910 this.fileSystemManager.getClusterId().toString(),
1911 this.serverManager.getOnlineServers(),
1912 this.serverManager.getDeadServers().copyServerNames(),
1913 this.serverName,
1914 backupMasters,
1915 this.assignmentManager.getRegionStates().getRegionsInTransition(),
1916 this.getCoprocessors(), this.loadBalancerTracker.isBalancerOn());
1917 }
1918
1919 public String getClusterId() {
1920 if (fileSystemManager == null) {
1921 return "";
1922 }
1923 ClusterId id = fileSystemManager.getClusterId();
1924 if (id == null) {
1925 return "";
1926 }
1927 return id.toString();
1928 }
1929
1930
1931
1932
1933
1934
1935
1936
1937 public static String getLoadedCoprocessors() {
1938 return CoprocessorHost.getLoadedCoprocessors().toString();
1939 }
1940
1941
1942
1943
1944 public long getMasterStartTime() {
1945 return masterStartTime;
1946 }
1947
1948
1949
1950
1951 public long getMasterActiveTime() {
1952 return masterActiveTime;
1953 }
1954
1955
1956
1957
1958 public String[] getCoprocessors() {
1959 Set<String> masterCoprocessors =
1960 getCoprocessorHost().getCoprocessors();
1961 return masterCoprocessors.toArray(new String[masterCoprocessors.size()]);
1962 }
1963
1964 @Override
1965 public void abort(final String msg, final Throwable t) {
1966 if (cpHost != null) {
1967
1968 LOG.fatal("Master server abort: loaded coprocessors are: " +
1969 getLoadedCoprocessors());
1970 }
1971
1972 if (abortNow(msg, t)) {
1973 if (t != null) LOG.fatal(msg, t);
1974 else LOG.fatal(msg);
1975 this.abort = true;
1976 stop("Aborting");
1977 }
1978 }
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997 private boolean tryRecoveringExpiredZKSession() throws InterruptedException,
1998 IOException, KeeperException, ExecutionException {
1999
2000 this.zooKeeper.reconnectAfterExpiration();
2001
2002 Callable<Boolean> callable = new Callable<Boolean> () {
2003 public Boolean call() throws InterruptedException,
2004 IOException, KeeperException {
2005 MonitoredTask status =
2006 TaskMonitor.get().createStatus("Recovering expired ZK session");
2007 try {
2008 if (!becomeActiveMaster(status)) {
2009 return Boolean.FALSE;
2010 }
2011 serverShutdownHandlerEnabled = false;
2012 initialized = false;
2013 finishInitialization(status, true);
2014 return Boolean.TRUE;
2015 } finally {
2016 status.cleanup();
2017 }
2018 }
2019 };
2020
2021 long timeout =
2022 conf.getLong("hbase.master.zksession.recover.timeout", 300000);
2023 java.util.concurrent.ExecutorService executor =
2024 Executors.newSingleThreadExecutor();
2025 Future<Boolean> result = executor.submit(callable);
2026 executor.shutdown();
2027 if (executor.awaitTermination(timeout, TimeUnit.MILLISECONDS)
2028 && result.isDone()) {
2029 Boolean recovered = result.get();
2030 if (recovered != null) {
2031 return recovered.booleanValue();
2032 }
2033 }
2034 executor.shutdownNow();
2035 return false;
2036 }
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046 private boolean abortNow(final String msg, final Throwable t) {
2047 if (!this.isActiveMaster) {
2048 return true;
2049 }
2050 if (t != null && t instanceof KeeperException.SessionExpiredException) {
2051 try {
2052 LOG.info("Primary Master trying to recover from ZooKeeper session " +
2053 "expiry.");
2054 return !tryRecoveringExpiredZKSession();
2055 } catch (Throwable newT) {
2056 LOG.error("Primary master encountered unexpected exception while " +
2057 "trying to recover from ZooKeeper session" +
2058 " expiry. Proceeding with server abort.", newT);
2059 }
2060 }
2061 return true;
2062 }
2063
2064 @Override
2065 public ZooKeeperWatcher getZooKeeper() {
2066 return zooKeeper;
2067 }
2068
2069 @Override
2070 public MasterCoprocessorHost getCoprocessorHost() {
2071 return cpHost;
2072 }
2073
2074 @Override
2075 public ServerName getServerName() {
2076 return this.serverName;
2077 }
2078
2079 @Override
2080 public CatalogTracker getCatalogTracker() {
2081 return catalogTracker;
2082 }
2083
2084 @Override
2085 public AssignmentManager getAssignmentManager() {
2086 return this.assignmentManager;
2087 }
2088
2089 @Override
2090 public TableLockManager getTableLockManager() {
2091 return this.tableLockManager;
2092 }
2093
2094 public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() {
2095 return rsFatals;
2096 }
2097
2098 public void shutdown() throws IOException {
2099 if (spanReceiverHost != null) {
2100 spanReceiverHost.closeReceivers();
2101 }
2102 if (cpHost != null) {
2103 cpHost.preShutdown();
2104 }
2105 if (mxBean != null) {
2106 MBeanUtil.unregisterMBean(mxBean);
2107 mxBean = null;
2108 }
2109 if (this.assignmentManager != null) this.assignmentManager.shutdown();
2110 if (this.serverManager != null) this.serverManager.shutdownCluster();
2111 try {
2112 if (this.clusterStatusTracker != null){
2113 this.clusterStatusTracker.setClusterDown();
2114 }
2115 } catch (KeeperException e) {
2116 LOG.error("ZooKeeper exception trying to set cluster as down in ZK", e);
2117 }
2118 }
2119
2120 @Override
2121 public ShutdownResponse shutdown(RpcController controller, ShutdownRequest request)
2122 throws ServiceException {
2123 try {
2124 shutdown();
2125 } catch (IOException e) {
2126 throw new ServiceException(e);
2127 }
2128 return ShutdownResponse.newBuilder().build();
2129 }
2130
2131 public void stopMaster() throws IOException {
2132 if (cpHost != null) {
2133 cpHost.preStopMaster();
2134 }
2135 stop("Stopped by " + Thread.currentThread().getName());
2136 }
2137
2138 @Override
2139 public StopMasterResponse stopMaster(RpcController controller, StopMasterRequest request)
2140 throws ServiceException {
2141 try {
2142 stopMaster();
2143 } catch (IOException e) {
2144 throw new ServiceException(e);
2145 }
2146 return StopMasterResponse.newBuilder().build();
2147 }
2148
2149 @Override
2150 public void stop(final String why) {
2151 LOG.info(why);
2152 this.stopped = true;
2153
2154 stopSleeper.skipSleepCycle();
2155
2156 if (this.activeMasterManager != null) {
2157 synchronized (this.activeMasterManager.clusterHasActiveMaster) {
2158 this.activeMasterManager.clusterHasActiveMaster.notifyAll();
2159 }
2160 }
2161 }
2162
2163 @Override
2164 public boolean isStopped() {
2165 return this.stopped;
2166 }
2167
2168 public boolean isAborted() {
2169 return this.abort;
2170 }
2171
2172 void checkInitialized() throws PleaseHoldException {
2173 if (!this.initialized) {
2174 throw new PleaseHoldException("Master is initializing");
2175 }
2176 }
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186 public boolean isActiveMaster() {
2187 return isActiveMaster;
2188 }
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199 public boolean isInitialized() {
2200 return initialized;
2201 }
2202
2203
2204
2205
2206
2207
2208 public boolean isServerShutdownHandlerEnabled() {
2209 return this.serverShutdownHandlerEnabled;
2210 }
2211
2212 @Override
2213 public AssignRegionResponse assignRegion(RpcController controller, AssignRegionRequest req)
2214 throws ServiceException {
2215 try {
2216 final byte [] regionName = req.getRegion().getValue().toByteArray();
2217 RegionSpecifierType type = req.getRegion().getType();
2218 AssignRegionResponse arr = AssignRegionResponse.newBuilder().build();
2219
2220 checkInitialized();
2221 if (type != RegionSpecifierType.REGION_NAME) {
2222 LOG.warn("assignRegion specifier type: expected: " + RegionSpecifierType.REGION_NAME
2223 + " actual: " + type);
2224 }
2225 HRegionInfo regionInfo = assignmentManager.getRegionStates().getRegionInfo(regionName);
2226 if (regionInfo == null) throw new UnknownRegionException(Bytes.toString(regionName));
2227 if (cpHost != null) {
2228 if (cpHost.preAssign(regionInfo)) {
2229 return arr;
2230 }
2231 }
2232 assignmentManager.assign(regionInfo, true, true);
2233 if (cpHost != null) {
2234 cpHost.postAssign(regionInfo);
2235 }
2236
2237 return arr;
2238 } catch (IOException ioe) {
2239 throw new ServiceException(ioe);
2240 }
2241 }
2242
2243 public void assignRegion(HRegionInfo hri) {
2244 assignmentManager.assign(hri, true);
2245 }
2246
2247 @Override
2248 public UnassignRegionResponse unassignRegion(RpcController controller, UnassignRegionRequest req)
2249 throws ServiceException {
2250 try {
2251 final byte [] regionName = req.getRegion().getValue().toByteArray();
2252 RegionSpecifierType type = req.getRegion().getType();
2253 final boolean force = req.getForce();
2254 UnassignRegionResponse urr = UnassignRegionResponse.newBuilder().build();
2255
2256 checkInitialized();
2257 if (type != RegionSpecifierType.REGION_NAME) {
2258 LOG.warn("unassignRegion specifier type: expected: " + RegionSpecifierType.REGION_NAME
2259 + " actual: " + type);
2260 }
2261 Pair<HRegionInfo, ServerName> pair =
2262 MetaReader.getRegion(this.catalogTracker, regionName);
2263 if (pair == null) throw new UnknownRegionException(Bytes.toString(regionName));
2264 HRegionInfo hri = pair.getFirst();
2265 if (cpHost != null) {
2266 if (cpHost.preUnassign(hri, force)) {
2267 return urr;
2268 }
2269 }
2270 if (force) {
2271 this.assignmentManager.regionOffline(hri);
2272 assignRegion(hri);
2273 } else {
2274 this.assignmentManager.unassign(hri, force);
2275 }
2276 if (cpHost != null) {
2277 cpHost.postUnassign(hri, force);
2278 }
2279
2280 return urr;
2281 } catch (IOException ioe) {
2282 throw new ServiceException(ioe);
2283 }
2284 }
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294 public GetTableDescriptorsResponse getTableDescriptors(
2295 RpcController controller, GetTableDescriptorsRequest req) throws ServiceException {
2296 GetTableDescriptorsResponse.Builder builder = GetTableDescriptorsResponse.newBuilder();
2297 if (req.getTableNamesCount() == 0) {
2298
2299 Map<String, HTableDescriptor> descriptors = null;
2300 try {
2301 descriptors = this.tableDescriptors.getAll();
2302 } catch (IOException e) {
2303 LOG.warn("Failed getting all descriptors", e);
2304 }
2305 if (descriptors != null) {
2306 for (HTableDescriptor htd : descriptors.values()) {
2307 builder.addTableSchema(htd.convert());
2308 }
2309 }
2310 }
2311 else {
2312 for (String s: req.getTableNamesList()) {
2313 HTableDescriptor htd = null;
2314 try {
2315 htd = this.tableDescriptors.get(s);
2316 } catch (IOException e) {
2317 LOG.warn("Failed getting descriptor for " + s, e);
2318 }
2319 if (htd == null) continue;
2320 builder.addTableSchema(htd.convert());
2321 }
2322 }
2323 return builder.build();
2324 }
2325
2326
2327
2328
2329
2330
2331
2332 public double getAverageLoad() {
2333 if (this.assignmentManager == null) {
2334 return 0;
2335 }
2336
2337 RegionStates regionStates = this.assignmentManager.getRegionStates();
2338 if (regionStates == null) {
2339 return 0;
2340 }
2341 return regionStates.getAverageLoad();
2342 }
2343
2344
2345
2346
2347
2348
2349
2350
2351 @Override
2352 public OfflineRegionResponse offlineRegion(RpcController controller, OfflineRegionRequest request)
2353 throws ServiceException {
2354 final byte [] regionName = request.getRegion().getValue().toByteArray();
2355 RegionSpecifierType type = request.getRegion().getType();
2356 if (type != RegionSpecifierType.REGION_NAME) {
2357 LOG.warn("moveRegion specifier type: expected: " + RegionSpecifierType.REGION_NAME
2358 + " actual: " + type);
2359 }
2360
2361 try {
2362 Pair<HRegionInfo, ServerName> pair =
2363 MetaReader.getRegion(this.catalogTracker, regionName);
2364 if (pair == null) throw new UnknownRegionException(Bytes.toStringBinary(regionName));
2365 HRegionInfo hri = pair.getFirst();
2366 if (cpHost != null) {
2367 cpHost.preRegionOffline(hri);
2368 }
2369 this.assignmentManager.regionOffline(hri);
2370 if (cpHost != null) {
2371 cpHost.postRegionOffline(hri);
2372 }
2373 } catch (IOException ioe) {
2374 throw new ServiceException(ioe);
2375 }
2376 return OfflineRegionResponse.newBuilder().build();
2377 }
2378
2379 @Override
2380 public boolean registerService(Service instance) {
2381
2382
2383
2384 Descriptors.ServiceDescriptor serviceDesc = instance.getDescriptorForType();
2385 if (coprocessorServiceHandlers.containsKey(serviceDesc.getFullName())) {
2386 LOG.error("Coprocessor service "+serviceDesc.getFullName()+
2387 " already registered, rejecting request from "+instance
2388 );
2389 return false;
2390 }
2391
2392 coprocessorServiceHandlers.put(serviceDesc.getFullName(), instance);
2393 if (LOG.isDebugEnabled()) {
2394 LOG.debug("Registered master coprocessor service: service="+serviceDesc.getFullName());
2395 }
2396 return true;
2397 }
2398
2399 @Override
2400 public ClientProtos.CoprocessorServiceResponse execMasterService(final RpcController controller,
2401 final ClientProtos.CoprocessorServiceRequest request) throws ServiceException {
2402 try {
2403 ServerRpcController execController = new ServerRpcController();
2404
2405 ClientProtos.CoprocessorServiceCall call = request.getCall();
2406 String serviceName = call.getServiceName();
2407 String methodName = call.getMethodName();
2408 if (!coprocessorServiceHandlers.containsKey(serviceName)) {
2409 throw new UnknownProtocolException(null,
2410 "No registered master coprocessor service found for name "+serviceName);
2411 }
2412
2413 Service service = coprocessorServiceHandlers.get(serviceName);
2414 Descriptors.ServiceDescriptor serviceDesc = service.getDescriptorForType();
2415 Descriptors.MethodDescriptor methodDesc = serviceDesc.findMethodByName(methodName);
2416 if (methodDesc == null) {
2417 throw new UnknownProtocolException(service.getClass(),
2418 "Unknown method "+methodName+" called on master service "+serviceName);
2419 }
2420
2421
2422 Message execRequest = service.getRequestPrototype(methodDesc).newBuilderForType()
2423 .mergeFrom(call.getRequest()).build();
2424 final Message.Builder responseBuilder =
2425 service.getResponsePrototype(methodDesc).newBuilderForType();
2426 service.callMethod(methodDesc, execController, execRequest, new RpcCallback<Message>() {
2427 @Override
2428 public void run(Message message) {
2429 if (message != null) {
2430 responseBuilder.mergeFrom(message);
2431 }
2432 }
2433 });
2434 Message execResult = responseBuilder.build();
2435
2436 if (execController.getFailedOn() != null) {
2437 throw execController.getFailedOn();
2438 }
2439 ClientProtos.CoprocessorServiceResponse.Builder builder =
2440 ClientProtos.CoprocessorServiceResponse.newBuilder();
2441 builder.setRegion(RequestConverter.buildRegionSpecifier(
2442 RegionSpecifierType.REGION_NAME, HConstants.EMPTY_BYTE_ARRAY));
2443 builder.setValue(
2444 builder.getValueBuilder().setName(execResult.getClass().getName())
2445 .setValue(execResult.toByteString()));
2446 return builder.build();
2447 } catch (IOException ie) {
2448 throw new ServiceException(ie);
2449 }
2450 }
2451
2452
2453
2454
2455
2456
2457
2458 public static HMaster constructMaster(Class<? extends HMaster> masterClass,
2459 final Configuration conf) {
2460 try {
2461 Constructor<? extends HMaster> c =
2462 masterClass.getConstructor(Configuration.class);
2463 return c.newInstance(conf);
2464 } catch (InvocationTargetException ite) {
2465 Throwable target = ite.getTargetException() != null?
2466 ite.getTargetException(): ite;
2467 if (target.getCause() != null) target = target.getCause();
2468 throw new RuntimeException("Failed construction of Master: " +
2469 masterClass.toString(), target);
2470 } catch (Exception e) {
2471 throw new RuntimeException("Failed construction of Master: " +
2472 masterClass.toString() + ((e.getCause() != null)?
2473 e.getCause().getMessage(): ""), e);
2474 }
2475 }
2476
2477
2478
2479
2480 public static void main(String [] args) {
2481 VersionInfo.logVersion();
2482 new HMasterCommandLine(HMaster.class).doMain(args);
2483 }
2484
2485 public HFileCleaner getHFileCleaner() {
2486 return this.hfileCleaner;
2487 }
2488
2489
2490
2491
2492
2493 public SnapshotManager getSnapshotManagerForTesting() {
2494 return this.snapshotManager;
2495 }
2496
2497
2498
2499
2500
2501 @Override
2502 public TakeSnapshotResponse snapshot(RpcController controller, TakeSnapshotRequest request)
2503 throws ServiceException {
2504 try {
2505 this.snapshotManager.checkSnapshotSupport();
2506 } catch (UnsupportedOperationException e) {
2507 throw new ServiceException(e);
2508 }
2509
2510 LOG.debug("Submitting snapshot request for:" +
2511 ClientSnapshotDescriptionUtils.toString(request.getSnapshot()));
2512
2513 SnapshotDescription snapshot = SnapshotDescriptionUtils.validate(request.getSnapshot(),
2514 this.conf);
2515 try {
2516 snapshotManager.takeSnapshot(snapshot);
2517 } catch (IOException e) {
2518 throw new ServiceException(e);
2519 }
2520
2521
2522 long waitTime = SnapshotDescriptionUtils.getMaxMasterTimeout(conf, snapshot.getType(),
2523 SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME);
2524 return TakeSnapshotResponse.newBuilder().setExpectedTimeout(waitTime).build();
2525 }
2526
2527
2528
2529
2530 @Override
2531 public ListSnapshotResponse getCompletedSnapshots(RpcController controller,
2532 ListSnapshotRequest request) throws ServiceException {
2533 try {
2534 ListSnapshotResponse.Builder builder = ListSnapshotResponse.newBuilder();
2535 List<SnapshotDescription> snapshots = snapshotManager.getCompletedSnapshots();
2536
2537
2538 for (SnapshotDescription snapshot : snapshots) {
2539 builder.addSnapshots(snapshot);
2540 }
2541 return builder.build();
2542 } catch (IOException e) {
2543 throw new ServiceException(e);
2544 }
2545 }
2546
2547
2548
2549
2550
2551
2552
2553
2554 @Override
2555 public DeleteSnapshotResponse deleteSnapshot(RpcController controller,
2556 DeleteSnapshotRequest request) throws ServiceException {
2557 try {
2558 this.snapshotManager.checkSnapshotSupport();
2559 } catch (UnsupportedOperationException e) {
2560 throw new ServiceException(e);
2561 }
2562
2563 try {
2564 snapshotManager.deleteSnapshot(request.getSnapshot());
2565 return DeleteSnapshotResponse.newBuilder().build();
2566 } catch (IOException e) {
2567 throw new ServiceException(e);
2568 }
2569 }
2570
2571
2572
2573
2574
2575
2576
2577
2578 @Override
2579 public IsSnapshotDoneResponse isSnapshotDone(RpcController controller,
2580 IsSnapshotDoneRequest request) throws ServiceException {
2581 LOG.debug("Checking to see if snapshot from request:" +
2582 ClientSnapshotDescriptionUtils.toString(request.getSnapshot()) + " is done");
2583 try {
2584 IsSnapshotDoneResponse.Builder builder = IsSnapshotDoneResponse.newBuilder();
2585 boolean done = snapshotManager.isSnapshotDone(request.getSnapshot());
2586 builder.setDone(done);
2587 return builder.build();
2588 } catch (IOException e) {
2589 throw new ServiceException(e);
2590 }
2591 }
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606 @Override
2607 public RestoreSnapshotResponse restoreSnapshot(RpcController controller,
2608 RestoreSnapshotRequest request) throws ServiceException {
2609 try {
2610 this.snapshotManager.checkSnapshotSupport();
2611 } catch (UnsupportedOperationException e) {
2612 throw new ServiceException(e);
2613 }
2614
2615 try {
2616 SnapshotDescription reqSnapshot = request.getSnapshot();
2617 snapshotManager.restoreSnapshot(reqSnapshot);
2618 return RestoreSnapshotResponse.newBuilder().build();
2619 } catch (IOException e) {
2620 throw new ServiceException(e);
2621 }
2622 }
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634 @Override
2635 public IsRestoreSnapshotDoneResponse isRestoreSnapshotDone(RpcController controller,
2636 IsRestoreSnapshotDoneRequest request) throws ServiceException {
2637 try {
2638 SnapshotDescription snapshot = request.getSnapshot();
2639 IsRestoreSnapshotDoneResponse.Builder builder = IsRestoreSnapshotDoneResponse.newBuilder();
2640 boolean isRestoring = snapshotManager.isRestoringTable(snapshot);
2641 builder.setDone(!isRestoring);
2642 return builder.build();
2643 } catch (IOException e) {
2644 throw new ServiceException(e);
2645 }
2646 }
2647
2648 private boolean isHealthCheckerConfigured() {
2649 String healthScriptLocation = this.conf.get(HConstants.HEALTH_SCRIPT_LOC);
2650 return org.apache.commons.lang.StringUtils.isNotBlank(healthScriptLocation);
2651 }
2652 }