1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.master;
21
22 import java.io.IOException;
23 import java.lang.reflect.Constructor;
24 import java.lang.reflect.InvocationTargetException;
25 import java.lang.reflect.Method;
26 import java.net.InetAddress;
27 import java.net.InetSocketAddress;
28 import java.util.ArrayList;
29 import java.util.Collection;
30 import java.util.Collections;
31 import java.util.Comparator;
32 import java.util.HashMap;
33 import java.util.Iterator;
34 import java.util.List;
35 import java.util.Map;
36 import java.util.Set;
37 import java.util.concurrent.Callable;
38 import java.util.concurrent.ExecutionException;
39 import java.util.concurrent.Executors;
40 import java.util.concurrent.Future;
41 import java.util.concurrent.TimeUnit;
42 import java.util.concurrent.atomic.AtomicReference;
43
44 import javax.management.ObjectName;
45
46 import org.apache.commons.logging.Log;
47 import org.apache.commons.logging.LogFactory;
48 import org.apache.hadoop.conf.Configuration;
49 import org.apache.hadoop.fs.Path;
50 import org.apache.hadoop.hbase.Chore;
51 import org.apache.hadoop.hbase.ClusterStatus;
52 import org.apache.hadoop.hbase.HColumnDescriptor;
53 import org.apache.hadoop.hbase.HConstants;
54 import org.apache.hadoop.hbase.HRegionInfo;
55 import org.apache.hadoop.hbase.HServerLoad;
56 import org.apache.hadoop.hbase.HTableDescriptor;
57 import org.apache.hadoop.hbase.HealthCheckChore;
58 import org.apache.hadoop.hbase.MasterNotRunningException;
59 import org.apache.hadoop.hbase.PleaseHoldException;
60 import org.apache.hadoop.hbase.Server;
61 import org.apache.hadoop.hbase.ServerName;
62 import org.apache.hadoop.hbase.TableDescriptors;
63 import org.apache.hadoop.hbase.TableNotDisabledException;
64 import org.apache.hadoop.hbase.TableNotFoundException;
65 import org.apache.hadoop.hbase.UnknownRegionException;
66 import org.apache.hadoop.hbase.catalog.CatalogTracker;
67 import org.apache.hadoop.hbase.catalog.MetaReader;
68 import org.apache.hadoop.hbase.client.HConnectionManager;
69 import org.apache.hadoop.hbase.client.MetaScanner;
70 import org.apache.hadoop.hbase.client.UserProvider;
71 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
72 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
73 import org.apache.hadoop.hbase.client.Result;
74 import org.apache.hadoop.hbase.client.coprocessor.Exec;
75 import org.apache.hadoop.hbase.client.coprocessor.ExecResult;
76 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
77 import org.apache.hadoop.hbase.executor.ExecutorService;
78 import org.apache.hadoop.hbase.executor.ExecutorService.ExecutorType;
79 import org.apache.hadoop.hbase.ipc.CoprocessorProtocol;
80 import org.apache.hadoop.hbase.ipc.HBaseRPC;
81 import org.apache.hadoop.hbase.ipc.HBaseServer;
82 import org.apache.hadoop.hbase.ipc.HMasterInterface;
83 import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
84 import org.apache.hadoop.hbase.ipc.ProtocolSignature;
85 import org.apache.hadoop.hbase.ipc.RpcServer;
86 import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
87 import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
88 import org.apache.hadoop.hbase.master.handler.CreateTableHandler;
89 import org.apache.hadoop.hbase.master.handler.DeleteTableHandler;
90 import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
91 import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
92 import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
93 import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
94 import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
95 import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
96 import org.apache.hadoop.hbase.master.handler.TableEventHandler;
97 import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
98 import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
99 import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
100 import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
101 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
102 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
103 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo;
104 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
105 import org.apache.hadoop.hbase.regionserver.wal.HLog;
106 import org.apache.hadoop.hbase.replication.regionserver.Replication;
107 import org.apache.hadoop.hbase.security.User;
108 import org.apache.hadoop.hbase.snapshot.HSnapshotDescription;
109 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
110 import org.apache.hadoop.hbase.util.Bytes;
111 import org.apache.hadoop.hbase.util.FSTableDescriptors;
112 import org.apache.hadoop.hbase.util.HFileArchiveUtil;
113 import org.apache.hadoop.hbase.util.HasThread;
114 import org.apache.hadoop.hbase.util.InfoServer;
115 import org.apache.hadoop.hbase.util.Pair;
116 import org.apache.hadoop.hbase.util.Sleeper;
117 import org.apache.hadoop.hbase.util.Strings;
118 import org.apache.hadoop.hbase.util.Threads;
119 import org.apache.hadoop.hbase.util.VersionInfo;
120 import org.apache.hadoop.hbase.zookeeper.ClusterId;
121 import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
122 import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker;
123 import org.apache.hadoop.hbase.zookeeper.RegionServerTracker;
124 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
125 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
126 import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
127 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
128 import org.apache.hadoop.io.MapWritable;
129 import org.apache.hadoop.io.Text;
130 import org.apache.hadoop.metrics.util.MBeanUtil;
131 import org.apache.hadoop.net.DNS;
132 import org.apache.zookeeper.KeeperException;
133 import org.apache.zookeeper.Watcher;
134
135 import com.google.common.collect.ClassToInstanceMap;
136 import com.google.common.collect.Maps;
137 import com.google.common.collect.MutableClassToInstanceMap;
138 import com.google.protobuf.ServiceException;
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157 public class HMaster extends HasThread
158 implements HMasterInterface, HMasterRegionInterface, MasterServices,
159 Server {
160 private static final Log LOG = LogFactory.getLog(HMaster.class.getName());
161
162
163
164 public static final String MASTER = "master";
165
166
167 private final Configuration conf;
168
169 private InfoServer infoServer;
170
171
172 private ZooKeeperWatcher zooKeeper;
173
174 private ActiveMasterManager activeMasterManager;
175
176 private RegionServerTracker regionServerTracker;
177
178 private DrainingServerTracker drainingServerTracker;
179
180
181 private final RpcServer rpcServer;
182
183
184
185
186 private final InetSocketAddress isa;
187
188
189 private final MasterMetrics metrics;
190
191 private MasterFileSystem fileSystemManager;
192
193
194 private ServerManager serverManager;
195
196
197 AssignmentManager assignmentManager;
198
199 private CatalogTracker catalogTracker;
200
201 private ClusterStatusTracker clusterStatusTracker;
202
203
204
205
206 private MemoryBoundedLogMessageBuffer rsFatals;
207
208
209
210 private volatile boolean stopped = false;
211
212 private volatile boolean abort = false;
213
214 private volatile boolean isActiveMaster = false;
215
216
217
218 volatile boolean initialized = false;
219
220
221 private volatile boolean serverShutdownHandlerEnabled = false;
222
223 private volatile boolean shouldSplitMetaSeparately;
224
225
226 ExecutorService executorService;
227
228 private LoadBalancer balancer;
229 private Thread balancerChore;
230
231 private volatile boolean balanceSwitch = true;
232
233 private CatalogJanitor catalogJanitorChore;
234 private LogCleaner logCleaner;
235 private HFileCleaner hfileCleaner;
236
237 private MasterCoprocessorHost cpHost;
238 private final ServerName serverName;
239
240 private TableDescriptors tableDescriptors;
241
242
243 private long masterStartTime;
244 private long masterActiveTime;
245
246
247 private SnapshotManager snapshotManager;
248
249
250
251
252 private ObjectName mxBean = null;
253
254
255 private ClassToInstanceMap<CoprocessorProtocol>
256 protocolHandlers = MutableClassToInstanceMap.create();
257
258 private Map<String, Class<? extends CoprocessorProtocol>>
259 protocolHandlerNames = Maps.newHashMap();
260
261
262 private HealthCheckChore healthCheckChore;
263
264
265 private boolean waitingOnLogSplitting = false;
266
267
268 private volatile boolean initializationBeforeMetaAssignment = false;
269
270
271 private List<ZooKeeperListener> registeredZKListenersBeforeRecovery;
272
273
274
275
276
277
278
279
280
281
282
283
284
285 public HMaster(final Configuration conf)
286 throws IOException, KeeperException, InterruptedException {
287 this.conf = new Configuration(conf);
288
289 this.conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
290
291 HConnectionManager.setServerSideHConnectionRetries(this.conf, LOG);
292
293 String hostname = Strings.domainNamePointerToHostName(DNS.getDefaultHost(
294 conf.get("hbase.master.dns.interface", "default"),
295 conf.get("hbase.master.dns.nameserver", "default")));
296 int port = conf.getInt(HConstants.MASTER_PORT, HConstants.DEFAULT_MASTER_PORT);
297
298 InetSocketAddress initialIsa = new InetSocketAddress(hostname, port);
299 if (initialIsa.getAddress() == null) {
300 throw new IllegalArgumentException("Failed resolve of hostname " + initialIsa);
301 }
302
303 String bindAddress = conf.get("hbase.master.ipc.address");
304 if (bindAddress != null) {
305 initialIsa = new InetSocketAddress(bindAddress, port);
306 if (initialIsa.getAddress() == null) {
307 throw new IllegalArgumentException("Failed resolve of bind address " + initialIsa);
308 }
309 }
310 int numHandlers = conf.getInt("hbase.master.handler.count",
311 conf.getInt("hbase.regionserver.handler.count", 25));
312 this.rpcServer = HBaseRPC.getServer(this,
313 new Class<?>[]{HMasterInterface.class, HMasterRegionInterface.class},
314 initialIsa.getHostName(),
315 initialIsa.getPort(),
316 numHandlers,
317 0,
318 conf.getBoolean("hbase.rpc.verbose", false), conf,
319 0);
320
321 this.isa = this.rpcServer.getListenerAddress();
322
323 this.serverName = new ServerName(hostname,
324 this.isa.getPort(), System.currentTimeMillis());
325 this.rsFatals = new MemoryBoundedLogMessageBuffer(
326 conf.getLong("hbase.master.buffer.for.rs.fatals", 1*1024*1024));
327
328
329 ZKUtil.loginClient(this.conf, "hbase.zookeeper.client.keytab.file",
330 "hbase.zookeeper.client.kerberos.principal", this.isa.getHostName());
331
332
333 UserProvider provider = UserProvider.instantiate(conf);
334 provider.login("hbase.master.keytab.file",
335 "hbase.master.kerberos.principal", this.isa.getHostName());
336
337
338 setName(MASTER + "-" + this.serverName.toString());
339
340 Replication.decorateMasterConfiguration(this.conf);
341
342
343
344 if (this.conf.get("mapred.task.id") == null) {
345 this.conf.set("mapred.task.id", "hb_m_" + this.serverName.toString());
346 }
347
348 this.zooKeeper = new ZooKeeperWatcher(conf, MASTER + ":" + isa.getPort(), this, true);
349 this.rpcServer.startThreads();
350 this.metrics = new MasterMetrics(getServerName().toString());
351
352
353 int sleepTime = this.conf.getInt(HConstants.HEALTH_CHORE_WAKE_FREQ,
354 HConstants.DEFAULT_THREAD_WAKE_FREQUENCY);
355 if (isHealthCheckerConfigured()) {
356 healthCheckChore = new HealthCheckChore(sleepTime, this, getConfiguration());
357 }
358
359 this.shouldSplitMetaSeparately = conf.getBoolean(HLog.SEPARATE_HLOG_FOR_META, false);
360 waitingOnLogSplitting = this.conf.getBoolean("hbase.master.wait.for.log.splitting", false);
361 }
362
363
364
365
366
367
368
369
370 private static void stallIfBackupMaster(final Configuration c,
371 final ActiveMasterManager amm)
372 throws InterruptedException {
373
374 if (!c.getBoolean(HConstants.MASTER_TYPE_BACKUP,
375 HConstants.DEFAULT_MASTER_TYPE_BACKUP)) {
376 return;
377 }
378 LOG.debug("HMaster started in backup mode. " +
379 "Stalling until master znode is written.");
380
381
382 while (!amm.isActiveMaster()) {
383 LOG.debug("Waiting for master address ZNode to be written " +
384 "(Also watching cluster state node)");
385 Thread.sleep(c.getInt("zookeeper.session.timeout", 180 * 1000));
386 }
387
388 }
389
390
391
392
393
394
395
396
397
398
399 @Override
400 public void run() {
401 MonitoredTask startupStatus =
402 TaskMonitor.get().createStatus("Master startup");
403 startupStatus.setDescription("Master startup");
404 masterStartTime = System.currentTimeMillis();
405 try {
406 this.registeredZKListenersBeforeRecovery = this.zooKeeper.getListeners();
407
408
409 int port = this.conf.getInt("hbase.master.info.port", 60010);
410 if (port >= 0) {
411 String a = this.conf.get("hbase.master.info.bindAddress", "0.0.0.0");
412 this.infoServer = new InfoServer(MASTER, a, port, false, this.conf);
413 this.infoServer.addServlet("status", "/master-status", MasterStatusServlet.class);
414 this.infoServer.addServlet("dump", "/dump", MasterDumpServlet.class);
415 this.infoServer.setAttribute(MASTER, this);
416 this.infoServer.start();
417 }
418
419
420
421
422
423
424
425
426
427
428
429 becomeActiveMaster(startupStatus);
430
431
432 if (!this.stopped) {
433 finishInitialization(startupStatus, false);
434 loop();
435 }
436 } catch (Throwable t) {
437
438 if (t instanceof NoClassDefFoundError &&
439 t.getMessage().contains("org/apache/hadoop/hdfs/protocol/FSConstants$SafeModeAction")) {
440
441 abort("HBase is having a problem with its Hadoop jars. You may need to "
442 + "recompile HBase against Hadoop version "
443 + org.apache.hadoop.util.VersionInfo.getVersion()
444 + " or change your hadoop jars to start properly", t);
445 } else {
446 abort("Unhandled exception. Starting shutdown.", t);
447 }
448 } finally {
449 startupStatus.cleanup();
450
451 stopChores();
452
453
454 if (!this.abort && this.serverManager != null &&
455 this.serverManager.isClusterShutdown()) {
456 this.serverManager.letRegionServersShutdown();
457 }
458 stopServiceThreads();
459
460 if (this.activeMasterManager != null) this.activeMasterManager.stop();
461 if (this.catalogTracker != null) this.catalogTracker.stop();
462 if (this.serverManager != null) this.serverManager.stop();
463 if (this.assignmentManager != null) this.assignmentManager.stop();
464 if (this.fileSystemManager != null) this.fileSystemManager.stop();
465 if (this.snapshotManager != null) this.snapshotManager.stop("server shutting down.");
466 this.zooKeeper.close();
467 }
468 LOG.info("HMaster main thread exiting");
469 }
470
471
472
473
474
475
476
477 private boolean becomeActiveMaster(MonitoredTask startupStatus)
478 throws InterruptedException {
479
480
481 this.activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName,
482 this);
483 this.zooKeeper.registerListener(activeMasterManager);
484 stallIfBackupMaster(this.conf, this.activeMasterManager);
485
486
487
488
489 this.clusterStatusTracker = new ClusterStatusTracker(getZooKeeper(), this);
490 this.clusterStatusTracker.start();
491 return this.activeMasterManager.blockUntilBecomingActiveMaster(startupStatus);
492 }
493
494
495
496
497
498
499 private void initializeZKBasedSystemTrackers() throws IOException,
500 InterruptedException, KeeperException {
501 this.catalogTracker = new CatalogTracker(this.zooKeeper, this.conf, this);
502 this.catalogTracker.start();
503
504 this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
505 this.assignmentManager = new AssignmentManager(this, serverManager,
506 this.catalogTracker, this.balancer, this.executorService);
507 zooKeeper.registerListenerFirst(assignmentManager);
508
509 this.regionServerTracker = new RegionServerTracker(zooKeeper, this,
510 this.serverManager);
511 this.regionServerTracker.start();
512
513 this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this,
514 this.serverManager);
515 this.drainingServerTracker.start();
516
517
518
519 boolean wasUp = this.clusterStatusTracker.isClusterUp();
520 if (!wasUp) this.clusterStatusTracker.setClusterUp();
521
522 LOG.info("Server active/primary master; " + this.serverName +
523 ", sessionid=0x" +
524 Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()) +
525 ", cluster-up flag was=" + wasUp);
526
527
528 this.snapshotManager = new SnapshotManager(this, this.metrics);
529 }
530
531
532 private Sleeper stopSleeper = new Sleeper(1000, this);
533 private void loop() {
534 while (!this.stopped) {
535 stopSleeper.sleep();
536 }
537 }
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559 private void finishInitialization(MonitoredTask status, boolean masterRecovery)
560 throws IOException, InterruptedException, KeeperException {
561
562 isActiveMaster = true;
563
564
565
566
567
568
569
570 status.setStatus("Initializing Master file system");
571 this.masterActiveTime = System.currentTimeMillis();
572
573 this.fileSystemManager = new MasterFileSystem(this, this, metrics, masterRecovery);
574
575 this.tableDescriptors =
576 new FSTableDescriptors(this.fileSystemManager.getFileSystem(),
577 this.fileSystemManager.getRootDir());
578
579
580 status.setStatus("Publishing Cluster ID in ZooKeeper");
581 ClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
582 if (!masterRecovery) {
583 this.executorService = new ExecutorService(getServerName().toString());
584 this.serverManager = new ServerManager(this, this);
585 }
586
587
588 status.setStatus("Initializing ZK system trackers");
589 initializeZKBasedSystemTrackers();
590
591 if (!masterRecovery) {
592
593 status.setStatus("Initializing master coprocessors");
594 this.cpHost = new MasterCoprocessorHost(this, this.conf);
595
596
597 status.setStatus("Initializing master service threads");
598 startServiceThreads();
599 }
600
601
602 this.serverManager.waitForRegionServers(status);
603
604 for (ServerName sn: this.regionServerTracker.getOnlineServers()) {
605 if (!this.serverManager.isServerOnline(sn)) {
606
607 LOG.info("Registering server found up in zk but who has not yet " +
608 "reported in: " + sn);
609 this.serverManager.recordNewServer(sn, HServerLoad.EMPTY_HSERVERLOAD);
610 }
611 }
612 if (!masterRecovery) {
613 this.assignmentManager.startTimeOutMonitor();
614 }
615
616
617 Set<ServerName> failedServers = this.fileSystemManager.getFailedServersFromLogFolders();
618 if (waitingOnLogSplitting) {
619 List<ServerName> servers = new ArrayList<ServerName>(failedServers);
620 this.fileSystemManager.splitAllLogs(servers);
621 failedServers.clear();
622 }
623
624 ServerName preRootServer = this.catalogTracker.getRootLocation();
625 if (preRootServer != null && failedServers.contains(preRootServer)) {
626
627 this.fileSystemManager.splitAllLogs(preRootServer);
628 failedServers.remove(preRootServer);
629 }
630
631 this.initializationBeforeMetaAssignment = true;
632
633 if (!assignRoot(status)) return;
634
635
636
637 this.serverManager.enableSSHForRoot();
638
639
640 ServerName preMetaServer = this.catalogTracker.getMetaLocationOrReadLocationFromRoot();
641 if (preMetaServer != null && failedServers.contains(preMetaServer)) {
642
643 this.fileSystemManager.splitAllLogs(preMetaServer);
644 failedServers.remove(preMetaServer);
645 }
646
647
648 if (!assignMeta(status, ((masterRecovery) ? null : preMetaServer), preRootServer)) return;
649
650 enableServerShutdownHandler();
651
652
653 status.setStatus("Submit log splitting work of non-meta region servers");
654 for (ServerName curServer : failedServers) {
655 this.serverManager.expireServer(curServer);
656 }
657
658
659
660
661
662 org.apache.hadoop.hbase.catalog.MetaMigrationRemovingHTD.
663 updateMetaWithNewHRI(this);
664
665
666 status.setStatus("Starting assignment manager");
667 this.assignmentManager.joinCluster();
668
669 this.balancer.setClusterStatus(getClusterStatus());
670 this.balancer.setMasterServices(this);
671
672
673 status.setStatus("Fixing up missing daughters");
674 fixupDaughters(status);
675
676 if (!masterRecovery) {
677
678
679 status.setStatus("Starting balancer and catalog janitor");
680 this.balancerChore = getAndStartBalancerChore(this);
681 this.catalogJanitorChore = new CatalogJanitor(this, this);
682 startCatalogJanitorChore();
683 registerMBean();
684 }
685
686 status.markComplete("Initialization successful");
687 LOG.info("Master has completed initialization");
688 initialized = true;
689
690
691
692
693 this.serverManager.clearDeadServersWithSameHostNameAndPortOfOnlineServer();
694
695 if (!masterRecovery) {
696 if (this.cpHost != null) {
697
698 try {
699 this.cpHost.postStartMaster();
700 } catch (IOException ioe) {
701 LOG.error("Coprocessor postStartMaster() hook failed", ioe);
702 }
703 }
704 }
705 }
706
707
708
709
710
711
712
713 private void enableServerShutdownHandler() throws IOException {
714 if (!serverShutdownHandlerEnabled) {
715 serverShutdownHandlerEnabled = true;
716 this.serverManager.expireDeadNotExpiredServers();
717 }
718 }
719
720
721
722
723
724 protected void startCatalogJanitorChore() {
725 Threads.setDaemonThreadRunning(catalogJanitorChore.getThread());
726 }
727
728
729
730
731
732
733
734
735 private boolean assignRoot(MonitoredTask status)
736 throws InterruptedException, IOException, KeeperException {
737 int assigned = 0;
738 long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000);
739
740
741 status.setStatus("Assigning ROOT region");
742 boolean rit = this.assignmentManager.
743 processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.ROOT_REGIONINFO);
744 ServerName currentRootServer = null;
745 boolean rootRegionLocation = catalogTracker.verifyRootRegionLocation(timeout);
746 if (!rit && !rootRegionLocation) {
747 currentRootServer = this.catalogTracker.getRootLocation();
748 splitLogAndExpireIfOnline(currentRootServer);
749 this.assignmentManager.assignRoot();
750 waitForRootAssignment();
751 if (!this.assignmentManager.isRegionAssigned(HRegionInfo.ROOT_REGIONINFO) || this.stopped) {
752 return false;
753 }
754 assigned++;
755 } else if (rit && !rootRegionLocation) {
756 waitForRootAssignment();
757 if (!this.assignmentManager.isRegionAssigned(HRegionInfo.ROOT_REGIONINFO) || this.stopped) {
758 return false;
759 }
760 assigned++;
761 } else {
762
763 this.assignmentManager.regionOnline(HRegionInfo.ROOT_REGIONINFO,
764 this.catalogTracker.getRootLocation());
765 }
766
767
768 enableCatalogTables(Bytes.toString(HConstants.ROOT_TABLE_NAME));
769 LOG.info("-ROOT- assigned=" + assigned + ", rit=" + rit +
770 ", location=" + catalogTracker.getRootLocation());
771
772 status.setStatus("ROOT assigned.");
773 return true;
774 }
775
776
777
778
779
780
781
782
783
784
785 private boolean assignMeta(MonitoredTask status, ServerName previousMetaServer,
786 ServerName previousRootServer)
787 throws InterruptedException,
788 IOException, KeeperException {
789 int assigned = 0;
790 long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000);
791
792 status.setStatus("Assigning META region");
793 boolean rit =
794 this.assignmentManager
795 .processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.FIRST_META_REGIONINFO);
796 boolean metaRegionLocation = this.catalogTracker.verifyMetaRegionLocation(timeout);
797 if (!rit && !metaRegionLocation) {
798 ServerName currentMetaServer =
799 (previousMetaServer != null) ? previousMetaServer : this.catalogTracker
800 .getMetaLocationOrReadLocationFromRoot();
801 if (currentMetaServer != null && !currentMetaServer.equals(previousRootServer)) {
802 fileSystemManager.splitAllLogs(currentMetaServer);
803 if (this.serverManager.isServerOnline(currentMetaServer)) {
804 this.serverManager.expireServer(currentMetaServer);
805 }
806 }
807 assignmentManager.assignMeta();
808 enableSSHandWaitForMeta();
809 if (!this.assignmentManager.isRegionAssigned(HRegionInfo.FIRST_META_REGIONINFO)
810 || this.stopped) {
811 return false;
812 }
813 assigned++;
814 } else if (rit && !metaRegionLocation) {
815 enableSSHandWaitForMeta();
816 if (!this.assignmentManager.isRegionAssigned(HRegionInfo.FIRST_META_REGIONINFO)
817 || this.stopped) {
818 return false;
819 }
820 assigned++;
821 } else {
822
823 this.assignmentManager.regionOnline(HRegionInfo.FIRST_META_REGIONINFO,
824 this.catalogTracker.getMetaLocation());
825 }
826 enableCatalogTables(Bytes.toString(HConstants.META_TABLE_NAME));
827 LOG.info(".META. assigned=" + assigned + ", rit=" + rit + ", location="
828 + catalogTracker.getMetaLocation());
829 status.setStatus("META assigned.");
830 return true;
831 }
832
833 private void enableSSHandWaitForMeta() throws IOException,
834 InterruptedException {
835 enableServerShutdownHandler();
836 this.catalogTracker.waitForMeta();
837
838
839 this.assignmentManager
840 .waitForAssignment(HRegionInfo.FIRST_META_REGIONINFO);
841 }
842
843 private void waitForRootAssignment() throws InterruptedException, IOException {
844
845
846 this.serverManager.enableSSHForRoot();
847 this.catalogTracker.waitForRoot();
848
849 this.assignmentManager.waitForAssignment(HRegionInfo.ROOT_REGIONINFO);
850 }
851
852 private void enableCatalogTables(String catalogTableName) {
853 if (!this.assignmentManager.getZKTable().isEnabledTable(catalogTableName)) {
854 this.assignmentManager.setEnabledTable(catalogTableName);
855 }
856 }
857
858 void fixupDaughters(final MonitoredTask status) throws IOException, KeeperException {
859 final Map<HRegionInfo, Result> offlineSplitParents =
860 new HashMap<HRegionInfo, Result>();
861
862 MetaReader.Visitor visitor = new MetaReader.Visitor() {
863 @Override
864 public boolean visit(Result r) throws IOException {
865 if (r == null || r.isEmpty()) return true;
866 HRegionInfo info =
867 MetaReader.parseHRegionInfoFromCatalogResult(
868 r, HConstants.REGIONINFO_QUALIFIER);
869 if (info == null) return true;
870 if (info.isOffline() && info.isSplit()) {
871 offlineSplitParents.put(info, r);
872 }
873
874 return true;
875 }
876 };
877
878 MetaReader.fullScan(this.catalogTracker, visitor);
879
880 int fixups = 0;
881 for (Map.Entry<HRegionInfo, Result> e : offlineSplitParents.entrySet()) {
882 String node = ZKAssign.getNodeName(zooKeeper, e.getKey().getEncodedName());
883 byte[] data = ZKUtil.getData(zooKeeper, node);
884 if (data == null) {
885 fixups += ServerShutdownHandler.fixupDaughters(
886 e.getValue(), assignmentManager, catalogTracker);
887 }
888 }
889 if (fixups != 0) {
890 LOG.info("Scanned the catalog and fixed up " + fixups +
891 " missing daughter region(s)");
892 }
893 }
894
895
896
897
898
899
900 private void splitLogAndExpireIfOnline(final ServerName sn)
901 throws IOException {
902 if (sn == null || !serverManager.isServerOnline(sn)) {
903 return;
904 }
905 LOG.info("Forcing splitLog and expire of " + sn);
906 if (this.shouldSplitMetaSeparately) {
907 fileSystemManager.splitMetaLog(sn);
908 fileSystemManager.splitLog(sn);
909 } else {
910 fileSystemManager.splitAllLogs(sn);
911 }
912 serverManager.expireServer(sn);
913 }
914
915 @Override
916 public ProtocolSignature getProtocolSignature(
917 String protocol, long version, int clientMethodsHashCode)
918 throws IOException {
919 if (HMasterInterface.class.getName().equals(protocol)) {
920 return new ProtocolSignature(HMasterInterface.VERSION, null);
921 } else if (HMasterRegionInterface.class.getName().equals(protocol)) {
922 return new ProtocolSignature(HMasterRegionInterface.VERSION, null);
923 }
924 throw new IOException("Unknown protocol: " + protocol);
925 }
926
927 public long getProtocolVersion(String protocol, long clientVersion) {
928 if (HMasterInterface.class.getName().equals(protocol)) {
929 return HMasterInterface.VERSION;
930 } else if (HMasterRegionInterface.class.getName().equals(protocol)) {
931 return HMasterRegionInterface.VERSION;
932 }
933
934 LOG.warn("Version requested for unimplemented protocol: "+protocol);
935 return -1;
936 }
937
938 @Override
939 public TableDescriptors getTableDescriptors() {
940 return this.tableDescriptors;
941 }
942
943
944 public InfoServer getInfoServer() {
945 return this.infoServer;
946 }
947
948 @Override
949 public Configuration getConfiguration() {
950 return this.conf;
951 }
952
953 @Override
954 public ServerManager getServerManager() {
955 return this.serverManager;
956 }
957
958 @Override
959 public ExecutorService getExecutorService() {
960 return this.executorService;
961 }
962
963 @Override
964 public MasterFileSystem getMasterFileSystem() {
965 return this.fileSystemManager;
966 }
967
968
969
970
971
972 public ZooKeeperWatcher getZooKeeperWatcher() {
973 return this.zooKeeper;
974 }
975
976 public ActiveMasterManager getActiveMasterManager() {
977 return this.activeMasterManager;
978 }
979
980
981
982
983
984
985
986
987 private void startServiceThreads() throws IOException{
988
989
990 this.executorService.startExecutorService(ExecutorType.MASTER_OPEN_REGION,
991 conf.getInt("hbase.master.executor.openregion.threads", 5));
992 this.executorService.startExecutorService(ExecutorType.MASTER_CLOSE_REGION,
993 conf.getInt("hbase.master.executor.closeregion.threads", 5));
994 this.executorService.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS,
995 conf.getInt("hbase.master.executor.serverops.threads", 3));
996 this.executorService.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS,
997 conf.getInt("hbase.master.executor.serverops.threads", 5));
998
999
1000
1001
1002 this.executorService.startExecutorService(ExecutorType.MASTER_TABLE_OPERATIONS, 1);
1003
1004
1005 String n = Thread.currentThread().getName();
1006 int cleanerInterval = conf.getInt("hbase.master.cleaner.interval", 60 * 1000);
1007 this.logCleaner =
1008 new LogCleaner(cleanerInterval,
1009 this, conf, getMasterFileSystem().getFileSystem(),
1010 getMasterFileSystem().getOldLogDir());
1011 Threads.setDaemonThreadRunning(logCleaner.getThread(), n + ".oldLogCleaner");
1012
1013
1014 Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
1015 this.hfileCleaner = new HFileCleaner(cleanerInterval, this, conf, getMasterFileSystem()
1016 .getFileSystem(), archiveDir);
1017 Threads.setDaemonThreadRunning(hfileCleaner.getThread(), n + ".archivedHFileCleaner");
1018
1019
1020 if (this.healthCheckChore != null) {
1021 Threads.setDaemonThreadRunning(this.healthCheckChore.getThread(), n + ".healthChecker");
1022 }
1023
1024
1025 this.rpcServer.openServer();
1026 if (LOG.isDebugEnabled()) {
1027 LOG.debug("Started service threads");
1028 }
1029
1030 }
1031
1032 private void stopServiceThreads() {
1033 if (LOG.isDebugEnabled()) {
1034 LOG.debug("Stopping service threads");
1035 }
1036 if (this.rpcServer != null) this.rpcServer.stop();
1037
1038 if (this.logCleaner!= null) this.logCleaner.interrupt();
1039 if (this.hfileCleaner != null) this.hfileCleaner.interrupt();
1040
1041 if (this.infoServer != null) {
1042 LOG.info("Stopping infoServer");
1043 try {
1044 this.infoServer.stop();
1045 } catch (Exception ex) {
1046 ex.printStackTrace();
1047 }
1048 }
1049 if (this.executorService != null) this.executorService.shutdown();
1050 if (this.healthCheckChore != null) {
1051 this.healthCheckChore.interrupt();
1052 }
1053 }
1054
1055 private static Thread getAndStartBalancerChore(final HMaster master) {
1056 String name = master.getServerName() + "-BalancerChore";
1057 int balancerPeriod =
1058 master.getConfiguration().getInt("hbase.balancer.period", 300000);
1059
1060 Chore chore = new Chore(name, balancerPeriod, master) {
1061 @Override
1062 protected void chore() {
1063 master.balance();
1064 }
1065 };
1066 return Threads.setDaemonThreadRunning(chore.getThread());
1067 }
1068
1069 private void stopChores() {
1070 if (this.balancerChore != null) {
1071 this.balancerChore.interrupt();
1072 }
1073 if (this.catalogJanitorChore != null) {
1074 this.catalogJanitorChore.interrupt();
1075 }
1076 }
1077
1078 @Override
1079 public MapWritable regionServerStartup(final int port,
1080 final long serverStartCode, final long serverCurrentTime)
1081 throws IOException {
1082
1083 InetAddress ia = HBaseServer.getRemoteIp();
1084 ServerName rs = this.serverManager.regionServerStartup(ia, port,
1085 serverStartCode, serverCurrentTime);
1086
1087 MapWritable mw = createConfigurationSubset();
1088 mw.put(new Text(HConstants.KEY_FOR_HOSTNAME_SEEN_BY_MASTER),
1089 new Text(rs.getHostname()));
1090 return mw;
1091 }
1092
1093
1094
1095
1096
1097 protected MapWritable createConfigurationSubset() {
1098 MapWritable mw = addConfig(new MapWritable(), HConstants.HBASE_DIR);
1099 return addConfig(mw, "fs.default.name");
1100 }
1101
1102 private MapWritable addConfig(final MapWritable mw, final String key) {
1103 mw.put(new Text(key), new Text(this.conf.get(key)));
1104 return mw;
1105 }
1106
1107 @Override
1108 public void regionServerReport(final byte [] sn, final HServerLoad hsl)
1109 throws IOException {
1110 this.serverManager.regionServerReport(ServerName.parseVersionedServerName(sn), hsl);
1111 if (hsl != null && this.metrics != null) {
1112
1113 this.metrics.incrementRequests(hsl.getTotalNumberOfRequests());
1114 }
1115 }
1116
1117 @Override
1118 public void reportRSFatalError(byte [] sn, String errorText) {
1119 String msg = "Region server " + Bytes.toString(sn) +
1120 " reported a fatal error:\n" + errorText;
1121 LOG.error(msg);
1122 rsFatals.add(msg);
1123 }
1124
1125 public boolean isMasterRunning() {
1126 return !isStopped();
1127 }
1128
1129
1130
1131
1132 private int getBalancerCutoffTime() {
1133 int balancerCutoffTime =
1134 getConfiguration().getInt("hbase.balancer.max.balancing", -1);
1135 if (balancerCutoffTime == -1) {
1136
1137 int balancerPeriod =
1138 getConfiguration().getInt("hbase.balancer.period", 300000);
1139 balancerCutoffTime = balancerPeriod / 2;
1140
1141 if (balancerCutoffTime <= 0) balancerCutoffTime = balancerPeriod;
1142 }
1143 return balancerCutoffTime;
1144 }
1145
1146 @Override
1147 public boolean balance() {
1148
1149 if (!this.initialized) {
1150 LOG.debug("Master has not been initialized, don't run balancer.");
1151 return false;
1152 }
1153
1154 if (!this.balanceSwitch) return false;
1155
1156 int maximumBalanceTime = getBalancerCutoffTime();
1157 long cutoffTime = System.currentTimeMillis() + maximumBalanceTime;
1158 boolean balancerRan;
1159 synchronized (this.balancer) {
1160
1161 if (this.assignmentManager.isRegionsInTransition()) {
1162 LOG.debug("Not running balancer because " +
1163 this.assignmentManager.getRegionsInTransition().size() +
1164 " region(s) in transition: " +
1165 org.apache.commons.lang.StringUtils.
1166 abbreviate(this.assignmentManager.getRegionsInTransition().toString(), 256));
1167 return false;
1168 }
1169 if (this.serverManager.areDeadServersInProgress()) {
1170 LOG.debug("Not running balancer because processing dead regionserver(s): " +
1171 this.serverManager.getDeadServers());
1172 return false;
1173 }
1174
1175 if (this.cpHost != null) {
1176 try {
1177 if (this.cpHost.preBalance()) {
1178 LOG.debug("Coprocessor bypassing balancer request");
1179 return false;
1180 }
1181 } catch (IOException ioe) {
1182 LOG.error("Error invoking master coprocessor preBalance()", ioe);
1183 return false;
1184 }
1185 }
1186
1187 Map<String, Map<ServerName, List<HRegionInfo>>> assignmentsByTable =
1188 this.assignmentManager.getAssignmentsByTable();
1189
1190 List<RegionPlan> plans = new ArrayList<RegionPlan>();
1191 for (Map<ServerName, List<HRegionInfo>> assignments : assignmentsByTable.values()) {
1192 List<RegionPlan> partialPlans = this.balancer.balanceCluster(assignments);
1193 if (partialPlans != null) plans.addAll(partialPlans);
1194 }
1195 int rpCount = 0;
1196 long totalRegPlanExecTime = 0;
1197 balancerRan = plans != null;
1198 if (plans != null && !plans.isEmpty()) {
1199 for (RegionPlan plan: plans) {
1200 LOG.info("balance " + plan);
1201 long balStartTime = System.currentTimeMillis();
1202 this.assignmentManager.balance(plan);
1203 totalRegPlanExecTime += System.currentTimeMillis()-balStartTime;
1204 rpCount++;
1205 if (rpCount < plans.size() &&
1206
1207 (System.currentTimeMillis() + (totalRegPlanExecTime / rpCount)) > cutoffTime) {
1208 LOG.debug("No more balancing till next balance run; maximumBalanceTime=" +
1209 maximumBalanceTime);
1210 break;
1211 }
1212 }
1213 }
1214 if (this.cpHost != null) {
1215 try {
1216 this.cpHost.postBalance();
1217 } catch (IOException ioe) {
1218
1219 LOG.error("Error invoking master coprocessor postBalance()", ioe);
1220 }
1221 }
1222 }
1223 return balancerRan;
1224 }
1225
1226 enum BalanceSwitchMode {
1227 SYNC,
1228 ASYNC
1229 }
1230
1231
1232
1233
1234
1235
1236 public boolean switchBalancer(final boolean b, BalanceSwitchMode mode) {
1237 boolean oldValue = this.balanceSwitch;
1238 boolean newValue = b;
1239 try {
1240 if (this.cpHost != null) {
1241 newValue = this.cpHost.preBalanceSwitch(newValue);
1242 }
1243 if (mode == BalanceSwitchMode.SYNC) {
1244 synchronized (this.balancer) {
1245 this.balanceSwitch = newValue;
1246 }
1247 } else {
1248 this.balanceSwitch = newValue;
1249 }
1250 LOG.info("BalanceSwitch=" + newValue);
1251 if (this.cpHost != null) {
1252 this.cpHost.postBalanceSwitch(oldValue, newValue);
1253 }
1254 } catch (IOException ioe) {
1255 LOG.warn("Error flipping balance switch", ioe);
1256 }
1257 return oldValue;
1258 }
1259
1260 @Override
1261 public boolean synchronousBalanceSwitch(final boolean b) {
1262 return switchBalancer(b, BalanceSwitchMode.SYNC);
1263 }
1264
1265 @Override
1266 public boolean balanceSwitch(final boolean b) {
1267 return switchBalancer(b, BalanceSwitchMode.ASYNC);
1268 }
1269
1270
1271
1272
1273
1274
1275
1276 public void setCatalogJanitorEnabled(final boolean b) {
1277 ((CatalogJanitor)this.catalogJanitorChore).setEnabled(b);
1278 }
1279
1280 @Override
1281 public void move(final byte[] encodedRegionName, final byte[] destServerName)
1282 throws UnknownRegionException {
1283 Pair<HRegionInfo, ServerName> p =
1284 this.assignmentManager.getAssignment(encodedRegionName);
1285 if (p == null)
1286 throw new UnknownRegionException(Bytes.toStringBinary(encodedRegionName));
1287 ServerName dest = null;
1288 if (destServerName == null || destServerName.length == 0) {
1289 LOG.info("Passed destination servername is null or empty so choosing a server at random");
1290 List<ServerName> destServers = this.serverManager.getOnlineServersList();
1291 destServers.remove(p.getSecond());
1292
1293 dest = balancer.randomAssignment(destServers);
1294 } else {
1295 dest = new ServerName(Bytes.toString(destServerName));
1296 }
1297
1298
1299 RegionPlan rp = new RegionPlan(p.getFirst(), p.getSecond(), dest);
1300
1301 try {
1302 checkInitialized();
1303 if (this.cpHost != null) {
1304 if (this.cpHost.preMove(p.getFirst(), p.getSecond(), dest)) {
1305 return;
1306 }
1307 }
1308 LOG.info("Added move plan " + rp + ", running balancer");
1309 this.assignmentManager.balance(rp);
1310 if (this.cpHost != null) {
1311 this.cpHost.postMove(p.getFirst(), p.getSecond(), dest);
1312 }
1313 } catch (IOException ioe) {
1314 UnknownRegionException ure = new UnknownRegionException(
1315 Bytes.toStringBinary(encodedRegionName));
1316 ure.initCause(ioe);
1317 throw ure;
1318 }
1319 }
1320
1321 public void createTable(HTableDescriptor hTableDescriptor,
1322 byte [][] splitKeys)
1323 throws IOException {
1324 if (!isMasterRunning()) {
1325 throw new MasterNotRunningException();
1326 }
1327
1328 HRegionInfo [] newRegions = getHRegionInfos(hTableDescriptor, splitKeys);
1329 checkInitialized();
1330 if (cpHost != null) {
1331 cpHost.preCreateTable(hTableDescriptor, newRegions);
1332 }
1333
1334 this.executorService.submit(new CreateTableHandler(this,
1335 this.fileSystemManager, this.serverManager, hTableDescriptor, conf,
1336 newRegions, catalogTracker, assignmentManager));
1337
1338 if (cpHost != null) {
1339 cpHost.postCreateTable(hTableDescriptor, newRegions);
1340 }
1341 }
1342
1343 private HRegionInfo[] getHRegionInfos(HTableDescriptor hTableDescriptor,
1344 byte[][] splitKeys) {
1345 HRegionInfo[] hRegionInfos = null;
1346 if (splitKeys == null || splitKeys.length == 0) {
1347 hRegionInfos = new HRegionInfo[]{
1348 new HRegionInfo(hTableDescriptor.getName(), null, null)};
1349 } else {
1350 int numRegions = splitKeys.length + 1;
1351 hRegionInfos = new HRegionInfo[numRegions];
1352 byte[] startKey = null;
1353 byte[] endKey = null;
1354 for (int i = 0; i < numRegions; i++) {
1355 endKey = (i == splitKeys.length) ? null : splitKeys[i];
1356 hRegionInfos[i] =
1357 new HRegionInfo(hTableDescriptor.getName(), startKey, endKey);
1358 startKey = endKey;
1359 }
1360 }
1361 return hRegionInfos;
1362 }
1363
1364 private static boolean isCatalogTable(final byte [] tableName) {
1365 return Bytes.equals(tableName, HConstants.ROOT_TABLE_NAME) ||
1366 Bytes.equals(tableName, HConstants.META_TABLE_NAME);
1367 }
1368
1369 @Override
1370 public void deleteTable(final byte [] tableName) throws IOException {
1371 checkInitialized();
1372 if (cpHost != null) {
1373 cpHost.preDeleteTable(tableName);
1374 }
1375 this.executorService.submit(new DeleteTableHandler(tableName, this, this));
1376 if (cpHost != null) {
1377 cpHost.postDeleteTable(tableName);
1378 }
1379 }
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389 public Pair<Integer, Integer> getAlterStatus(byte[] tableName)
1390 throws IOException {
1391 return this.assignmentManager.getReopenStatus(tableName);
1392 }
1393
1394 public void addColumn(byte [] tableName, HColumnDescriptor column)
1395 throws IOException {
1396 checkInitialized();
1397 if (cpHost != null) {
1398 if (cpHost.preAddColumn(tableName, column)) {
1399 return;
1400 }
1401 }
1402 new TableAddFamilyHandler(tableName, column, this, this).process();
1403 if (cpHost != null) {
1404 cpHost.postAddColumn(tableName, column);
1405 }
1406 }
1407
1408 public void modifyColumn(byte [] tableName, HColumnDescriptor descriptor)
1409 throws IOException {
1410 checkInitialized();
1411 if (cpHost != null) {
1412 if (cpHost.preModifyColumn(tableName, descriptor)) {
1413 return;
1414 }
1415 }
1416 new TableModifyFamilyHandler(tableName, descriptor, this, this).process();
1417 if (cpHost != null) {
1418 cpHost.postModifyColumn(tableName, descriptor);
1419 }
1420 }
1421
1422 public void deleteColumn(final byte [] tableName, final byte [] c)
1423 throws IOException {
1424 checkInitialized();
1425 if (cpHost != null) {
1426 if (cpHost.preDeleteColumn(tableName, c)) {
1427 return;
1428 }
1429 }
1430 new TableDeleteFamilyHandler(tableName, c, this, this).process();
1431 if (cpHost != null) {
1432 cpHost.postDeleteColumn(tableName, c);
1433 }
1434 }
1435
1436 public void enableTable(final byte [] tableName) throws IOException {
1437 checkInitialized();
1438 if (cpHost != null) {
1439 cpHost.preEnableTable(tableName);
1440 }
1441 this.executorService.submit(new EnableTableHandler(this, tableName,
1442 catalogTracker, assignmentManager, false));
1443
1444 if (cpHost != null) {
1445 cpHost.postEnableTable(tableName);
1446 }
1447 }
1448
1449 public void disableTable(final byte [] tableName) throws IOException {
1450 checkInitialized();
1451 if (cpHost != null) {
1452 cpHost.preDisableTable(tableName);
1453 }
1454 this.executorService.submit(new DisableTableHandler(this, tableName,
1455 catalogTracker, assignmentManager, false));
1456
1457 if (cpHost != null) {
1458 cpHost.postDisableTable(tableName);
1459 }
1460 }
1461
1462
1463
1464
1465
1466
1467
1468 Pair<HRegionInfo, ServerName> getTableRegionForRow(
1469 final byte [] tableName, final byte [] rowKey)
1470 throws IOException {
1471 final AtomicReference<Pair<HRegionInfo, ServerName>> result =
1472 new AtomicReference<Pair<HRegionInfo, ServerName>>(null);
1473
1474 MetaScannerVisitor visitor =
1475 new MetaScannerVisitorBase() {
1476 @Override
1477 public boolean processRow(Result data) throws IOException {
1478 if (data == null || data.size() <= 0) {
1479 return true;
1480 }
1481 Pair<HRegionInfo, ServerName> pair = MetaReader.parseCatalogResult(data);
1482 if (pair == null) {
1483 return false;
1484 }
1485 if (!Bytes.equals(pair.getFirst().getTableName(), tableName)) {
1486 return false;
1487 }
1488 result.set(pair);
1489 return true;
1490 }
1491 };
1492
1493 MetaScanner.metaScan(conf, visitor, tableName, rowKey, 1);
1494 return result.get();
1495 }
1496
1497 @Override
1498 public void modifyTable(final byte[] tableName, HTableDescriptor htd)
1499 throws IOException {
1500 checkInitialized();
1501 if (cpHost != null) {
1502 cpHost.preModifyTable(tableName, htd);
1503 }
1504 TableEventHandler tblHandler = new ModifyTableHandler(tableName, htd, this, this);
1505 this.executorService.submit(tblHandler);
1506
1507 tblHandler.waitForEventBeingHandled();
1508 if (cpHost != null) {
1509 cpHost.postModifyTable(tableName, htd);
1510 }
1511 }
1512
1513 @Override
1514 public void checkTableModifiable(final byte [] tableName)
1515 throws IOException {
1516 String tableNameStr = Bytes.toString(tableName);
1517 if (isCatalogTable(tableName)) {
1518 throw new IOException("Can't modify catalog tables");
1519 }
1520 if (!MetaReader.tableExists(getCatalogTracker(), tableNameStr)) {
1521 throw new TableNotFoundException(tableNameStr);
1522 }
1523 if (!getAssignmentManager().getZKTable().
1524 isDisabledTable(Bytes.toString(tableName))) {
1525 throw new TableNotDisabledException(tableName);
1526 }
1527 }
1528
1529 public void clearFromTransition(HRegionInfo hri) {
1530 if (this.assignmentManager.isRegionInTransition(hri) != null) {
1531 this.assignmentManager.regionOffline(hri);
1532 }
1533 }
1534
1535
1536
1537
1538 public ClusterStatus getClusterStatus() {
1539
1540 List<String> backupMasterStrings;
1541 try {
1542 backupMasterStrings = ZKUtil.listChildrenNoWatch(this.zooKeeper,
1543 this.zooKeeper.backupMasterAddressesZNode);
1544 } catch (KeeperException e) {
1545 LOG.warn(this.zooKeeper.prefix("Unable to list backup servers"), e);
1546 backupMasterStrings = new ArrayList<String>(0);
1547 }
1548 List<ServerName> backupMasters = new ArrayList<ServerName>(
1549 backupMasterStrings.size());
1550 for (String s: backupMasterStrings) {
1551 try {
1552 byte[] bytes = ZKUtil.getData(this.zooKeeper, ZKUtil.joinZNode(this.zooKeeper.backupMasterAddressesZNode, s));
1553 if (bytes != null) {
1554 backupMasters.add(ServerName.parseVersionedServerName(bytes));
1555 }
1556 } catch (KeeperException e) {
1557 LOG.warn(this.zooKeeper.prefix("Unable to get information about " +
1558 "backup servers"), e);
1559 }
1560 }
1561 Collections.sort(backupMasters, new Comparator<ServerName>() {
1562 public int compare(ServerName s1, ServerName s2) {
1563 return s1.getServerName().compareTo(s2.getServerName());
1564 }});
1565
1566 return new ClusterStatus(VersionInfo.getVersion(),
1567 this.fileSystemManager.getClusterId(),
1568 this.serverManager.getOnlineServers(),
1569 this.serverManager.getDeadServers(),
1570 this.serverName,
1571 backupMasters,
1572 this.assignmentManager.getRegionsInTransition(),
1573 this.getCoprocessors());
1574 }
1575
1576 public String getClusterId() {
1577 return (fileSystemManager == null) ? null : fileSystemManager.getClusterId();
1578 }
1579
1580
1581
1582
1583
1584
1585
1586
1587 public static String getLoadedCoprocessors() {
1588 return CoprocessorHost.getLoadedCoprocessors().toString();
1589 }
1590
1591
1592
1593
1594 public long getMasterStartTime() {
1595 return masterStartTime;
1596 }
1597
1598
1599
1600
1601 public long getMasterActiveTime() {
1602 return masterActiveTime;
1603 }
1604
1605 public int getRegionServerInfoPort(final ServerName sn) {
1606 RegionServerInfo info = this.regionServerTracker.getRegionServerInfo(sn);
1607 if (info == null || info.getInfoPort() == 0) {
1608 return conf.getInt(HConstants.REGIONSERVER_INFO_PORT,
1609 HConstants.DEFAULT_REGIONSERVER_INFOPORT);
1610 }
1611 return info.getInfoPort();
1612 }
1613
1614
1615
1616
1617 public String[] getCoprocessors() {
1618 MasterCoprocessorHost cp = getCoprocessorHost();
1619 String[] cpList = new String[0];
1620 if (cp == null) return cpList;
1621
1622 Set<String> masterCoprocessors = cp.getCoprocessors();
1623 return masterCoprocessors.toArray(cpList);
1624 }
1625
1626 @Override
1627 public void abort(final String msg, final Throwable t) {
1628 if (cpHost != null) {
1629
1630 LOG.fatal("Master server abort: loaded coprocessors are: " +
1631 getLoadedCoprocessors());
1632 }
1633
1634 if (abortNow(msg, t)) {
1635 if (t != null) LOG.fatal(msg, t);
1636 else LOG.fatal(msg);
1637 this.abort = true;
1638 stop("Aborting");
1639 }
1640 }
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659 private boolean tryRecoveringExpiredZKSession() throws InterruptedException,
1660 IOException, KeeperException, ExecutionException {
1661
1662 this.zooKeeper.unregisterAllListeners();
1663
1664
1665 if (this.registeredZKListenersBeforeRecovery != null) {
1666 for (ZooKeeperListener curListener : this.registeredZKListenersBeforeRecovery) {
1667 this.zooKeeper.registerListener(curListener);
1668 }
1669 }
1670
1671 this.zooKeeper.reconnectAfterExpiration();
1672
1673 Callable<Boolean> callable = new Callable<Boolean> () {
1674 public Boolean call() throws InterruptedException,
1675 IOException, KeeperException {
1676 MonitoredTask status =
1677 TaskMonitor.get().createStatus("Recovering expired ZK session");
1678 try {
1679 if (!becomeActiveMaster(status)) {
1680 return Boolean.FALSE;
1681 }
1682 serverManager.disableSSHForRoot();
1683 serverShutdownHandlerEnabled = false;
1684 initialized = false;
1685 finishInitialization(status, true);
1686 return Boolean.TRUE;
1687 } finally {
1688 status.cleanup();
1689 }
1690 }
1691 };
1692
1693 long timeout =
1694 conf.getLong("hbase.master.zksession.recover.timeout", 300000);
1695 java.util.concurrent.ExecutorService executor =
1696 Executors.newSingleThreadExecutor();
1697 Future<Boolean> result = executor.submit(callable);
1698 executor.shutdown();
1699 if (executor.awaitTermination(timeout, TimeUnit.MILLISECONDS)
1700 && result.isDone()) {
1701 Boolean recovered = result.get();
1702 if (recovered != null) {
1703 return recovered.booleanValue();
1704 }
1705 }
1706 executor.shutdownNow();
1707 return false;
1708 }
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718 private boolean abortNow(final String msg, final Throwable t) {
1719 if (!this.isActiveMaster || this.stopped) {
1720 return true;
1721 }
1722
1723 boolean failFast = conf.getBoolean("fail.fast.expired.active.master", false);
1724 if (t != null && t instanceof KeeperException.SessionExpiredException
1725 && !failFast) {
1726 try {
1727 LOG.info("Primary Master trying to recover from ZooKeeper session " +
1728 "expiry.");
1729 return !tryRecoveringExpiredZKSession();
1730 } catch (Throwable newT) {
1731 LOG.error("Primary master encountered unexpected exception while " +
1732 "trying to recover from ZooKeeper session" +
1733 " expiry. Proceeding with server abort.", newT);
1734 }
1735 }
1736 return true;
1737 }
1738
1739 @Override
1740 public ZooKeeperWatcher getZooKeeper() {
1741 return zooKeeper;
1742 }
1743
1744 @Override
1745 public MasterCoprocessorHost getCoprocessorHost() {
1746 return cpHost;
1747 }
1748
1749 @Override
1750 public ServerName getServerName() {
1751 return this.serverName;
1752 }
1753
1754 @Override
1755 public CatalogTracker getCatalogTracker() {
1756 return catalogTracker;
1757 }
1758
1759 @Override
1760 public AssignmentManager getAssignmentManager() {
1761 return this.assignmentManager;
1762 }
1763
1764 public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() {
1765 return rsFatals;
1766 }
1767
1768 @SuppressWarnings("deprecation")
1769 @Override
1770 public void shutdown() {
1771 if (cpHost != null) {
1772 try {
1773 cpHost.preShutdown();
1774 } catch (IOException ioe) {
1775 LOG.error("Error call master coprocessor preShutdown()", ioe);
1776 }
1777 }
1778 if (mxBean != null) {
1779 MBeanUtil.unregisterMBean(mxBean);
1780 mxBean = null;
1781 }
1782 if (this.assignmentManager != null) this.assignmentManager.shutdown();
1783 if (this.serverManager != null) this.serverManager.shutdownCluster();
1784
1785 try {
1786 if (this.clusterStatusTracker != null){
1787 this.clusterStatusTracker.setClusterDown();
1788 }
1789 } catch (KeeperException e) {
1790 if (e instanceof KeeperException.SessionExpiredException) {
1791 LOG.warn("ZK session expired. Retry a new connection...");
1792 try {
1793 this.zooKeeper.reconnectAfterExpiration();
1794 this.clusterStatusTracker.setClusterDown();
1795 } catch (Exception ex) {
1796 LOG.error("Retry setClusterDown failed", ex);
1797 }
1798 } else {
1799 LOG.error("ZooKeeper exception trying to set cluster as down in ZK", e);
1800 }
1801 }
1802 }
1803
1804 @Override
1805 public void stopMaster() {
1806 if (cpHost != null) {
1807 try {
1808 cpHost.preStopMaster();
1809 } catch (IOException ioe) {
1810 LOG.error("Error call master coprocessor preStopMaster()", ioe);
1811 }
1812 }
1813 stop("Stopped by " + Thread.currentThread().getName());
1814 }
1815
1816 @Override
1817 public void stop(final String why) {
1818 LOG.info(why);
1819 this.stopped = true;
1820
1821 stopSleeper.skipSleepCycle();
1822
1823 if (this.activeMasterManager != null) {
1824 synchronized (this.activeMasterManager.clusterHasActiveMaster) {
1825 this.activeMasterManager.clusterHasActiveMaster.notifyAll();
1826 }
1827 }
1828
1829
1830 if (this.catalogTracker != null && this.serverManager.getOnlineServers().isEmpty()) {
1831 this.catalogTracker.stop();
1832 }
1833 }
1834
1835 @Override
1836 public boolean isStopped() {
1837 return this.stopped;
1838 }
1839
1840 public boolean isAborted() {
1841 return this.abort;
1842 }
1843
1844 void checkInitialized() throws PleaseHoldException {
1845 if (!this.initialized) {
1846 throw new PleaseHoldException("Master is initializing");
1847 }
1848 }
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858 public boolean isActiveMaster() {
1859 return isActiveMaster;
1860 }
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871 public boolean isInitialized() {
1872 return initialized;
1873 }
1874
1875
1876
1877
1878
1879
1880 public boolean isServerShutdownHandlerEnabled() {
1881 return this.serverShutdownHandlerEnabled;
1882 }
1883
1884 public boolean shouldSplitMetaSeparately() {
1885 return this.shouldSplitMetaSeparately;
1886 }
1887
1888
1889
1890
1891
1892 public boolean isInitializationStartsMetaRegoinAssignment() {
1893 return this.initializationBeforeMetaAssignment;
1894 }
1895
1896 @Override
1897 @Deprecated
1898 public void assign(final byte[] regionName, final boolean force)
1899 throws IOException {
1900 assign(regionName);
1901 }
1902
1903 @Override
1904 public void assign(final byte [] regionName)throws IOException {
1905 checkInitialized();
1906 Pair<HRegionInfo, ServerName> pair =
1907 MetaReader.getRegion(this.catalogTracker, regionName);
1908 if (pair == null) throw new UnknownRegionException(Bytes.toString(regionName));
1909 if (cpHost != null) {
1910 if (cpHost.preAssign(pair.getFirst())) {
1911 return;
1912 }
1913 }
1914 assignRegion(pair.getFirst());
1915 if (cpHost != null) {
1916 cpHost.postAssign(pair.getFirst());
1917 }
1918 }
1919
1920
1921
1922 public void assignRegion(HRegionInfo hri) {
1923 assignmentManager.assign(hri, true);
1924 }
1925
1926 @Override
1927 public void unassign(final byte [] regionName, final boolean force)
1928 throws IOException {
1929 checkInitialized();
1930 Pair<HRegionInfo, ServerName> pair =
1931 MetaReader.getRegion(this.catalogTracker, regionName);
1932 if (pair == null) throw new UnknownRegionException(Bytes.toString(regionName));
1933 HRegionInfo hri = pair.getFirst();
1934 if (cpHost != null) {
1935 if (cpHost.preUnassign(hri, force)) {
1936 return;
1937 }
1938 }
1939 if (force) {
1940 this.assignmentManager.regionOffline(hri);
1941 assignRegion(hri);
1942 } else {
1943 this.assignmentManager.unassign(hri, force);
1944 }
1945 if (cpHost != null) {
1946 cpHost.postUnassign(hri, force);
1947 }
1948 }
1949
1950
1951
1952
1953
1954
1955 public HTableDescriptor[] getHTableDescriptors(List<String> tableNames)
1956 throws IOException {
1957 List<HTableDescriptor> descriptors =
1958 new ArrayList<HTableDescriptor>(tableNames.size());
1959
1960 boolean bypass = false;
1961 if (this.cpHost != null) {
1962 bypass = this.cpHost.preGetTableDescriptors(tableNames, descriptors);
1963 }
1964
1965 if (!bypass) {
1966 for (String s: tableNames) {
1967 HTableDescriptor htd = null;
1968 try {
1969 htd = this.tableDescriptors.get(s);
1970 } catch (IOException e) {
1971 LOG.warn("Failed getting descriptor for " + s, e);
1972 }
1973 if (htd == null) continue;
1974 descriptors.add(htd);
1975 }
1976 }
1977
1978 if (this.cpHost != null) {
1979 this.cpHost.postGetTableDescriptors(descriptors);
1980 }
1981
1982 return descriptors.toArray(new HTableDescriptor [] {});
1983 }
1984
1985 @Override
1986 public <T extends CoprocessorProtocol> boolean registerProtocol(
1987 Class<T> protocol, T handler) {
1988
1989
1990
1991
1992 if (protocolHandlers.containsKey(protocol)) {
1993 LOG.error("Protocol "+protocol.getName()+
1994 " already registered, rejecting request from "+
1995 handler
1996 );
1997 return false;
1998 }
1999
2000 protocolHandlers.putInstance(protocol, handler);
2001 protocolHandlerNames.put(protocol.getName(), protocol);
2002 if (LOG.isDebugEnabled()) {
2003 LOG.debug("Registered master protocol handler: protocol="+protocol.getName());
2004 }
2005 return true;
2006 }
2007
2008 @Override
2009 public ExecResult execCoprocessor(Exec call) throws IOException {
2010 Class<? extends CoprocessorProtocol> protocol = call.getProtocol();
2011 if (protocol == null) {
2012 String protocolName = call.getProtocolName();
2013 if (LOG.isTraceEnabled()) {
2014 LOG.trace("Received dynamic protocol exec call with protocolName " + protocolName);
2015 }
2016
2017 protocol = protocolHandlerNames.get(protocolName);
2018 if (protocol == null) {
2019 throw new HBaseRPC.UnknownProtocolException(protocol,
2020 "No matching handler for master protocol "+protocolName);
2021 }
2022 }
2023 if (!protocolHandlers.containsKey(protocol)) {
2024 throw new HBaseRPC.UnknownProtocolException(protocol,
2025 "No matching handler for protocol ");
2026 }
2027
2028 CoprocessorProtocol handler = protocolHandlers.getInstance(protocol);
2029 Object value;
2030
2031 try {
2032 Method method = protocol.getMethod(
2033 call.getMethodName(), call.getParameterClasses());
2034 method.setAccessible(true);
2035
2036 value = method.invoke(handler, call.getParameters());
2037 } catch (InvocationTargetException e) {
2038 Throwable target = e.getTargetException();
2039 if (target instanceof IOException) {
2040 throw (IOException)target;
2041 }
2042 IOException ioe = new IOException(target.toString());
2043 ioe.setStackTrace(target.getStackTrace());
2044 throw ioe;
2045 } catch (Throwable e) {
2046 if (!(e instanceof IOException)) {
2047 LOG.error("Unexpected throwable object ", e);
2048 }
2049 IOException ioe = new IOException(e.toString());
2050 ioe.setStackTrace(e.getStackTrace());
2051 throw ioe;
2052 }
2053
2054 return new ExecResult(value);
2055 }
2056
2057
2058
2059
2060
2061
2062 public HTableDescriptor [] getHTableDescriptors() throws IOException {
2063 List<HTableDescriptor> descriptors = new ArrayList<HTableDescriptor>();
2064 boolean bypass = false;
2065 if (this.cpHost != null) {
2066 bypass = this.cpHost.preGetTableDescriptors(null, descriptors);
2067 }
2068 if (!bypass) {
2069 descriptors.addAll(this.tableDescriptors.getAll().values());
2070 }
2071 if (this.cpHost != null) {
2072 this.cpHost.postGetTableDescriptors(descriptors);
2073 }
2074 return descriptors.toArray(new HTableDescriptor [] {});
2075 }
2076
2077
2078
2079
2080
2081
2082
2083 public double getAverageLoad() {
2084 return this.assignmentManager.getAverageLoad();
2085 }
2086
2087
2088
2089
2090 @Override
2091 public void offline(final byte[] regionName) throws IOException {
2092 Pair<HRegionInfo, ServerName> pair =
2093 MetaReader.getRegion(this.catalogTracker, regionName);
2094 if (pair == null) throw new UnknownRegionException(Bytes.toStringBinary(regionName));
2095 HRegionInfo hri = pair.getFirst();
2096 this.assignmentManager.regionOffline(hri);
2097 }
2098
2099
2100
2101
2102
2103
2104
2105 public static HMaster constructMaster(Class<? extends HMaster> masterClass,
2106 final Configuration conf) {
2107 try {
2108 Constructor<? extends HMaster> c =
2109 masterClass.getConstructor(Configuration.class);
2110 return c.newInstance(conf);
2111 } catch (InvocationTargetException ite) {
2112 Throwable target = ite.getTargetException() != null?
2113 ite.getTargetException(): ite;
2114 if (target.getCause() != null) target = target.getCause();
2115 throw new RuntimeException("Failed construction of Master: " +
2116 masterClass.toString(), target);
2117 } catch (Exception e) {
2118 throw new RuntimeException("Failed construction of Master: " +
2119 masterClass.toString() + ((e.getCause() != null)?
2120 e.getCause().getMessage(): ""), e);
2121 }
2122 }
2123
2124
2125
2126
2127 public static void main(String [] args) throws Exception {
2128 VersionInfo.logVersion();
2129 new HMasterCommandLine(HMaster.class).doMain(args);
2130 }
2131
2132
2133
2134
2135 @SuppressWarnings("deprecation")
2136 void registerMBean() {
2137 MXBeanImpl mxBeanInfo = MXBeanImpl.init(this);
2138 MBeanUtil.registerMBean("Master", "Master", mxBeanInfo);
2139 LOG.info("Registered HMaster MXBean");
2140 }
2141
2142
2143
2144
2145
2146 public HFileCleaner getHFileCleaner() {
2147 return this.hfileCleaner;
2148 }
2149
2150 private boolean isHealthCheckerConfigured() {
2151 String healthScriptLocation = this.conf.get(HConstants.HEALTH_SCRIPT_LOC);
2152 return org.apache.commons.lang.StringUtils.isNotBlank(healthScriptLocation);
2153 }
2154
2155
2156
2157
2158
2159 public SnapshotManager getSnapshotManagerForTesting() {
2160 return this.snapshotManager;
2161 }
2162
2163
2164
2165
2166
2167
2168 @Override
2169 public long snapshot(final HSnapshotDescription request) throws IOException {
2170 LOG.debug("Submitting snapshot request for:" +
2171 SnapshotDescriptionUtils.toString(request.getProto()));
2172 try {
2173 this.snapshotManager.checkSnapshotSupport();
2174 } catch (UnsupportedOperationException e) {
2175 throw new IOException(e);
2176 }
2177
2178
2179 SnapshotDescription snapshot = SnapshotDescriptionUtils.validate(request.getProto(),
2180 this.conf);
2181
2182 snapshotManager.takeSnapshot(snapshot);
2183
2184
2185 long waitTime = SnapshotDescriptionUtils.getMaxMasterTimeout(conf, snapshot.getType(),
2186 SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME);
2187 return waitTime;
2188 }
2189
2190
2191
2192
2193 @Override
2194 public List<HSnapshotDescription> getCompletedSnapshots() throws IOException {
2195 List<HSnapshotDescription> availableSnapshots = new ArrayList<HSnapshotDescription>();
2196 List<SnapshotDescription> snapshots = snapshotManager.getCompletedSnapshots();
2197
2198
2199 for (SnapshotDescription snapshot: snapshots) {
2200 availableSnapshots.add(new HSnapshotDescription(snapshot));
2201 }
2202
2203 return availableSnapshots;
2204 }
2205
2206
2207
2208
2209
2210
2211 @Override
2212 public void deleteSnapshot(final HSnapshotDescription request) throws IOException {
2213 try {
2214 this.snapshotManager.checkSnapshotSupport();
2215 } catch (UnsupportedOperationException e) {
2216 throw new IOException(e);
2217 }
2218
2219 snapshotManager.deleteSnapshot(request.getProto());
2220 }
2221
2222
2223
2224
2225
2226
2227
2228
2229 @Override
2230 public boolean isSnapshotDone(final HSnapshotDescription request) throws IOException {
2231 LOG.debug("Checking to see if snapshot from request:" +
2232 SnapshotDescriptionUtils.toString(request.getProto()) + " is done");
2233 return snapshotManager.isSnapshotDone(request.getProto());
2234 }
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249 @Override
2250 public void restoreSnapshot(final HSnapshotDescription request) throws IOException {
2251 try {
2252 this.snapshotManager.checkSnapshotSupport();
2253 } catch (UnsupportedOperationException e) {
2254 throw new IOException(e);
2255 }
2256
2257 snapshotManager.restoreSnapshot(request.getProto());
2258 }
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270 @Override
2271 public boolean isRestoreSnapshotDone(final HSnapshotDescription request) throws IOException {
2272 return snapshotManager.isRestoreDone(request.getProto());
2273 }
2274
2275
2276
2277
2278
2279
2280 @Override
2281 public String[] getTableNames() throws IOException {
2282
2283
2284 Collection<HTableDescriptor> descriptors = tableDescriptors.getAll().values();
2285 Iterator<HTableDescriptor> iter = descriptors.iterator();
2286 String names[] = new String[descriptors.size()];
2287 int i = 0;
2288 while (iter.hasNext()) {
2289 names[i++] = iter.next().getNameAsString();
2290 }
2291 return names;
2292 }
2293 }