1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.master;
21
22 import java.io.IOException;
23 import java.lang.reflect.Constructor;
24 import java.lang.reflect.InvocationTargetException;
25 import java.lang.reflect.Method;
26 import java.net.InetAddress;
27 import java.net.InetSocketAddress;
28 import java.util.ArrayList;
29 import java.util.Collections;
30 import java.util.Comparator;
31 import java.util.HashMap;
32 import java.util.List;
33 import java.util.Map;
34 import java.util.Set;
35 import java.util.concurrent.Callable;
36 import java.util.concurrent.ExecutionException;
37 import java.util.concurrent.Executors;
38 import java.util.concurrent.Future;
39 import java.util.concurrent.TimeUnit;
40 import java.util.concurrent.atomic.AtomicReference;
41
42 import javax.management.ObjectName;
43
44 import org.apache.commons.logging.Log;
45 import org.apache.commons.logging.LogFactory;
46 import org.apache.hadoop.conf.Configuration;
47 import org.apache.hadoop.fs.Path;
48 import org.apache.hadoop.hbase.Chore;
49 import org.apache.hadoop.hbase.ClusterStatus;
50 import org.apache.hadoop.hbase.HColumnDescriptor;
51 import org.apache.hadoop.hbase.HConstants;
52 import org.apache.hadoop.hbase.HRegionInfo;
53 import org.apache.hadoop.hbase.HServerLoad;
54 import org.apache.hadoop.hbase.HTableDescriptor;
55 import org.apache.hadoop.hbase.HealthCheckChore;
56 import org.apache.hadoop.hbase.MasterNotRunningException;
57 import org.apache.hadoop.hbase.PleaseHoldException;
58 import org.apache.hadoop.hbase.Server;
59 import org.apache.hadoop.hbase.ServerName;
60 import org.apache.hadoop.hbase.TableDescriptors;
61 import org.apache.hadoop.hbase.TableNotDisabledException;
62 import org.apache.hadoop.hbase.TableNotFoundException;
63 import org.apache.hadoop.hbase.UnknownRegionException;
64 import org.apache.hadoop.hbase.catalog.CatalogTracker;
65 import org.apache.hadoop.hbase.catalog.MetaReader;
66 import org.apache.hadoop.hbase.client.HConnectionManager;
67 import org.apache.hadoop.hbase.client.MetaScanner;
68 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
69 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
70 import org.apache.hadoop.hbase.client.Result;
71 import org.apache.hadoop.hbase.client.coprocessor.Exec;
72 import org.apache.hadoop.hbase.client.coprocessor.ExecResult;
73 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
74 import org.apache.hadoop.hbase.executor.ExecutorService;
75 import org.apache.hadoop.hbase.executor.ExecutorService.ExecutorType;
76 import org.apache.hadoop.hbase.ipc.CoprocessorProtocol;
77 import org.apache.hadoop.hbase.ipc.HBaseRPC;
78 import org.apache.hadoop.hbase.ipc.HBaseServer;
79 import org.apache.hadoop.hbase.ipc.HMasterInterface;
80 import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
81 import org.apache.hadoop.hbase.ipc.ProtocolSignature;
82 import org.apache.hadoop.hbase.ipc.RpcServer;
83 import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
84 import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
85 import org.apache.hadoop.hbase.master.handler.CreateTableHandler;
86 import org.apache.hadoop.hbase.master.handler.DeleteTableHandler;
87 import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
88 import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
89 import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
90 import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
91 import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
92 import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
93 import org.apache.hadoop.hbase.master.handler.TableEventHandler;
94 import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
95 import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
96 import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
97 import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
98 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
99 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
100 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
101 import org.apache.hadoop.hbase.regionserver.wal.HLog;
102 import org.apache.hadoop.hbase.replication.regionserver.Replication;
103 import org.apache.hadoop.hbase.security.User;
104 import org.apache.hadoop.hbase.snapshot.HSnapshotDescription;
105 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
106 import org.apache.hadoop.hbase.util.Bytes;
107 import org.apache.hadoop.hbase.util.FSTableDescriptors;
108 import org.apache.hadoop.hbase.util.HFileArchiveUtil;
109 import org.apache.hadoop.hbase.util.HasThread;
110 import org.apache.hadoop.hbase.util.InfoServer;
111 import org.apache.hadoop.hbase.util.Pair;
112 import org.apache.hadoop.hbase.util.Sleeper;
113 import org.apache.hadoop.hbase.util.Strings;
114 import org.apache.hadoop.hbase.util.Threads;
115 import org.apache.hadoop.hbase.util.VersionInfo;
116 import org.apache.hadoop.hbase.zookeeper.ClusterId;
117 import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
118 import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker;
119 import org.apache.hadoop.hbase.zookeeper.RegionServerTracker;
120 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
121 import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
122 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
123 import org.apache.hadoop.io.MapWritable;
124 import org.apache.hadoop.io.Text;
125 import org.apache.hadoop.metrics.util.MBeanUtil;
126 import org.apache.hadoop.net.DNS;
127 import org.apache.zookeeper.KeeperException;
128 import org.apache.zookeeper.Watcher;
129
130 import com.google.common.collect.ClassToInstanceMap;
131 import com.google.common.collect.Maps;
132 import com.google.common.collect.MutableClassToInstanceMap;
133 import com.google.protobuf.ServiceException;
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152 public class HMaster extends HasThread
153 implements HMasterInterface, HMasterRegionInterface, MasterServices,
154 Server {
155 private static final Log LOG = LogFactory.getLog(HMaster.class.getName());
156
157
158
159 public static final String MASTER = "master";
160
161
162 private final Configuration conf;
163
164 private InfoServer infoServer;
165
166
167 private ZooKeeperWatcher zooKeeper;
168
169 private ActiveMasterManager activeMasterManager;
170
171 private RegionServerTracker regionServerTracker;
172
173 private DrainingServerTracker drainingServerTracker;
174
175
176 private final RpcServer rpcServer;
177
178
179
180
181 private final InetSocketAddress isa;
182
183
184 private final MasterMetrics metrics;
185
186 private MasterFileSystem fileSystemManager;
187
188
189 private ServerManager serverManager;
190
191
192 AssignmentManager assignmentManager;
193
194 private CatalogTracker catalogTracker;
195
196 private ClusterStatusTracker clusterStatusTracker;
197
198
199
200
201 private MemoryBoundedLogMessageBuffer rsFatals;
202
203
204
205 private volatile boolean stopped = false;
206
207 private volatile boolean abort = false;
208
209 private volatile boolean isActiveMaster = false;
210
211
212
213 volatile boolean initialized = false;
214
215
216 private volatile boolean serverShutdownHandlerEnabled = false;
217
218 private volatile boolean shouldSplitMetaSeparately;
219
220
221 ExecutorService executorService;
222
223 private LoadBalancer balancer;
224 private Thread balancerChore;
225
226 private volatile boolean balanceSwitch = true;
227
228 private CatalogJanitor catalogJanitorChore;
229 private LogCleaner logCleaner;
230 private HFileCleaner hfileCleaner;
231
232 private MasterCoprocessorHost cpHost;
233 private final ServerName serverName;
234
235 private TableDescriptors tableDescriptors;
236
237
238 private long masterStartTime;
239 private long masterActiveTime;
240
241
242 private SnapshotManager snapshotManager;
243
244
245
246
247 private ObjectName mxBean = null;
248
249
250 private ClassToInstanceMap<CoprocessorProtocol>
251 protocolHandlers = MutableClassToInstanceMap.create();
252
253 private Map<String, Class<? extends CoprocessorProtocol>>
254 protocolHandlerNames = Maps.newHashMap();
255
256
257 private HealthCheckChore healthCheckChore;
258
259
260 private boolean waitingOnLogSplitting = false;
261
262
263 private volatile boolean initializationBeforeMetaAssignment = false;
264
265
266 private List<ZooKeeperListener> registeredZKListenersBeforeRecovery;
267
268
269
270
271
272
273
274
275
276
277
278
279
280 public HMaster(final Configuration conf)
281 throws IOException, KeeperException, InterruptedException {
282 this.conf = new Configuration(conf);
283
284 this.conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
285
286 HConnectionManager.setServerSideHConnectionRetries(this.conf, LOG);
287
288 String hostname = conf.get("hbase.master.ipc.address",
289 Strings.domainNamePointerToHostName(DNS.getDefaultHost(
290 conf.get("hbase.master.dns.interface", "default"),
291 conf.get("hbase.master.dns.nameserver", "default"))));
292 int port = conf.getInt(HConstants.MASTER_PORT, HConstants.DEFAULT_MASTER_PORT);
293
294 InetSocketAddress initialIsa = new InetSocketAddress(hostname, port);
295 if (initialIsa.getAddress() == null) {
296 throw new IllegalArgumentException("Failed resolve of hostname " + initialIsa);
297 }
298 int numHandlers = conf.getInt("hbase.master.handler.count",
299 conf.getInt("hbase.regionserver.handler.count", 25));
300 this.rpcServer = HBaseRPC.getServer(this,
301 new Class<?>[]{HMasterInterface.class, HMasterRegionInterface.class},
302 initialIsa.getHostName(),
303 initialIsa.getPort(),
304 numHandlers,
305 0,
306 conf.getBoolean("hbase.rpc.verbose", false), conf,
307 0);
308
309 this.isa = this.rpcServer.getListenerAddress();
310 this.serverName = new ServerName(this.isa.getHostName(),
311 this.isa.getPort(), System.currentTimeMillis());
312 this.rsFatals = new MemoryBoundedLogMessageBuffer(
313 conf.getLong("hbase.master.buffer.for.rs.fatals", 1*1024*1024));
314
315
316 ZKUtil.loginClient(this.conf, "hbase.zookeeper.client.keytab.file",
317 "hbase.zookeeper.client.kerberos.principal", this.isa.getHostName());
318
319
320 User.login(conf, "hbase.master.keytab.file",
321 "hbase.master.kerberos.principal", this.isa.getHostName());
322
323
324 setName(MASTER + "-" + this.serverName.toString());
325
326 Replication.decorateMasterConfiguration(this.conf);
327
328
329
330 if (this.conf.get("mapred.task.id") == null) {
331 this.conf.set("mapred.task.id", "hb_m_" + this.serverName.toString());
332 }
333
334 this.zooKeeper = new ZooKeeperWatcher(conf, MASTER + ":" + isa.getPort(), this, true);
335 this.rpcServer.startThreads();
336 this.metrics = new MasterMetrics(getServerName().toString());
337
338
339 int sleepTime = this.conf.getInt(HConstants.HEALTH_CHORE_WAKE_FREQ,
340 HConstants.DEFAULT_THREAD_WAKE_FREQUENCY);
341 if (isHealthCheckerConfigured()) {
342 healthCheckChore = new HealthCheckChore(sleepTime, this, getConfiguration());
343 }
344
345 this.shouldSplitMetaSeparately = conf.getBoolean(HLog.SEPARATE_HLOG_FOR_META, false);
346 waitingOnLogSplitting = this.conf.getBoolean("hbase.master.wait.for.log.splitting", false);
347 }
348
349
350
351
352
353
354
355
356 private static void stallIfBackupMaster(final Configuration c,
357 final ActiveMasterManager amm)
358 throws InterruptedException {
359
360 if (!c.getBoolean(HConstants.MASTER_TYPE_BACKUP,
361 HConstants.DEFAULT_MASTER_TYPE_BACKUP)) {
362 return;
363 }
364 LOG.debug("HMaster started in backup mode. " +
365 "Stalling until master znode is written.");
366
367
368 while (!amm.isActiveMaster()) {
369 LOG.debug("Waiting for master address ZNode to be written " +
370 "(Also watching cluster state node)");
371 Thread.sleep(c.getInt("zookeeper.session.timeout", 180 * 1000));
372 }
373
374 }
375
376
377
378
379
380
381
382
383
384
385 @Override
386 public void run() {
387 MonitoredTask startupStatus =
388 TaskMonitor.get().createStatus("Master startup");
389 startupStatus.setDescription("Master startup");
390 masterStartTime = System.currentTimeMillis();
391 try {
392 this.registeredZKListenersBeforeRecovery = this.zooKeeper.getListeners();
393
394
395 int port = this.conf.getInt("hbase.master.info.port", 60010);
396 if (port >= 0) {
397 String a = this.conf.get("hbase.master.info.bindAddress", "0.0.0.0");
398 this.infoServer = new InfoServer(MASTER, a, port, false, this.conf);
399 this.infoServer.addServlet("status", "/master-status", MasterStatusServlet.class);
400 this.infoServer.addServlet("dump", "/dump", MasterDumpServlet.class);
401 this.infoServer.setAttribute(MASTER, this);
402 this.infoServer.start();
403 }
404
405
406
407
408
409
410
411
412
413
414
415 becomeActiveMaster(startupStatus);
416
417
418 if (!this.stopped) {
419 finishInitialization(startupStatus, false);
420 loop();
421 }
422 } catch (Throwable t) {
423
424 if (t instanceof NoClassDefFoundError &&
425 t.getMessage().contains("org/apache/hadoop/hdfs/protocol/FSConstants$SafeModeAction")) {
426
427 abort("HBase is having a problem with its Hadoop jars. You may need to "
428 + "recompile HBase against Hadoop version "
429 + org.apache.hadoop.util.VersionInfo.getVersion()
430 + " or change your hadoop jars to start properly", t);
431 } else {
432 abort("Unhandled exception. Starting shutdown.", t);
433 }
434 } finally {
435 startupStatus.cleanup();
436
437 stopChores();
438
439
440 if (!this.abort && this.serverManager != null &&
441 this.serverManager.isClusterShutdown()) {
442 this.serverManager.letRegionServersShutdown();
443 }
444 stopServiceThreads();
445
446 if (this.activeMasterManager != null) this.activeMasterManager.stop();
447 if (this.catalogTracker != null) this.catalogTracker.stop();
448 if (this.serverManager != null) this.serverManager.stop();
449 if (this.assignmentManager != null) this.assignmentManager.stop();
450 if (this.fileSystemManager != null) this.fileSystemManager.stop();
451 if (this.snapshotManager != null) this.snapshotManager.stop("server shutting down.");
452 this.zooKeeper.close();
453 }
454 LOG.info("HMaster main thread exiting");
455 }
456
457
458
459
460
461
462
463 private boolean becomeActiveMaster(MonitoredTask startupStatus)
464 throws InterruptedException {
465
466
467 this.activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName,
468 this);
469 this.zooKeeper.registerListener(activeMasterManager);
470 stallIfBackupMaster(this.conf, this.activeMasterManager);
471
472
473
474
475 this.clusterStatusTracker = new ClusterStatusTracker(getZooKeeper(), this);
476 this.clusterStatusTracker.start();
477 return this.activeMasterManager.blockUntilBecomingActiveMaster(startupStatus,
478 this.clusterStatusTracker);
479 }
480
481
482
483
484
485
486 private void initializeZKBasedSystemTrackers() throws IOException,
487 InterruptedException, KeeperException {
488 this.catalogTracker = new CatalogTracker(this.zooKeeper, this.conf, this);
489 this.catalogTracker.start();
490
491 this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
492 this.assignmentManager = new AssignmentManager(this, serverManager,
493 this.catalogTracker, this.balancer, this.executorService);
494 zooKeeper.registerListenerFirst(assignmentManager);
495
496 this.regionServerTracker = new RegionServerTracker(zooKeeper, this,
497 this.serverManager);
498 this.regionServerTracker.start();
499
500 this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this,
501 this.serverManager);
502 this.drainingServerTracker.start();
503
504
505
506 boolean wasUp = this.clusterStatusTracker.isClusterUp();
507 if (!wasUp) this.clusterStatusTracker.setClusterUp();
508
509 LOG.info("Server active/primary master; " + this.serverName +
510 ", sessionid=0x" +
511 Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()) +
512 ", cluster-up flag was=" + wasUp);
513
514
515 this.snapshotManager = new SnapshotManager(this, this.metrics);
516 }
517
518
519 private Sleeper stopSleeper = new Sleeper(1000, this);
520 private void loop() {
521 while (!this.stopped) {
522 stopSleeper.sleep();
523 }
524 }
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546 private void finishInitialization(MonitoredTask status, boolean masterRecovery)
547 throws IOException, InterruptedException, KeeperException {
548
549 isActiveMaster = true;
550
551
552
553
554
555
556
557 status.setStatus("Initializing Master file system");
558 this.masterActiveTime = System.currentTimeMillis();
559
560 this.fileSystemManager = new MasterFileSystem(this, this, metrics, masterRecovery);
561
562 this.tableDescriptors =
563 new FSTableDescriptors(this.fileSystemManager.getFileSystem(),
564 this.fileSystemManager.getRootDir());
565
566
567 status.setStatus("Publishing Cluster ID in ZooKeeper");
568 ClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
569 if (!masterRecovery) {
570 this.executorService = new ExecutorService(getServerName().toString());
571 this.serverManager = new ServerManager(this, this);
572 }
573
574
575 status.setStatus("Initializing ZK system trackers");
576 initializeZKBasedSystemTrackers();
577
578 if (!masterRecovery) {
579
580 status.setStatus("Initializing master coprocessors");
581 this.cpHost = new MasterCoprocessorHost(this, this.conf);
582
583
584 status.setStatus("Initializing master service threads");
585 startServiceThreads();
586 }
587
588
589 this.serverManager.waitForRegionServers(status);
590
591 for (ServerName sn: this.regionServerTracker.getOnlineServers()) {
592 if (!this.serverManager.isServerOnline(sn)) {
593
594 LOG.info("Registering server found up in zk but who has not yet " +
595 "reported in: " + sn);
596 this.serverManager.recordNewServer(sn, HServerLoad.EMPTY_HSERVERLOAD);
597 }
598 }
599 if (!masterRecovery) {
600 this.assignmentManager.startTimeOutMonitor();
601 }
602
603
604 Set<ServerName> failedServers = this.fileSystemManager.getFailedServersFromLogFolders();
605 if (waitingOnLogSplitting) {
606 List<ServerName> servers = new ArrayList<ServerName>(failedServers);
607 this.fileSystemManager.splitAllLogs(servers);
608 failedServers.clear();
609 }
610
611 ServerName preRootServer = this.catalogTracker.getRootLocation();
612 if (preRootServer != null && failedServers.contains(preRootServer)) {
613
614 this.fileSystemManager.splitAllLogs(preRootServer);
615 failedServers.remove(preRootServer);
616 }
617
618 this.initializationBeforeMetaAssignment = true;
619
620 if (!assignRoot(status)) return;
621
622
623
624 this.serverManager.enableSSHForRoot();
625
626
627 ServerName preMetaServer = this.catalogTracker.getMetaLocationOrReadLocationFromRoot();
628 if (preMetaServer != null && failedServers.contains(preMetaServer)) {
629
630 this.fileSystemManager.splitAllLogs(preMetaServer);
631 failedServers.remove(preMetaServer);
632 }
633
634
635 if (!assignMeta(status, ((masterRecovery) ? null : preMetaServer), preRootServer)) return;
636
637 enableServerShutdownHandler();
638
639
640 status.setStatus("Submit log splitting work of non-meta region servers");
641 for (ServerName curServer : failedServers) {
642 this.serverManager.expireServer(curServer);
643 }
644
645
646
647
648
649 org.apache.hadoop.hbase.catalog.MetaMigrationRemovingHTD.
650 updateMetaWithNewHRI(this);
651
652
653 status.setStatus("Starting assignment manager");
654 this.assignmentManager.joinCluster();
655
656 this.balancer.setClusterStatus(getClusterStatus());
657 this.balancer.setMasterServices(this);
658
659
660 status.setStatus("Fixing up missing daughters");
661 fixupDaughters(status);
662
663 if (!masterRecovery) {
664
665
666 status.setStatus("Starting balancer and catalog janitor");
667 this.balancerChore = getAndStartBalancerChore(this);
668 this.catalogJanitorChore = new CatalogJanitor(this, this);
669 startCatalogJanitorChore();
670 registerMBean();
671 }
672
673 status.markComplete("Initialization successful");
674 LOG.info("Master has completed initialization");
675 initialized = true;
676
677
678
679
680 this.serverManager.clearDeadServersWithSameHostNameAndPortOfOnlineServer();
681
682 if (!masterRecovery) {
683 if (this.cpHost != null) {
684
685 try {
686 this.cpHost.postStartMaster();
687 } catch (IOException ioe) {
688 LOG.error("Coprocessor postStartMaster() hook failed", ioe);
689 }
690 }
691 }
692 }
693
694
695
696
697
698
699
700 private void enableServerShutdownHandler() throws IOException {
701 if (!serverShutdownHandlerEnabled) {
702 serverShutdownHandlerEnabled = true;
703 this.serverManager.expireDeadNotExpiredServers();
704 }
705 }
706
707
708
709
710
711 protected void startCatalogJanitorChore() {
712 Threads.setDaemonThreadRunning(catalogJanitorChore.getThread());
713 }
714
715
716
717
718
719
720
721
722 private boolean assignRoot(MonitoredTask status)
723 throws InterruptedException, IOException, KeeperException {
724 int assigned = 0;
725 long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000);
726
727
728 status.setStatus("Assigning ROOT region");
729 boolean rit = this.assignmentManager.
730 processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.ROOT_REGIONINFO);
731 ServerName currentRootServer = null;
732 boolean rootRegionLocation = catalogTracker.verifyRootRegionLocation(timeout);
733 if (!rit && !rootRegionLocation) {
734 currentRootServer = this.catalogTracker.getRootLocation();
735 splitLogAndExpireIfOnline(currentRootServer);
736 this.assignmentManager.assignRoot();
737 waitForRootAssignment();
738 if (!this.assignmentManager.isRegionAssigned(HRegionInfo.ROOT_REGIONINFO) || this.stopped) {
739 return false;
740 }
741 assigned++;
742 } else if (rit && !rootRegionLocation) {
743 waitForRootAssignment();
744 if (!this.assignmentManager.isRegionAssigned(HRegionInfo.ROOT_REGIONINFO) || this.stopped) {
745 return false;
746 }
747 assigned++;
748 } else {
749
750 this.assignmentManager.regionOnline(HRegionInfo.ROOT_REGIONINFO,
751 this.catalogTracker.getRootLocation());
752 }
753
754
755 enableCatalogTables(Bytes.toString(HConstants.ROOT_TABLE_NAME));
756 LOG.info("-ROOT- assigned=" + assigned + ", rit=" + rit +
757 ", location=" + catalogTracker.getRootLocation());
758
759 status.setStatus("ROOT assigned.");
760 return true;
761 }
762
763
764
765
766
767
768
769
770
771
772 private boolean assignMeta(MonitoredTask status, ServerName previousMetaServer,
773 ServerName previousRootServer)
774 throws InterruptedException,
775 IOException, KeeperException {
776 int assigned = 0;
777 long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000);
778
779 status.setStatus("Assigning META region");
780 boolean rit =
781 this.assignmentManager
782 .processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.FIRST_META_REGIONINFO);
783 boolean metaRegionLocation = this.catalogTracker.verifyMetaRegionLocation(timeout);
784 if (!rit && !metaRegionLocation) {
785 ServerName currentMetaServer =
786 (previousMetaServer != null) ? previousMetaServer : this.catalogTracker
787 .getMetaLocationOrReadLocationFromRoot();
788 if (currentMetaServer != null && !currentMetaServer.equals(previousRootServer)) {
789 fileSystemManager.splitAllLogs(currentMetaServer);
790 if (this.serverManager.isServerOnline(currentMetaServer)) {
791 this.serverManager.expireServer(currentMetaServer);
792 }
793 }
794 assignmentManager.assignMeta();
795 enableSSHandWaitForMeta();
796 if (!this.assignmentManager.isRegionAssigned(HRegionInfo.FIRST_META_REGIONINFO)
797 || this.stopped) {
798 return false;
799 }
800 assigned++;
801 } else if (rit && !metaRegionLocation) {
802 enableSSHandWaitForMeta();
803 if (!this.assignmentManager.isRegionAssigned(HRegionInfo.FIRST_META_REGIONINFO)
804 || this.stopped) {
805 return false;
806 }
807 assigned++;
808 } else {
809
810 this.assignmentManager.regionOnline(HRegionInfo.FIRST_META_REGIONINFO,
811 this.catalogTracker.getMetaLocation());
812 }
813 enableCatalogTables(Bytes.toString(HConstants.META_TABLE_NAME));
814 LOG.info(".META. assigned=" + assigned + ", rit=" + rit + ", location="
815 + catalogTracker.getMetaLocation());
816 status.setStatus("META assigned.");
817 return true;
818 }
819
820 private void enableSSHandWaitForMeta() throws IOException,
821 InterruptedException {
822 enableServerShutdownHandler();
823 this.catalogTracker.waitForMeta();
824
825
826 this.assignmentManager
827 .waitForAssignment(HRegionInfo.FIRST_META_REGIONINFO);
828 }
829
830 private void waitForRootAssignment() throws InterruptedException, IOException {
831
832
833 this.serverManager.enableSSHForRoot();
834 this.catalogTracker.waitForRoot();
835
836 this.assignmentManager.waitForAssignment(HRegionInfo.ROOT_REGIONINFO);
837 }
838
839 private void enableCatalogTables(String catalogTableName) {
840 if (!this.assignmentManager.getZKTable().isEnabledTable(catalogTableName)) {
841 this.assignmentManager.setEnabledTable(catalogTableName);
842 }
843 }
844
845 void fixupDaughters(final MonitoredTask status) throws IOException {
846 final Map<HRegionInfo, Result> offlineSplitParents =
847 new HashMap<HRegionInfo, Result>();
848
849 MetaReader.Visitor visitor = new MetaReader.Visitor() {
850 @Override
851 public boolean visit(Result r) throws IOException {
852 if (r == null || r.isEmpty()) return true;
853 HRegionInfo info =
854 MetaReader.parseHRegionInfoFromCatalogResult(
855 r, HConstants.REGIONINFO_QUALIFIER);
856 if (info == null) return true;
857 if (info.isOffline() && info.isSplit()) {
858 offlineSplitParents.put(info, r);
859 }
860
861 return true;
862 }
863 };
864
865 MetaReader.fullScan(this.catalogTracker, visitor);
866
867 int fixups = 0;
868 for (Map.Entry<HRegionInfo, Result> e : offlineSplitParents.entrySet()) {
869 fixups += ServerShutdownHandler.fixupDaughters(
870 e.getValue(), assignmentManager, catalogTracker);
871 }
872 if (fixups != 0) {
873 LOG.info("Scanned the catalog and fixed up " + fixups +
874 " missing daughter region(s)");
875 }
876 }
877
878
879
880
881
882
883 private void splitLogAndExpireIfOnline(final ServerName sn)
884 throws IOException {
885 if (sn == null || !serverManager.isServerOnline(sn)) {
886 return;
887 }
888 LOG.info("Forcing splitLog and expire of " + sn);
889 if (this.shouldSplitMetaSeparately) {
890 fileSystemManager.splitMetaLog(sn);
891 fileSystemManager.splitLog(sn);
892 } else {
893 fileSystemManager.splitAllLogs(sn);
894 }
895 serverManager.expireServer(sn);
896 }
897
898 @Override
899 public ProtocolSignature getProtocolSignature(
900 String protocol, long version, int clientMethodsHashCode)
901 throws IOException {
902 if (HMasterInterface.class.getName().equals(protocol)) {
903 return new ProtocolSignature(HMasterInterface.VERSION, null);
904 } else if (HMasterRegionInterface.class.getName().equals(protocol)) {
905 return new ProtocolSignature(HMasterRegionInterface.VERSION, null);
906 }
907 throw new IOException("Unknown protocol: " + protocol);
908 }
909
910 public long getProtocolVersion(String protocol, long clientVersion) {
911 if (HMasterInterface.class.getName().equals(protocol)) {
912 return HMasterInterface.VERSION;
913 } else if (HMasterRegionInterface.class.getName().equals(protocol)) {
914 return HMasterRegionInterface.VERSION;
915 }
916
917 LOG.warn("Version requested for unimplemented protocol: "+protocol);
918 return -1;
919 }
920
921 @Override
922 public TableDescriptors getTableDescriptors() {
923 return this.tableDescriptors;
924 }
925
926
927 public InfoServer getInfoServer() {
928 return this.infoServer;
929 }
930
931 @Override
932 public Configuration getConfiguration() {
933 return this.conf;
934 }
935
936 @Override
937 public ServerManager getServerManager() {
938 return this.serverManager;
939 }
940
941 @Override
942 public ExecutorService getExecutorService() {
943 return this.executorService;
944 }
945
946 @Override
947 public MasterFileSystem getMasterFileSystem() {
948 return this.fileSystemManager;
949 }
950
951
952
953
954
955 public ZooKeeperWatcher getZooKeeperWatcher() {
956 return this.zooKeeper;
957 }
958
959 public ActiveMasterManager getActiveMasterManager() {
960 return this.activeMasterManager;
961 }
962
963
964
965
966
967
968
969
970 private void startServiceThreads() throws IOException{
971
972
973 this.executorService.startExecutorService(ExecutorType.MASTER_OPEN_REGION,
974 conf.getInt("hbase.master.executor.openregion.threads", 5));
975 this.executorService.startExecutorService(ExecutorType.MASTER_CLOSE_REGION,
976 conf.getInt("hbase.master.executor.closeregion.threads", 5));
977 this.executorService.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS,
978 conf.getInt("hbase.master.executor.serverops.threads", 3));
979 this.executorService.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS,
980 conf.getInt("hbase.master.executor.serverops.threads", 5));
981
982
983
984
985 this.executorService.startExecutorService(ExecutorType.MASTER_TABLE_OPERATIONS, 1);
986
987
988 String n = Thread.currentThread().getName();
989 int cleanerInterval = conf.getInt("hbase.master.cleaner.interval", 60 * 1000);
990 this.logCleaner =
991 new LogCleaner(cleanerInterval,
992 this, conf, getMasterFileSystem().getFileSystem(),
993 getMasterFileSystem().getOldLogDir());
994 Threads.setDaemonThreadRunning(logCleaner.getThread(), n + ".oldLogCleaner");
995
996
997 Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
998 this.hfileCleaner = new HFileCleaner(cleanerInterval, this, conf, getMasterFileSystem()
999 .getFileSystem(), archiveDir);
1000 Threads.setDaemonThreadRunning(hfileCleaner.getThread(), n + ".archivedHFileCleaner");
1001
1002
1003 if (this.healthCheckChore != null) {
1004 Threads.setDaemonThreadRunning(this.healthCheckChore.getThread(), n + ".healthChecker");
1005 }
1006
1007
1008 this.rpcServer.openServer();
1009 if (LOG.isDebugEnabled()) {
1010 LOG.debug("Started service threads");
1011 }
1012
1013 }
1014
1015 private void stopServiceThreads() {
1016 if (LOG.isDebugEnabled()) {
1017 LOG.debug("Stopping service threads");
1018 }
1019 if (this.rpcServer != null) this.rpcServer.stop();
1020
1021 if (this.logCleaner!= null) this.logCleaner.interrupt();
1022 if (this.hfileCleaner != null) this.hfileCleaner.interrupt();
1023
1024 if (this.infoServer != null) {
1025 LOG.info("Stopping infoServer");
1026 try {
1027 this.infoServer.stop();
1028 } catch (Exception ex) {
1029 ex.printStackTrace();
1030 }
1031 }
1032 if (this.executorService != null) this.executorService.shutdown();
1033 if (this.healthCheckChore != null) {
1034 this.healthCheckChore.interrupt();
1035 }
1036 }
1037
1038 private static Thread getAndStartBalancerChore(final HMaster master) {
1039 String name = master.getServerName() + "-BalancerChore";
1040 int balancerPeriod =
1041 master.getConfiguration().getInt("hbase.balancer.period", 300000);
1042
1043 Chore chore = new Chore(name, balancerPeriod, master) {
1044 @Override
1045 protected void chore() {
1046 master.balance();
1047 }
1048 };
1049 return Threads.setDaemonThreadRunning(chore.getThread());
1050 }
1051
1052 private void stopChores() {
1053 if (this.balancerChore != null) {
1054 this.balancerChore.interrupt();
1055 }
1056 if (this.catalogJanitorChore != null) {
1057 this.catalogJanitorChore.interrupt();
1058 }
1059 }
1060
1061 @Override
1062 public MapWritable regionServerStartup(final int port,
1063 final long serverStartCode, final long serverCurrentTime)
1064 throws IOException {
1065
1066 InetAddress ia = HBaseServer.getRemoteIp();
1067 ServerName rs = this.serverManager.regionServerStartup(ia, port,
1068 serverStartCode, serverCurrentTime);
1069
1070 MapWritable mw = createConfigurationSubset();
1071 mw.put(new Text(HConstants.KEY_FOR_HOSTNAME_SEEN_BY_MASTER),
1072 new Text(rs.getHostname()));
1073 return mw;
1074 }
1075
1076
1077
1078
1079
1080 protected MapWritable createConfigurationSubset() {
1081 MapWritable mw = addConfig(new MapWritable(), HConstants.HBASE_DIR);
1082 return addConfig(mw, "fs.default.name");
1083 }
1084
1085 private MapWritable addConfig(final MapWritable mw, final String key) {
1086 mw.put(new Text(key), new Text(this.conf.get(key)));
1087 return mw;
1088 }
1089
1090 @Override
1091 public void regionServerReport(final byte [] sn, final HServerLoad hsl)
1092 throws IOException {
1093 this.serverManager.regionServerReport(ServerName.parseVersionedServerName(sn), hsl);
1094 if (hsl != null && this.metrics != null) {
1095
1096 this.metrics.incrementRequests(hsl.getTotalNumberOfRequests());
1097 }
1098 }
1099
1100 @Override
1101 public void reportRSFatalError(byte [] sn, String errorText) {
1102 String msg = "Region server " + Bytes.toString(sn) +
1103 " reported a fatal error:\n" + errorText;
1104 LOG.error(msg);
1105 rsFatals.add(msg);
1106 }
1107
1108 public boolean isMasterRunning() {
1109 return !isStopped();
1110 }
1111
1112
1113
1114
1115 private int getBalancerCutoffTime() {
1116 int balancerCutoffTime =
1117 getConfiguration().getInt("hbase.balancer.max.balancing", -1);
1118 if (balancerCutoffTime == -1) {
1119
1120 int balancerPeriod =
1121 getConfiguration().getInt("hbase.balancer.period", 300000);
1122 balancerCutoffTime = balancerPeriod / 2;
1123
1124 if (balancerCutoffTime <= 0) balancerCutoffTime = balancerPeriod;
1125 }
1126 return balancerCutoffTime;
1127 }
1128
1129 @Override
1130 public boolean balance() {
1131
1132 if (!this.initialized) {
1133 LOG.debug("Master has not been initialized, don't run balancer.");
1134 return false;
1135 }
1136
1137 if (!this.balanceSwitch) return false;
1138
1139 int maximumBalanceTime = getBalancerCutoffTime();
1140 long cutoffTime = System.currentTimeMillis() + maximumBalanceTime;
1141 boolean balancerRan;
1142 synchronized (this.balancer) {
1143
1144 if (this.assignmentManager.isRegionsInTransition()) {
1145 LOG.debug("Not running balancer because " +
1146 this.assignmentManager.getRegionsInTransition().size() +
1147 " region(s) in transition: " +
1148 org.apache.commons.lang.StringUtils.
1149 abbreviate(this.assignmentManager.getRegionsInTransition().toString(), 256));
1150 return false;
1151 }
1152 if (this.serverManager.areDeadServersInProgress()) {
1153 LOG.debug("Not running balancer because processing dead regionserver(s): " +
1154 this.serverManager.getDeadServers());
1155 return false;
1156 }
1157
1158 if (this.cpHost != null) {
1159 try {
1160 if (this.cpHost.preBalance()) {
1161 LOG.debug("Coprocessor bypassing balancer request");
1162 return false;
1163 }
1164 } catch (IOException ioe) {
1165 LOG.error("Error invoking master coprocessor preBalance()", ioe);
1166 return false;
1167 }
1168 }
1169
1170 Map<String, Map<ServerName, List<HRegionInfo>>> assignmentsByTable =
1171 this.assignmentManager.getAssignmentsByTable();
1172
1173 List<RegionPlan> plans = new ArrayList<RegionPlan>();
1174 for (Map<ServerName, List<HRegionInfo>> assignments : assignmentsByTable.values()) {
1175 List<RegionPlan> partialPlans = this.balancer.balanceCluster(assignments);
1176 if (partialPlans != null) plans.addAll(partialPlans);
1177 }
1178 int rpCount = 0;
1179 long totalRegPlanExecTime = 0;
1180 balancerRan = plans != null;
1181 if (plans != null && !plans.isEmpty()) {
1182 for (RegionPlan plan: plans) {
1183 LOG.info("balance " + plan);
1184 long balStartTime = System.currentTimeMillis();
1185 this.assignmentManager.balance(plan);
1186 totalRegPlanExecTime += System.currentTimeMillis()-balStartTime;
1187 rpCount++;
1188 if (rpCount < plans.size() &&
1189
1190 (System.currentTimeMillis() + (totalRegPlanExecTime / rpCount)) > cutoffTime) {
1191 LOG.debug("No more balancing till next balance run; maximumBalanceTime=" +
1192 maximumBalanceTime);
1193 break;
1194 }
1195 }
1196 }
1197 if (this.cpHost != null) {
1198 try {
1199 this.cpHost.postBalance();
1200 } catch (IOException ioe) {
1201
1202 LOG.error("Error invoking master coprocessor postBalance()", ioe);
1203 }
1204 }
1205 }
1206 return balancerRan;
1207 }
1208
1209 enum BalanceSwitchMode {
1210 SYNC,
1211 ASYNC
1212 }
1213
1214
1215
1216
1217
1218
1219 public boolean switchBalancer(final boolean b, BalanceSwitchMode mode) {
1220 boolean oldValue = this.balanceSwitch;
1221 boolean newValue = b;
1222 try {
1223 if (this.cpHost != null) {
1224 newValue = this.cpHost.preBalanceSwitch(newValue);
1225 }
1226 if (mode == BalanceSwitchMode.SYNC) {
1227 synchronized (this.balancer) {
1228 this.balanceSwitch = newValue;
1229 }
1230 } else {
1231 this.balanceSwitch = newValue;
1232 }
1233 LOG.info("BalanceSwitch=" + newValue);
1234 if (this.cpHost != null) {
1235 this.cpHost.postBalanceSwitch(oldValue, newValue);
1236 }
1237 } catch (IOException ioe) {
1238 LOG.warn("Error flipping balance switch", ioe);
1239 }
1240 return oldValue;
1241 }
1242
1243 @Override
1244 public boolean synchronousBalanceSwitch(final boolean b) {
1245 return switchBalancer(b, BalanceSwitchMode.SYNC);
1246 }
1247
1248 @Override
1249 public boolean balanceSwitch(final boolean b) {
1250 return switchBalancer(b, BalanceSwitchMode.ASYNC);
1251 }
1252
1253
1254
1255
1256
1257
1258
1259 public void setCatalogJanitorEnabled(final boolean b) {
1260 ((CatalogJanitor)this.catalogJanitorChore).setEnabled(b);
1261 }
1262
1263 @Override
1264 public void move(final byte[] encodedRegionName, final byte[] destServerName)
1265 throws UnknownRegionException {
1266 Pair<HRegionInfo, ServerName> p =
1267 this.assignmentManager.getAssignment(encodedRegionName);
1268 if (p == null)
1269 throw new UnknownRegionException(Bytes.toStringBinary(encodedRegionName));
1270 ServerName dest = null;
1271 if (destServerName == null || destServerName.length == 0) {
1272 LOG.info("Passed destination servername is null or empty so choosing a server at random");
1273 List<ServerName> destServers = this.serverManager.getOnlineServersList();
1274 destServers.remove(p.getSecond());
1275
1276 dest = balancer.randomAssignment(destServers);
1277 } else {
1278 dest = new ServerName(Bytes.toString(destServerName));
1279 }
1280
1281
1282 RegionPlan rp = new RegionPlan(p.getFirst(), p.getSecond(), dest);
1283
1284 try {
1285 checkInitialized();
1286 if (this.cpHost != null) {
1287 if (this.cpHost.preMove(p.getFirst(), p.getSecond(), dest)) {
1288 return;
1289 }
1290 }
1291 LOG.info("Added move plan " + rp + ", running balancer");
1292 this.assignmentManager.balance(rp);
1293 if (this.cpHost != null) {
1294 this.cpHost.postMove(p.getFirst(), p.getSecond(), dest);
1295 }
1296 } catch (IOException ioe) {
1297 UnknownRegionException ure = new UnknownRegionException(
1298 Bytes.toStringBinary(encodedRegionName));
1299 ure.initCause(ioe);
1300 throw ure;
1301 }
1302 }
1303
1304 public void createTable(HTableDescriptor hTableDescriptor,
1305 byte [][] splitKeys)
1306 throws IOException {
1307 if (!isMasterRunning()) {
1308 throw new MasterNotRunningException();
1309 }
1310
1311 HRegionInfo [] newRegions = getHRegionInfos(hTableDescriptor, splitKeys);
1312 checkInitialized();
1313 if (cpHost != null) {
1314 cpHost.preCreateTable(hTableDescriptor, newRegions);
1315 }
1316
1317 this.executorService.submit(new CreateTableHandler(this,
1318 this.fileSystemManager, this.serverManager, hTableDescriptor, conf,
1319 newRegions, catalogTracker, assignmentManager));
1320
1321 if (cpHost != null) {
1322 cpHost.postCreateTable(hTableDescriptor, newRegions);
1323 }
1324 }
1325
1326 private HRegionInfo[] getHRegionInfos(HTableDescriptor hTableDescriptor,
1327 byte[][] splitKeys) {
1328 HRegionInfo[] hRegionInfos = null;
1329 if (splitKeys == null || splitKeys.length == 0) {
1330 hRegionInfos = new HRegionInfo[]{
1331 new HRegionInfo(hTableDescriptor.getName(), null, null)};
1332 } else {
1333 int numRegions = splitKeys.length + 1;
1334 hRegionInfos = new HRegionInfo[numRegions];
1335 byte[] startKey = null;
1336 byte[] endKey = null;
1337 for (int i = 0; i < numRegions; i++) {
1338 endKey = (i == splitKeys.length) ? null : splitKeys[i];
1339 hRegionInfos[i] =
1340 new HRegionInfo(hTableDescriptor.getName(), startKey, endKey);
1341 startKey = endKey;
1342 }
1343 }
1344 return hRegionInfos;
1345 }
1346
1347 private static boolean isCatalogTable(final byte [] tableName) {
1348 return Bytes.equals(tableName, HConstants.ROOT_TABLE_NAME) ||
1349 Bytes.equals(tableName, HConstants.META_TABLE_NAME);
1350 }
1351
1352 @Override
1353 public void deleteTable(final byte [] tableName) throws IOException {
1354 checkInitialized();
1355 if (cpHost != null) {
1356 cpHost.preDeleteTable(tableName);
1357 }
1358 this.executorService.submit(new DeleteTableHandler(tableName, this, this));
1359 if (cpHost != null) {
1360 cpHost.postDeleteTable(tableName);
1361 }
1362 }
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372 public Pair<Integer, Integer> getAlterStatus(byte[] tableName)
1373 throws IOException {
1374 return this.assignmentManager.getReopenStatus(tableName);
1375 }
1376
1377 public void addColumn(byte [] tableName, HColumnDescriptor column)
1378 throws IOException {
1379 checkInitialized();
1380 if (cpHost != null) {
1381 if (cpHost.preAddColumn(tableName, column)) {
1382 return;
1383 }
1384 }
1385 new TableAddFamilyHandler(tableName, column, this, this).process();
1386 if (cpHost != null) {
1387 cpHost.postAddColumn(tableName, column);
1388 }
1389 }
1390
1391 public void modifyColumn(byte [] tableName, HColumnDescriptor descriptor)
1392 throws IOException {
1393 checkInitialized();
1394 if (cpHost != null) {
1395 if (cpHost.preModifyColumn(tableName, descriptor)) {
1396 return;
1397 }
1398 }
1399 new TableModifyFamilyHandler(tableName, descriptor, this, this).process();
1400 if (cpHost != null) {
1401 cpHost.postModifyColumn(tableName, descriptor);
1402 }
1403 }
1404
1405 public void deleteColumn(final byte [] tableName, final byte [] c)
1406 throws IOException {
1407 checkInitialized();
1408 if (cpHost != null) {
1409 if (cpHost.preDeleteColumn(tableName, c)) {
1410 return;
1411 }
1412 }
1413 new TableDeleteFamilyHandler(tableName, c, this, this).process();
1414 if (cpHost != null) {
1415 cpHost.postDeleteColumn(tableName, c);
1416 }
1417 }
1418
1419 public void enableTable(final byte [] tableName) throws IOException {
1420 checkInitialized();
1421 if (cpHost != null) {
1422 cpHost.preEnableTable(tableName);
1423 }
1424 this.executorService.submit(new EnableTableHandler(this, tableName,
1425 catalogTracker, assignmentManager, false));
1426
1427 if (cpHost != null) {
1428 cpHost.postEnableTable(tableName);
1429 }
1430 }
1431
1432 public void disableTable(final byte [] tableName) throws IOException {
1433 checkInitialized();
1434 if (cpHost != null) {
1435 cpHost.preDisableTable(tableName);
1436 }
1437 this.executorService.submit(new DisableTableHandler(this, tableName,
1438 catalogTracker, assignmentManager, false));
1439
1440 if (cpHost != null) {
1441 cpHost.postDisableTable(tableName);
1442 }
1443 }
1444
1445
1446
1447
1448
1449
1450
1451 Pair<HRegionInfo, ServerName> getTableRegionForRow(
1452 final byte [] tableName, final byte [] rowKey)
1453 throws IOException {
1454 final AtomicReference<Pair<HRegionInfo, ServerName>> result =
1455 new AtomicReference<Pair<HRegionInfo, ServerName>>(null);
1456
1457 MetaScannerVisitor visitor =
1458 new MetaScannerVisitorBase() {
1459 @Override
1460 public boolean processRow(Result data) throws IOException {
1461 if (data == null || data.size() <= 0) {
1462 return true;
1463 }
1464 Pair<HRegionInfo, ServerName> pair = MetaReader.parseCatalogResult(data);
1465 if (pair == null) {
1466 return false;
1467 }
1468 if (!Bytes.equals(pair.getFirst().getTableName(), tableName)) {
1469 return false;
1470 }
1471 result.set(pair);
1472 return true;
1473 }
1474 };
1475
1476 MetaScanner.metaScan(conf, visitor, tableName, rowKey, 1);
1477 return result.get();
1478 }
1479
1480 @Override
1481 public void modifyTable(final byte[] tableName, HTableDescriptor htd)
1482 throws IOException {
1483 checkInitialized();
1484 if (cpHost != null) {
1485 cpHost.preModifyTable(tableName, htd);
1486 }
1487 TableEventHandler tblHandler = new ModifyTableHandler(tableName, htd, this, this);
1488 this.executorService.submit(tblHandler);
1489
1490 tblHandler.waitForEventBeingHandled();
1491 if (cpHost != null) {
1492 cpHost.postModifyTable(tableName, htd);
1493 }
1494 }
1495
1496 @Override
1497 public void checkTableModifiable(final byte [] tableName)
1498 throws IOException {
1499 String tableNameStr = Bytes.toString(tableName);
1500 if (isCatalogTable(tableName)) {
1501 throw new IOException("Can't modify catalog tables");
1502 }
1503 if (!MetaReader.tableExists(getCatalogTracker(), tableNameStr)) {
1504 throw new TableNotFoundException(tableNameStr);
1505 }
1506 if (!getAssignmentManager().getZKTable().
1507 isDisabledTable(Bytes.toString(tableName))) {
1508 throw new TableNotDisabledException(tableName);
1509 }
1510 }
1511
1512 public void clearFromTransition(HRegionInfo hri) {
1513 if (this.assignmentManager.isRegionInTransition(hri) != null) {
1514 this.assignmentManager.regionOffline(hri);
1515 }
1516 }
1517
1518
1519
1520
1521 public ClusterStatus getClusterStatus() {
1522
1523 List<String> backupMasterStrings;
1524 try {
1525 backupMasterStrings = ZKUtil.listChildrenNoWatch(this.zooKeeper,
1526 this.zooKeeper.backupMasterAddressesZNode);
1527 } catch (KeeperException e) {
1528 LOG.warn(this.zooKeeper.prefix("Unable to list backup servers"), e);
1529 backupMasterStrings = new ArrayList<String>(0);
1530 }
1531 List<ServerName> backupMasters = new ArrayList<ServerName>(
1532 backupMasterStrings.size());
1533 for (String s: backupMasterStrings) {
1534 try {
1535 byte[] bytes = ZKUtil.getData(this.zooKeeper, ZKUtil.joinZNode(this.zooKeeper.backupMasterAddressesZNode, s));
1536 if (bytes != null) {
1537 backupMasters.add(ServerName.parseVersionedServerName(bytes));
1538 }
1539 } catch (KeeperException e) {
1540 LOG.warn(this.zooKeeper.prefix("Unable to get information about " +
1541 "backup servers"), e);
1542 }
1543 }
1544 Collections.sort(backupMasters, new Comparator<ServerName>() {
1545 public int compare(ServerName s1, ServerName s2) {
1546 return s1.getServerName().compareTo(s2.getServerName());
1547 }});
1548
1549 return new ClusterStatus(VersionInfo.getVersion(),
1550 this.fileSystemManager.getClusterId(),
1551 this.serverManager.getOnlineServers(),
1552 this.serverManager.getDeadServers(),
1553 this.serverName,
1554 backupMasters,
1555 this.assignmentManager.getRegionsInTransition(),
1556 this.getCoprocessors());
1557 }
1558
1559 public String getClusterId() {
1560 return (fileSystemManager == null) ? null : fileSystemManager.getClusterId();
1561 }
1562
1563
1564
1565
1566
1567
1568
1569
1570 public static String getLoadedCoprocessors() {
1571 return CoprocessorHost.getLoadedCoprocessors().toString();
1572 }
1573
1574
1575
1576
1577 public long getMasterStartTime() {
1578 return masterStartTime;
1579 }
1580
1581
1582
1583
1584 public long getMasterActiveTime() {
1585 return masterActiveTime;
1586 }
1587
1588
1589
1590
1591 public String[] getCoprocessors() {
1592 MasterCoprocessorHost cp = getCoprocessorHost();
1593 String[] cpList = new String[0];
1594 if (cp == null) return cpList;
1595
1596 Set<String> masterCoprocessors = cp.getCoprocessors();
1597 return masterCoprocessors.toArray(cpList);
1598 }
1599
1600 @Override
1601 public void abort(final String msg, final Throwable t) {
1602 if (cpHost != null) {
1603
1604 LOG.fatal("Master server abort: loaded coprocessors are: " +
1605 getLoadedCoprocessors());
1606 }
1607
1608 if (abortNow(msg, t)) {
1609 if (t != null) LOG.fatal(msg, t);
1610 else LOG.fatal(msg);
1611 this.abort = true;
1612 stop("Aborting");
1613 }
1614 }
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633 private boolean tryRecoveringExpiredZKSession() throws InterruptedException,
1634 IOException, KeeperException, ExecutionException {
1635
1636 this.zooKeeper.unregisterAllListeners();
1637
1638
1639 if (this.registeredZKListenersBeforeRecovery != null) {
1640 for (ZooKeeperListener curListener : this.registeredZKListenersBeforeRecovery) {
1641 this.zooKeeper.registerListener(curListener);
1642 }
1643 }
1644
1645 this.zooKeeper.reconnectAfterExpiration();
1646
1647 Callable<Boolean> callable = new Callable<Boolean> () {
1648 public Boolean call() throws InterruptedException,
1649 IOException, KeeperException {
1650 MonitoredTask status =
1651 TaskMonitor.get().createStatus("Recovering expired ZK session");
1652 try {
1653 if (!becomeActiveMaster(status)) {
1654 return Boolean.FALSE;
1655 }
1656 serverManager.disableSSHForRoot();
1657 serverShutdownHandlerEnabled = false;
1658 initialized = false;
1659 finishInitialization(status, true);
1660 return Boolean.TRUE;
1661 } finally {
1662 status.cleanup();
1663 }
1664 }
1665 };
1666
1667 long timeout =
1668 conf.getLong("hbase.master.zksession.recover.timeout", 300000);
1669 java.util.concurrent.ExecutorService executor =
1670 Executors.newSingleThreadExecutor();
1671 Future<Boolean> result = executor.submit(callable);
1672 executor.shutdown();
1673 if (executor.awaitTermination(timeout, TimeUnit.MILLISECONDS)
1674 && result.isDone()) {
1675 Boolean recovered = result.get();
1676 if (recovered != null) {
1677 return recovered.booleanValue();
1678 }
1679 }
1680 executor.shutdownNow();
1681 return false;
1682 }
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692 private boolean abortNow(final String msg, final Throwable t) {
1693 if (!this.isActiveMaster || this.stopped) {
1694 return true;
1695 }
1696 if (t != null && t instanceof KeeperException.SessionExpiredException) {
1697 try {
1698 LOG.info("Primary Master trying to recover from ZooKeeper session " +
1699 "expiry.");
1700 return !tryRecoveringExpiredZKSession();
1701 } catch (Throwable newT) {
1702 LOG.error("Primary master encountered unexpected exception while " +
1703 "trying to recover from ZooKeeper session" +
1704 " expiry. Proceeding with server abort.", newT);
1705 }
1706 }
1707 return true;
1708 }
1709
1710 @Override
1711 public ZooKeeperWatcher getZooKeeper() {
1712 return zooKeeper;
1713 }
1714
1715 @Override
1716 public MasterCoprocessorHost getCoprocessorHost() {
1717 return cpHost;
1718 }
1719
1720 @Override
1721 public ServerName getServerName() {
1722 return this.serverName;
1723 }
1724
1725 @Override
1726 public CatalogTracker getCatalogTracker() {
1727 return catalogTracker;
1728 }
1729
1730 @Override
1731 public AssignmentManager getAssignmentManager() {
1732 return this.assignmentManager;
1733 }
1734
1735 public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() {
1736 return rsFatals;
1737 }
1738
1739 @SuppressWarnings("deprecation")
1740 @Override
1741 public void shutdown() {
1742 if (cpHost != null) {
1743 try {
1744 cpHost.preShutdown();
1745 } catch (IOException ioe) {
1746 LOG.error("Error call master coprocessor preShutdown()", ioe);
1747 }
1748 }
1749 if (mxBean != null) {
1750 MBeanUtil.unregisterMBean(mxBean);
1751 mxBean = null;
1752 }
1753 if (this.assignmentManager != null) this.assignmentManager.shutdown();
1754 if (this.serverManager != null) this.serverManager.shutdownCluster();
1755
1756 try {
1757 if (this.clusterStatusTracker != null){
1758 this.clusterStatusTracker.setClusterDown();
1759 }
1760 } catch (KeeperException e) {
1761 if (e instanceof KeeperException.SessionExpiredException) {
1762 LOG.warn("ZK session expired. Retry a new connection...");
1763 try {
1764 this.zooKeeper.reconnectAfterExpiration();
1765 this.clusterStatusTracker.setClusterDown();
1766 } catch (Exception ex) {
1767 LOG.error("Retry setClusterDown failed", ex);
1768 }
1769 } else {
1770 LOG.error("ZooKeeper exception trying to set cluster as down in ZK", e);
1771 }
1772 }
1773 }
1774
1775 @Override
1776 public void stopMaster() {
1777 if (cpHost != null) {
1778 try {
1779 cpHost.preStopMaster();
1780 } catch (IOException ioe) {
1781 LOG.error("Error call master coprocessor preStopMaster()", ioe);
1782 }
1783 }
1784 stop("Stopped by " + Thread.currentThread().getName());
1785 }
1786
1787 @Override
1788 public void stop(final String why) {
1789 LOG.info(why);
1790 this.stopped = true;
1791
1792 stopSleeper.skipSleepCycle();
1793
1794 if (this.activeMasterManager != null) {
1795 synchronized (this.activeMasterManager.clusterHasActiveMaster) {
1796 this.activeMasterManager.clusterHasActiveMaster.notifyAll();
1797 }
1798 }
1799
1800
1801 if (this.catalogTracker != null && this.serverManager.getOnlineServers().isEmpty()) {
1802 this.catalogTracker.stop();
1803 }
1804 }
1805
1806 @Override
1807 public boolean isStopped() {
1808 return this.stopped;
1809 }
1810
1811 public boolean isAborted() {
1812 return this.abort;
1813 }
1814
1815 void checkInitialized() throws PleaseHoldException {
1816 if (!this.initialized) {
1817 throw new PleaseHoldException("Master is initializing");
1818 }
1819 }
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829 public boolean isActiveMaster() {
1830 return isActiveMaster;
1831 }
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842 public boolean isInitialized() {
1843 return initialized;
1844 }
1845
1846
1847
1848
1849
1850
1851 public boolean isServerShutdownHandlerEnabled() {
1852 return this.serverShutdownHandlerEnabled;
1853 }
1854
1855 public boolean shouldSplitMetaSeparately() {
1856 return this.shouldSplitMetaSeparately;
1857 }
1858
1859
1860
1861
1862
1863 public boolean isInitializationStartsMetaRegoinAssignment() {
1864 return this.initializationBeforeMetaAssignment;
1865 }
1866
1867 @Override
1868 @Deprecated
1869 public void assign(final byte[] regionName, final boolean force)
1870 throws IOException {
1871 assign(regionName);
1872 }
1873
1874 @Override
1875 public void assign(final byte [] regionName)throws IOException {
1876 checkInitialized();
1877 Pair<HRegionInfo, ServerName> pair =
1878 MetaReader.getRegion(this.catalogTracker, regionName);
1879 if (pair == null) throw new UnknownRegionException(Bytes.toString(regionName));
1880 if (cpHost != null) {
1881 if (cpHost.preAssign(pair.getFirst())) {
1882 return;
1883 }
1884 }
1885 assignRegion(pair.getFirst());
1886 if (cpHost != null) {
1887 cpHost.postAssign(pair.getFirst());
1888 }
1889 }
1890
1891
1892
1893 public void assignRegion(HRegionInfo hri) {
1894 assignmentManager.assign(hri, true);
1895 }
1896
1897 @Override
1898 public void unassign(final byte [] regionName, final boolean force)
1899 throws IOException {
1900 checkInitialized();
1901 Pair<HRegionInfo, ServerName> pair =
1902 MetaReader.getRegion(this.catalogTracker, regionName);
1903 if (pair == null) throw new UnknownRegionException(Bytes.toString(regionName));
1904 HRegionInfo hri = pair.getFirst();
1905 if (cpHost != null) {
1906 if (cpHost.preUnassign(hri, force)) {
1907 return;
1908 }
1909 }
1910 if (force) {
1911 this.assignmentManager.regionOffline(hri);
1912 assignRegion(hri);
1913 } else {
1914 this.assignmentManager.unassign(hri, force);
1915 }
1916 if (cpHost != null) {
1917 cpHost.postUnassign(hri, force);
1918 }
1919 }
1920
1921
1922
1923
1924
1925
1926 public HTableDescriptor[] getHTableDescriptors(List<String> tableNames)
1927 throws IOException {
1928 List<HTableDescriptor> descriptors =
1929 new ArrayList<HTableDescriptor>(tableNames.size());
1930
1931 boolean bypass = false;
1932 if (this.cpHost != null) {
1933 bypass = this.cpHost.preGetTableDescriptors(tableNames, descriptors);
1934 }
1935
1936 if (!bypass) {
1937 for (String s: tableNames) {
1938 HTableDescriptor htd = null;
1939 try {
1940 htd = this.tableDescriptors.get(s);
1941 } catch (IOException e) {
1942 LOG.warn("Failed getting descriptor for " + s, e);
1943 }
1944 if (htd == null) continue;
1945 descriptors.add(htd);
1946 }
1947 }
1948
1949 if (this.cpHost != null) {
1950 this.cpHost.postGetTableDescriptors(descriptors);
1951 }
1952
1953 return descriptors.toArray(new HTableDescriptor [] {});
1954 }
1955
1956 @Override
1957 public <T extends CoprocessorProtocol> boolean registerProtocol(
1958 Class<T> protocol, T handler) {
1959
1960
1961
1962
1963 if (protocolHandlers.containsKey(protocol)) {
1964 LOG.error("Protocol "+protocol.getName()+
1965 " already registered, rejecting request from "+
1966 handler
1967 );
1968 return false;
1969 }
1970
1971 protocolHandlers.putInstance(protocol, handler);
1972 protocolHandlerNames.put(protocol.getName(), protocol);
1973 if (LOG.isDebugEnabled()) {
1974 LOG.debug("Registered master protocol handler: protocol="+protocol.getName());
1975 }
1976 return true;
1977 }
1978
1979 @Override
1980 public ExecResult execCoprocessor(Exec call) throws IOException {
1981 Class<? extends CoprocessorProtocol> protocol = call.getProtocol();
1982 if (protocol == null) {
1983 String protocolName = call.getProtocolName();
1984 if (LOG.isDebugEnabled()) {
1985 LOG.debug("Received dynamic protocol exec call with protocolName " + protocolName);
1986 }
1987
1988 protocol = protocolHandlerNames.get(protocolName);
1989 if (protocol == null) {
1990 throw new HBaseRPC.UnknownProtocolException(protocol,
1991 "No matching handler for master protocol "+protocolName);
1992 }
1993 }
1994 if (!protocolHandlers.containsKey(protocol)) {
1995 throw new HBaseRPC.UnknownProtocolException(protocol,
1996 "No matching handler for protocol ");
1997 }
1998
1999 CoprocessorProtocol handler = protocolHandlers.getInstance(protocol);
2000 Object value;
2001
2002 try {
2003 Method method = protocol.getMethod(
2004 call.getMethodName(), call.getParameterClasses());
2005 method.setAccessible(true);
2006
2007 value = method.invoke(handler, call.getParameters());
2008 } catch (InvocationTargetException e) {
2009 Throwable target = e.getTargetException();
2010 if (target instanceof IOException) {
2011 throw (IOException)target;
2012 }
2013 IOException ioe = new IOException(target.toString());
2014 ioe.setStackTrace(target.getStackTrace());
2015 throw ioe;
2016 } catch (Throwable e) {
2017 if (!(e instanceof IOException)) {
2018 LOG.error("Unexpected throwable object ", e);
2019 }
2020 IOException ioe = new IOException(e.toString());
2021 ioe.setStackTrace(e.getStackTrace());
2022 throw ioe;
2023 }
2024
2025 return new ExecResult(value);
2026 }
2027
2028
2029
2030
2031
2032
2033 public HTableDescriptor [] getHTableDescriptors() throws IOException {
2034 List<HTableDescriptor> descriptors = new ArrayList<HTableDescriptor>();
2035 boolean bypass = false;
2036 if (this.cpHost != null) {
2037 bypass = this.cpHost.preGetTableDescriptors(null, descriptors);
2038 }
2039 if (!bypass) {
2040 descriptors.addAll(this.tableDescriptors.getAll().values());
2041 }
2042 if (this.cpHost != null) {
2043 this.cpHost.postGetTableDescriptors(descriptors);
2044 }
2045 return descriptors.toArray(new HTableDescriptor [] {});
2046 }
2047
2048
2049
2050
2051
2052
2053
2054 public double getAverageLoad() {
2055 return this.assignmentManager.getAverageLoad();
2056 }
2057
2058
2059
2060
2061 @Override
2062 public void offline(final byte[] regionName) throws IOException {
2063 Pair<HRegionInfo, ServerName> pair =
2064 MetaReader.getRegion(this.catalogTracker, regionName);
2065 if (pair == null) throw new UnknownRegionException(Bytes.toStringBinary(regionName));
2066 HRegionInfo hri = pair.getFirst();
2067 this.assignmentManager.regionOffline(hri);
2068 }
2069
2070
2071
2072
2073
2074
2075
2076 public static HMaster constructMaster(Class<? extends HMaster> masterClass,
2077 final Configuration conf) {
2078 try {
2079 Constructor<? extends HMaster> c =
2080 masterClass.getConstructor(Configuration.class);
2081 return c.newInstance(conf);
2082 } catch (InvocationTargetException ite) {
2083 Throwable target = ite.getTargetException() != null?
2084 ite.getTargetException(): ite;
2085 if (target.getCause() != null) target = target.getCause();
2086 throw new RuntimeException("Failed construction of Master: " +
2087 masterClass.toString(), target);
2088 } catch (Exception e) {
2089 throw new RuntimeException("Failed construction of Master: " +
2090 masterClass.toString() + ((e.getCause() != null)?
2091 e.getCause().getMessage(): ""), e);
2092 }
2093 }
2094
2095
2096
2097
2098 public static void main(String [] args) throws Exception {
2099 VersionInfo.logVersion();
2100 new HMasterCommandLine(HMaster.class).doMain(args);
2101 }
2102
2103
2104
2105
2106 @SuppressWarnings("deprecation")
2107 void registerMBean() {
2108 MXBeanImpl mxBeanInfo = MXBeanImpl.init(this);
2109 MBeanUtil.registerMBean("Master", "Master", mxBeanInfo);
2110 LOG.info("Registered HMaster MXBean");
2111 }
2112
2113
2114
2115
2116
2117 public HFileCleaner getHFileCleaner() {
2118 return this.hfileCleaner;
2119 }
2120
2121 private boolean isHealthCheckerConfigured() {
2122 String healthScriptLocation = this.conf.get(HConstants.HEALTH_SCRIPT_LOC);
2123 return org.apache.commons.lang.StringUtils.isNotBlank(healthScriptLocation);
2124 }
2125
2126
2127
2128
2129
2130 public SnapshotManager getSnapshotManagerForTesting() {
2131 return this.snapshotManager;
2132 }
2133
2134
2135
2136
2137
2138
2139 @Override
2140 public long snapshot(final HSnapshotDescription request) throws IOException {
2141 LOG.debug("Submitting snapshot request for:" +
2142 SnapshotDescriptionUtils.toString(request.getProto()));
2143 try {
2144 this.snapshotManager.checkSnapshotSupport();
2145 } catch (UnsupportedOperationException e) {
2146 throw new IOException(e);
2147 }
2148
2149
2150 SnapshotDescription snapshot = SnapshotDescriptionUtils.validate(request.getProto(),
2151 this.conf);
2152
2153 snapshotManager.takeSnapshot(snapshot);
2154
2155
2156 long waitTime = SnapshotDescriptionUtils.getMaxMasterTimeout(conf, snapshot.getType(),
2157 SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME);
2158 return waitTime;
2159 }
2160
2161
2162
2163
2164 @Override
2165 public List<HSnapshotDescription> getCompletedSnapshots() throws IOException {
2166 List<HSnapshotDescription> availableSnapshots = new ArrayList<HSnapshotDescription>();
2167 List<SnapshotDescription> snapshots = snapshotManager.getCompletedSnapshots();
2168
2169
2170 for (SnapshotDescription snapshot: snapshots) {
2171 availableSnapshots.add(new HSnapshotDescription(snapshot));
2172 }
2173
2174 return availableSnapshots;
2175 }
2176
2177
2178
2179
2180
2181
2182 @Override
2183 public void deleteSnapshot(final HSnapshotDescription request) throws IOException {
2184 try {
2185 this.snapshotManager.checkSnapshotSupport();
2186 } catch (UnsupportedOperationException e) {
2187 throw new IOException(e);
2188 }
2189
2190 snapshotManager.deleteSnapshot(request.getProto());
2191 }
2192
2193
2194
2195
2196
2197
2198
2199
2200 @Override
2201 public boolean isSnapshotDone(final HSnapshotDescription request) throws IOException {
2202 LOG.debug("Checking to see if snapshot from request:" +
2203 SnapshotDescriptionUtils.toString(request.getProto()) + " is done");
2204 return snapshotManager.isSnapshotDone(request.getProto());
2205 }
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220 @Override
2221 public void restoreSnapshot(final HSnapshotDescription request) throws IOException {
2222 try {
2223 this.snapshotManager.checkSnapshotSupport();
2224 } catch (UnsupportedOperationException e) {
2225 throw new IOException(e);
2226 }
2227
2228 snapshotManager.restoreSnapshot(request.getProto());
2229 }
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241 @Override
2242 public boolean isRestoreSnapshotDone(final HSnapshotDescription request) throws IOException {
2243 return snapshotManager.isRestoreDone(request.getProto());
2244 }
2245 }
2246