1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.master;
21
22 import java.io.IOException;
23 import java.lang.reflect.Constructor;
24 import java.lang.reflect.InvocationTargetException;
25 import java.lang.reflect.Method;
26 import java.net.InetAddress;
27 import java.net.InetSocketAddress;
28 import java.util.ArrayList;
29 import java.util.Collections;
30 import java.util.Comparator;
31 import java.util.HashMap;
32 import java.util.List;
33 import java.util.Map;
34 import java.util.Set;
35 import java.util.concurrent.Callable;
36 import java.util.concurrent.ExecutionException;
37 import java.util.concurrent.Executors;
38 import java.util.concurrent.Future;
39 import java.util.concurrent.TimeUnit;
40 import java.util.concurrent.atomic.AtomicReference;
41
42 import javax.management.ObjectName;
43
44 import org.apache.commons.logging.Log;
45 import org.apache.commons.logging.LogFactory;
46 import org.apache.hadoop.conf.Configuration;
47 import org.apache.hadoop.fs.Path;
48 import org.apache.hadoop.hbase.Chore;
49 import org.apache.hadoop.hbase.ClusterStatus;
50 import org.apache.hadoop.hbase.HColumnDescriptor;
51 import org.apache.hadoop.hbase.HConstants;
52 import org.apache.hadoop.hbase.HRegionInfo;
53 import org.apache.hadoop.hbase.HServerLoad;
54 import org.apache.hadoop.hbase.HTableDescriptor;
55 import org.apache.hadoop.hbase.HealthCheckChore;
56 import org.apache.hadoop.hbase.MasterNotRunningException;
57 import org.apache.hadoop.hbase.PleaseHoldException;
58 import org.apache.hadoop.hbase.Server;
59 import org.apache.hadoop.hbase.ServerName;
60 import org.apache.hadoop.hbase.TableDescriptors;
61 import org.apache.hadoop.hbase.TableNotDisabledException;
62 import org.apache.hadoop.hbase.TableNotFoundException;
63 import org.apache.hadoop.hbase.UnknownRegionException;
64 import org.apache.hadoop.hbase.catalog.CatalogTracker;
65 import org.apache.hadoop.hbase.catalog.MetaReader;
66 import org.apache.hadoop.hbase.client.HConnectionManager;
67 import org.apache.hadoop.hbase.client.MetaScanner;
68 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
69 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
70 import org.apache.hadoop.hbase.client.Result;
71 import org.apache.hadoop.hbase.client.coprocessor.Exec;
72 import org.apache.hadoop.hbase.client.coprocessor.ExecResult;
73 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
74 import org.apache.hadoop.hbase.executor.ExecutorService;
75 import org.apache.hadoop.hbase.executor.ExecutorService.ExecutorType;
76 import org.apache.hadoop.hbase.ipc.CoprocessorProtocol;
77 import org.apache.hadoop.hbase.ipc.HBaseRPC;
78 import org.apache.hadoop.hbase.ipc.HBaseServer;
79 import org.apache.hadoop.hbase.ipc.HMasterInterface;
80 import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
81 import org.apache.hadoop.hbase.ipc.ProtocolSignature;
82 import org.apache.hadoop.hbase.ipc.RpcServer;
83 import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
84 import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
85 import org.apache.hadoop.hbase.master.handler.CreateTableHandler;
86 import org.apache.hadoop.hbase.master.handler.DeleteTableHandler;
87 import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
88 import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
89 import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
90 import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
91 import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
92 import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
93 import org.apache.hadoop.hbase.master.handler.TableEventHandler;
94 import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
95 import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
96 import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
97 import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
98 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
99 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
100 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
101 import org.apache.hadoop.hbase.regionserver.wal.HLog;
102 import org.apache.hadoop.hbase.replication.regionserver.Replication;
103 import org.apache.hadoop.hbase.snapshot.HSnapshotDescription;
104 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
105 import org.apache.hadoop.hbase.security.User;
106 import org.apache.hadoop.hbase.util.Bytes;
107 import org.apache.hadoop.hbase.util.FSTableDescriptors;
108 import org.apache.hadoop.hbase.util.HFileArchiveUtil;
109 import org.apache.hadoop.hbase.util.HasThread;
110 import org.apache.hadoop.hbase.util.InfoServer;
111 import org.apache.hadoop.hbase.util.Pair;
112 import org.apache.hadoop.hbase.util.Sleeper;
113 import org.apache.hadoop.hbase.util.Strings;
114 import org.apache.hadoop.hbase.util.Threads;
115 import org.apache.hadoop.hbase.util.VersionInfo;
116 import org.apache.hadoop.hbase.zookeeper.ClusterId;
117 import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
118 import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker;
119 import org.apache.hadoop.hbase.zookeeper.RegionServerTracker;
120 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
121 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
122 import org.apache.hadoop.io.MapWritable;
123 import org.apache.hadoop.io.Text;
124 import org.apache.hadoop.metrics.util.MBeanUtil;
125 import org.apache.hadoop.net.DNS;
126 import org.apache.zookeeper.KeeperException;
127 import org.apache.zookeeper.Watcher;
128
129 import com.google.common.collect.ClassToInstanceMap;
130 import com.google.common.collect.Maps;
131 import com.google.common.collect.MutableClassToInstanceMap;
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150 public class HMaster extends HasThread
151 implements HMasterInterface, HMasterRegionInterface, MasterServices,
152 Server {
153 private static final Log LOG = LogFactory.getLog(HMaster.class.getName());
154
155
156
157 public static final String MASTER = "master";
158
159
160 private final Configuration conf;
161
162 private InfoServer infoServer;
163
164
165 private ZooKeeperWatcher zooKeeper;
166
167 private ActiveMasterManager activeMasterManager;
168
169 private RegionServerTracker regionServerTracker;
170
171 private DrainingServerTracker drainingServerTracker;
172
173
174 private final RpcServer rpcServer;
175
176
177
178
179 private final InetSocketAddress isa;
180
181
182 private final MasterMetrics metrics;
183
184 private MasterFileSystem fileSystemManager;
185
186
187 private ServerManager serverManager;
188
189
190 AssignmentManager assignmentManager;
191
192 private CatalogTracker catalogTracker;
193
194 private ClusterStatusTracker clusterStatusTracker;
195
196
197
198
199 private MemoryBoundedLogMessageBuffer rsFatals;
200
201
202
203 private volatile boolean stopped = false;
204
205 private volatile boolean abort = false;
206
207 private volatile boolean isActiveMaster = false;
208
209
210
211 volatile boolean initialized = false;
212
213
214 private volatile boolean serverShutdownHandlerEnabled = false;
215
216 private volatile boolean shouldSplitMetaSeparately;
217
218
219 ExecutorService executorService;
220
221 private LoadBalancer balancer;
222 private Thread balancerChore;
223
224 private volatile boolean balanceSwitch = true;
225
226 private CatalogJanitor catalogJanitorChore;
227 private LogCleaner logCleaner;
228 private HFileCleaner hfileCleaner;
229
230 private MasterCoprocessorHost cpHost;
231 private final ServerName serverName;
232
233 private TableDescriptors tableDescriptors;
234
235
236 private long masterStartTime;
237 private long masterActiveTime;
238
239
240 private SnapshotManager snapshotManager;
241
242
243
244
245 private ObjectName mxBean = null;
246
247
248 private ClassToInstanceMap<CoprocessorProtocol>
249 protocolHandlers = MutableClassToInstanceMap.create();
250
251 private Map<String, Class<? extends CoprocessorProtocol>>
252 protocolHandlerNames = Maps.newHashMap();
253
254
255 private HealthCheckChore healthCheckChore;
256
257
258 private boolean waitingOnLogSplitting = false;
259
260
261 private volatile boolean initializationBeforeMetaAssignment = false;
262
263
264
265
266
267
268
269
270
271
272
273
274
275 public HMaster(final Configuration conf)
276 throws IOException, KeeperException, InterruptedException {
277 this.conf = new Configuration(conf);
278
279 this.conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
280
281 HConnectionManager.setServerSideHConnectionRetries(this.conf, LOG);
282
283 String hostname = Strings.domainNamePointerToHostName(DNS.getDefaultHost(
284 conf.get("hbase.master.dns.interface", "default"),
285 conf.get("hbase.master.dns.nameserver", "default")));
286 int port = conf.getInt(HConstants.MASTER_PORT, HConstants.DEFAULT_MASTER_PORT);
287
288 InetSocketAddress initialIsa = new InetSocketAddress(hostname, port);
289 if (initialIsa.getAddress() == null) {
290 throw new IllegalArgumentException("Failed resolve of hostname " + initialIsa);
291 }
292
293 String bindAddress = conf.get("hbase.master.ipc.address");
294 if (bindAddress != null) {
295 initialIsa = new InetSocketAddress(bindAddress, port);
296 if (initialIsa.getAddress() == null) {
297 throw new IllegalArgumentException("Failed resolve of bind address " + initialIsa);
298 }
299 }
300 int numHandlers = conf.getInt("hbase.master.handler.count",
301 conf.getInt("hbase.regionserver.handler.count", 25));
302 this.rpcServer = HBaseRPC.getServer(this,
303 new Class<?>[]{HMasterInterface.class, HMasterRegionInterface.class},
304 initialIsa.getHostName(),
305 initialIsa.getPort(),
306 numHandlers,
307 0,
308 conf.getBoolean("hbase.rpc.verbose", false), conf,
309 0);
310
311 this.isa = this.rpcServer.getListenerAddress();
312 this.serverName = new ServerName(hostname,
313 this.isa.getPort(), System.currentTimeMillis());
314 this.rsFatals = new MemoryBoundedLogMessageBuffer(
315 conf.getLong("hbase.master.buffer.for.rs.fatals", 1*1024*1024));
316
317
318 ZKUtil.loginClient(this.conf, "hbase.zookeeper.client.keytab.file",
319 "hbase.zookeeper.client.kerberos.principal", this.isa.getHostName());
320
321
322 User.login(conf, "hbase.master.keytab.file",
323 "hbase.master.kerberos.principal", this.isa.getHostName());
324
325
326 setName(MASTER + "-" + this.serverName.toString());
327
328 Replication.decorateMasterConfiguration(this.conf);
329
330
331
332 if (this.conf.get("mapred.task.id") == null) {
333 this.conf.set("mapred.task.id", "hb_m_" + this.serverName.toString());
334 }
335
336 this.zooKeeper = new ZooKeeperWatcher(conf, MASTER + ":" + isa.getPort(), this, true);
337 this.rpcServer.startThreads();
338 this.metrics = new MasterMetrics(getServerName().toString());
339
340
341 int sleepTime = this.conf.getInt(HConstants.HEALTH_CHORE_WAKE_FREQ,
342 HConstants.DEFAULT_THREAD_WAKE_FREQUENCY);
343 if (isHealthCheckerConfigured()) {
344 healthCheckChore = new HealthCheckChore(sleepTime, this, getConfiguration());
345 }
346
347 this.shouldSplitMetaSeparately = conf.getBoolean(HLog.SEPARATE_HLOG_FOR_META, false);
348 waitingOnLogSplitting = this.conf.getBoolean("hbase.master.wait.for.log.splitting", false);
349 }
350
351
352
353
354
355
356
357
358 private static void stallIfBackupMaster(final Configuration c,
359 final ActiveMasterManager amm)
360 throws InterruptedException {
361
362 if (!c.getBoolean(HConstants.MASTER_TYPE_BACKUP,
363 HConstants.DEFAULT_MASTER_TYPE_BACKUP)) {
364 return;
365 }
366 LOG.debug("HMaster started in backup mode. " +
367 "Stalling until master znode is written.");
368
369
370 while (!amm.isActiveMaster()) {
371 LOG.debug("Waiting for master address ZNode to be written " +
372 "(Also watching cluster state node)");
373 Thread.sleep(c.getInt("zookeeper.session.timeout", 180 * 1000));
374 }
375
376 }
377
378
379
380
381
382
383
384
385
386
387 @Override
388 public void run() {
389 MonitoredTask startupStatus =
390 TaskMonitor.get().createStatus("Master startup");
391 startupStatus.setDescription("Master startup");
392 masterStartTime = System.currentTimeMillis();
393 try {
394
395
396
397
398
399
400
401
402
403
404 becomeActiveMaster(startupStatus);
405
406
407 if (!this.stopped) {
408 finishInitialization(startupStatus, false);
409 loop();
410 }
411 } catch (Throwable t) {
412
413 if (t instanceof NoClassDefFoundError &&
414 t.getMessage().contains("org/apache/hadoop/hdfs/protocol/FSConstants$SafeModeAction")) {
415
416 abort("HBase is having a problem with its Hadoop jars. You may need to "
417 + "recompile HBase against Hadoop version "
418 + org.apache.hadoop.util.VersionInfo.getVersion()
419 + " or change your hadoop jars to start properly", t);
420 } else {
421 abort("Unhandled exception. Starting shutdown.", t);
422 }
423 } finally {
424 startupStatus.cleanup();
425
426 stopChores();
427
428
429 if (!this.abort && this.serverManager != null &&
430 this.serverManager.isClusterShutdown()) {
431 this.serverManager.letRegionServersShutdown();
432 }
433 stopServiceThreads();
434
435 if (this.activeMasterManager != null) this.activeMasterManager.stop();
436 if (this.catalogTracker != null) this.catalogTracker.stop();
437 if (this.serverManager != null) this.serverManager.stop();
438 if (this.assignmentManager != null) this.assignmentManager.stop();
439 if (this.fileSystemManager != null) this.fileSystemManager.stop();
440 if (this.snapshotManager != null) this.snapshotManager.stop("server shutting down.");
441 this.zooKeeper.close();
442 }
443 LOG.info("HMaster main thread exiting");
444 }
445
446
447
448
449
450
451
452 private boolean becomeActiveMaster(MonitoredTask startupStatus)
453 throws InterruptedException {
454
455
456 this.activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName,
457 this);
458 this.zooKeeper.registerListener(activeMasterManager);
459 stallIfBackupMaster(this.conf, this.activeMasterManager);
460
461
462
463
464 this.clusterStatusTracker = new ClusterStatusTracker(getZooKeeper(), this);
465 this.clusterStatusTracker.start();
466 return this.activeMasterManager.blockUntilBecomingActiveMaster(startupStatus,
467 this.clusterStatusTracker);
468 }
469
470
471
472
473
474
475 private void initializeZKBasedSystemTrackers() throws IOException,
476 InterruptedException, KeeperException {
477 this.catalogTracker = new CatalogTracker(this.zooKeeper, this.conf, this);
478 this.catalogTracker.start();
479
480 this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
481 this.assignmentManager = new AssignmentManager(this, serverManager,
482 this.catalogTracker, this.balancer, this.executorService);
483 zooKeeper.registerListenerFirst(assignmentManager);
484
485 this.regionServerTracker = new RegionServerTracker(zooKeeper, this,
486 this.serverManager);
487 this.regionServerTracker.start();
488
489 this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this,
490 this.serverManager);
491 this.drainingServerTracker.start();
492
493
494
495 boolean wasUp = this.clusterStatusTracker.isClusterUp();
496 if (!wasUp) this.clusterStatusTracker.setClusterUp();
497
498 LOG.info("Server active/primary master; " + this.serverName +
499 ", sessionid=0x" +
500 Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()) +
501 ", cluster-up flag was=" + wasUp);
502
503
504 this.snapshotManager = new SnapshotManager(this, this.metrics);
505 }
506
507
508 private Sleeper stopSleeper = new Sleeper(1000, this);
509 private void loop() {
510 while (!this.stopped) {
511 stopSleeper.sleep();
512 }
513 }
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535 private void finishInitialization(MonitoredTask status, boolean masterRecovery)
536 throws IOException, InterruptedException, KeeperException {
537
538 isActiveMaster = true;
539
540
541
542
543
544
545
546 status.setStatus("Initializing Master file system");
547 this.masterActiveTime = System.currentTimeMillis();
548
549 this.fileSystemManager = new MasterFileSystem(this, this, metrics, masterRecovery);
550
551 this.tableDescriptors =
552 new FSTableDescriptors(this.fileSystemManager.getFileSystem(),
553 this.fileSystemManager.getRootDir());
554
555
556 status.setStatus("Publishing Cluster ID in ZooKeeper");
557 ClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
558 if (!masterRecovery) {
559 this.executorService = new ExecutorService(getServerName().toString());
560 this.serverManager = new ServerManager(this, this);
561 }
562
563
564 status.setStatus("Initializing ZK system trackers");
565 initializeZKBasedSystemTrackers();
566
567 if (!masterRecovery) {
568
569 status.setStatus("Initializing master coprocessors");
570 this.cpHost = new MasterCoprocessorHost(this, this.conf);
571
572
573 status.setStatus("Initializing master service threads");
574 startServiceThreads();
575 }
576
577
578 this.serverManager.waitForRegionServers(status);
579
580 for (ServerName sn: this.regionServerTracker.getOnlineServers()) {
581 if (!this.serverManager.isServerOnline(sn)) {
582
583 LOG.info("Registering server found up in zk but who has not yet " +
584 "reported in: " + sn);
585 this.serverManager.recordNewServer(sn, HServerLoad.EMPTY_HSERVERLOAD);
586 }
587 }
588 if (!masterRecovery) {
589 this.assignmentManager.startTimeOutMonitor();
590 }
591
592
593 Set<ServerName> failedServers = this.fileSystemManager.getFailedServersFromLogFolders();
594 if (waitingOnLogSplitting) {
595 List<ServerName> servers = new ArrayList<ServerName>(failedServers);
596 this.fileSystemManager.splitAllLogs(servers);
597 failedServers.clear();
598 }
599
600 ServerName preRootServer = this.catalogTracker.getRootLocation();
601 if (preRootServer != null && failedServers.contains(preRootServer)) {
602
603 this.fileSystemManager.splitAllLogs(preRootServer);
604 failedServers.remove(preRootServer);
605 }
606
607 this.initializationBeforeMetaAssignment = true;
608
609 assignRoot(status);
610
611
612
613 this.serverManager.enableSSHForRoot();
614
615
616 ServerName preMetaServer = this.catalogTracker.getMetaLocationOrReadLocationFromRoot();
617 if (preMetaServer != null && failedServers.contains(preMetaServer)) {
618
619 this.fileSystemManager.splitAllLogs(preMetaServer);
620 failedServers.remove(preMetaServer);
621 }
622
623
624 assignMeta(status, ((masterRecovery) ? null : preMetaServer), preRootServer);
625
626 enableServerShutdownHandler();
627
628
629 status.setStatus("Submit log splitting work of non-meta region servers");
630 for (ServerName curServer : failedServers) {
631 this.serverManager.expireServer(curServer);
632 }
633
634
635
636
637
638 org.apache.hadoop.hbase.catalog.MetaMigrationRemovingHTD.
639 updateMetaWithNewHRI(this);
640
641
642 status.setStatus("Starting assignment manager");
643 this.assignmentManager.joinCluster();
644
645 this.balancer.setClusterStatus(getClusterStatus());
646 this.balancer.setMasterServices(this);
647
648
649 status.setStatus("Fixing up missing daughters");
650 fixupDaughters(status);
651
652 if (!masterRecovery) {
653
654
655 status.setStatus("Starting balancer and catalog janitor");
656 this.balancerChore = getAndStartBalancerChore(this);
657 this.catalogJanitorChore = new CatalogJanitor(this, this);
658 startCatalogJanitorChore();
659 registerMBean();
660 }
661
662 status.markComplete("Initialization successful");
663 LOG.info("Master has completed initialization");
664 initialized = true;
665
666
667
668
669 this.serverManager.clearDeadServersWithSameHostNameAndPortOfOnlineServer();
670
671 if (!masterRecovery) {
672 if (this.cpHost != null) {
673
674 try {
675 this.cpHost.postStartMaster();
676 } catch (IOException ioe) {
677 LOG.error("Coprocessor postStartMaster() hook failed", ioe);
678 }
679 }
680 }
681 }
682
683
684
685
686
687
688
689 private void enableServerShutdownHandler() throws IOException {
690 if (!serverShutdownHandlerEnabled) {
691 serverShutdownHandlerEnabled = true;
692 this.serverManager.expireDeadNotExpiredServers();
693 }
694 }
695
696
697
698
699
700 protected void startCatalogJanitorChore() {
701 Threads.setDaemonThreadRunning(catalogJanitorChore.getThread());
702 }
703
704
705
706
707
708
709
710
711 private void assignRoot(MonitoredTask status)
712 throws InterruptedException, IOException, KeeperException {
713 int assigned = 0;
714 long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000);
715
716
717 status.setStatus("Assigning ROOT region");
718 boolean rit = this.assignmentManager.
719 processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.ROOT_REGIONINFO);
720 ServerName currentRootServer = null;
721 boolean rootRegionLocation = catalogTracker.verifyRootRegionLocation(timeout);
722 if (!rit && !rootRegionLocation) {
723 currentRootServer = this.catalogTracker.getRootLocation();
724 splitLogAndExpireIfOnline(currentRootServer);
725 this.assignmentManager.assignRoot();
726 waitForRootAssignment();
727 assigned++;
728 } else if (rit && !rootRegionLocation) {
729 waitForRootAssignment();
730 assigned++;
731 } else {
732
733 this.assignmentManager.regionOnline(HRegionInfo.ROOT_REGIONINFO,
734 this.catalogTracker.getRootLocation());
735 }
736
737
738 enableCatalogTables(Bytes.toString(HConstants.ROOT_TABLE_NAME));
739 LOG.info("-ROOT- assigned=" + assigned + ", rit=" + rit +
740 ", location=" + catalogTracker.getRootLocation());
741
742 status.setStatus("ROOT assigned.");
743 }
744
745
746
747
748
749
750
751
752
753
754 private void assignMeta(MonitoredTask status, ServerName previousMetaServer,
755 ServerName previousRootServer)
756 throws InterruptedException,
757 IOException, KeeperException {
758 int assigned = 0;
759 long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000);
760
761 status.setStatus("Assigning META region");
762 boolean rit =
763 this.assignmentManager
764 .processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.FIRST_META_REGIONINFO);
765 boolean metaRegionLocation = this.catalogTracker.verifyMetaRegionLocation(timeout);
766 if (!rit && !metaRegionLocation) {
767 ServerName currentMetaServer =
768 (previousMetaServer != null) ? previousMetaServer : this.catalogTracker
769 .getMetaLocationOrReadLocationFromRoot();
770 if (currentMetaServer != null && !currentMetaServer.equals(previousRootServer)) {
771 fileSystemManager.splitAllLogs(currentMetaServer);
772 if (this.serverManager.isServerOnline(currentMetaServer)) {
773 this.serverManager.expireServer(currentMetaServer);
774 }
775 }
776 assignmentManager.assignMeta();
777 enableSSHandWaitForMeta();
778 assigned++;
779 } else if (rit && !metaRegionLocation) {
780 enableSSHandWaitForMeta();
781 assigned++;
782 } else {
783
784 this.assignmentManager.regionOnline(HRegionInfo.FIRST_META_REGIONINFO,
785 this.catalogTracker.getMetaLocation());
786 }
787 enableCatalogTables(Bytes.toString(HConstants.META_TABLE_NAME));
788 LOG.info(".META. assigned=" + assigned + ", rit=" + rit + ", location="
789 + catalogTracker.getMetaLocation());
790 status.setStatus("META assigned.");
791 }
792
793 private void enableSSHandWaitForMeta() throws IOException,
794 InterruptedException {
795 enableServerShutdownHandler();
796 this.catalogTracker.waitForMeta();
797
798
799 this.assignmentManager
800 .waitForAssignment(HRegionInfo.FIRST_META_REGIONINFO);
801 }
802
803 private void waitForRootAssignment() throws InterruptedException, IOException {
804
805
806 this.serverManager.enableSSHForRoot();
807 this.catalogTracker.waitForRoot();
808
809 this.assignmentManager.waitForAssignment(HRegionInfo.ROOT_REGIONINFO);
810 }
811
812 private void enableCatalogTables(String catalogTableName) {
813 if (!this.assignmentManager.getZKTable().isEnabledTable(catalogTableName)) {
814 this.assignmentManager.setEnabledTable(catalogTableName);
815 }
816 }
817
818 void fixupDaughters(final MonitoredTask status) throws IOException {
819 final Map<HRegionInfo, Result> offlineSplitParents =
820 new HashMap<HRegionInfo, Result>();
821
822 MetaReader.Visitor visitor = new MetaReader.Visitor() {
823 @Override
824 public boolean visit(Result r) throws IOException {
825 if (r == null || r.isEmpty()) return true;
826 HRegionInfo info =
827 MetaReader.parseHRegionInfoFromCatalogResult(
828 r, HConstants.REGIONINFO_QUALIFIER);
829 if (info == null) return true;
830 if (info.isOffline() && info.isSplit()) {
831 offlineSplitParents.put(info, r);
832 }
833
834 return true;
835 }
836 };
837
838 MetaReader.fullScan(this.catalogTracker, visitor);
839
840 int fixups = 0;
841 for (Map.Entry<HRegionInfo, Result> e : offlineSplitParents.entrySet()) {
842 fixups += ServerShutdownHandler.fixupDaughters(
843 e.getValue(), assignmentManager, catalogTracker);
844 }
845 if (fixups != 0) {
846 LOG.info("Scanned the catalog and fixed up " + fixups +
847 " missing daughter region(s)");
848 }
849 }
850
851
852
853
854
855
856 private void splitLogAndExpireIfOnline(final ServerName sn)
857 throws IOException {
858 if (sn == null || !serverManager.isServerOnline(sn)) {
859 return;
860 }
861 LOG.info("Forcing splitLog and expire of " + sn);
862 if (this.shouldSplitMetaSeparately) {
863 fileSystemManager.splitMetaLog(sn);
864 fileSystemManager.splitLog(sn);
865 } else {
866 fileSystemManager.splitAllLogs(sn);
867 }
868 serverManager.expireServer(sn);
869 }
870
871 @Override
872 public ProtocolSignature getProtocolSignature(
873 String protocol, long version, int clientMethodsHashCode)
874 throws IOException {
875 if (HMasterInterface.class.getName().equals(protocol)) {
876 return new ProtocolSignature(HMasterInterface.VERSION, null);
877 } else if (HMasterRegionInterface.class.getName().equals(protocol)) {
878 return new ProtocolSignature(HMasterRegionInterface.VERSION, null);
879 }
880 throw new IOException("Unknown protocol: " + protocol);
881 }
882
883 public long getProtocolVersion(String protocol, long clientVersion) {
884 if (HMasterInterface.class.getName().equals(protocol)) {
885 return HMasterInterface.VERSION;
886 } else if (HMasterRegionInterface.class.getName().equals(protocol)) {
887 return HMasterRegionInterface.VERSION;
888 }
889
890 LOG.warn("Version requested for unimplemented protocol: "+protocol);
891 return -1;
892 }
893
894 @Override
895 public TableDescriptors getTableDescriptors() {
896 return this.tableDescriptors;
897 }
898
899
900 public InfoServer getInfoServer() {
901 return this.infoServer;
902 }
903
904 @Override
905 public Configuration getConfiguration() {
906 return this.conf;
907 }
908
909 @Override
910 public ServerManager getServerManager() {
911 return this.serverManager;
912 }
913
914 @Override
915 public ExecutorService getExecutorService() {
916 return this.executorService;
917 }
918
919 @Override
920 public MasterFileSystem getMasterFileSystem() {
921 return this.fileSystemManager;
922 }
923
924
925
926
927
928 public ZooKeeperWatcher getZooKeeperWatcher() {
929 return this.zooKeeper;
930 }
931
932
933
934
935
936
937
938
939 private void startServiceThreads() throws IOException{
940
941
942 this.executorService.startExecutorService(ExecutorType.MASTER_OPEN_REGION,
943 conf.getInt("hbase.master.executor.openregion.threads", 5));
944 this.executorService.startExecutorService(ExecutorType.MASTER_CLOSE_REGION,
945 conf.getInt("hbase.master.executor.closeregion.threads", 5));
946 this.executorService.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS,
947 conf.getInt("hbase.master.executor.serverops.threads", 3));
948 this.executorService.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS,
949 conf.getInt("hbase.master.executor.serverops.threads", 5));
950
951
952
953
954 this.executorService.startExecutorService(ExecutorType.MASTER_TABLE_OPERATIONS, 1);
955
956
957 String n = Thread.currentThread().getName();
958 int cleanerInterval = conf.getInt("hbase.master.cleaner.interval", 60 * 1000);
959 this.logCleaner =
960 new LogCleaner(cleanerInterval,
961 this, conf, getMasterFileSystem().getFileSystem(),
962 getMasterFileSystem().getOldLogDir());
963 Threads.setDaemonThreadRunning(logCleaner.getThread(), n + ".oldLogCleaner");
964
965
966 Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
967 this.hfileCleaner = new HFileCleaner(cleanerInterval, this, conf, getMasterFileSystem()
968 .getFileSystem(), archiveDir);
969 Threads.setDaemonThreadRunning(hfileCleaner.getThread(), n + ".archivedHFileCleaner");
970
971
972 int port = this.conf.getInt("hbase.master.info.port", 60010);
973 if (port >= 0) {
974 String a = this.conf.get("hbase.master.info.bindAddress", "0.0.0.0");
975 this.infoServer = new InfoServer(MASTER, a, port, false, this.conf);
976 this.infoServer.addServlet("status", "/master-status", MasterStatusServlet.class);
977 this.infoServer.addServlet("dump", "/dump", MasterDumpServlet.class);
978 this.infoServer.setAttribute(MASTER, this);
979 this.infoServer.start();
980 }
981
982
983 if (this.healthCheckChore != null) {
984 Threads.setDaemonThreadRunning(this.healthCheckChore.getThread(), n + ".healthChecker");
985 }
986
987
988 this.rpcServer.openServer();
989 if (LOG.isDebugEnabled()) {
990 LOG.debug("Started service threads");
991 }
992
993 }
994
995 private void stopServiceThreads() {
996 if (LOG.isDebugEnabled()) {
997 LOG.debug("Stopping service threads");
998 }
999 if (this.rpcServer != null) this.rpcServer.stop();
1000
1001 if (this.logCleaner!= null) this.logCleaner.interrupt();
1002 if (this.hfileCleaner != null) this.hfileCleaner.interrupt();
1003
1004 if (this.infoServer != null) {
1005 LOG.info("Stopping infoServer");
1006 try {
1007 this.infoServer.stop();
1008 } catch (Exception ex) {
1009 ex.printStackTrace();
1010 }
1011 }
1012 if (this.executorService != null) this.executorService.shutdown();
1013 if (this.healthCheckChore != null) {
1014 this.healthCheckChore.interrupt();
1015 }
1016 }
1017
1018 private static Thread getAndStartBalancerChore(final HMaster master) {
1019 String name = master.getServerName() + "-BalancerChore";
1020 int balancerPeriod =
1021 master.getConfiguration().getInt("hbase.balancer.period", 300000);
1022
1023 Chore chore = new Chore(name, balancerPeriod, master) {
1024 @Override
1025 protected void chore() {
1026 master.balance();
1027 }
1028 };
1029 return Threads.setDaemonThreadRunning(chore.getThread());
1030 }
1031
1032 private void stopChores() {
1033 if (this.balancerChore != null) {
1034 this.balancerChore.interrupt();
1035 }
1036 if (this.catalogJanitorChore != null) {
1037 this.catalogJanitorChore.interrupt();
1038 }
1039 }
1040
1041 @Override
1042 public MapWritable regionServerStartup(final int port,
1043 final long serverStartCode, final long serverCurrentTime)
1044 throws IOException {
1045
1046 InetAddress ia = HBaseServer.getRemoteIp();
1047 ServerName rs = this.serverManager.regionServerStartup(ia, port,
1048 serverStartCode, serverCurrentTime);
1049
1050 MapWritable mw = createConfigurationSubset();
1051 mw.put(new Text(HConstants.KEY_FOR_HOSTNAME_SEEN_BY_MASTER),
1052 new Text(rs.getHostname()));
1053 return mw;
1054 }
1055
1056
1057
1058
1059
1060 protected MapWritable createConfigurationSubset() {
1061 MapWritable mw = addConfig(new MapWritable(), HConstants.HBASE_DIR);
1062 return addConfig(mw, "fs.default.name");
1063 }
1064
1065 private MapWritable addConfig(final MapWritable mw, final String key) {
1066 mw.put(new Text(key), new Text(this.conf.get(key)));
1067 return mw;
1068 }
1069
1070 @Override
1071 public void regionServerReport(final byte [] sn, final HServerLoad hsl)
1072 throws IOException {
1073 this.serverManager.regionServerReport(ServerName.parseVersionedServerName(sn), hsl);
1074 if (hsl != null && this.metrics != null) {
1075
1076 this.metrics.incrementRequests(hsl.getTotalNumberOfRequests());
1077 }
1078 }
1079
1080 @Override
1081 public void reportRSFatalError(byte [] sn, String errorText) {
1082 String msg = "Region server " + Bytes.toString(sn) +
1083 " reported a fatal error:\n" + errorText;
1084 LOG.error(msg);
1085 rsFatals.add(msg);
1086 }
1087
1088 public boolean isMasterRunning() {
1089 return !isStopped();
1090 }
1091
1092
1093
1094
1095 private int getBalancerCutoffTime() {
1096 int balancerCutoffTime =
1097 getConfiguration().getInt("hbase.balancer.max.balancing", -1);
1098 if (balancerCutoffTime == -1) {
1099
1100 int balancerPeriod =
1101 getConfiguration().getInt("hbase.balancer.period", 300000);
1102 balancerCutoffTime = balancerPeriod / 2;
1103
1104 if (balancerCutoffTime <= 0) balancerCutoffTime = balancerPeriod;
1105 }
1106 return balancerCutoffTime;
1107 }
1108
1109 @Override
1110 public boolean balance() {
1111
1112 if (!this.initialized) {
1113 LOG.debug("Master has not been initialized, don't run balancer.");
1114 return false;
1115 }
1116
1117 if (!this.balanceSwitch) return false;
1118
1119 int maximumBalanceTime = getBalancerCutoffTime();
1120 long cutoffTime = System.currentTimeMillis() + maximumBalanceTime;
1121 boolean balancerRan;
1122 synchronized (this.balancer) {
1123
1124 if (this.assignmentManager.isRegionsInTransition()) {
1125 LOG.debug("Not running balancer because " +
1126 this.assignmentManager.getRegionsInTransition().size() +
1127 " region(s) in transition: " +
1128 org.apache.commons.lang.StringUtils.
1129 abbreviate(this.assignmentManager.getRegionsInTransition().toString(), 256));
1130 return false;
1131 }
1132 if (this.serverManager.areDeadServersInProgress()) {
1133 LOG.debug("Not running balancer because processing dead regionserver(s): " +
1134 this.serverManager.getDeadServers());
1135 return false;
1136 }
1137
1138 if (this.cpHost != null) {
1139 try {
1140 if (this.cpHost.preBalance()) {
1141 LOG.debug("Coprocessor bypassing balancer request");
1142 return false;
1143 }
1144 } catch (IOException ioe) {
1145 LOG.error("Error invoking master coprocessor preBalance()", ioe);
1146 return false;
1147 }
1148 }
1149
1150 Map<String, Map<ServerName, List<HRegionInfo>>> assignmentsByTable =
1151 this.assignmentManager.getAssignmentsByTable();
1152
1153 List<RegionPlan> plans = new ArrayList<RegionPlan>();
1154 for (Map<ServerName, List<HRegionInfo>> assignments : assignmentsByTable.values()) {
1155 List<RegionPlan> partialPlans = this.balancer.balanceCluster(assignments);
1156 if (partialPlans != null) plans.addAll(partialPlans);
1157 }
1158 int rpCount = 0;
1159 long totalRegPlanExecTime = 0;
1160 balancerRan = plans != null;
1161 if (plans != null && !plans.isEmpty()) {
1162 for (RegionPlan plan: plans) {
1163 LOG.info("balance " + plan);
1164 long balStartTime = System.currentTimeMillis();
1165 this.assignmentManager.balance(plan);
1166 totalRegPlanExecTime += System.currentTimeMillis()-balStartTime;
1167 rpCount++;
1168 if (rpCount < plans.size() &&
1169
1170 (System.currentTimeMillis() + (totalRegPlanExecTime / rpCount)) > cutoffTime) {
1171 LOG.debug("No more balancing till next balance run; maximumBalanceTime=" +
1172 maximumBalanceTime);
1173 break;
1174 }
1175 }
1176 }
1177 if (this.cpHost != null) {
1178 try {
1179 this.cpHost.postBalance();
1180 } catch (IOException ioe) {
1181
1182 LOG.error("Error invoking master coprocessor postBalance()", ioe);
1183 }
1184 }
1185 }
1186 return balancerRan;
1187 }
1188
1189 enum BalanceSwitchMode {
1190 SYNC,
1191 ASYNC
1192 }
1193
1194
1195
1196
1197
1198
1199 public boolean switchBalancer(final boolean b, BalanceSwitchMode mode) {
1200 boolean oldValue = this.balanceSwitch;
1201 boolean newValue = b;
1202 try {
1203 if (this.cpHost != null) {
1204 newValue = this.cpHost.preBalanceSwitch(newValue);
1205 }
1206 if (mode == BalanceSwitchMode.SYNC) {
1207 synchronized (this.balancer) {
1208 this.balanceSwitch = newValue;
1209 }
1210 } else {
1211 this.balanceSwitch = newValue;
1212 }
1213 LOG.info("BalanceSwitch=" + newValue);
1214 if (this.cpHost != null) {
1215 this.cpHost.postBalanceSwitch(oldValue, newValue);
1216 }
1217 } catch (IOException ioe) {
1218 LOG.warn("Error flipping balance switch", ioe);
1219 }
1220 return oldValue;
1221 }
1222
1223 @Override
1224 public boolean synchronousBalanceSwitch(final boolean b) {
1225 return switchBalancer(b, BalanceSwitchMode.SYNC);
1226 }
1227
1228 @Override
1229 public boolean balanceSwitch(final boolean b) {
1230 return switchBalancer(b, BalanceSwitchMode.ASYNC);
1231 }
1232
1233
1234
1235
1236
1237
1238
1239 public void setCatalogJanitorEnabled(final boolean b) {
1240 ((CatalogJanitor)this.catalogJanitorChore).setEnabled(b);
1241 }
1242
1243 @Override
1244 public void move(final byte[] encodedRegionName, final byte[] destServerName)
1245 throws UnknownRegionException {
1246 Pair<HRegionInfo, ServerName> p =
1247 this.assignmentManager.getAssignment(encodedRegionName);
1248 if (p == null)
1249 throw new UnknownRegionException(Bytes.toStringBinary(encodedRegionName));
1250 ServerName dest = null;
1251 if (destServerName == null || destServerName.length == 0) {
1252 LOG.info("Passed destination servername is null or empty so choosing a server at random");
1253 List<ServerName> destServers = this.serverManager.getOnlineServersList();
1254 destServers.remove(p.getSecond());
1255
1256 dest = balancer.randomAssignment(destServers);
1257 } else {
1258 dest = new ServerName(Bytes.toString(destServerName));
1259 }
1260
1261
1262 RegionPlan rp = new RegionPlan(p.getFirst(), p.getSecond(), dest);
1263
1264 try {
1265 checkInitialized();
1266 if (this.cpHost != null) {
1267 if (this.cpHost.preMove(p.getFirst(), p.getSecond(), dest)) {
1268 return;
1269 }
1270 }
1271 LOG.info("Added move plan " + rp + ", running balancer");
1272 this.assignmentManager.balance(rp);
1273 if (this.cpHost != null) {
1274 this.cpHost.postMove(p.getFirst(), p.getSecond(), dest);
1275 }
1276 } catch (IOException ioe) {
1277 UnknownRegionException ure = new UnknownRegionException(
1278 Bytes.toStringBinary(encodedRegionName));
1279 ure.initCause(ioe);
1280 throw ure;
1281 }
1282 }
1283
1284 public void createTable(HTableDescriptor hTableDescriptor,
1285 byte [][] splitKeys)
1286 throws IOException {
1287 if (!isMasterRunning()) {
1288 throw new MasterNotRunningException();
1289 }
1290
1291 HRegionInfo [] newRegions = getHRegionInfos(hTableDescriptor, splitKeys);
1292 checkInitialized();
1293 if (cpHost != null) {
1294 cpHost.preCreateTable(hTableDescriptor, newRegions);
1295 }
1296
1297 this.executorService.submit(new CreateTableHandler(this,
1298 this.fileSystemManager, this.serverManager, hTableDescriptor, conf,
1299 newRegions, catalogTracker, assignmentManager));
1300
1301 if (cpHost != null) {
1302 cpHost.postCreateTable(hTableDescriptor, newRegions);
1303 }
1304 }
1305
1306 private HRegionInfo[] getHRegionInfos(HTableDescriptor hTableDescriptor,
1307 byte[][] splitKeys) {
1308 HRegionInfo[] hRegionInfos = null;
1309 if (splitKeys == null || splitKeys.length == 0) {
1310 hRegionInfos = new HRegionInfo[]{
1311 new HRegionInfo(hTableDescriptor.getName(), null, null)};
1312 } else {
1313 int numRegions = splitKeys.length + 1;
1314 hRegionInfos = new HRegionInfo[numRegions];
1315 byte[] startKey = null;
1316 byte[] endKey = null;
1317 for (int i = 0; i < numRegions; i++) {
1318 endKey = (i == splitKeys.length) ? null : splitKeys[i];
1319 hRegionInfos[i] =
1320 new HRegionInfo(hTableDescriptor.getName(), startKey, endKey);
1321 startKey = endKey;
1322 }
1323 }
1324 return hRegionInfos;
1325 }
1326
1327 private static boolean isCatalogTable(final byte [] tableName) {
1328 return Bytes.equals(tableName, HConstants.ROOT_TABLE_NAME) ||
1329 Bytes.equals(tableName, HConstants.META_TABLE_NAME);
1330 }
1331
1332 @Override
1333 public void deleteTable(final byte [] tableName) throws IOException {
1334 checkInitialized();
1335 if (cpHost != null) {
1336 cpHost.preDeleteTable(tableName);
1337 }
1338 this.executorService.submit(new DeleteTableHandler(tableName, this, this));
1339 if (cpHost != null) {
1340 cpHost.postDeleteTable(tableName);
1341 }
1342 }
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352 public Pair<Integer, Integer> getAlterStatus(byte[] tableName)
1353 throws IOException {
1354 return this.assignmentManager.getReopenStatus(tableName);
1355 }
1356
1357 public void addColumn(byte [] tableName, HColumnDescriptor column)
1358 throws IOException {
1359 checkInitialized();
1360 if (cpHost != null) {
1361 if (cpHost.preAddColumn(tableName, column)) {
1362 return;
1363 }
1364 }
1365 new TableAddFamilyHandler(tableName, column, this, this).process();
1366 if (cpHost != null) {
1367 cpHost.postAddColumn(tableName, column);
1368 }
1369 }
1370
1371 public void modifyColumn(byte [] tableName, HColumnDescriptor descriptor)
1372 throws IOException {
1373 checkInitialized();
1374 if (cpHost != null) {
1375 if (cpHost.preModifyColumn(tableName, descriptor)) {
1376 return;
1377 }
1378 }
1379 new TableModifyFamilyHandler(tableName, descriptor, this, this).process();
1380 if (cpHost != null) {
1381 cpHost.postModifyColumn(tableName, descriptor);
1382 }
1383 }
1384
1385 public void deleteColumn(final byte [] tableName, final byte [] c)
1386 throws IOException {
1387 checkInitialized();
1388 if (cpHost != null) {
1389 if (cpHost.preDeleteColumn(tableName, c)) {
1390 return;
1391 }
1392 }
1393 new TableDeleteFamilyHandler(tableName, c, this, this).process();
1394 if (cpHost != null) {
1395 cpHost.postDeleteColumn(tableName, c);
1396 }
1397 }
1398
1399 public void enableTable(final byte [] tableName) throws IOException {
1400 checkInitialized();
1401 if (cpHost != null) {
1402 cpHost.preEnableTable(tableName);
1403 }
1404 this.executorService.submit(new EnableTableHandler(this, tableName,
1405 catalogTracker, assignmentManager, false));
1406
1407 if (cpHost != null) {
1408 cpHost.postEnableTable(tableName);
1409 }
1410 }
1411
1412 public void disableTable(final byte [] tableName) throws IOException {
1413 checkInitialized();
1414 if (cpHost != null) {
1415 cpHost.preDisableTable(tableName);
1416 }
1417 this.executorService.submit(new DisableTableHandler(this, tableName,
1418 catalogTracker, assignmentManager, false));
1419
1420 if (cpHost != null) {
1421 cpHost.postDisableTable(tableName);
1422 }
1423 }
1424
1425
1426
1427
1428
1429
1430
1431 Pair<HRegionInfo, ServerName> getTableRegionForRow(
1432 final byte [] tableName, final byte [] rowKey)
1433 throws IOException {
1434 final AtomicReference<Pair<HRegionInfo, ServerName>> result =
1435 new AtomicReference<Pair<HRegionInfo, ServerName>>(null);
1436
1437 MetaScannerVisitor visitor =
1438 new MetaScannerVisitorBase() {
1439 @Override
1440 public boolean processRow(Result data) throws IOException {
1441 if (data == null || data.size() <= 0) {
1442 return true;
1443 }
1444 Pair<HRegionInfo, ServerName> pair = MetaReader.parseCatalogResult(data);
1445 if (pair == null) {
1446 return false;
1447 }
1448 if (!Bytes.equals(pair.getFirst().getTableName(), tableName)) {
1449 return false;
1450 }
1451 result.set(pair);
1452 return true;
1453 }
1454 };
1455
1456 MetaScanner.metaScan(conf, visitor, tableName, rowKey, 1);
1457 return result.get();
1458 }
1459
1460 @Override
1461 public void modifyTable(final byte[] tableName, HTableDescriptor htd)
1462 throws IOException {
1463 checkInitialized();
1464 if (cpHost != null) {
1465 cpHost.preModifyTable(tableName, htd);
1466 }
1467 TableEventHandler tblHandler = new ModifyTableHandler(tableName, htd, this, this);
1468 this.executorService.submit(tblHandler);
1469
1470 tblHandler.waitForEventBeingHandled();
1471 if (cpHost != null) {
1472 cpHost.postModifyTable(tableName, htd);
1473 }
1474 }
1475
1476 @Override
1477 public void checkTableModifiable(final byte [] tableName)
1478 throws IOException {
1479 String tableNameStr = Bytes.toString(tableName);
1480 if (isCatalogTable(tableName)) {
1481 throw new IOException("Can't modify catalog tables");
1482 }
1483 if (!MetaReader.tableExists(getCatalogTracker(), tableNameStr)) {
1484 throw new TableNotFoundException(tableNameStr);
1485 }
1486 if (!getAssignmentManager().getZKTable().
1487 isDisabledTable(Bytes.toString(tableName))) {
1488 throw new TableNotDisabledException(tableName);
1489 }
1490 }
1491
1492 public void clearFromTransition(HRegionInfo hri) {
1493 if (this.assignmentManager.isRegionInTransition(hri) != null) {
1494 this.assignmentManager.regionOffline(hri);
1495 }
1496 }
1497
1498
1499
1500
1501 public ClusterStatus getClusterStatus() {
1502
1503 List<String> backupMasterStrings;
1504 try {
1505 backupMasterStrings = ZKUtil.listChildrenNoWatch(this.zooKeeper,
1506 this.zooKeeper.backupMasterAddressesZNode);
1507 } catch (KeeperException e) {
1508 LOG.warn(this.zooKeeper.prefix("Unable to list backup servers"), e);
1509 backupMasterStrings = new ArrayList<String>(0);
1510 }
1511 List<ServerName> backupMasters = new ArrayList<ServerName>(
1512 backupMasterStrings.size());
1513 for (String s: backupMasterStrings) {
1514 try {
1515 byte[] bytes = ZKUtil.getData(this.zooKeeper, ZKUtil.joinZNode(this.zooKeeper.backupMasterAddressesZNode, s));
1516 if (bytes != null) {
1517 backupMasters.add(ServerName.parseVersionedServerName(bytes));
1518 }
1519 } catch (KeeperException e) {
1520 LOG.warn(this.zooKeeper.prefix("Unable to get information about " +
1521 "backup servers"), e);
1522 }
1523 }
1524 Collections.sort(backupMasters, new Comparator<ServerName>() {
1525 public int compare(ServerName s1, ServerName s2) {
1526 return s1.getServerName().compareTo(s2.getServerName());
1527 }});
1528
1529 return new ClusterStatus(VersionInfo.getVersion(),
1530 this.fileSystemManager.getClusterId(),
1531 this.serverManager.getOnlineServers(),
1532 this.serverManager.getDeadServers(),
1533 this.serverName,
1534 backupMasters,
1535 this.assignmentManager.getRegionsInTransition(),
1536 this.getCoprocessors());
1537 }
1538
1539 public String getClusterId() {
1540 return fileSystemManager.getClusterId();
1541 }
1542
1543
1544
1545
1546
1547
1548
1549
1550 public static String getLoadedCoprocessors() {
1551 return CoprocessorHost.getLoadedCoprocessors().toString();
1552 }
1553
1554
1555
1556
1557 public long getMasterStartTime() {
1558 return masterStartTime;
1559 }
1560
1561
1562
1563
1564 public long getMasterActiveTime() {
1565 return masterActiveTime;
1566 }
1567
1568
1569
1570
1571 public String[] getCoprocessors() {
1572 Set<String> masterCoprocessors =
1573 getCoprocessorHost().getCoprocessors();
1574 return masterCoprocessors.toArray(new String[0]);
1575 }
1576
1577 @Override
1578 public void abort(final String msg, final Throwable t) {
1579 if (cpHost != null) {
1580
1581 LOG.fatal("Master server abort: loaded coprocessors are: " +
1582 getLoadedCoprocessors());
1583 }
1584
1585 if (abortNow(msg, t)) {
1586 if (t != null) LOG.fatal(msg, t);
1587 else LOG.fatal(msg);
1588 this.abort = true;
1589 stop("Aborting");
1590 }
1591 }
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610 private boolean tryRecoveringExpiredZKSession() throws InterruptedException,
1611 IOException, KeeperException, ExecutionException {
1612
1613 this.zooKeeper.reconnectAfterExpiration();
1614
1615 Callable<Boolean> callable = new Callable<Boolean> () {
1616 public Boolean call() throws InterruptedException,
1617 IOException, KeeperException {
1618 MonitoredTask status =
1619 TaskMonitor.get().createStatus("Recovering expired ZK session");
1620 try {
1621 if (!becomeActiveMaster(status)) {
1622 return Boolean.FALSE;
1623 }
1624 serverManager.disableSSHForRoot();
1625 serverShutdownHandlerEnabled = false;
1626 initialized = false;
1627 finishInitialization(status, true);
1628 return Boolean.TRUE;
1629 } finally {
1630 status.cleanup();
1631 }
1632 }
1633 };
1634
1635 long timeout =
1636 conf.getLong("hbase.master.zksession.recover.timeout", 300000);
1637 java.util.concurrent.ExecutorService executor =
1638 Executors.newSingleThreadExecutor();
1639 Future<Boolean> result = executor.submit(callable);
1640 executor.shutdown();
1641 if (executor.awaitTermination(timeout, TimeUnit.MILLISECONDS)
1642 && result.isDone()) {
1643 Boolean recovered = result.get();
1644 if (recovered != null) {
1645 return recovered.booleanValue();
1646 }
1647 }
1648 executor.shutdownNow();
1649 return false;
1650 }
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660 private boolean abortNow(final String msg, final Throwable t) {
1661 if (!this.isActiveMaster) {
1662 return true;
1663 }
1664 if (t != null && t instanceof KeeperException.SessionExpiredException) {
1665 try {
1666 LOG.info("Primary Master trying to recover from ZooKeeper session " +
1667 "expiry.");
1668 return !tryRecoveringExpiredZKSession();
1669 } catch (Throwable newT) {
1670 LOG.error("Primary master encountered unexpected exception while " +
1671 "trying to recover from ZooKeeper session" +
1672 " expiry. Proceeding with server abort.", newT);
1673 }
1674 }
1675 return true;
1676 }
1677
1678 @Override
1679 public ZooKeeperWatcher getZooKeeper() {
1680 return zooKeeper;
1681 }
1682
1683 @Override
1684 public MasterCoprocessorHost getCoprocessorHost() {
1685 return cpHost;
1686 }
1687
1688 @Override
1689 public ServerName getServerName() {
1690 return this.serverName;
1691 }
1692
1693 @Override
1694 public CatalogTracker getCatalogTracker() {
1695 return catalogTracker;
1696 }
1697
1698 @Override
1699 public AssignmentManager getAssignmentManager() {
1700 return this.assignmentManager;
1701 }
1702
1703 public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() {
1704 return rsFatals;
1705 }
1706
1707 @SuppressWarnings("deprecation")
1708 @Override
1709 public void shutdown() {
1710 if (cpHost != null) {
1711 try {
1712 cpHost.preShutdown();
1713 } catch (IOException ioe) {
1714 LOG.error("Error call master coprocessor preShutdown()", ioe);
1715 }
1716 }
1717 if (mxBean != null) {
1718 MBeanUtil.unregisterMBean(mxBean);
1719 mxBean = null;
1720 }
1721 if (this.assignmentManager != null) this.assignmentManager.shutdown();
1722 if (this.serverManager != null) this.serverManager.shutdownCluster();
1723
1724 try {
1725 if (this.clusterStatusTracker != null){
1726 this.clusterStatusTracker.setClusterDown();
1727 }
1728 } catch (KeeperException e) {
1729 if (e instanceof KeeperException.SessionExpiredException) {
1730 LOG.warn("ZK session expired. Retry a new connection...");
1731 try {
1732 this.zooKeeper.reconnectAfterExpiration();
1733 this.clusterStatusTracker.setClusterDown();
1734 } catch (Exception ex) {
1735 LOG.error("Retry setClusterDown failed", ex);
1736 }
1737 } else {
1738 LOG.error("ZooKeeper exception trying to set cluster as down in ZK", e);
1739 }
1740 }
1741 }
1742
1743 @Override
1744 public void stopMaster() {
1745 if (cpHost != null) {
1746 try {
1747 cpHost.preStopMaster();
1748 } catch (IOException ioe) {
1749 LOG.error("Error call master coprocessor preStopMaster()", ioe);
1750 }
1751 }
1752 stop("Stopped by " + Thread.currentThread().getName());
1753 }
1754
1755 @Override
1756 public void stop(final String why) {
1757 LOG.info(why);
1758 this.stopped = true;
1759
1760 stopSleeper.skipSleepCycle();
1761
1762 if (this.activeMasterManager != null) {
1763 synchronized (this.activeMasterManager.clusterHasActiveMaster) {
1764 this.activeMasterManager.clusterHasActiveMaster.notifyAll();
1765 }
1766 }
1767 }
1768
1769 @Override
1770 public boolean isStopped() {
1771 return this.stopped;
1772 }
1773
1774 public boolean isAborted() {
1775 return this.abort;
1776 }
1777
1778 void checkInitialized() throws PleaseHoldException {
1779 if (!this.initialized) {
1780 throw new PleaseHoldException("Master is initializing");
1781 }
1782 }
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792 public boolean isActiveMaster() {
1793 return isActiveMaster;
1794 }
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805 public boolean isInitialized() {
1806 return initialized;
1807 }
1808
1809
1810
1811
1812
1813
1814 public boolean isServerShutdownHandlerEnabled() {
1815 return this.serverShutdownHandlerEnabled;
1816 }
1817
1818 public boolean shouldSplitMetaSeparately() {
1819 return this.shouldSplitMetaSeparately;
1820 }
1821
1822
1823
1824
1825
1826 public boolean isInitializationStartsMetaRegoinAssignment() {
1827 return this.initializationBeforeMetaAssignment;
1828 }
1829
1830 @Override
1831 @Deprecated
1832 public void assign(final byte[] regionName, final boolean force)
1833 throws IOException {
1834 assign(regionName);
1835 }
1836
1837 @Override
1838 public void assign(final byte [] regionName)throws IOException {
1839 checkInitialized();
1840 Pair<HRegionInfo, ServerName> pair =
1841 MetaReader.getRegion(this.catalogTracker, regionName);
1842 if (pair == null) throw new UnknownRegionException(Bytes.toString(regionName));
1843 if (cpHost != null) {
1844 if (cpHost.preAssign(pair.getFirst())) {
1845 return;
1846 }
1847 }
1848 assignRegion(pair.getFirst());
1849 if (cpHost != null) {
1850 cpHost.postAssign(pair.getFirst());
1851 }
1852 }
1853
1854
1855
1856 public void assignRegion(HRegionInfo hri) {
1857 assignmentManager.assign(hri, true);
1858 }
1859
1860 @Override
1861 public void unassign(final byte [] regionName, final boolean force)
1862 throws IOException {
1863 checkInitialized();
1864 Pair<HRegionInfo, ServerName> pair =
1865 MetaReader.getRegion(this.catalogTracker, regionName);
1866 if (pair == null) throw new UnknownRegionException(Bytes.toString(regionName));
1867 HRegionInfo hri = pair.getFirst();
1868 if (cpHost != null) {
1869 if (cpHost.preUnassign(hri, force)) {
1870 return;
1871 }
1872 }
1873 if (force) {
1874 this.assignmentManager.regionOffline(hri);
1875 assignRegion(hri);
1876 } else {
1877 this.assignmentManager.unassign(hri, force);
1878 }
1879 if (cpHost != null) {
1880 cpHost.postUnassign(hri, force);
1881 }
1882 }
1883
1884
1885
1886
1887
1888
1889 public HTableDescriptor[] getHTableDescriptors(List<String> tableNames) {
1890 List<HTableDescriptor> list =
1891 new ArrayList<HTableDescriptor>(tableNames.size());
1892 for (String s: tableNames) {
1893 HTableDescriptor htd = null;
1894 try {
1895 htd = this.tableDescriptors.get(s);
1896 } catch (IOException e) {
1897 LOG.warn("Failed getting descriptor for " + s, e);
1898 }
1899 if (htd == null) continue;
1900 list.add(htd);
1901 }
1902 return list.toArray(new HTableDescriptor [] {});
1903 }
1904
1905 @Override
1906 public <T extends CoprocessorProtocol> boolean registerProtocol(
1907 Class<T> protocol, T handler) {
1908
1909
1910
1911
1912 if (protocolHandlers.containsKey(protocol)) {
1913 LOG.error("Protocol "+protocol.getName()+
1914 " already registered, rejecting request from "+
1915 handler
1916 );
1917 return false;
1918 }
1919
1920 protocolHandlers.putInstance(protocol, handler);
1921 protocolHandlerNames.put(protocol.getName(), protocol);
1922 if (LOG.isDebugEnabled()) {
1923 LOG.debug("Registered master protocol handler: protocol="+protocol.getName());
1924 }
1925 return true;
1926 }
1927
1928 @Override
1929 public ExecResult execCoprocessor(Exec call) throws IOException {
1930 Class<? extends CoprocessorProtocol> protocol = call.getProtocol();
1931 if (protocol == null) {
1932 String protocolName = call.getProtocolName();
1933 if (LOG.isDebugEnabled()) {
1934 LOG.debug("Received dynamic protocol exec call with protocolName " + protocolName);
1935 }
1936
1937 protocol = protocolHandlerNames.get(protocolName);
1938 if (protocol == null) {
1939 throw new HBaseRPC.UnknownProtocolException(protocol,
1940 "No matching handler for master protocol "+protocolName);
1941 }
1942 }
1943 if (!protocolHandlers.containsKey(protocol)) {
1944 throw new HBaseRPC.UnknownProtocolException(protocol,
1945 "No matching handler for protocol ");
1946 }
1947
1948 CoprocessorProtocol handler = protocolHandlers.getInstance(protocol);
1949 Object value;
1950
1951 try {
1952 Method method = protocol.getMethod(
1953 call.getMethodName(), call.getParameterClasses());
1954 method.setAccessible(true);
1955
1956 value = method.invoke(handler, call.getParameters());
1957 } catch (InvocationTargetException e) {
1958 Throwable target = e.getTargetException();
1959 if (target instanceof IOException) {
1960 throw (IOException)target;
1961 }
1962 IOException ioe = new IOException(target.toString());
1963 ioe.setStackTrace(target.getStackTrace());
1964 throw ioe;
1965 } catch (Throwable e) {
1966 if (!(e instanceof IOException)) {
1967 LOG.error("Unexpected throwable object ", e);
1968 }
1969 IOException ioe = new IOException(e.toString());
1970 ioe.setStackTrace(e.getStackTrace());
1971 throw ioe;
1972 }
1973
1974 return new ExecResult(value);
1975 }
1976
1977
1978
1979
1980
1981 public HTableDescriptor [] getHTableDescriptors() {
1982 Map<String, HTableDescriptor> descriptors = null;
1983 try {
1984 descriptors = this.tableDescriptors.getAll();
1985 } catch (IOException e) {
1986 LOG.warn("Failed getting all descriptors", e);
1987 }
1988 return descriptors == null?
1989 null: descriptors.values().toArray(new HTableDescriptor [] {});
1990 }
1991
1992
1993
1994
1995
1996
1997
1998 public double getAverageLoad() {
1999 return this.assignmentManager.getAverageLoad();
2000 }
2001
2002
2003
2004
2005 @Override
2006 public void offline(final byte[] regionName) throws IOException {
2007 Pair<HRegionInfo, ServerName> pair =
2008 MetaReader.getRegion(this.catalogTracker, regionName);
2009 if (pair == null) throw new UnknownRegionException(Bytes.toStringBinary(regionName));
2010 HRegionInfo hri = pair.getFirst();
2011 this.assignmentManager.regionOffline(hri);
2012 }
2013
2014
2015
2016
2017
2018
2019
2020 public static HMaster constructMaster(Class<? extends HMaster> masterClass,
2021 final Configuration conf) {
2022 try {
2023 Constructor<? extends HMaster> c =
2024 masterClass.getConstructor(Configuration.class);
2025 return c.newInstance(conf);
2026 } catch (InvocationTargetException ite) {
2027 Throwable target = ite.getTargetException() != null?
2028 ite.getTargetException(): ite;
2029 if (target.getCause() != null) target = target.getCause();
2030 throw new RuntimeException("Failed construction of Master: " +
2031 masterClass.toString(), target);
2032 } catch (Exception e) {
2033 throw new RuntimeException("Failed construction of Master: " +
2034 masterClass.toString() + ((e.getCause() != null)?
2035 e.getCause().getMessage(): ""), e);
2036 }
2037 }
2038
2039
2040
2041
2042 public static void main(String [] args) throws Exception {
2043 VersionInfo.logVersion();
2044 new HMasterCommandLine(HMaster.class).doMain(args);
2045 }
2046
2047
2048
2049
2050 @SuppressWarnings("deprecation")
2051 void registerMBean() {
2052 MXBeanImpl mxBeanInfo = MXBeanImpl.init(this);
2053 MBeanUtil.registerMBean("Master", "Master", mxBeanInfo);
2054 LOG.info("Registered HMaster MXBean");
2055 }
2056
2057
2058
2059
2060
2061 public HFileCleaner getHFileCleaner() {
2062 return this.hfileCleaner;
2063 }
2064
2065 private boolean isHealthCheckerConfigured() {
2066 String healthScriptLocation = this.conf.get(HConstants.HEALTH_SCRIPT_LOC);
2067 return org.apache.commons.lang.StringUtils.isNotBlank(healthScriptLocation);
2068 }
2069
2070
2071
2072
2073
2074 public SnapshotManager getSnapshotManagerForTesting() {
2075 return this.snapshotManager;
2076 }
2077
2078
2079
2080
2081
2082
2083 @Override
2084 public long snapshot(final HSnapshotDescription request) throws IOException {
2085 LOG.debug("Submitting snapshot request for:" +
2086 SnapshotDescriptionUtils.toString(request.getProto()));
2087 try {
2088 this.snapshotManager.checkSnapshotSupport();
2089 } catch (UnsupportedOperationException e) {
2090 throw new IOException(e);
2091 }
2092
2093
2094 SnapshotDescription snapshot = SnapshotDescriptionUtils.validate(request.getProto(),
2095 this.conf);
2096
2097 snapshotManager.takeSnapshot(snapshot);
2098
2099
2100 long waitTime = SnapshotDescriptionUtils.getMaxMasterTimeout(conf, snapshot.getType(),
2101 SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME);
2102 return waitTime;
2103 }
2104
2105
2106
2107
2108 @Override
2109 public List<HSnapshotDescription> getCompletedSnapshots() throws IOException {
2110 List<HSnapshotDescription> availableSnapshots = new ArrayList<HSnapshotDescription>();
2111 List<SnapshotDescription> snapshots = snapshotManager.getCompletedSnapshots();
2112
2113
2114 for (SnapshotDescription snapshot: snapshots) {
2115 availableSnapshots.add(new HSnapshotDescription(snapshot));
2116 }
2117
2118 return availableSnapshots;
2119 }
2120
2121
2122
2123
2124
2125
2126 @Override
2127 public void deleteSnapshot(final HSnapshotDescription request) throws IOException {
2128 try {
2129 this.snapshotManager.checkSnapshotSupport();
2130 } catch (UnsupportedOperationException e) {
2131 throw new IOException(e);
2132 }
2133
2134 snapshotManager.deleteSnapshot(request.getProto());
2135 }
2136
2137
2138
2139
2140
2141
2142
2143
2144 @Override
2145 public boolean isSnapshotDone(final HSnapshotDescription request) throws IOException {
2146 LOG.debug("Checking to see if snapshot from request:" +
2147 SnapshotDescriptionUtils.toString(request.getProto()) + " is done");
2148 return snapshotManager.isSnapshotDone(request.getProto());
2149 }
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164 @Override
2165 public void restoreSnapshot(final HSnapshotDescription request) throws IOException {
2166 try {
2167 this.snapshotManager.checkSnapshotSupport();
2168 } catch (UnsupportedOperationException e) {
2169 throw new IOException(e);
2170 }
2171
2172 snapshotManager.restoreSnapshot(request.getProto());
2173 }
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185 @Override
2186 public boolean isRestoreSnapshotDone(final HSnapshotDescription request) throws IOException {
2187 return !snapshotManager.isRestoringTable(request.getProto());
2188 }
2189 }
2190