1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master;
20
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.Arrays;
24 import java.util.Collections;
25 import java.util.HashMap;
26 import java.util.HashSet;
27 import java.util.Iterator;
28 import java.util.List;
29 import java.util.Map;
30 import java.util.NavigableMap;
31 import java.util.Set;
32 import java.util.TreeMap;
33 import java.util.concurrent.ConcurrentHashMap;
34 import java.util.concurrent.ConcurrentSkipListSet;
35 import java.util.concurrent.ThreadFactory;
36 import java.util.concurrent.TimeUnit;
37 import java.util.concurrent.atomic.AtomicBoolean;
38 import java.util.concurrent.atomic.AtomicInteger;
39 import java.util.concurrent.locks.Lock;
40 import java.util.concurrent.locks.ReentrantLock;
41
42 import org.apache.commons.logging.Log;
43 import org.apache.commons.logging.LogFactory;
44 import org.apache.hadoop.classification.InterfaceAudience;
45 import org.apache.hadoop.conf.Configuration;
46 import org.apache.hadoop.hbase.Chore;
47 import org.apache.hadoop.hbase.HConstants;
48 import org.apache.hadoop.hbase.HRegionInfo;
49 import org.apache.hadoop.hbase.RegionTransition;
50 import org.apache.hadoop.hbase.Server;
51 import org.apache.hadoop.hbase.ServerName;
52 import org.apache.hadoop.hbase.Stoppable;
53 import org.apache.hadoop.hbase.catalog.CatalogTracker;
54 import org.apache.hadoop.hbase.catalog.MetaReader;
55 import org.apache.hadoop.hbase.client.Result;
56 import org.apache.hadoop.hbase.exceptions.DeserializationException;
57 import org.apache.hadoop.hbase.exceptions.NotServingRegionException;
58 import org.apache.hadoop.hbase.exceptions.RegionAlreadyInTransitionException;
59 import org.apache.hadoop.hbase.exceptions.RegionServerStoppedException;
60 import org.apache.hadoop.hbase.exceptions.ServerNotRunningYetException;
61 import org.apache.hadoop.hbase.exceptions.TableNotFoundException;
62 import org.apache.hadoop.hbase.executor.EventHandler;
63 import org.apache.hadoop.hbase.executor.EventType;
64 import org.apache.hadoop.hbase.executor.ExecutorService;
65 import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler;
66 import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
67 import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
68 import org.apache.hadoop.hbase.master.handler.MergedRegionHandler;
69 import org.apache.hadoop.hbase.master.handler.OpenedRegionHandler;
70 import org.apache.hadoop.hbase.master.handler.SplitRegionHandler;
71 import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
72 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
73 import org.apache.hadoop.hbase.util.KeyLocker;
74 import org.apache.hadoop.hbase.util.Pair;
75 import org.apache.hadoop.hbase.util.Threads;
76 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
77 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
78 import org.apache.hadoop.hbase.zookeeper.ZKTable;
79 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
80 import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
81 import org.apache.hadoop.ipc.RemoteException;
82 import org.apache.zookeeper.AsyncCallback;
83 import org.apache.zookeeper.KeeperException;
84 import org.apache.zookeeper.KeeperException.NoNodeException;
85 import org.apache.zookeeper.KeeperException.NodeExistsException;
86 import org.apache.zookeeper.data.Stat;
87
88 import com.google.common.base.Preconditions;
89 import com.google.common.collect.LinkedHashMultimap;
90
91
92
93
94
95
96
97
98 @InterfaceAudience.Private
99 public class AssignmentManager extends ZooKeeperListener {
100 private static final Log LOG = LogFactory.getLog(AssignmentManager.class);
101
102 public static final ServerName HBCK_CODE_SERVERNAME = new ServerName(HConstants.HBCK_CODE_NAME,
103 -1, -1L);
104
105 protected final Server server;
106
107 private ServerManager serverManager;
108
109 private CatalogTracker catalogTracker;
110
111 protected final TimeoutMonitor timeoutMonitor;
112
113 private final TimerUpdater timerUpdater;
114
115 private LoadBalancer balancer;
116
117 private final TableLockManager tableLockManager;
118
119 final private KeyLocker<String> locker = new KeyLocker<String>();
120
121
122
123
124
125 private final Map <String, HRegionInfo> regionsToReopen;
126
127
128
129
130
131 private final int maximumAttempts;
132
133
134
135
136
137 final NavigableMap<String, RegionPlan> regionPlans =
138 new TreeMap<String, RegionPlan>();
139
140 private final ZKTable zkTable;
141
142
143
144
145
146 private final ConcurrentSkipListSet<ServerName> serversInUpdatingTimer;
147
148 private final ExecutorService executorService;
149
150
151 private java.util.concurrent.ExecutorService threadPoolExecutorService;
152
153
154 private final java.util.concurrent.ExecutorService zkEventWorkers;
155
156 private List<EventType> ignoreStatesRSOffline = Arrays.asList(
157 EventType.RS_ZK_REGION_FAILED_OPEN, EventType.RS_ZK_REGION_CLOSED);
158
159
160 MetricsMaster metricsMaster;
161
162 private final RegionStates regionStates;
163
164
165
166
167
168 private final int bulkAssignThresholdRegions;
169 private final int bulkAssignThresholdServers;
170
171
172
173
174 private final boolean bulkAssignWaitTillAllAssigned;
175
176
177
178
179
180
181
182
183
184 protected final AtomicBoolean failoverCleanupDone = new AtomicBoolean(false);
185
186
187 private final boolean tomActivated;
188
189
190
191
192
193
194
195
196 private final ConcurrentHashMap<String, AtomicInteger>
197 failedOpenTracker = new ConcurrentHashMap<String, AtomicInteger>();
198
199
200
201
202
203
204
205
206
207
208
209 public AssignmentManager(Server server, ServerManager serverManager,
210 CatalogTracker catalogTracker, final LoadBalancer balancer,
211 final ExecutorService service, MetricsMaster metricsMaster,
212 final TableLockManager tableLockManager) throws KeeperException, IOException {
213 super(server.getZooKeeper());
214 this.server = server;
215 this.serverManager = serverManager;
216 this.catalogTracker = catalogTracker;
217 this.executorService = service;
218 this.regionsToReopen = Collections.synchronizedMap
219 (new HashMap<String, HRegionInfo> ());
220 Configuration conf = server.getConfiguration();
221 this.tomActivated = conf.getBoolean("hbase.assignment.timeout.management", false);
222 if (tomActivated){
223 this.serversInUpdatingTimer = new ConcurrentSkipListSet<ServerName>();
224 this.timeoutMonitor = new TimeoutMonitor(
225 conf.getInt("hbase.master.assignment.timeoutmonitor.period", 30000),
226 server, serverManager,
227 conf.getInt("hbase.master.assignment.timeoutmonitor.timeout", 600000));
228 this.timerUpdater = new TimerUpdater(conf.getInt(
229 "hbase.master.assignment.timerupdater.period", 10000), server);
230 Threads.setDaemonThreadRunning(timerUpdater.getThread(),
231 server.getServerName() + ".timerUpdater");
232 } else {
233 this.serversInUpdatingTimer = null;
234 this.timeoutMonitor = null;
235 this.timerUpdater = null;
236 }
237 this.zkTable = new ZKTable(this.watcher);
238 this.maximumAttempts =
239 this.server.getConfiguration().getInt("hbase.assignment.maximum.attempts", 10);
240 this.balancer = balancer;
241 int maxThreads = conf.getInt("hbase.assignment.threads.max", 30);
242 this.threadPoolExecutorService = Threads.getBoundedCachedThreadPool(
243 maxThreads, 60L, TimeUnit.SECONDS, Threads.newDaemonThreadFactory("hbase-am"));
244 this.metricsMaster = metricsMaster;
245 this.regionStates = new RegionStates(server, serverManager);
246
247 this.bulkAssignWaitTillAllAssigned =
248 conf.getBoolean("hbase.bulk.assignment.waittillallassigned", false);
249 this.bulkAssignThresholdRegions = conf.getInt("hbase.bulk.assignment.threshold.regions", 7);
250 this.bulkAssignThresholdServers = conf.getInt("hbase.bulk.assignment.threshold.servers", 3);
251
252 int workers = conf.getInt("hbase.assignment.zkevent.workers", 20);
253 ThreadFactory threadFactory = Threads.newDaemonThreadFactory("hbase-am-zkevent-worker");
254 zkEventWorkers = Threads.getBoundedCachedThreadPool(workers, 60L,
255 TimeUnit.SECONDS, threadFactory);
256 this.tableLockManager = tableLockManager;
257 }
258
259 void startTimeOutMonitor() {
260 if (tomActivated) {
261 Threads.setDaemonThreadRunning(timeoutMonitor.getThread(), server.getServerName()
262 + ".timeoutMonitor");
263 }
264 }
265
266
267
268
269 public ZKTable getZKTable() {
270
271
272 return this.zkTable;
273 }
274
275
276
277
278
279
280
281 public RegionStates getRegionStates() {
282 return regionStates;
283 }
284
285 public RegionPlan getRegionReopenPlan(HRegionInfo hri) {
286 return new RegionPlan(hri, null, regionStates.getRegionServerOfRegion(hri));
287 }
288
289
290
291
292
293
294 public void addPlan(String encodedName, RegionPlan plan) {
295 synchronized (regionPlans) {
296 regionPlans.put(encodedName, plan);
297 }
298 }
299
300
301
302
303 public void addPlans(Map<String, RegionPlan> plans) {
304 synchronized (regionPlans) {
305 regionPlans.putAll(plans);
306 }
307 }
308
309
310
311
312
313
314
315
316 public void setRegionsToReopen(List <HRegionInfo> regions) {
317 for(HRegionInfo hri : regions) {
318 regionsToReopen.put(hri.getEncodedName(), hri);
319 }
320 }
321
322
323
324
325
326
327
328
329 public Pair<Integer, Integer> getReopenStatus(byte[] tableName)
330 throws IOException {
331 List <HRegionInfo> hris =
332 MetaReader.getTableRegions(this.server.getCatalogTracker(), tableName, true);
333 Integer pending = 0;
334 for (HRegionInfo hri : hris) {
335 String name = hri.getEncodedName();
336
337 if (regionsToReopen.containsKey(name)
338 || regionStates.isRegionInTransition(name)) {
339 pending++;
340 }
341 }
342 return new Pair<Integer, Integer>(pending, hris.size());
343 }
344
345
346
347
348
349
350 public boolean isFailoverCleanupDone() {
351 return failoverCleanupDone.get();
352 }
353
354
355
356
357
358 void failoverCleanupDone() {
359 failoverCleanupDone.set(true);
360 serverManager.processQueuedDeadServers();
361 }
362
363
364
365
366
367
368
369
370 void joinCluster() throws IOException,
371 KeeperException, InterruptedException {
372
373
374
375
376
377
378
379
380
381
382 Map<ServerName, List<HRegionInfo>> deadServers = rebuildUserRegions();
383
384
385
386
387 processDeadServersAndRegionsInTransition(deadServers);
388
389 recoverTableInDisablingState();
390 recoverTableInEnablingState();
391 }
392
393
394
395
396
397
398
399
400
401
402
403
404 void processDeadServersAndRegionsInTransition(
405 final Map<ServerName, List<HRegionInfo>> deadServers)
406 throws KeeperException, IOException, InterruptedException {
407 List<String> nodes = ZKUtil.listChildrenNoWatch(watcher,
408 watcher.assignmentZNode);
409
410 if (nodes == null) {
411 String errorMessage = "Failed to get the children from ZK";
412 server.abort(errorMessage, new IOException(errorMessage));
413 return;
414 }
415
416 boolean failover = !serverManager.getDeadServers().isEmpty();
417
418 if (!failover) {
419
420
421 Map<HRegionInfo, ServerName> regions = regionStates.getRegionAssignments();
422 for (Map.Entry<HRegionInfo, ServerName> e: regions.entrySet()) {
423 if (!e.getKey().isMetaTable() && e.getValue() != null) {
424 LOG.debug("Found " + e + " out on cluster");
425 failover = true;
426 break;
427 }
428 if (nodes.contains(e.getKey().getEncodedName())) {
429 LOG.debug("Found " + e.getKey().getRegionNameAsString() + " in RITs");
430
431 failover = true;
432 break;
433 }
434 }
435 }
436
437
438 if (failover) {
439 LOG.info("Found regions out on cluster or in RIT; failover");
440
441
442 processDeadServersAndRecoverLostRegions(deadServers);
443 } else {
444
445 LOG.info("Clean cluster startup. Assigning userregions");
446 assignAllUserRegions();
447 }
448 }
449
450
451
452
453
454
455
456
457
458
459
460
461 boolean processRegionInTransitionAndBlockUntilAssigned(final HRegionInfo hri)
462 throws InterruptedException, KeeperException, IOException {
463 boolean intransistion = processRegionInTransition(hri.getEncodedName(), hri);
464 if (!intransistion) return intransistion;
465 LOG.debug("Waiting on " + HRegionInfo.prettyPrint(hri.getEncodedName()));
466 while (!this.server.isStopped() &&
467 this.regionStates.isRegionInTransition(hri.getEncodedName())) {
468
469
470 this.regionStates.waitForUpdate(100);
471 }
472 return intransistion;
473 }
474
475
476
477
478
479
480
481
482
483
484 boolean processRegionInTransition(final String encodedRegionName,
485 final HRegionInfo regionInfo) throws KeeperException, IOException {
486
487
488
489
490 Lock lock = locker.acquireLock(encodedRegionName);
491 try {
492 Stat stat = new Stat();
493 byte [] data = ZKAssign.getDataAndWatch(watcher, encodedRegionName, stat);
494 if (data == null) return false;
495 RegionTransition rt;
496 try {
497 rt = RegionTransition.parseFrom(data);
498 } catch (DeserializationException e) {
499 LOG.warn("Failed parse znode data", e);
500 return false;
501 }
502 HRegionInfo hri = regionInfo;
503 if (hri == null) {
504 hri = regionStates.getRegionInfo(rt.getRegionName());
505 if (hri == null) return false;
506 }
507 processRegionsInTransition(rt, hri, stat.getVersion());
508 return true;
509 } finally {
510 lock.unlock();
511 }
512 }
513
514
515
516
517
518
519
520
521 void processRegionsInTransition(
522 final RegionTransition rt, final HRegionInfo regionInfo,
523 final int expectedVersion) throws KeeperException {
524 EventType et = rt.getEventType();
525
526 final ServerName sn = rt.getServerName();
527 String encodedRegionName = regionInfo.getEncodedName();
528 LOG.info("Processing region " + regionInfo.getRegionNameAsString() + " in state " + et);
529
530
531 if (regionStates.isRegionInTransition(encodedRegionName)) {
532
533 return;
534 }
535 switch (et) {
536 case M_ZK_REGION_CLOSING:
537
538
539 if (!serverManager.isServerOnline(sn)) {
540
541
542 forceOffline(regionInfo, rt);
543 } else {
544
545
546 regionStates.updateRegionState(rt, RegionState.State.CLOSING);
547 final RegionState rs = regionStates.getRegionState(regionInfo);
548 this.executorService.submit(
549 new EventHandler(server, EventType.M_MASTER_RECOVERY) {
550 @Override
551 public void process() throws IOException {
552 ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName());
553 try {
554 unassign(regionInfo, rs, expectedVersion, sn, true, null);
555 } finally {
556 lock.unlock();
557 }
558 }
559 });
560 }
561 break;
562
563 case RS_ZK_REGION_CLOSED:
564 case RS_ZK_REGION_FAILED_OPEN:
565
566 addToRITandCallClose(regionInfo, RegionState.State.CLOSED, rt);
567 break;
568
569 case M_ZK_REGION_OFFLINE:
570
571
572 if (!serverManager.isServerOnline(sn)) {
573
574 addToRITandCallClose(regionInfo, RegionState.State.OFFLINE, rt);
575 } else {
576
577 regionStates.updateRegionState(rt, RegionState.State.PENDING_OPEN);
578 final RegionState rs = regionStates.getRegionState(regionInfo);
579 this.executorService.submit(
580 new EventHandler(server, EventType.M_MASTER_RECOVERY) {
581 @Override
582 public void process() throws IOException {
583 ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName());
584 try {
585 assign(rs, false, false);
586 } finally {
587 lock.unlock();
588 }
589 }
590 });
591 }
592 break;
593
594 case RS_ZK_REGION_OPENING:
595 if (!serverManager.isServerOnline(sn)) {
596 forceOffline(regionInfo, rt);
597 } else {
598 regionStates.updateRegionState(rt, RegionState.State.OPENING);
599 }
600 break;
601
602 case RS_ZK_REGION_OPENED:
603 if (!serverManager.isServerOnline(sn)) {
604 forceOffline(regionInfo, rt);
605 } else {
606
607
608
609 regionStates.updateRegionState(rt, RegionState.State.OPEN);
610 new OpenedRegionHandler(server, this, regionInfo, sn, expectedVersion).process();
611 }
612 break;
613 case RS_ZK_REGION_SPLITTING:
614 if (!serverManager.isServerOnline(sn)) {
615
616
617
618 LOG.warn("Processed region " + regionInfo.getEncodedName() + " in state : " + et +
619 " on a dead regionserver: " + sn + " doing nothing");
620 } else {
621 LOG.info("Processed region " + regionInfo.getEncodedName() + " in state : " +
622 et + " nothing to do.");
623
624
625 }
626 break;
627 case RS_ZK_REGION_SPLIT:
628 if (!serverManager.isServerOnline(sn)) {
629 forceOffline(regionInfo, rt);
630 } else {
631 LOG.info("Processed region " + regionInfo.getEncodedName() + " in state : " +
632 et + " nothing to do.");
633
634
635 }
636 break;
637 case RS_ZK_REGION_MERGING:
638
639 LOG.info("Processed region " + regionInfo.getEncodedName()
640 + " in state : " + et + " nothing to do.");
641 break;
642 case RS_ZK_REGION_MERGE:
643 if (!serverManager.isServerOnline(sn)) {
644
645 LOG.warn("Processed region " + regionInfo.getEncodedName()
646 + " in state : " + et + " on a dead regionserver: " + sn
647 + " doing nothing");
648 } else {
649 LOG.info("Processed region " + regionInfo.getEncodedName() + " in state : " +
650 et + " nothing to do.");
651
652
653 }
654 break;
655 default:
656 throw new IllegalStateException("Received region in state :" + et + " is not valid.");
657 }
658 }
659
660
661
662
663
664
665
666
667
668
669 private void forceOffline(final HRegionInfo hri, final RegionTransition oldRt)
670 throws KeeperException {
671
672
673 LOG.debug("RIT " + hri.getEncodedName() + " in state=" + oldRt.getEventType() +
674 " was on deadserver; forcing offline");
675 ZKAssign.createOrForceNodeOffline(this.watcher, hri, oldRt.getServerName());
676 addToRITandCallClose(hri, RegionState.State.OFFLINE, oldRt);
677 }
678
679
680
681
682
683
684
685
686 private void addToRITandCallClose(final HRegionInfo hri,
687 final RegionState.State state, final RegionTransition oldData) {
688 regionStates.updateRegionState(oldData, state);
689 new ClosedRegionHandler(this.server, this, hri).process();
690 }
691
692
693
694
695
696 public void removeClosedRegion(HRegionInfo hri) {
697 if (regionsToReopen.remove(hri.getEncodedName()) != null) {
698 LOG.debug("Removed region from reopening regions because it was closed");
699 }
700 }
701
702
703
704
705
706
707
708
709
710
711
712 private void handleRegion(final RegionTransition rt, int expectedVersion) {
713 if (rt == null) {
714 LOG.warn("Unexpected NULL input for RegionTransition rt");
715 return;
716 }
717 final ServerName sn = rt.getServerName();
718
719 if (sn.equals(HBCK_CODE_SERVERNAME)) {
720 handleHBCK(rt);
721 return;
722 }
723 final long createTime = rt.getCreateTime();
724 final byte[] regionName = rt.getRegionName();
725 String encodedName = HRegionInfo.encodeRegionName(regionName);
726 String prettyPrintedRegionName = HRegionInfo.prettyPrint(encodedName);
727
728 if (!serverManager.isServerOnline(sn)
729 && !ignoreStatesRSOffline.contains(rt.getEventType())) {
730 LOG.warn("Attempted to handle region transition for server but " +
731 "server is not online: " + prettyPrintedRegionName);
732 return;
733 }
734
735 RegionState regionState =
736 regionStates.getRegionTransitionState(encodedName);
737 long startTime = System.currentTimeMillis();
738 if (LOG.isDebugEnabled()) {
739 boolean lateEvent = createTime < (startTime - 15000);
740 LOG.debug("Handling transition=" + rt.getEventType() +
741 ", server=" + sn + ", region=" +
742 (prettyPrintedRegionName == null ? "null" : prettyPrintedRegionName) +
743 (lateEvent ? ", which is more than 15 seconds late" : "") +
744 ", current state from region state map =" + regionState);
745 }
746
747
748 if (rt.getEventType() == EventType.M_ZK_REGION_OFFLINE) {
749 return;
750 }
751
752
753 Lock lock = locker.acquireLock(encodedName);
754 try {
755 RegionState latestState =
756 regionStates.getRegionTransitionState(encodedName);
757 if ((regionState == null && latestState != null)
758 || (regionState != null && latestState == null)
759 || (regionState != null && latestState != null
760 && latestState.getState() != regionState.getState())) {
761 LOG.warn("Region state changed from " + regionState + " to "
762 + latestState + ", while acquiring lock");
763 }
764 long waitedTime = System.currentTimeMillis() - startTime;
765 if (waitedTime > 5000) {
766 LOG.warn("Took " + waitedTime + "ms to acquire the lock");
767 }
768 regionState = latestState;
769 switch (rt.getEventType()) {
770 case RS_ZK_REGION_SPLITTING:
771 if (!isInStateForSplitting(regionState)) break;
772 regionStates.updateRegionState(rt, RegionState.State.SPLITTING);
773 break;
774
775 case RS_ZK_REGION_SPLIT:
776
777 if (!isInStateForSplitting(regionState)) break;
778
779 if (regionState == null) {
780 regionState = regionStates.updateRegionState(rt,
781 RegionState.State.SPLITTING);
782
783 String message = "Received SPLIT for region " + prettyPrintedRegionName +
784 " from server " + sn;
785
786 if (regionState == null) {
787 LOG.warn(message + " but it doesn't exist anymore," +
788 " probably already processed its split");
789 break;
790 }
791 LOG.info(message +
792 " but region was not first in SPLITTING state; continuing");
793 }
794
795 byte [] payload = rt.getPayload();
796 List<HRegionInfo> daughters;
797 try {
798 daughters = HRegionInfo.parseDelimitedFrom(payload, 0, payload.length);
799 } catch (IOException e) {
800 LOG.error("Dropped split! Failed reading split payload for " +
801 prettyPrintedRegionName);
802 break;
803 }
804 assert daughters.size() == 2;
805
806 if (!this.serverManager.isServerOnline(sn)) {
807 LOG.error("Dropped split! ServerName=" + sn + " unknown.");
808 break;
809 }
810
811 this.executorService.submit(new SplitRegionHandler(server, this,
812 regionState.getRegion(), sn, daughters));
813 break;
814
815 case RS_ZK_REGION_MERGING:
816
817
818 break;
819
820 case RS_ZK_REGION_MERGE:
821
822 if (!this.serverManager.isServerOnline(sn)) {
823 LOG.error("Dropped merge! ServerName=" + sn + " unknown.");
824 break;
825 }
826
827 byte[] payloadOfMerge = rt.getPayload();
828 List<HRegionInfo> mergeRegions;
829 try {
830 mergeRegions = HRegionInfo.parseDelimitedFrom(payloadOfMerge, 0,
831 payloadOfMerge.length);
832 } catch (IOException e) {
833 LOG.error("Dropped merge! Failed reading merge payload for " +
834 prettyPrintedRegionName);
835 break;
836 }
837 assert mergeRegions.size() == 3;
838
839 this.executorService.submit(new MergedRegionHandler(server, this, sn,
840 mergeRegions));
841 break;
842
843 case M_ZK_REGION_CLOSING:
844
845
846 if (regionState != null
847 && !regionState.isPendingCloseOrClosingOnServer(sn)) {
848 LOG.warn("Received CLOSING for region " + prettyPrintedRegionName
849 + " from server " + sn + " but region was in the state " + regionState
850 + " and not in expected PENDING_CLOSE or CLOSING states,"
851 + " or not on the expected server");
852 return;
853 }
854
855 regionStates.updateRegionState(rt, RegionState.State.CLOSING);
856 break;
857
858 case RS_ZK_REGION_CLOSED:
859
860 if (regionState != null
861 && !regionState.isPendingCloseOrClosingOnServer(sn)) {
862 LOG.warn("Received CLOSED for region " + prettyPrintedRegionName
863 + " from server " + sn + " but region was in the state " + regionState
864 + " and not in expected PENDING_CLOSE or CLOSING states,"
865 + " or not on the expected server");
866 return;
867 }
868
869
870
871 regionState = regionStates.updateRegionState(rt, RegionState.State.CLOSED);
872 if (regionState != null) {
873 removeClosedRegion(regionState.getRegion());
874 this.executorService.submit(new ClosedRegionHandler(server,
875 this, regionState.getRegion()));
876 }
877 break;
878
879 case RS_ZK_REGION_FAILED_OPEN:
880 if (regionState != null
881 && !regionState.isPendingOpenOrOpeningOnServer(sn)) {
882 LOG.warn("Received FAILED_OPEN for region " + prettyPrintedRegionName
883 + " from server " + sn + " but region was in the state " + regionState
884 + " and not in expected PENDING_OPEN or OPENING states,"
885 + " or not on the expected server");
886 return;
887 }
888
889 regionState = regionStates.updateRegionState(rt, RegionState.State.CLOSED);
890
891
892 if (regionState != null) {
893 AtomicInteger failedOpenCount = failedOpenTracker.get(encodedName);
894 if (failedOpenCount == null) {
895 failedOpenCount = new AtomicInteger();
896
897
898
899 failedOpenTracker.put(encodedName, failedOpenCount);
900 }
901 if (failedOpenCount.incrementAndGet() >= maximumAttempts) {
902 regionStates.updateRegionState(
903 regionState.getRegion(), RegionState.State.FAILED_OPEN);
904
905
906 failedOpenTracker.remove(encodedName);
907 } else {
908 getRegionPlan(regionState.getRegion(), sn, true);
909 this.executorService.submit(new ClosedRegionHandler(server,
910 this, regionState.getRegion()));
911 }
912 }
913 break;
914
915 case RS_ZK_REGION_OPENING:
916
917
918 if (regionState != null
919 && !regionState.isPendingOpenOrOpeningOnServer(sn)) {
920 LOG.warn("Received OPENING for region " + prettyPrintedRegionName
921 + " from server " + sn + " but region was in the state " + regionState
922 + " and not in expected PENDING_OPEN or OPENING states,"
923 + " or not on the expected server");
924 return;
925 }
926
927 regionStates.updateRegionState(rt, RegionState.State.OPENING);
928 break;
929
930 case RS_ZK_REGION_OPENED:
931
932 if (regionState != null
933 && !regionState.isPendingOpenOrOpeningOnServer(sn)) {
934 LOG.warn("Received OPENED for region " + prettyPrintedRegionName
935 + " from server " + sn + " but region was in the state " + regionState
936 + " and not in expected PENDING_OPEN or OPENING states,"
937 + " or not on the expected server");
938
939
940
941 unassign(regionState.getRegion(), null, -1, null, false, sn);
942 return;
943 }
944
945 regionState = regionStates.updateRegionState(rt, RegionState.State.OPEN);
946 if (regionState != null) {
947 failedOpenTracker.remove(encodedName);
948 this.executorService.submit(new OpenedRegionHandler(
949 server, this, regionState.getRegion(), sn, expectedVersion));
950 }
951 break;
952
953 default:
954 throw new IllegalStateException("Received event is not valid.");
955 }
956 } finally {
957 lock.unlock();
958 }
959 }
960
961
962
963
964
965
966 private boolean isInStateForSplitting(final RegionState rs) {
967 if (rs == null) return true;
968 if (rs.isSplitting()) return true;
969 if (convertPendingCloseToSplitting(rs)) return true;
970 LOG.warn("Dropped region split! Not in state good for SPLITTING; rs=" + rs);
971 return false;
972 }
973
974
975
976
977
978
979
980
981
982
983 private boolean convertPendingCloseToSplitting(final RegionState rs) {
984 if (!rs.isPendingClose()) return false;
985 LOG.debug("Converting PENDING_CLOSE to SPLITTING; rs=" + rs);
986 regionStates.updateRegionState(
987 rs.getRegion(), RegionState.State.SPLITTING);
988
989
990 clearRegionPlan(rs.getRegion());
991 return true;
992 }
993
994
995
996
997
998
999
1000 private void handleHBCK(RegionTransition rt) {
1001 String encodedName = HRegionInfo.encodeRegionName(rt.getRegionName());
1002 LOG.info("Handling HBCK triggered transition=" + rt.getEventType() +
1003 ", server=" + rt.getServerName() + ", region=" +
1004 HRegionInfo.prettyPrint(encodedName));
1005 RegionState regionState = regionStates.getRegionTransitionState(encodedName);
1006 switch (rt.getEventType()) {
1007 case M_ZK_REGION_OFFLINE:
1008 HRegionInfo regionInfo;
1009 if (regionState != null) {
1010 regionInfo = regionState.getRegion();
1011 } else {
1012 try {
1013 byte [] name = rt.getRegionName();
1014 Pair<HRegionInfo, ServerName> p = MetaReader.getRegion(catalogTracker, name);
1015 regionInfo = p.getFirst();
1016 } catch (IOException e) {
1017 LOG.info("Exception reading META doing HBCK repair operation", e);
1018 return;
1019 }
1020 }
1021 LOG.info("HBCK repair is triggering assignment of region=" +
1022 regionInfo.getRegionNameAsString());
1023
1024 assign(regionInfo, false);
1025 break;
1026
1027 default:
1028 LOG.warn("Received unexpected region state from HBCK: " + rt.toString());
1029 break;
1030 }
1031
1032 }
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048 @Override
1049 public void nodeCreated(String path) {
1050 handleAssignmentEvent(path);
1051 }
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065 @Override
1066 public void nodeDataChanged(String path) {
1067 handleAssignmentEvent(path);
1068 }
1069
1070
1071
1072
1073
1074 private final Set<String> regionsInProgress = new HashSet<String>();
1075
1076
1077 private final LinkedHashMultimap <String, RegionRunnable>
1078 zkEventWorkerWaitingList = LinkedHashMultimap.create();
1079
1080
1081
1082
1083 private static interface RegionRunnable extends Runnable{
1084
1085
1086
1087 public String getRegionName();
1088 }
1089
1090
1091
1092
1093
1094 protected void zkEventWorkersSubmit(final RegionRunnable regRunnable) {
1095
1096 synchronized (regionsInProgress) {
1097
1098
1099 if (regionsInProgress.contains(regRunnable.getRegionName())) {
1100 synchronized (zkEventWorkerWaitingList){
1101 zkEventWorkerWaitingList.put(regRunnable.getRegionName(), regRunnable);
1102 }
1103 return;
1104 }
1105
1106
1107 regionsInProgress.add(regRunnable.getRegionName());
1108 zkEventWorkers.submit(new Runnable() {
1109 @Override
1110 public void run() {
1111 try {
1112 regRunnable.run();
1113 } finally {
1114
1115
1116 synchronized (regionsInProgress) {
1117 regionsInProgress.remove(regRunnable.getRegionName());
1118 synchronized (zkEventWorkerWaitingList) {
1119 java.util.Set<RegionRunnable> waiting = zkEventWorkerWaitingList.get(
1120 regRunnable.getRegionName());
1121 if (!waiting.isEmpty()) {
1122
1123 RegionRunnable toSubmit = waiting.iterator().next();
1124 zkEventWorkerWaitingList.remove(toSubmit.getRegionName(), toSubmit);
1125 zkEventWorkersSubmit(toSubmit);
1126 }
1127 }
1128 }
1129 }
1130 }
1131 });
1132 }
1133 }
1134
1135 @Override
1136 public void nodeDeleted(final String path) {
1137 if (path.startsWith(watcher.assignmentZNode)) {
1138 final String regionName = ZKAssign.getRegionName(watcher, path);
1139 zkEventWorkersSubmit(new RegionRunnable() {
1140 @Override
1141 public String getRegionName() {
1142 return regionName;
1143 }
1144
1145 @Override
1146 public void run() {
1147 Lock lock = locker.acquireLock(regionName);
1148 try {
1149 RegionState rs = regionStates.getRegionTransitionState(regionName);
1150 if (rs == null) return;
1151
1152 HRegionInfo regionInfo = rs.getRegion();
1153 if (rs.isSplit()) {
1154 LOG.debug("Ephemeral node deleted, regionserver crashed?, " +
1155 "clearing from RIT; rs=" + rs);
1156 regionOffline(rs.getRegion());
1157 } else {
1158 String regionNameStr = regionInfo.getRegionNameAsString();
1159 LOG.debug("The znode of region " + regionNameStr
1160 + " has been deleted.");
1161 if (rs.isOpened()) {
1162 ServerName serverName = rs.getServerName();
1163 regionOnline(regionInfo, serverName);
1164 LOG.info("The master has opened the region "
1165 + regionNameStr + " that was online on " + serverName);
1166 boolean disabled = getZKTable().isDisablingOrDisabledTable(
1167 regionInfo.getTableNameAsString());
1168 if (!serverManager.isServerOnline(serverName) && !disabled) {
1169 LOG.info("Opened region " + regionNameStr
1170 + "but the region server is offline, reassign the region");
1171 assign(regionInfo, true);
1172 } else if (disabled) {
1173
1174 LOG.info("Opened region " + regionNameStr
1175 + "but this table is disabled, triggering close of region");
1176 unassign(regionInfo);
1177 }
1178 }
1179 }
1180 } finally {
1181 lock.unlock();
1182 }
1183 }
1184 });
1185 }
1186 }
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200 @Override
1201 public void nodeChildrenChanged(String path) {
1202 if (path.equals(watcher.assignmentZNode)) {
1203 zkEventWorkers.submit(new Runnable() {
1204 @Override
1205 public void run() {
1206 try {
1207
1208 List<String> children =
1209 ZKUtil.listChildrenAndWatchForNewChildren(
1210 watcher, watcher.assignmentZNode);
1211 if (children != null) {
1212 Stat stat = new Stat();
1213 for (String child : children) {
1214
1215
1216
1217 if (!regionStates.isRegionInTransition(child)) {
1218 stat.setVersion(0);
1219 byte[] data = ZKAssign.getDataAndWatch(watcher,
1220 ZKUtil.joinZNode(watcher.assignmentZNode, child), stat);
1221 if (data != null && stat.getVersion() > 0) {
1222 try {
1223 RegionTransition rt = RegionTransition.parseFrom(data);
1224
1225
1226 if (rt.getEventType() == EventType.RS_ZK_REGION_SPLITTING) {
1227 handleRegion(rt, stat.getVersion());
1228 }
1229 } catch (DeserializationException de) {
1230 LOG.error("error getting data for " + child, de);
1231 }
1232 }
1233 }
1234 }
1235 }
1236 } catch (KeeperException e) {
1237 server.abort("Unexpected ZK exception reading unassigned children", e);
1238 }
1239 }
1240 });
1241 }
1242 }
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252 void regionOnline(HRegionInfo regionInfo, ServerName sn) {
1253 if (!serverManager.isServerOnline(sn)) {
1254 LOG.warn("A region was opened on a dead server, ServerName=" +
1255 sn + ", region=" + regionInfo.getEncodedName());
1256 }
1257
1258 regionStates.regionOnline(regionInfo, sn);
1259
1260
1261 clearRegionPlan(regionInfo);
1262
1263 addToServersInUpdatingTimer(sn);
1264 }
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274 private void handleAssignmentEvent(final String path) {
1275 if (path.startsWith(watcher.assignmentZNode)) {
1276 final String regionName = ZKAssign.getRegionName(watcher, path);
1277
1278 zkEventWorkersSubmit(new RegionRunnable() {
1279 @Override
1280 public String getRegionName() {
1281 return regionName;
1282 }
1283
1284 @Override
1285 public void run() {
1286 try {
1287 Stat stat = new Stat();
1288 byte [] data = ZKAssign.getDataAndWatch(watcher, path, stat);
1289 if (data == null) return;
1290
1291 RegionTransition rt = RegionTransition.parseFrom(data);
1292 handleRegion(rt, stat.getVersion());
1293 } catch (KeeperException e) {
1294 server.abort("Unexpected ZK exception reading unassigned node data", e);
1295 } catch (DeserializationException e) {
1296 server.abort("Unexpected exception deserializing node data", e);
1297 }
1298 }
1299 });
1300 }
1301 }
1302
1303
1304
1305
1306
1307
1308 private void addToServersInUpdatingTimer(final ServerName sn) {
1309 if (tomActivated){
1310 this.serversInUpdatingTimer.add(sn);
1311 }
1312 }
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327 private void updateTimers(final ServerName sn) {
1328 Preconditions.checkState(tomActivated);
1329 if (sn == null) return;
1330
1331
1332
1333
1334
1335 List<Map.Entry<String, RegionPlan>> rps;
1336 synchronized(this.regionPlans) {
1337 rps = new ArrayList<Map.Entry<String, RegionPlan>>(regionPlans.entrySet());
1338 }
1339
1340 for (Map.Entry<String, RegionPlan> e : rps) {
1341 if (e.getValue() != null && e.getKey() != null && sn.equals(e.getValue().getDestination())) {
1342 RegionState regionState = regionStates.getRegionTransitionState(e.getKey());
1343 if (regionState != null) {
1344 regionState.updateTimestampToNow();
1345 }
1346 }
1347 }
1348 }
1349
1350
1351
1352
1353
1354
1355
1356
1357 public void regionOffline(final HRegionInfo regionInfo) {
1358 regionStates.regionOffline(regionInfo);
1359 removeClosedRegion(regionInfo);
1360
1361 clearRegionPlan(regionInfo);
1362 }
1363
1364 public void offlineDisabledRegion(HRegionInfo regionInfo) {
1365
1366 LOG.debug("Table being disabled so deleting ZK node and removing from " +
1367 "regions in transition, skipping assignment of region " +
1368 regionInfo.getRegionNameAsString());
1369 try {
1370 if (!ZKAssign.deleteClosedNode(watcher, regionInfo.getEncodedName())) {
1371
1372 ZKAssign.deleteOfflineNode(watcher, regionInfo.getEncodedName());
1373 }
1374 } catch (KeeperException.NoNodeException nne) {
1375 LOG.debug("Tried to delete closed node for " + regionInfo + " but it " +
1376 "does not exist so just offlining");
1377 } catch (KeeperException e) {
1378 this.server.abort("Error deleting CLOSED node in ZK", e);
1379 }
1380 regionOffline(regionInfo);
1381 }
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403 public void assign(HRegionInfo region, boolean setOfflineInZK) {
1404 assign(region, setOfflineInZK, false);
1405 }
1406
1407
1408
1409
1410 public void assign(HRegionInfo region,
1411 boolean setOfflineInZK, boolean forceNewPlan) {
1412 if (!setOfflineInZK && isDisabledorDisablingRegionInRIT(region)) {
1413 return;
1414 }
1415 if (this.serverManager.isClusterShutdown()) {
1416 LOG.info("Cluster shutdown is set; skipping assign of " +
1417 region.getRegionNameAsString());
1418 return;
1419 }
1420 String encodedName = region.getEncodedName();
1421 Lock lock = locker.acquireLock(encodedName);
1422 try {
1423 RegionState state = forceRegionStateToOffline(region, forceNewPlan);
1424 if (state != null) {
1425 assign(state, setOfflineInZK, forceNewPlan);
1426 }
1427 } finally {
1428 lock.unlock();
1429 }
1430 }
1431
1432
1433
1434
1435
1436
1437
1438 boolean assign(final ServerName destination,
1439 final List<HRegionInfo> regions) {
1440 int regionCount = regions.size();
1441 if (regionCount == 0) {
1442 return true;
1443 }
1444 LOG.debug("Bulk assigning " + regionCount + " region(s) to " +
1445 destination.toString());
1446
1447 Set<String> encodedNames = new HashSet<String>(regionCount);
1448 for (HRegionInfo region : regions) {
1449 encodedNames.add(region.getEncodedName());
1450 }
1451
1452 List<HRegionInfo> failedToOpenRegions = new ArrayList<HRegionInfo>();
1453 Map<String, Lock> locks = locker.acquireLocks(encodedNames);
1454 try {
1455 AtomicInteger counter = new AtomicInteger(0);
1456 Map<String, Integer> offlineNodesVersions = new ConcurrentHashMap<String, Integer>();
1457 OfflineCallback cb = new OfflineCallback(
1458 watcher, destination, counter, offlineNodesVersions);
1459 Map<String, RegionPlan> plans = new HashMap<String, RegionPlan>(regions.size());
1460 List<RegionState> states = new ArrayList<RegionState>(regions.size());
1461 for (HRegionInfo region : regions) {
1462 String encodedRegionName = region.getEncodedName();
1463 RegionState state = forceRegionStateToOffline(region, true);
1464 if (state != null && asyncSetOfflineInZooKeeper(state, cb, destination)) {
1465 RegionPlan plan = new RegionPlan(region, state.getServerName(), destination);
1466 plans.put(encodedRegionName, plan);
1467 states.add(state);
1468 } else {
1469 LOG.warn("failed to force region state to offline or "
1470 + "failed to set it offline in ZK, will reassign later: " + region);
1471 failedToOpenRegions.add(region);
1472 Lock lock = locks.remove(encodedRegionName);
1473 lock.unlock();
1474 }
1475 }
1476
1477
1478 int total = states.size();
1479 for (int oldCounter = 0; !server.isStopped();) {
1480 int count = counter.get();
1481 if (oldCounter != count) {
1482 LOG.info(destination.toString() + " unassigned znodes=" + count +
1483 " of total=" + total);
1484 oldCounter = count;
1485 }
1486 if (count >= total) break;
1487 Threads.sleep(5);
1488 }
1489
1490 if (server.isStopped()) {
1491 return false;
1492 }
1493
1494
1495
1496 this.addPlans(plans);
1497
1498 List<Pair<HRegionInfo, Integer>> regionOpenInfos =
1499 new ArrayList<Pair<HRegionInfo, Integer>>(states.size());
1500 for (RegionState state: states) {
1501 HRegionInfo region = state.getRegion();
1502 String encodedRegionName = region.getEncodedName();
1503 Integer nodeVersion = offlineNodesVersions.get(encodedRegionName);
1504 if (nodeVersion == null || nodeVersion == -1) {
1505 LOG.warn("failed to offline in zookeeper: " + region);
1506 failedToOpenRegions.add(region);
1507 Lock lock = locks.remove(encodedRegionName);
1508 lock.unlock();
1509 } else {
1510 regionStates.updateRegionState(region,
1511 RegionState.State.PENDING_OPEN, destination);
1512 regionOpenInfos.add(new Pair<HRegionInfo, Integer>(
1513 region, nodeVersion));
1514 }
1515 }
1516
1517
1518 try {
1519
1520
1521 long maxWaitTime = System.currentTimeMillis() +
1522 this.server.getConfiguration().
1523 getLong("hbase.regionserver.rpc.startup.waittime", 60000);
1524 for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) {
1525 try {
1526 List<RegionOpeningState> regionOpeningStateList = serverManager
1527 .sendRegionOpen(destination, regionOpenInfos);
1528 if (regionOpeningStateList == null) {
1529
1530 return false;
1531 }
1532 for (int k = 0, n = regionOpeningStateList.size(); k < n; k++) {
1533 RegionOpeningState openingState = regionOpeningStateList.get(k);
1534 if (openingState != RegionOpeningState.OPENED) {
1535 HRegionInfo region = regionOpenInfos.get(k).getFirst();
1536 if (openingState == RegionOpeningState.ALREADY_OPENED) {
1537 processAlreadyOpenedRegion(region, destination);
1538 } else if (openingState == RegionOpeningState.FAILED_OPENING) {
1539
1540 failedToOpenRegions.add(region);
1541 } else {
1542 LOG.warn("THIS SHOULD NOT HAPPEN: unknown opening state "
1543 + openingState + " in assigning region " + region);
1544 }
1545 }
1546 }
1547 break;
1548 } catch (IOException e) {
1549 if (e instanceof RemoteException) {
1550 e = ((RemoteException)e).unwrapRemoteException();
1551 }
1552 if (e instanceof RegionServerStoppedException) {
1553 LOG.warn("The region server was shut down, ", e);
1554
1555 return false;
1556 } else if (e instanceof ServerNotRunningYetException) {
1557 long now = System.currentTimeMillis();
1558 if (now < maxWaitTime) {
1559 LOG.debug("Server is not yet up; waiting up to " +
1560 (maxWaitTime - now) + "ms", e);
1561 Thread.sleep(100);
1562 i--;
1563 continue;
1564 }
1565 } else if (e instanceof java.net.SocketTimeoutException
1566 && this.serverManager.isServerOnline(destination)) {
1567
1568
1569
1570
1571 if (LOG.isDebugEnabled()) {
1572 LOG.debug("Bulk assigner openRegion() to " + destination
1573 + " has timed out, but the regions might"
1574 + " already be opened on it.", e);
1575 }
1576 continue;
1577 }
1578 throw e;
1579 }
1580 }
1581 } catch (IOException e) {
1582
1583 LOG.info("Unable to communicate with the region server in order" +
1584 " to assign regions", e);
1585 return false;
1586 } catch (InterruptedException e) {
1587 throw new RuntimeException(e);
1588 }
1589 } finally {
1590 for (Lock lock : locks.values()) {
1591 lock.unlock();
1592 }
1593 }
1594
1595 if (!failedToOpenRegions.isEmpty()) {
1596 for (HRegionInfo region : failedToOpenRegions) {
1597 invokeAssign(region);
1598 }
1599 }
1600 LOG.debug("Bulk assigning done for " + destination.toString());
1601 return true;
1602 }
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614 private void unassign(final HRegionInfo region,
1615 final RegionState state, final int versionOfClosingNode,
1616 final ServerName dest, final boolean transitionInZK,
1617 final ServerName src) {
1618 ServerName server = src;
1619 if (state != null) {
1620 server = state.getServerName();
1621 }
1622 for (int i = 1; i <= this.maximumAttempts; i++) {
1623
1624 if (!serverManager.isServerOnline(server)) {
1625 if (transitionInZK) {
1626
1627 deleteClosingOrClosedNode(region);
1628 }
1629 if (state != null) {
1630 regionOffline(region);
1631 }
1632 return;
1633 }
1634 try {
1635
1636 if (serverManager.sendRegionClose(server, region,
1637 versionOfClosingNode, dest, transitionInZK)) {
1638 LOG.debug("Sent CLOSE to " + server + " for region " +
1639 region.getRegionNameAsString());
1640 return;
1641 }
1642
1643
1644 LOG.warn("Server " + server + " region CLOSE RPC returned false for " +
1645 region.getRegionNameAsString());
1646 } catch (Throwable t) {
1647 if (t instanceof RemoteException) {
1648 t = ((RemoteException)t).unwrapRemoteException();
1649 }
1650 if (t instanceof NotServingRegionException
1651 || t instanceof RegionServerStoppedException) {
1652 if (transitionInZK) {
1653 deleteClosingOrClosedNode(region);
1654 }
1655 if (state != null) {
1656 regionOffline(region);
1657 }
1658 return;
1659 } else if (state != null
1660 && t instanceof RegionAlreadyInTransitionException) {
1661
1662 LOG.debug("update " + state + " the timestamp.");
1663 state.updateTimestampToNow();
1664 }
1665 LOG.info("Server " + server + " returned " + t + " for "
1666 + region.getRegionNameAsString() + ", try=" + i
1667 + " of " + this.maximumAttempts, t);
1668
1669 }
1670 }
1671
1672 if (!tomActivated && state != null) {
1673 regionStates.updateRegionState(region, RegionState.State.FAILED_CLOSE);
1674 }
1675 }
1676
1677
1678
1679
1680 private RegionState forceRegionStateToOffline(
1681 final HRegionInfo region, final boolean forceNewPlan) {
1682 RegionState state = regionStates.getRegionState(region);
1683 if (state == null) {
1684 LOG.warn("Assigning a region not in region states: " + region);
1685 state = regionStates.createRegionState(region);
1686 } else {
1687 switch (state.getState()) {
1688 case OPEN:
1689 case OPENING:
1690 case PENDING_OPEN:
1691 if (!forceNewPlan) {
1692 LOG.debug("Attempting to assign region " +
1693 region + " but it is already in transition: " + state);
1694 return null;
1695 }
1696 case CLOSING:
1697 case PENDING_CLOSE:
1698 case FAILED_CLOSE:
1699 unassign(region, state, -1, null, false, null);
1700 state = regionStates.getRegionState(region);
1701 if (state.isOffline()) break;
1702 case FAILED_OPEN:
1703 case CLOSED:
1704 LOG.debug("Forcing OFFLINE; was=" + state);
1705 state = regionStates.updateRegionState(
1706 region, RegionState.State.OFFLINE);
1707 case OFFLINE:
1708 break;
1709 default:
1710 LOG.error("Trying to assign region " + region
1711 + ", which is in state " + state);
1712 return null;
1713 }
1714 }
1715 return state;
1716 }
1717
1718
1719
1720
1721
1722
1723
1724 private void assign(RegionState state,
1725 final boolean setOfflineInZK, final boolean forceNewPlan) {
1726 RegionState currentState = state;
1727 int versionOfOfflineNode = -1;
1728 RegionPlan plan = null;
1729 long maxRegionServerStartupWaitTime = -1;
1730 HRegionInfo region = state.getRegion();
1731 RegionOpeningState regionOpenState;
1732 for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) {
1733 if (plan == null) {
1734 plan = getRegionPlan(region, forceNewPlan);
1735 }
1736 if (plan == null) {
1737 LOG.warn("Unable to determine a plan to assign " + region);
1738 if (tomActivated){
1739 this.timeoutMonitor.setAllRegionServersOffline(true);
1740 } else {
1741 regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
1742 }
1743 return;
1744 }
1745 if (setOfflineInZK && versionOfOfflineNode == -1) {
1746
1747
1748 versionOfOfflineNode = setOfflineInZooKeeper(currentState, plan.getDestination());
1749 if (versionOfOfflineNode != -1) {
1750 if (isDisabledorDisablingRegionInRIT(region)) {
1751 return;
1752 }
1753
1754
1755
1756
1757
1758
1759 String tableName = region.getTableNameAsString();
1760 if (!zkTable.isEnablingTable(tableName) && !zkTable.isEnabledTable(tableName)) {
1761 LOG.debug("Setting table " + tableName + " to ENABLED state.");
1762 setEnabledTable(tableName);
1763 }
1764 }
1765 }
1766 if (setOfflineInZK && versionOfOfflineNode == -1) {
1767 LOG.info("Unable to set offline in ZooKeeper to assign " + region);
1768
1769
1770
1771
1772 if (!server.isAborted()) {
1773 continue;
1774 }
1775 }
1776 if (this.server.isStopped() || this.server.isAborted()) {
1777 LOG.debug("Server stopped/aborted; skipping assign of " + region);
1778 return;
1779 }
1780 LOG.info("Assigning region " + region.getRegionNameAsString() +
1781 " to " + plan.getDestination().toString());
1782
1783 currentState = regionStates.updateRegionState(region,
1784 RegionState.State.PENDING_OPEN, plan.getDestination());
1785
1786 boolean needNewPlan;
1787 final String assignMsg = "Failed assignment of " + region.getRegionNameAsString() +
1788 " to " + plan.getDestination();
1789 try {
1790 regionOpenState = serverManager.sendRegionOpen(
1791 plan.getDestination(), region, versionOfOfflineNode);
1792
1793 if (regionOpenState == RegionOpeningState.FAILED_OPENING) {
1794
1795 needNewPlan = true;
1796 LOG.warn(assignMsg + ", regionserver says 'FAILED_OPENING', " +
1797 " trying to assign elsewhere instead; " +
1798 "try=" + i + " of " + this.maximumAttempts);
1799 } else {
1800
1801 if (regionOpenState == RegionOpeningState.ALREADY_OPENED) {
1802 processAlreadyOpenedRegion(region, plan.getDestination());
1803 }
1804 return;
1805 }
1806
1807 } catch (Throwable t) {
1808 if (t instanceof RemoteException) {
1809 t = ((RemoteException) t).unwrapRemoteException();
1810 }
1811
1812
1813
1814
1815 boolean hold = (t instanceof RegionAlreadyInTransitionException ||
1816 t instanceof ServerNotRunningYetException);
1817
1818
1819
1820
1821
1822
1823 boolean retry = !hold && (t instanceof java.net.SocketTimeoutException
1824 && this.serverManager.isServerOnline(plan.getDestination()));
1825
1826
1827 if (hold) {
1828 LOG.warn(assignMsg + ", waiting a little before trying on the same region server " +
1829 "try=" + i + " of " + this.maximumAttempts, t);
1830
1831 if (maxRegionServerStartupWaitTime < 0) {
1832 maxRegionServerStartupWaitTime = EnvironmentEdgeManager.currentTimeMillis() +
1833 this.server.getConfiguration().
1834 getLong("hbase.regionserver.rpc.startup.waittime", 60000);
1835 }
1836 try {
1837 long now = EnvironmentEdgeManager.currentTimeMillis();
1838 if (now < maxRegionServerStartupWaitTime) {
1839 LOG.debug("Server is not yet up; waiting up to " +
1840 (maxRegionServerStartupWaitTime - now) + "ms", t);
1841 Thread.sleep(100);
1842 i--;
1843 needNewPlan = false;
1844 } else {
1845 LOG.debug("Server is not up for a while; try a new one", t);
1846 needNewPlan = true;
1847 }
1848 } catch (InterruptedException ie) {
1849 LOG.warn("Failed to assign "
1850 + region.getRegionNameAsString() + " since interrupted", ie);
1851 Thread.currentThread().interrupt();
1852 if (!tomActivated) {
1853 regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
1854 }
1855 return;
1856 }
1857 } else if (retry) {
1858 needNewPlan = false;
1859 LOG.warn(assignMsg + ", trying to assign to the same region server " +
1860 "try=" + i + " of " + this.maximumAttempts, t);
1861 } else {
1862 needNewPlan = true;
1863 LOG.warn(assignMsg + ", trying to assign elsewhere instead;" +
1864 " try=" + i + " of " + this.maximumAttempts, t);
1865 }
1866 }
1867
1868 if (i == this.maximumAttempts) {
1869
1870
1871 continue;
1872 }
1873
1874
1875
1876
1877 if (needNewPlan) {
1878
1879
1880
1881
1882 RegionPlan newPlan = getRegionPlan(region, true);
1883
1884 if (newPlan == null) {
1885 if (tomActivated) {
1886 this.timeoutMonitor.setAllRegionServersOffline(true);
1887 } else {
1888 regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
1889 }
1890 LOG.warn("Unable to find a viable location to assign region " +
1891 region.getRegionNameAsString());
1892 return;
1893 }
1894
1895 if (plan != newPlan && !plan.getDestination().equals(newPlan.getDestination())) {
1896
1897
1898
1899 currentState = regionStates.updateRegionState(region, RegionState.State.OFFLINE);
1900 versionOfOfflineNode = -1;
1901 plan = newPlan;
1902 }
1903 }
1904 }
1905
1906 if (!tomActivated) {
1907 regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
1908 }
1909 }
1910
1911 private void processAlreadyOpenedRegion(HRegionInfo region, ServerName sn) {
1912
1913
1914
1915 LOG.debug("ALREADY_OPENED region " + region.getRegionNameAsString()
1916 + " to " + sn);
1917 String encodedRegionName = region.getEncodedName();
1918 try {
1919 ZKAssign.deleteOfflineNode(watcher, encodedRegionName);
1920 } catch (KeeperException.NoNodeException e) {
1921 if (LOG.isDebugEnabled()) {
1922 LOG.debug("The unassigned node " + encodedRegionName
1923 + " does not exist.");
1924 }
1925 } catch (KeeperException e) {
1926 server.abort(
1927 "Error deleting OFFLINED node in ZK for transition ZK node ("
1928 + encodedRegionName + ")", e);
1929 }
1930
1931 regionStates.regionOnline(region, sn);
1932 }
1933
1934 private boolean isDisabledorDisablingRegionInRIT(final HRegionInfo region) {
1935 String tableName = region.getTableNameAsString();
1936 boolean disabled = this.zkTable.isDisabledTable(tableName);
1937 if (disabled || this.zkTable.isDisablingTable(tableName)) {
1938 LOG.info("Table " + tableName + (disabled ? " disabled;" : " disabling;") +
1939 " skipping assign of " + region.getRegionNameAsString());
1940 offlineDisabledRegion(region);
1941 return true;
1942 }
1943 return false;
1944 }
1945
1946
1947
1948
1949
1950
1951
1952
1953 private int setOfflineInZooKeeper(final RegionState state, final ServerName destination) {
1954 if (!state.isClosed() && !state.isOffline()) {
1955 String msg = "Unexpected state : " + state + " .. Cannot transit it to OFFLINE.";
1956 this.server.abort(msg, new IllegalStateException(msg));
1957 return -1;
1958 }
1959 regionStates.updateRegionState(state.getRegion(),
1960 RegionState.State.OFFLINE);
1961 int versionOfOfflineNode;
1962 try {
1963
1964 versionOfOfflineNode = ZKAssign.createOrForceNodeOffline(watcher,
1965 state.getRegion(), destination);
1966 if (versionOfOfflineNode == -1) {
1967 LOG.warn("Attempted to create/force node into OFFLINE state before "
1968 + "completing assignment but failed to do so for " + state);
1969 return -1;
1970 }
1971 } catch (KeeperException e) {
1972 server.abort("Unexpected ZK exception creating/setting node OFFLINE", e);
1973 return -1;
1974 }
1975 return versionOfOfflineNode;
1976 }
1977
1978
1979
1980
1981
1982
1983 private RegionPlan getRegionPlan(final HRegionInfo region,
1984 final boolean forceNewPlan) {
1985 return getRegionPlan(region, null, forceNewPlan);
1986 }
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997 private RegionPlan getRegionPlan(final HRegionInfo region,
1998 final ServerName serverToExclude, final boolean forceNewPlan) {
1999
2000 final String encodedName = region.getEncodedName();
2001 final List<ServerName> destServers =
2002 serverManager.createDestinationServersList(serverToExclude);
2003
2004 if (destServers.isEmpty()){
2005 LOG.warn("Can't move the region " + encodedName +
2006 ", there is no destination server available.");
2007 return null;
2008 }
2009
2010 RegionPlan randomPlan = null;
2011 boolean newPlan = false;
2012 RegionPlan existingPlan;
2013
2014 synchronized (this.regionPlans) {
2015 existingPlan = this.regionPlans.get(encodedName);
2016
2017 if (existingPlan != null && existingPlan.getDestination() != null) {
2018 LOG.debug("Found an existing plan for " + region.getRegionNameAsString()
2019 + " destination server is " + existingPlan.getDestination() +
2020 " accepted as a dest server = " + destServers.contains(existingPlan.getDestination()));
2021 }
2022
2023 if (forceNewPlan
2024 || existingPlan == null
2025 || existingPlan.getDestination() == null
2026 || !destServers.contains(existingPlan.getDestination())) {
2027 newPlan = true;
2028 randomPlan = new RegionPlan(region, null,
2029 balancer.randomAssignment(region, destServers));
2030 this.regionPlans.put(encodedName, randomPlan);
2031 }
2032 }
2033
2034 if (newPlan) {
2035 if (randomPlan.getDestination() == null) {
2036 LOG.warn("Can't find a destination for region" + encodedName);
2037 return null;
2038 }
2039 LOG.debug("No previous transition plan was found (or we are ignoring " +
2040 "an existing plan) for " + region.getRegionNameAsString() +
2041 " so generated a random one; " + randomPlan + "; " +
2042 serverManager.countOfRegionServers() +
2043 " (online=" + serverManager.getOnlineServers().size() +
2044 ", available=" + destServers.size() + ") available servers" +
2045 ", forceNewPlan=" + forceNewPlan);
2046 return randomPlan;
2047 }
2048 LOG.debug("Using pre-existing plan for region " +
2049 region.getRegionNameAsString() + "; plan=" + existingPlan);
2050 return existingPlan;
2051 }
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061 public void unassign(List<HRegionInfo> regions) {
2062 int waitTime = this.server.getConfiguration().getInt(
2063 "hbase.bulk.waitbetween.reopen", 0);
2064 for (HRegionInfo region : regions) {
2065 if (regionStates.isRegionInTransition(region))
2066 continue;
2067 unassign(region, false);
2068 while (regionStates.isRegionInTransition(region)) {
2069 try {
2070 Thread.sleep(10);
2071 } catch (InterruptedException e) {
2072
2073 }
2074 }
2075 if (waitTime > 0)
2076 try {
2077 Thread.sleep(waitTime);
2078 } catch (InterruptedException e) {
2079
2080 }
2081 }
2082 }
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097 public void unassign(HRegionInfo region) {
2098 unassign(region, false);
2099 }
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116 public void unassign(HRegionInfo region, boolean force, ServerName dest) {
2117
2118 LOG.debug("Starting unassignment of region " +
2119 region.getRegionNameAsString() + " (offlining)");
2120
2121 String encodedName = region.getEncodedName();
2122
2123 int versionOfClosingNode = -1;
2124
2125
2126 ReentrantLock lock = locker.acquireLock(encodedName);
2127 RegionState state = regionStates.getRegionTransitionState(encodedName);
2128 try {
2129 if (state == null) {
2130
2131 try {
2132 state = regionStates.getRegionState(region);
2133 if (state == null || state.getServerName() == null) {
2134
2135
2136 regionOffline(region);
2137 return;
2138 }
2139 versionOfClosingNode = ZKAssign.createNodeClosing(
2140 watcher, region, state.getServerName());
2141 if (versionOfClosingNode == -1) {
2142 LOG.debug("Attempting to unassign region " +
2143 region.getRegionNameAsString() + " but ZK closing node "
2144 + "can't be created.");
2145 return;
2146 }
2147 } catch (KeeperException e) {
2148 if (e instanceof NodeExistsException) {
2149
2150
2151
2152
2153 NodeExistsException nee = (NodeExistsException)e;
2154 String path = nee.getPath();
2155 try {
2156 if (isSplitOrSplittingOrMergeOrMerging(path)) {
2157 LOG.debug(path + " is SPLIT or SPLITTING or MERGE or MERGING; " +
2158 "skipping unassign because region no longer exists -- its split or merge");
2159 return;
2160 }
2161 } catch (KeeperException.NoNodeException ke) {
2162 LOG.warn("Failed getData on SPLITTING/SPLIT at " + path +
2163 "; presuming split and that the region to unassign, " +
2164 encodedName + ", no longer exists -- confirm", ke);
2165 return;
2166 } catch (KeeperException ke) {
2167 LOG.error("Unexpected zk state", ke);
2168 } catch (DeserializationException de) {
2169 LOG.error("Failed parse", de);
2170 }
2171 }
2172
2173 server.abort("Unexpected ZK exception creating node CLOSING", e);
2174 return;
2175 }
2176 state = regionStates.updateRegionState(region, RegionState.State.PENDING_CLOSE);
2177 } else if (state.isFailedOpen()) {
2178
2179 regionOffline(region);
2180 return;
2181 } else if (force && (state.isPendingClose()
2182 || state.isClosing() || state.isFailedClose())) {
2183 LOG.debug("Attempting to unassign region " + region.getRegionNameAsString() +
2184 " which is already " + state.getState() +
2185 " but forcing to send a CLOSE RPC again ");
2186 if (state.isFailedClose()) {
2187 state = regionStates.updateRegionState(region, RegionState.State.PENDING_CLOSE);
2188 }
2189 state.updateTimestampToNow();
2190 } else {
2191 LOG.debug("Attempting to unassign region " +
2192 region.getRegionNameAsString() + " but it is " +
2193 "already in transition (" + state.getState() + ", force=" + force + ")");
2194 return;
2195 }
2196
2197 unassign(region, state, versionOfClosingNode, dest, true, null);
2198 } finally {
2199 lock.unlock();
2200 }
2201 }
2202
2203 public void unassign(HRegionInfo region, boolean force){
2204 unassign(region, force, null);
2205 }
2206
2207
2208
2209
2210 public void deleteClosingOrClosedNode(HRegionInfo region) {
2211 String encodedName = region.getEncodedName();
2212 try {
2213 if (!ZKAssign.deleteNode(watcher, encodedName,
2214 EventType.M_ZK_REGION_CLOSING)) {
2215 boolean deleteNode = ZKAssign.deleteNode(watcher,
2216 encodedName, EventType.RS_ZK_REGION_CLOSED);
2217
2218
2219 if (!deleteNode) {
2220 LOG.error("The deletion of the CLOSED node for the region "
2221 + encodedName + " returned " + deleteNode);
2222 }
2223 }
2224 } catch (NoNodeException e) {
2225 LOG.debug("CLOSING/CLOSED node for the region " + encodedName
2226 + " already deleted");
2227 } catch (KeeperException ke) {
2228 server.abort(
2229 "Unexpected ZK exception deleting node CLOSING/CLOSED for the region "
2230 + encodedName, ke);
2231 }
2232 }
2233
2234
2235
2236
2237
2238
2239
2240 private boolean isSplitOrSplittingOrMergeOrMerging(final String path)
2241 throws KeeperException, DeserializationException {
2242 boolean result = false;
2243
2244
2245 byte [] data = ZKAssign.getData(watcher, path);
2246 if (data == null) return false;
2247 RegionTransition rt = RegionTransition.parseFrom(data);
2248 switch (rt.getEventType()) {
2249 case RS_ZK_REGION_SPLIT:
2250 case RS_ZK_REGION_SPLITTING:
2251 case RS_ZK_REGION_MERGE:
2252 case RS_ZK_REGION_MERGING:
2253 result = true;
2254 break;
2255 default:
2256 break;
2257 }
2258 return result;
2259 }
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269 public boolean waitForAssignment(HRegionInfo regionInfo)
2270 throws InterruptedException {
2271 while (!regionStates.isRegionAssigned(regionInfo)) {
2272 if (regionStates.isRegionFailedToOpen(regionInfo)
2273 || this.server.isStopped()) {
2274 return false;
2275 }
2276
2277
2278
2279
2280 regionStates.waitForUpdate(100);
2281 }
2282 return true;
2283 }
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295 public void assignMeta() throws KeeperException {
2296 MetaRegionTracker.deleteMetaLocation(this.watcher);
2297 assign(HRegionInfo.FIRST_META_REGIONINFO, true);
2298 }
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308 public void assign(Map<HRegionInfo, ServerName> regions)
2309 throws IOException, InterruptedException {
2310 if (regions == null || regions.isEmpty()) {
2311 return;
2312 }
2313 List<ServerName> servers = serverManager.createDestinationServersList();
2314 if (servers == null || servers.isEmpty()) {
2315 throw new IOException("Found no destination server to assign region(s)");
2316 }
2317
2318
2319 Map<ServerName, List<HRegionInfo>> bulkPlan =
2320 balancer.retainAssignment(regions, servers);
2321
2322 assign(regions.size(), servers.size(),
2323 "retainAssignment=true", bulkPlan);
2324 }
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334 public void assign(List<HRegionInfo> regions)
2335 throws IOException, InterruptedException {
2336 if (regions == null || regions.isEmpty()) {
2337 return;
2338 }
2339
2340 List<ServerName> servers = serverManager.createDestinationServersList();
2341 if (servers == null || servers.isEmpty()) {
2342 throw new IOException("Found no destination server to assign region(s)");
2343 }
2344
2345
2346 Map<ServerName, List<HRegionInfo>> bulkPlan
2347 = balancer.roundRobinAssignment(regions, servers);
2348
2349 assign(regions.size(), servers.size(),
2350 "round-robin=true", bulkPlan);
2351 }
2352
2353 private void assign(int regions, int totalServers,
2354 String message, Map<ServerName, List<HRegionInfo>> bulkPlan)
2355 throws InterruptedException, IOException {
2356
2357 int servers = bulkPlan.size();
2358 if (servers == 1 || (regions < bulkAssignThresholdRegions
2359 && servers < bulkAssignThresholdServers)) {
2360
2361
2362
2363 LOG.info("Not use bulk assigning since we are assigning only "
2364 + regions + " region(s) to " + servers + " server(s)");
2365
2366 for (Map.Entry<ServerName, List<HRegionInfo>> plan: bulkPlan.entrySet()) {
2367 assign(plan.getKey(), plan.getValue());
2368 }
2369 } else {
2370 LOG.info("Bulk assigning " + regions + " region(s) across "
2371 + totalServers + " server(s), " + message);
2372
2373
2374 BulkAssigner ba = new GeneralBulkAssigner(
2375 this.server, bulkPlan, this, bulkAssignWaitTillAllAssigned);
2376 ba.bulkAssign();
2377 LOG.info("Bulk assigning done");
2378 }
2379 }
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391 private void assignAllUserRegions()
2392 throws IOException, InterruptedException, KeeperException {
2393
2394 ZKAssign.deleteAllNodes(watcher);
2395 ZKUtil.listChildrenAndWatchForNewChildren(this.watcher,
2396 this.watcher.assignmentZNode);
2397 failoverCleanupDone();
2398
2399
2400
2401
2402 Set<String> disabledOrDisablingOrEnabling = ZKTable.getDisabledOrDisablingTables(watcher);
2403 disabledOrDisablingOrEnabling.addAll(ZKTable.getEnablingTables(watcher));
2404
2405 Map<HRegionInfo, ServerName> allRegions = MetaReader.fullScan(
2406 catalogTracker, disabledOrDisablingOrEnabling, true);
2407 if (allRegions == null || allRegions.isEmpty()) return;
2408
2409
2410 boolean retainAssignment = server.getConfiguration().
2411 getBoolean("hbase.master.startup.retainassign", true);
2412
2413 if (retainAssignment) {
2414 assign(allRegions);
2415 } else {
2416 List<HRegionInfo> regions = new ArrayList<HRegionInfo>(allRegions.keySet());
2417 assign(regions);
2418 }
2419
2420 for (HRegionInfo hri : allRegions.keySet()) {
2421 String tableName = hri.getTableNameAsString();
2422 if (!zkTable.isEnabledTable(tableName)) {
2423 setEnabledTable(tableName);
2424 }
2425 }
2426 }
2427
2428
2429
2430
2431
2432
2433
2434 boolean waitUntilNoRegionsInTransition(final long timeout)
2435 throws InterruptedException {
2436
2437
2438
2439
2440
2441
2442 final long endTime = System.currentTimeMillis() + timeout;
2443
2444 while (!this.server.isStopped() && regionStates.isRegionsInTransition()
2445 && endTime > System.currentTimeMillis()) {
2446 regionStates.waitForUpdate(100);
2447 }
2448
2449 return !regionStates.isRegionsInTransition();
2450 }
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461 Map<ServerName, List<HRegionInfo>> rebuildUserRegions() throws IOException, KeeperException {
2462 Set<String> enablingTables = ZKTable.getEnablingTables(watcher);
2463 Set<String> disabledOrEnablingTables = ZKTable.getDisabledTables(watcher);
2464 disabledOrEnablingTables.addAll(enablingTables);
2465 Set<String> disabledOrDisablingOrEnabling = ZKTable.getDisablingTables(watcher);
2466 disabledOrDisablingOrEnabling.addAll(disabledOrEnablingTables);
2467
2468
2469 List<Result> results = MetaReader.fullScan(this.catalogTracker);
2470
2471 Set<ServerName> onlineServers = serverManager.getOnlineServers().keySet();
2472
2473 Map<ServerName, List<HRegionInfo>> offlineServers =
2474 new TreeMap<ServerName, List<HRegionInfo>>();
2475
2476 for (Result result : results) {
2477 Pair<HRegionInfo, ServerName> region = HRegionInfo.getHRegionInfoAndServerName(result);
2478 if (region == null) continue;
2479 HRegionInfo regionInfo = region.getFirst();
2480 ServerName regionLocation = region.getSecond();
2481 if (regionInfo == null) continue;
2482 regionStates.createRegionState(regionInfo);
2483 String tableName = regionInfo.getTableNameAsString();
2484 if (regionLocation == null) {
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495 if (!enablingTables.contains(tableName)) {
2496 LOG.warn("Region " + regionInfo.getEncodedName() +
2497 " has null regionLocation." + " But its table " + tableName +
2498 " isn't in ENABLING state.");
2499 }
2500 } else if (!onlineServers.contains(regionLocation)) {
2501
2502 List<HRegionInfo> offlineRegions = offlineServers.get(regionLocation);
2503 if (offlineRegions == null) {
2504 offlineRegions = new ArrayList<HRegionInfo>(1);
2505 offlineServers.put(regionLocation, offlineRegions);
2506 }
2507 offlineRegions.add(regionInfo);
2508
2509
2510 if (!disabledOrDisablingOrEnabling.contains(tableName)
2511 && !getZKTable().isEnabledTable(tableName)) {
2512 setEnabledTable(tableName);
2513 }
2514 } else {
2515
2516 if (regionInfo.isOffline() && regionInfo.isSplit()) {
2517 String node = ZKAssign.getNodeName(this.watcher, regionInfo
2518 .getEncodedName());
2519 Stat stat = new Stat();
2520 byte[] data = ZKUtil.getDataNoWatch(this.watcher, node, stat);
2521
2522 if (data == null) {
2523 LOG.debug("Region " + regionInfo.getRegionNameAsString()
2524 + " split is completed. Hence need not add to regions list");
2525 continue;
2526 }
2527 }
2528
2529
2530 if (!disabledOrEnablingTables.contains(tableName)) {
2531 regionStates.regionOnline(regionInfo, regionLocation);
2532 }
2533
2534
2535 if (!disabledOrDisablingOrEnabling.contains(tableName)
2536 && !getZKTable().isEnabledTable(tableName)) {
2537 setEnabledTable(tableName);
2538 }
2539 }
2540 }
2541 return offlineServers;
2542 }
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552 private void recoverTableInDisablingState()
2553 throws KeeperException, TableNotFoundException, IOException {
2554 Set<String> disablingTables = ZKTable.getDisablingTables(watcher);
2555 if (disablingTables.size() != 0) {
2556 for (String tableName : disablingTables) {
2557
2558 LOG.info("The table " + tableName
2559 + " is in DISABLING state. Hence recovering by moving the table"
2560 + " to DISABLED state.");
2561 new DisableTableHandler(this.server, tableName.getBytes(), catalogTracker,
2562 this, tableLockManager, true).prepare().process();
2563 }
2564 }
2565 }
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575 private void recoverTableInEnablingState()
2576 throws KeeperException, TableNotFoundException, IOException {
2577 Set<String> enablingTables = ZKTable.getEnablingTables(watcher);
2578 if (enablingTables.size() != 0) {
2579 for (String tableName : enablingTables) {
2580
2581 LOG.info("The table " + tableName
2582 + " is in ENABLING state. Hence recovering by moving the table"
2583 + " to ENABLED state.");
2584
2585
2586 new EnableTableHandler(this.server, tableName.getBytes(),
2587 catalogTracker, this, tableLockManager, true).prepare().process();
2588 }
2589 }
2590 }
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607 private void processDeadServersAndRecoverLostRegions(
2608 Map<ServerName, List<HRegionInfo>> deadServers)
2609 throws IOException, KeeperException {
2610 if (deadServers != null) {
2611 for (Map.Entry<ServerName, List<HRegionInfo>> server: deadServers.entrySet()) {
2612 ServerName serverName = server.getKey();
2613 if (!serverManager.isServerDead(serverName)) {
2614 serverManager.expireServer(serverName);
2615 }
2616 }
2617 }
2618 List<String> nodes = ZKUtil.listChildrenAndWatchForNewChildren(
2619 this.watcher, this.watcher.assignmentZNode);
2620 if (!nodes.isEmpty()) {
2621 for (String encodedRegionName : nodes) {
2622 processRegionInTransition(encodedRegionName, null);
2623 }
2624 }
2625
2626
2627
2628
2629
2630 failoverCleanupDone();
2631 }
2632
2633
2634
2635
2636
2637
2638
2639
2640 public void updateRegionsInTransitionMetrics() {
2641 long currentTime = System.currentTimeMillis();
2642 int totalRITs = 0;
2643 int totalRITsOverThreshold = 0;
2644 long oldestRITTime = 0;
2645 int ritThreshold = this.server.getConfiguration().
2646 getInt(HConstants.METRICS_RIT_STUCK_WARNING_THRESHOLD, 60000);
2647 for (RegionState state: regionStates.getRegionsInTransition().values()) {
2648 totalRITs++;
2649 long ritTime = currentTime - state.getStamp();
2650 if (ritTime > ritThreshold) {
2651 totalRITsOverThreshold++;
2652 }
2653 if (oldestRITTime < ritTime) {
2654 oldestRITTime = ritTime;
2655 }
2656 }
2657 if (this.metricsMaster != null) {
2658 this.metricsMaster.updateRITOldestAge(oldestRITTime);
2659 this.metricsMaster.updateRITCount(totalRITs);
2660 this.metricsMaster.updateRITCountOverThreshold(totalRITsOverThreshold);
2661 }
2662 }
2663
2664
2665
2666
2667 void clearRegionPlan(final HRegionInfo region) {
2668 synchronized (this.regionPlans) {
2669 this.regionPlans.remove(region.getEncodedName());
2670 }
2671 }
2672
2673
2674
2675
2676
2677
2678 public void waitOnRegionToClearRegionsInTransition(final HRegionInfo hri)
2679 throws IOException, InterruptedException {
2680 if (!regionStates.isRegionInTransition(hri)) return;
2681 RegionState rs = null;
2682
2683
2684 while(!this.server.isStopped() && regionStates.isRegionInTransition(hri)) {
2685 LOG.info("Waiting on " + rs + " to clear regions-in-transition");
2686 regionStates.waitForUpdate(100);
2687 }
2688 if (this.server.isStopped()) {
2689 LOG.info("Giving up wait on regions in " +
2690 "transition because stoppable.isStopped is set");
2691 }
2692 }
2693
2694
2695
2696
2697
2698 public class TimerUpdater extends Chore {
2699
2700 public TimerUpdater(final int period, final Stoppable stopper) {
2701 super("AssignmentTimerUpdater", period, stopper);
2702 }
2703
2704 @Override
2705 protected void chore() {
2706 Preconditions.checkState(tomActivated);
2707 ServerName serverToUpdateTimer = null;
2708 while (!serversInUpdatingTimer.isEmpty() && !stopper.isStopped()) {
2709 if (serverToUpdateTimer == null) {
2710 serverToUpdateTimer = serversInUpdatingTimer.first();
2711 } else {
2712 serverToUpdateTimer = serversInUpdatingTimer
2713 .higher(serverToUpdateTimer);
2714 }
2715 if (serverToUpdateTimer == null) {
2716 break;
2717 }
2718 updateTimers(serverToUpdateTimer);
2719 serversInUpdatingTimer.remove(serverToUpdateTimer);
2720 }
2721 }
2722 }
2723
2724
2725
2726
2727 public class TimeoutMonitor extends Chore {
2728 private boolean allRegionServersOffline = false;
2729 private ServerManager serverManager;
2730 private final int timeout;
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741 public TimeoutMonitor(final int period, final Stoppable stopper,
2742 ServerManager serverManager,
2743 final int timeout) {
2744 super("AssignmentTimeoutMonitor", period, stopper);
2745 this.timeout = timeout;
2746 this.serverManager = serverManager;
2747 }
2748
2749 private synchronized void setAllRegionServersOffline(
2750 boolean allRegionServersOffline) {
2751 this.allRegionServersOffline = allRegionServersOffline;
2752 }
2753
2754 @Override
2755 protected void chore() {
2756 Preconditions.checkState(tomActivated);
2757 boolean noRSAvailable = this.serverManager.createDestinationServersList().isEmpty();
2758
2759
2760 long now = System.currentTimeMillis();
2761
2762
2763 for (String regionName : regionStates.getRegionsInTransition().keySet()) {
2764 RegionState regionState = regionStates.getRegionTransitionState(regionName);
2765 if (regionState == null) continue;
2766
2767 if (regionState.getStamp() + timeout <= now) {
2768
2769 actOnTimeOut(regionState);
2770 } else if (this.allRegionServersOffline && !noRSAvailable) {
2771 RegionPlan existingPlan = regionPlans.get(regionName);
2772 if (existingPlan == null
2773 || !this.serverManager.isServerOnline(existingPlan
2774 .getDestination())) {
2775
2776
2777 actOnTimeOut(regionState);
2778 }
2779 }
2780 }
2781 setAllRegionServersOffline(noRSAvailable);
2782 }
2783
2784 private void actOnTimeOut(RegionState regionState) {
2785 HRegionInfo regionInfo = regionState.getRegion();
2786 LOG.info("Regions in transition timed out: " + regionState);
2787
2788 switch (regionState.getState()) {
2789 case CLOSED:
2790 LOG.info("Region " + regionInfo.getEncodedName()
2791 + " has been CLOSED for too long, waiting on queued "
2792 + "ClosedRegionHandler to run or server shutdown");
2793
2794 regionState.updateTimestampToNow();
2795 break;
2796 case OFFLINE:
2797 LOG.info("Region has been OFFLINE for too long, " + "reassigning "
2798 + regionInfo.getRegionNameAsString() + " to a random server");
2799 invokeAssign(regionInfo);
2800 break;
2801 case PENDING_OPEN:
2802 LOG.info("Region has been PENDING_OPEN for too "
2803 + "long, reassigning region=" + regionInfo.getRegionNameAsString());
2804 invokeAssign(regionInfo);
2805 break;
2806 case OPENING:
2807 processOpeningState(regionInfo);
2808 break;
2809 case OPEN:
2810 LOG.error("Region has been OPEN for too long, " +
2811 "we don't know where region was opened so can't do anything");
2812 regionState.updateTimestampToNow();
2813 break;
2814
2815 case PENDING_CLOSE:
2816 LOG.info("Region has been PENDING_CLOSE for too "
2817 + "long, running forced unassign again on region="
2818 + regionInfo.getRegionNameAsString());
2819 invokeUnassign(regionInfo);
2820 break;
2821 case CLOSING:
2822 LOG.info("Region has been CLOSING for too " +
2823 "long, this should eventually complete or the server will " +
2824 "expire, send RPC again");
2825 invokeUnassign(regionInfo);
2826 break;
2827
2828 case SPLIT:
2829 case SPLITTING:
2830 case FAILED_OPEN:
2831 case FAILED_CLOSE:
2832 break;
2833
2834 default:
2835 throw new IllegalStateException("Received event is not valid.");
2836 }
2837 }
2838 }
2839
2840 private void processOpeningState(HRegionInfo regionInfo) {
2841 LOG.info("Region has been OPENING for too long, reassigning region="
2842 + regionInfo.getRegionNameAsString());
2843
2844 try {
2845 String node = ZKAssign.getNodeName(watcher, regionInfo.getEncodedName());
2846 Stat stat = new Stat();
2847 byte [] data = ZKAssign.getDataNoWatch(watcher, node, stat);
2848 if (data == null) {
2849 LOG.warn("Data is null, node " + node + " no longer exists");
2850 return;
2851 }
2852 RegionTransition rt = RegionTransition.parseFrom(data);
2853 EventType et = rt.getEventType();
2854 if (et == EventType.RS_ZK_REGION_OPENED) {
2855 LOG.debug("Region has transitioned to OPENED, allowing "
2856 + "watched event handlers to process");
2857 return;
2858 } else if (et != EventType.RS_ZK_REGION_OPENING && et != EventType.RS_ZK_REGION_FAILED_OPEN ) {
2859 LOG.warn("While timing out a region, found ZK node in unexpected state: " + et);
2860 return;
2861 }
2862 invokeAssign(regionInfo);
2863 } catch (KeeperException ke) {
2864 LOG.error("Unexpected ZK exception timing out CLOSING region", ke);
2865 } catch (DeserializationException e) {
2866 LOG.error("Unexpected exception parsing CLOSING region", e);
2867 }
2868 }
2869
2870 void invokeAssign(HRegionInfo regionInfo) {
2871 threadPoolExecutorService.submit(new AssignCallable(this, regionInfo));
2872 }
2873
2874 private void invokeUnassign(HRegionInfo regionInfo) {
2875 threadPoolExecutorService.submit(new UnAssignCallable(this, regionInfo));
2876 }
2877
2878 public boolean isCarryingMeta(ServerName serverName) {
2879 return isCarryingRegion(serverName, HRegionInfo.FIRST_META_REGIONINFO);
2880 }
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892 private boolean isCarryingRegion(ServerName serverName, HRegionInfo hri) {
2893 RegionTransition rt = null;
2894 try {
2895 byte [] data = ZKAssign.getData(watcher, hri.getEncodedName());
2896
2897 rt = data == null? null: RegionTransition.parseFrom(data);
2898 } catch (KeeperException e) {
2899 server.abort("Exception reading unassigned node for region=" + hri.getEncodedName(), e);
2900 } catch (DeserializationException e) {
2901 server.abort("Exception parsing unassigned node for region=" + hri.getEncodedName(), e);
2902 }
2903
2904 ServerName addressFromZK = rt != null? rt.getServerName(): null;
2905 if (addressFromZK != null) {
2906
2907 boolean matchZK = addressFromZK.equals(serverName);
2908 LOG.debug("based on ZK, current region=" + hri.getRegionNameAsString() +
2909 " is on server=" + addressFromZK +
2910 " server being checked=: " + serverName);
2911 return matchZK;
2912 }
2913
2914 ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
2915 boolean matchAM = (addressFromAM != null &&
2916 addressFromAM.equals(serverName));
2917 LOG.debug("based on AM, current region=" + hri.getRegionNameAsString() +
2918 " is on server=" + (addressFromAM != null ? addressFromAM : "null") +
2919 " server being checked: " + serverName);
2920
2921 return matchAM;
2922 }
2923
2924
2925
2926
2927
2928
2929 public List<HRegionInfo> processServerShutdown(final ServerName sn) {
2930
2931 synchronized (this.regionPlans) {
2932 for (Iterator <Map.Entry<String, RegionPlan>> i =
2933 this.regionPlans.entrySet().iterator(); i.hasNext();) {
2934 Map.Entry<String, RegionPlan> e = i.next();
2935 ServerName otherSn = e.getValue().getDestination();
2936
2937 if (otherSn != null && otherSn.equals(sn)) {
2938
2939 i.remove();
2940 }
2941 }
2942 }
2943 List<HRegionInfo> regions = regionStates.serverOffline(sn);
2944 for (Iterator<HRegionInfo> it = regions.iterator(); it.hasNext(); ) {
2945 HRegionInfo hri = it.next();
2946 String encodedName = hri.getEncodedName();
2947
2948
2949 Lock lock = locker.acquireLock(encodedName);
2950 try {
2951 RegionState regionState =
2952 regionStates.getRegionTransitionState(encodedName);
2953 if (regionState == null
2954 || !regionState.isPendingOpenOrOpeningOnServer(sn)) {
2955 LOG.info("Skip region " + hri
2956 + " since it is not opening on the dead server any more: " + sn);
2957 it.remove();
2958 } else {
2959 try{
2960
2961 ZKAssign.deleteNodeFailSilent(watcher, hri);
2962 } catch (KeeperException ke) {
2963 server.abort("Unexpected ZK exception deleting node " + hri, ke);
2964 }
2965
2966 regionStates.updateRegionState(hri, RegionState.State.CLOSED);
2967 }
2968 } finally {
2969 lock.unlock();
2970 }
2971 }
2972 return regions;
2973 }
2974
2975
2976
2977
2978
2979
2980
2981
2982 public void handleSplitReport(final ServerName sn, final HRegionInfo parent,
2983 final HRegionInfo a, final HRegionInfo b) {
2984 regionOffline(parent);
2985 regionOnline(a, sn);
2986 regionOnline(b, sn);
2987
2988
2989
2990
2991
2992 if (this.zkTable.isDisablingOrDisabledTable(
2993 parent.getTableNameAsString())) {
2994 unassign(a);
2995 unassign(b);
2996 }
2997 }
2998
2999
3000
3001
3002
3003
3004
3005
3006 public void handleRegionsMergeReport(final ServerName sn,
3007 final HRegionInfo merged, final HRegionInfo a, final HRegionInfo b) {
3008 regionOffline(a);
3009 regionOffline(b);
3010 regionOnline(merged, sn);
3011
3012
3013
3014
3015
3016 if (this.zkTable.isDisablingOrDisabledTable(merged.getTableNameAsString())) {
3017 unassign(merged);
3018 }
3019 }
3020
3021
3022
3023
3024 public void balance(final RegionPlan plan) {
3025 synchronized (this.regionPlans) {
3026 this.regionPlans.put(plan.getRegionName(), plan);
3027 }
3028 unassign(plan.getRegionInfo(), false, plan.getDestination());
3029 }
3030
3031 public void stop() {
3032 if (tomActivated){
3033 this.timeoutMonitor.interrupt();
3034 this.timerUpdater.interrupt();
3035 }
3036 }
3037
3038
3039
3040
3041 public void shutdown() {
3042
3043 synchronized (zkEventWorkerWaitingList){
3044 zkEventWorkerWaitingList.clear();
3045 }
3046 threadPoolExecutorService.shutdownNow();
3047 zkEventWorkers.shutdownNow();
3048 }
3049
3050 protected void setEnabledTable(String tableName) {
3051 try {
3052 this.zkTable.setEnabledTable(tableName);
3053 } catch (KeeperException e) {
3054
3055 String errorMsg = "Unable to ensure that the table " + tableName
3056 + " will be" + " enabled because of a ZooKeeper issue";
3057 LOG.error(errorMsg);
3058 this.server.abort(errorMsg, e);
3059 }
3060 }
3061
3062
3063
3064
3065
3066
3067
3068 private boolean asyncSetOfflineInZooKeeper(final RegionState state,
3069 final AsyncCallback.StringCallback cb, final ServerName destination) {
3070 if (!state.isClosed() && !state.isOffline()) {
3071 this.server.abort("Unexpected state trying to OFFLINE; " + state,
3072 new IllegalStateException());
3073 return false;
3074 }
3075 regionStates.updateRegionState(
3076 state.getRegion(), RegionState.State.OFFLINE);
3077 try {
3078 ZKAssign.asyncCreateNodeOffline(watcher, state.getRegion(),
3079 destination, cb, state);
3080 } catch (KeeperException e) {
3081 if (e instanceof NodeExistsException) {
3082 LOG.warn("Node for " + state.getRegion() + " already exists");
3083 } else {
3084 server.abort("Unexpected ZK exception creating/setting node OFFLINE", e);
3085 }
3086 return false;
3087 }
3088 return true;
3089 }
3090 }