1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master;
20
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.Arrays;
24 import java.util.Collections;
25 import java.util.HashMap;
26 import java.util.HashSet;
27 import java.util.Iterator;
28 import java.util.List;
29 import java.util.Map;
30 import java.util.NavigableMap;
31 import java.util.Set;
32 import java.util.TreeMap;
33 import java.util.concurrent.ConcurrentHashMap;
34 import java.util.concurrent.ConcurrentSkipListSet;
35 import java.util.concurrent.CopyOnWriteArrayList;
36 import java.util.concurrent.ThreadFactory;
37 import java.util.concurrent.TimeUnit;
38 import java.util.concurrent.atomic.AtomicBoolean;
39 import java.util.concurrent.atomic.AtomicInteger;
40 import java.util.concurrent.locks.Lock;
41 import java.util.concurrent.locks.ReentrantLock;
42
43 import org.apache.commons.logging.Log;
44 import org.apache.commons.logging.LogFactory;
45 import org.apache.hadoop.classification.InterfaceAudience;
46 import org.apache.hadoop.conf.Configuration;
47 import org.apache.hadoop.hbase.Chore;
48 import org.apache.hadoop.hbase.HBaseIOException;
49 import org.apache.hadoop.hbase.HConstants;
50 import org.apache.hadoop.hbase.HRegionInfo;
51 import org.apache.hadoop.hbase.NotServingRegionException;
52 import org.apache.hadoop.hbase.RegionTransition;
53 import org.apache.hadoop.hbase.Server;
54 import org.apache.hadoop.hbase.ServerName;
55 import org.apache.hadoop.hbase.Stoppable;
56 import org.apache.hadoop.hbase.TableName;
57 import org.apache.hadoop.hbase.TableNotFoundException;
58 import org.apache.hadoop.hbase.catalog.CatalogTracker;
59 import org.apache.hadoop.hbase.catalog.MetaReader;
60 import org.apache.hadoop.hbase.client.Result;
61 import org.apache.hadoop.hbase.exceptions.DeserializationException;
62 import org.apache.hadoop.hbase.executor.EventHandler;
63 import org.apache.hadoop.hbase.executor.EventType;
64 import org.apache.hadoop.hbase.executor.ExecutorService;
65 import org.apache.hadoop.hbase.ipc.RpcClient;
66 import org.apache.hadoop.hbase.ipc.RpcClient.FailedServerException;
67 import org.apache.hadoop.hbase.ipc.RpcClient.FailedServerException;
68 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
69 import org.apache.hadoop.hbase.master.RegionState.State;
70 import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper;
71 import org.apache.hadoop.hbase.master.balancer.FavoredNodeLoadBalancer;
72 import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler;
73 import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
74 import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
75 import org.apache.hadoop.hbase.master.handler.OpenedRegionHandler;
76 import org.apache.hadoop.hbase.regionserver.RegionAlreadyInTransitionException;
77 import org.apache.hadoop.hbase.regionserver.RegionMergeTransaction;
78 import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
79 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
80 import org.apache.hadoop.hbase.regionserver.SplitTransaction;
81 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
82 import org.apache.hadoop.hbase.util.KeyLocker;
83 import org.apache.hadoop.hbase.util.Pair;
84 import org.apache.hadoop.hbase.util.PairOfSameType;
85 import org.apache.hadoop.hbase.util.Threads;
86 import org.apache.hadoop.hbase.util.Triple;
87 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
88 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
89 import org.apache.hadoop.hbase.zookeeper.ZKTable;
90 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
91 import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
92 import org.apache.hadoop.ipc.RemoteException;
93 import org.apache.zookeeper.AsyncCallback;
94 import org.apache.zookeeper.KeeperException;
95 import org.apache.zookeeper.KeeperException.NoNodeException;
96 import org.apache.zookeeper.KeeperException.NodeExistsException;
97 import org.apache.zookeeper.data.Stat;
98
99 import com.google.common.base.Preconditions;
100 import com.google.common.collect.LinkedHashMultimap;
101
102
103
104
105
106
107
108
109 @InterfaceAudience.Private
110 public class AssignmentManager extends ZooKeeperListener {
111 private static final Log LOG = LogFactory.getLog(AssignmentManager.class);
112
113 public static final ServerName HBCK_CODE_SERVERNAME = ServerName.valueOf(HConstants.HBCK_CODE_NAME,
114 -1, -1L);
115
116 public static final String ASSIGNMENT_TIMEOUT = "hbase.master.assignment.timeoutmonitor.timeout";
117 public static final int DEFAULT_ASSIGNMENT_TIMEOUT_DEFAULT = 600000;
118 public static final String ASSIGNMENT_TIMEOUT_MANAGEMENT = "hbase.assignment.timeout.management";
119 public static final boolean DEFAULT_ASSIGNMENT_TIMEOUT_MANAGEMENT = false;
120
121 public static final String ALREADY_IN_TRANSITION_WAITTIME
122 = "hbase.assignment.already.intransition.waittime";
123 public static final int DEFAULT_ALREADY_IN_TRANSITION_WAITTIME = 60000;
124
125 protected final Server server;
126
127 private ServerManager serverManager;
128
129 private boolean shouldAssignRegionsWithFavoredNodes;
130
131 private CatalogTracker catalogTracker;
132
133 protected final TimeoutMonitor timeoutMonitor;
134
135 private final TimerUpdater timerUpdater;
136
137 private LoadBalancer balancer;
138
139 private final MetricsAssignmentManager metricsAssignmentManager;
140
141 private final TableLockManager tableLockManager;
142
143 private AtomicInteger numRegionsOpened = new AtomicInteger(0);
144
145 final private KeyLocker<String> locker = new KeyLocker<String>();
146
147
148
149
150
151 private final Map <String, HRegionInfo> regionsToReopen;
152
153
154
155
156
157 private final int maximumAttempts;
158
159
160
161
162 private final Map<String, PairOfSameType<HRegionInfo>> mergingRegions
163 = new HashMap<String, PairOfSameType<HRegionInfo>>();
164
165
166
167
168
169 private final long sleepTimeBeforeRetryingMetaAssignment;
170
171
172
173
174
175 final NavigableMap<String, RegionPlan> regionPlans =
176 new TreeMap<String, RegionPlan>();
177
178 private final ZKTable zkTable;
179
180
181
182
183
184 private final ConcurrentSkipListSet<ServerName> serversInUpdatingTimer;
185
186 private final ExecutorService executorService;
187
188
189 private Map<HRegionInfo, AtomicBoolean> closedRegionHandlerCalled = null;
190
191
192 private Map<HRegionInfo, AtomicBoolean> openedRegionHandlerCalled = null;
193
194
195 private java.util.concurrent.ExecutorService threadPoolExecutorService;
196
197
198 private final java.util.concurrent.ExecutorService zkEventWorkers;
199
200 private List<EventType> ignoreStatesRSOffline = Arrays.asList(
201 EventType.RS_ZK_REGION_FAILED_OPEN, EventType.RS_ZK_REGION_CLOSED);
202
203 private final RegionStates regionStates;
204
205
206
207
208
209 private final int bulkAssignThresholdRegions;
210 private final int bulkAssignThresholdServers;
211
212
213
214
215 private final boolean bulkAssignWaitTillAllAssigned;
216
217
218
219
220
221
222
223
224
225 protected final AtomicBoolean failoverCleanupDone = new AtomicBoolean(false);
226
227
228 private final boolean tomActivated;
229
230
231
232
233
234
235
236
237 private final ConcurrentHashMap<String, AtomicInteger>
238 failedOpenTracker = new ConcurrentHashMap<String, AtomicInteger>();
239
240
241
242
243 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="MS_SHOULD_BE_FINAL")
244 public static boolean TEST_SKIP_SPLIT_HANDLING = false;
245
246
247 private List<AssignmentListener> listeners = new CopyOnWriteArrayList<AssignmentListener>();
248
249
250
251
252
253
254
255
256
257
258
259 public AssignmentManager(Server server, ServerManager serverManager,
260 CatalogTracker catalogTracker, final LoadBalancer balancer,
261 final ExecutorService service, MetricsMaster metricsMaster,
262 final TableLockManager tableLockManager) throws KeeperException, IOException {
263 super(server.getZooKeeper());
264 this.server = server;
265 this.serverManager = serverManager;
266 this.catalogTracker = catalogTracker;
267 this.executorService = service;
268 this.regionsToReopen = Collections.synchronizedMap
269 (new HashMap<String, HRegionInfo> ());
270 Configuration conf = server.getConfiguration();
271
272 this.shouldAssignRegionsWithFavoredNodes = conf.getClass(
273 HConstants.HBASE_MASTER_LOADBALANCER_CLASS, Object.class).equals(
274 FavoredNodeLoadBalancer.class);
275 this.tomActivated = conf.getBoolean(
276 ASSIGNMENT_TIMEOUT_MANAGEMENT, DEFAULT_ASSIGNMENT_TIMEOUT_MANAGEMENT);
277 if (tomActivated){
278 this.serversInUpdatingTimer = new ConcurrentSkipListSet<ServerName>();
279 this.timeoutMonitor = new TimeoutMonitor(
280 conf.getInt("hbase.master.assignment.timeoutmonitor.period", 30000),
281 server, serverManager,
282 conf.getInt(ASSIGNMENT_TIMEOUT, DEFAULT_ASSIGNMENT_TIMEOUT_DEFAULT));
283 this.timerUpdater = new TimerUpdater(conf.getInt(
284 "hbase.master.assignment.timerupdater.period", 10000), server);
285 Threads.setDaemonThreadRunning(timerUpdater.getThread(),
286 server.getServerName() + ".timerUpdater");
287 } else {
288 this.serversInUpdatingTimer = null;
289 this.timeoutMonitor = null;
290 this.timerUpdater = null;
291 }
292 this.zkTable = new ZKTable(this.watcher);
293
294 this.maximumAttempts = Math.max(1,
295 this.server.getConfiguration().getInt("hbase.assignment.maximum.attempts", 10));
296 this.sleepTimeBeforeRetryingMetaAssignment = this.server.getConfiguration().getLong(
297 "hbase.meta.assignment.retry.sleeptime", 1000l);
298 this.balancer = balancer;
299 int maxThreads = conf.getInt("hbase.assignment.threads.max", 30);
300 this.threadPoolExecutorService = Threads.getBoundedCachedThreadPool(
301 maxThreads, 60L, TimeUnit.SECONDS, Threads.newDaemonThreadFactory("AM."));
302 this.regionStates = new RegionStates(server, serverManager);
303
304 this.bulkAssignWaitTillAllAssigned =
305 conf.getBoolean("hbase.bulk.assignment.waittillallassigned", false);
306 this.bulkAssignThresholdRegions = conf.getInt("hbase.bulk.assignment.threshold.regions", 7);
307 this.bulkAssignThresholdServers = conf.getInt("hbase.bulk.assignment.threshold.servers", 3);
308
309 int workers = conf.getInt("hbase.assignment.zkevent.workers", 20);
310 ThreadFactory threadFactory = Threads.newDaemonThreadFactory("AM.ZK.Worker");
311 zkEventWorkers = Threads.getBoundedCachedThreadPool(workers, 60L,
312 TimeUnit.SECONDS, threadFactory);
313 this.tableLockManager = tableLockManager;
314
315 this.metricsAssignmentManager = new MetricsAssignmentManager();
316 }
317
318 void startTimeOutMonitor() {
319 if (tomActivated) {
320 Threads.setDaemonThreadRunning(timeoutMonitor.getThread(), server.getServerName()
321 + ".timeoutMonitor");
322 }
323 }
324
325
326
327
328
329 public void registerListener(final AssignmentListener listener) {
330 this.listeners.add(listener);
331 }
332
333
334
335
336
337 public boolean unregisterListener(final AssignmentListener listener) {
338 return this.listeners.remove(listener);
339 }
340
341
342
343
344 public ZKTable getZKTable() {
345
346
347 return this.zkTable;
348 }
349
350
351
352
353
354
355
356 public RegionStates getRegionStates() {
357 return regionStates;
358 }
359
360 public RegionPlan getRegionReopenPlan(HRegionInfo hri) {
361 return new RegionPlan(hri, null, regionStates.getRegionServerOfRegion(hri));
362 }
363
364
365
366
367
368
369 public void addPlan(String encodedName, RegionPlan plan) {
370 synchronized (regionPlans) {
371 regionPlans.put(encodedName, plan);
372 }
373 }
374
375
376
377
378 public void addPlans(Map<String, RegionPlan> plans) {
379 synchronized (regionPlans) {
380 regionPlans.putAll(plans);
381 }
382 }
383
384
385
386
387
388
389
390
391 public void setRegionsToReopen(List <HRegionInfo> regions) {
392 for(HRegionInfo hri : regions) {
393 regionsToReopen.put(hri.getEncodedName(), hri);
394 }
395 }
396
397
398
399
400
401
402
403
404 public Pair<Integer, Integer> getReopenStatus(TableName tableName)
405 throws IOException {
406 List <HRegionInfo> hris =
407 MetaReader.getTableRegions(this.server.getCatalogTracker(), tableName, true);
408 Integer pending = 0;
409 for (HRegionInfo hri : hris) {
410 String name = hri.getEncodedName();
411
412 if (regionsToReopen.containsKey(name)
413 || regionStates.isRegionInTransition(name)) {
414 pending++;
415 }
416 }
417 return new Pair<Integer, Integer>(pending, hris.size());
418 }
419
420
421
422
423
424
425 public boolean isFailoverCleanupDone() {
426 return failoverCleanupDone.get();
427 }
428
429
430
431
432
433 public Lock acquireRegionLock(final String encodedName) {
434 return locker.acquireLock(encodedName);
435 }
436
437
438
439
440
441 void failoverCleanupDone() {
442 failoverCleanupDone.set(true);
443 serverManager.processQueuedDeadServers();
444 }
445
446
447
448
449
450
451
452
453 void joinCluster() throws IOException,
454 KeeperException, InterruptedException {
455
456
457
458
459
460
461
462
463
464
465 Map<ServerName, List<HRegionInfo>> deadServers = rebuildUserRegions();
466
467
468
469
470 processDeadServersAndRegionsInTransition(deadServers);
471
472 recoverTableInDisablingState();
473 recoverTableInEnablingState();
474 }
475
476
477
478
479
480
481
482
483
484
485
486
487 void processDeadServersAndRegionsInTransition(
488 final Map<ServerName, List<HRegionInfo>> deadServers)
489 throws KeeperException, IOException, InterruptedException {
490 List<String> nodes = ZKUtil.listChildrenNoWatch(watcher,
491 watcher.assignmentZNode);
492
493 if (nodes == null) {
494 String errorMessage = "Failed to get the children from ZK";
495 server.abort(errorMessage, new IOException(errorMessage));
496 return;
497 }
498
499 boolean failover = (!serverManager.getDeadServers().isEmpty() || !serverManager
500 .getRequeuedDeadServers().isEmpty());
501
502 if (!failover) {
503
504 Map<HRegionInfo, ServerName> regions = regionStates.getRegionAssignments();
505 for (HRegionInfo hri: regions.keySet()) {
506 if (!hri.isMetaTable()) {
507 LOG.debug("Found " + hri + " out on cluster");
508 failover = true;
509 break;
510 }
511 }
512 if (!failover) {
513
514 for (String encodedName: nodes) {
515 RegionState state = regionStates.getRegionState(encodedName);
516 if (state != null && !state.getRegion().isMetaRegion()) {
517 LOG.debug("Found " + state.getRegion().getRegionNameAsString() + " in RITs");
518 failover = true;
519 break;
520 }
521 }
522 }
523 }
524
525
526 if (failover) {
527 LOG.info("Found regions out on cluster or in RIT; presuming failover");
528
529
530 processDeadServersAndRecoverLostRegions(deadServers);
531 } else {
532
533 LOG.info("Clean cluster startup. Assigning userregions");
534 assignAllUserRegions();
535 }
536 }
537
538
539
540
541
542
543
544
545
546
547
548
549 boolean processRegionInTransitionAndBlockUntilAssigned(final HRegionInfo hri)
550 throws InterruptedException, KeeperException, IOException {
551 String encodedRegionName = hri.getEncodedName();
552 if (!processRegionInTransition(encodedRegionName, hri)) {
553 return false;
554 }
555 LOG.debug("Waiting on " + HRegionInfo.prettyPrint(encodedRegionName));
556 while (!this.server.isStopped() &&
557 this.regionStates.isRegionInTransition(encodedRegionName)) {
558 RegionState state = this.regionStates.getRegionTransitionState(encodedRegionName);
559 if (state == null || !serverManager.isServerOnline(state.getServerName())) {
560
561
562
563 break;
564 }
565 this.regionStates.waitForUpdate(100);
566 }
567 return true;
568 }
569
570
571
572
573
574
575
576
577
578
579 boolean processRegionInTransition(final String encodedRegionName,
580 final HRegionInfo regionInfo) throws KeeperException, IOException {
581
582
583
584
585 Lock lock = locker.acquireLock(encodedRegionName);
586 try {
587 Stat stat = new Stat();
588 byte [] data = ZKAssign.getDataAndWatch(watcher, encodedRegionName, stat);
589 if (data == null) return false;
590 RegionTransition rt;
591 try {
592 rt = RegionTransition.parseFrom(data);
593 } catch (DeserializationException e) {
594 LOG.warn("Failed parse znode data", e);
595 return false;
596 }
597 HRegionInfo hri = regionInfo;
598 if (hri == null) {
599
600
601
602
603
604 hri = regionStates.getRegionInfo(rt.getRegionName());
605 EventType et = rt.getEventType();
606 if (hri == null && et != EventType.RS_ZK_REGION_MERGING
607 && et != EventType.RS_ZK_REQUEST_REGION_MERGE) {
608 LOG.warn("Couldn't find the region in recovering " + rt);
609 return false;
610 }
611 }
612 return processRegionsInTransition(
613 rt, hri, stat.getVersion());
614 } finally {
615 lock.unlock();
616 }
617 }
618
619
620
621
622
623
624
625
626
627 boolean processRegionsInTransition(
628 final RegionTransition rt, final HRegionInfo regionInfo,
629 final int expectedVersion) throws KeeperException {
630 EventType et = rt.getEventType();
631
632 final ServerName sn = rt.getServerName();
633 final byte[] regionName = rt.getRegionName();
634 final String encodedName = HRegionInfo.encodeRegionName(regionName);
635 final String prettyPrintedRegionName = HRegionInfo.prettyPrint(encodedName);
636 LOG.info("Processing " + prettyPrintedRegionName + " in state: " + et);
637
638 if (regionStates.isRegionInTransition(encodedName)) {
639 LOG.info("Processed region " + prettyPrintedRegionName + " in state: "
640 + et + ", does nothing since the region is already in transition "
641 + regionStates.getRegionTransitionState(encodedName));
642
643 return true;
644 }
645 if (!serverManager.isServerOnline(sn)) {
646
647
648
649 LOG.debug("RIT " + encodedName + " in state=" + rt.getEventType() +
650 " was on deadserver; forcing offline");
651 if (regionStates.isRegionOnline(regionInfo)) {
652
653
654
655 regionStates.regionOffline(regionInfo);
656 sendRegionClosedNotification(regionInfo);
657 }
658
659 regionStates.updateRegionState(regionInfo, State.OFFLINE, sn);
660
661 if (regionInfo.isMetaRegion()) {
662
663
664 MetaRegionTracker.setMetaLocation(watcher, sn);
665 } else {
666
667
668 regionStates.setLastRegionServerOfRegion(sn, encodedName);
669
670 if (!serverManager.isServerDead(sn)) {
671 serverManager.expireServer(sn);
672 }
673 }
674 return false;
675 }
676 switch (et) {
677 case M_ZK_REGION_CLOSING:
678
679
680 final RegionState rsClosing = regionStates.updateRegionState(rt, State.CLOSING);
681 this.executorService.submit(
682 new EventHandler(server, EventType.M_MASTER_RECOVERY) {
683 @Override
684 public void process() throws IOException {
685 ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName());
686 try {
687 unassign(regionInfo, rsClosing, expectedVersion, null, true, null);
688 if (regionStates.isRegionOffline(regionInfo)) {
689 assign(regionInfo, true);
690 }
691 } finally {
692 lock.unlock();
693 }
694 }
695 });
696 break;
697
698 case RS_ZK_REGION_CLOSED:
699 case RS_ZK_REGION_FAILED_OPEN:
700
701 regionStates.updateRegionState(regionInfo, State.CLOSED, sn);
702 invokeAssign(regionInfo);
703 break;
704
705 case M_ZK_REGION_OFFLINE:
706
707 regionStates.updateRegionState(rt, State.PENDING_OPEN);
708 final RegionState rsOffline = regionStates.getRegionState(regionInfo);
709 this.executorService.submit(
710 new EventHandler(server, EventType.M_MASTER_RECOVERY) {
711 @Override
712 public void process() throws IOException {
713 ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName());
714 try {
715 RegionPlan plan = new RegionPlan(regionInfo, null, sn);
716 addPlan(encodedName, plan);
717 assign(rsOffline, false, false);
718 } finally {
719 lock.unlock();
720 }
721 }
722 });
723 break;
724
725 case RS_ZK_REGION_OPENING:
726 regionStates.updateRegionState(rt, State.OPENING);
727 break;
728
729 case RS_ZK_REGION_OPENED:
730
731
732
733 regionStates.updateRegionState(rt, State.OPEN);
734 new OpenedRegionHandler(server, this, regionInfo, sn, expectedVersion).process();
735 break;
736 case RS_ZK_REQUEST_REGION_SPLIT:
737 case RS_ZK_REGION_SPLITTING:
738 case RS_ZK_REGION_SPLIT:
739
740
741
742 regionStates.regionOnline(regionInfo, sn);
743 regionStates.updateRegionState(rt, State.SPLITTING);
744 if (!handleRegionSplitting(
745 rt, encodedName, prettyPrintedRegionName, sn)) {
746 deleteSplittingNode(encodedName, sn);
747 }
748 break;
749 case RS_ZK_REQUEST_REGION_MERGE:
750 case RS_ZK_REGION_MERGING:
751 case RS_ZK_REGION_MERGED:
752 if (!handleRegionMerging(
753 rt, encodedName, prettyPrintedRegionName, sn)) {
754 deleteMergingNode(encodedName, sn);
755 }
756 break;
757 default:
758 throw new IllegalStateException("Received region in state:" + et + " is not valid.");
759 }
760 LOG.info("Processed region " + prettyPrintedRegionName + " in state "
761 + et + ", on " + (serverManager.isServerOnline(sn) ? "" : "dead ")
762 + "server: " + sn);
763 return true;
764 }
765
766
767
768
769
770 public void removeClosedRegion(HRegionInfo hri) {
771 if (regionsToReopen.remove(hri.getEncodedName()) != null) {
772 LOG.debug("Removed region from reopening regions because it was closed");
773 }
774 }
775
776
777
778
779
780
781
782
783
784
785
786 void handleRegion(final RegionTransition rt, int expectedVersion) {
787 if (rt == null) {
788 LOG.warn("Unexpected NULL input for RegionTransition rt");
789 return;
790 }
791 final ServerName sn = rt.getServerName();
792
793 if (sn.equals(HBCK_CODE_SERVERNAME)) {
794 handleHBCK(rt);
795 return;
796 }
797 final long createTime = rt.getCreateTime();
798 final byte[] regionName = rt.getRegionName();
799 String encodedName = HRegionInfo.encodeRegionName(regionName);
800 String prettyPrintedRegionName = HRegionInfo.prettyPrint(encodedName);
801
802 if (!serverManager.isServerOnline(sn)
803 && !ignoreStatesRSOffline.contains(rt.getEventType())) {
804 LOG.warn("Attempted to handle region transition for server but " +
805 "it is not online: " + prettyPrintedRegionName + ", " + rt);
806 return;
807 }
808
809 RegionState regionState =
810 regionStates.getRegionState(encodedName);
811 long startTime = System.currentTimeMillis();
812 if (LOG.isDebugEnabled()) {
813 boolean lateEvent = createTime < (startTime - 15000);
814 LOG.debug("Handling " + rt.getEventType() +
815 ", server=" + sn + ", region=" +
816 (prettyPrintedRegionName == null ? "null" : prettyPrintedRegionName) +
817 (lateEvent ? ", which is more than 15 seconds late" : "") +
818 ", current_state=" + regionState);
819 }
820
821
822 if (rt.getEventType() == EventType.M_ZK_REGION_OFFLINE) {
823 return;
824 }
825
826
827 Lock lock = locker.acquireLock(encodedName);
828 try {
829 RegionState latestState =
830 regionStates.getRegionState(encodedName);
831 if ((regionState == null && latestState != null)
832 || (regionState != null && latestState == null)
833 || (regionState != null && latestState != null
834 && latestState.getState() != regionState.getState())) {
835 LOG.warn("Region state changed from " + regionState + " to "
836 + latestState + ", while acquiring lock");
837 }
838 long waitedTime = System.currentTimeMillis() - startTime;
839 if (waitedTime > 5000) {
840 LOG.warn("Took " + waitedTime + "ms to acquire the lock");
841 }
842 regionState = latestState;
843 switch (rt.getEventType()) {
844 case RS_ZK_REQUEST_REGION_SPLIT:
845 case RS_ZK_REGION_SPLITTING:
846 case RS_ZK_REGION_SPLIT:
847 if (!handleRegionSplitting(
848 rt, encodedName, prettyPrintedRegionName, sn)) {
849 deleteSplittingNode(encodedName, sn);
850 }
851 break;
852
853 case RS_ZK_REQUEST_REGION_MERGE:
854 case RS_ZK_REGION_MERGING:
855 case RS_ZK_REGION_MERGED:
856
857
858 if (!handleRegionMerging(
859 rt, encodedName, prettyPrintedRegionName, sn)) {
860 deleteMergingNode(encodedName, sn);
861 }
862 break;
863
864 case M_ZK_REGION_CLOSING:
865
866
867 if (regionState == null
868 || !regionState.isPendingCloseOrClosingOnServer(sn)) {
869 LOG.warn("Received CLOSING for " + prettyPrintedRegionName
870 + " from " + sn + " but the region isn't PENDING_CLOSE/CLOSING here: "
871 + regionStates.getRegionState(encodedName));
872 return;
873 }
874
875 regionStates.updateRegionState(rt, State.CLOSING);
876 break;
877
878 case RS_ZK_REGION_CLOSED:
879
880 if (regionState == null
881 || !regionState.isPendingCloseOrClosingOnServer(sn)) {
882 LOG.warn("Received CLOSED for " + prettyPrintedRegionName
883 + " from " + sn + " but the region isn't PENDING_CLOSE/CLOSING here: "
884 + regionStates.getRegionState(encodedName));
885 return;
886 }
887
888
889
890 new ClosedRegionHandler(server, this, regionState.getRegion()).process();
891 updateClosedRegionHandlerTracker(regionState.getRegion());
892 break;
893
894 case RS_ZK_REGION_FAILED_OPEN:
895 if (regionState == null
896 || !regionState.isPendingOpenOrOpeningOnServer(sn)) {
897 LOG.warn("Received FAILED_OPEN for " + prettyPrintedRegionName
898 + " from " + sn + " but the region isn't PENDING_OPEN/OPENING here: "
899 + regionStates.getRegionState(encodedName));
900 return;
901 }
902 AtomicInteger failedOpenCount = failedOpenTracker.get(encodedName);
903 if (failedOpenCount == null) {
904 failedOpenCount = new AtomicInteger();
905
906
907
908 failedOpenTracker.put(encodedName, failedOpenCount);
909 }
910 if (failedOpenCount.incrementAndGet() >= maximumAttempts) {
911 regionStates.updateRegionState(rt, State.FAILED_OPEN);
912
913
914 failedOpenTracker.remove(encodedName);
915 } else {
916
917 regionState = regionStates.updateRegionState(rt, State.CLOSED);
918 if (regionState != null) {
919
920
921 try {
922 getRegionPlan(regionState.getRegion(), sn, true);
923 new ClosedRegionHandler(server, this, regionState.getRegion()).process();
924 } catch (HBaseIOException e) {
925 LOG.warn("Failed to get region plan", e);
926 }
927 }
928 }
929 break;
930
931 case RS_ZK_REGION_OPENING:
932
933
934 if (regionState == null
935 || !regionState.isPendingOpenOrOpeningOnServer(sn)) {
936 LOG.warn("Received OPENING for " + prettyPrintedRegionName
937 + " from " + sn + " but the region isn't PENDING_OPEN/OPENING here: "
938 + regionStates.getRegionState(encodedName));
939 return;
940 }
941
942 regionStates.updateRegionState(rt, State.OPENING);
943 break;
944
945 case RS_ZK_REGION_OPENED:
946
947 if (regionState == null
948 || !regionState.isPendingOpenOrOpeningOnServer(sn)) {
949 LOG.warn("Received OPENED for " + prettyPrintedRegionName
950 + " from " + sn + " but the region isn't PENDING_OPEN/OPENING here: "
951 + regionStates.getRegionState(encodedName));
952
953 if (regionState != null) {
954
955
956
957 unassign(regionState.getRegion(), null, -1, null, false, sn);
958 }
959 return;
960 }
961
962 regionState = regionStates.updateRegionState(rt, State.OPEN);
963 if (regionState != null) {
964 failedOpenTracker.remove(encodedName);
965 new OpenedRegionHandler(
966 server, this, regionState.getRegion(), sn, expectedVersion).process();
967 updateOpenedRegionHandlerTracker(regionState.getRegion());
968 }
969 break;
970
971 default:
972 throw new IllegalStateException("Received event is not valid.");
973 }
974 } finally {
975 lock.unlock();
976 }
977 }
978
979
980 boolean wasClosedHandlerCalled(HRegionInfo hri) {
981 AtomicBoolean b = closedRegionHandlerCalled.get(hri);
982
983
984
985 return b == null ? false : b.compareAndSet(true, false);
986 }
987
988
989 boolean wasOpenedHandlerCalled(HRegionInfo hri) {
990 AtomicBoolean b = openedRegionHandlerCalled.get(hri);
991
992
993
994 return b == null ? false : b.compareAndSet(true, false);
995 }
996
997
998 void initializeHandlerTrackers() {
999 closedRegionHandlerCalled = new HashMap<HRegionInfo, AtomicBoolean>();
1000 openedRegionHandlerCalled = new HashMap<HRegionInfo, AtomicBoolean>();
1001 }
1002
1003 void updateClosedRegionHandlerTracker(HRegionInfo hri) {
1004 if (closedRegionHandlerCalled != null) {
1005 closedRegionHandlerCalled.put(hri, new AtomicBoolean(true));
1006 }
1007 }
1008
1009 void updateOpenedRegionHandlerTracker(HRegionInfo hri) {
1010 if (openedRegionHandlerCalled != null) {
1011 openedRegionHandlerCalled.put(hri, new AtomicBoolean(true));
1012 }
1013 }
1014
1015
1016
1017
1018
1019
1020 void processFavoredNodes(List<HRegionInfo> regions) throws IOException {
1021 if (!shouldAssignRegionsWithFavoredNodes) return;
1022
1023
1024 Map<HRegionInfo, List<ServerName>> regionToFavoredNodes =
1025 new HashMap<HRegionInfo, List<ServerName>>();
1026 for (HRegionInfo region : regions) {
1027 regionToFavoredNodes.put(region,
1028 ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region));
1029 }
1030 FavoredNodeAssignmentHelper.updateMetaWithFavoredNodesInfo(regionToFavoredNodes, catalogTracker);
1031 }
1032
1033
1034
1035
1036
1037
1038
1039 private void handleHBCK(RegionTransition rt) {
1040 String encodedName = HRegionInfo.encodeRegionName(rt.getRegionName());
1041 LOG.info("Handling HBCK triggered transition=" + rt.getEventType() +
1042 ", server=" + rt.getServerName() + ", region=" +
1043 HRegionInfo.prettyPrint(encodedName));
1044 RegionState regionState = regionStates.getRegionTransitionState(encodedName);
1045 switch (rt.getEventType()) {
1046 case M_ZK_REGION_OFFLINE:
1047 HRegionInfo regionInfo;
1048 if (regionState != null) {
1049 regionInfo = regionState.getRegion();
1050 } else {
1051 try {
1052 byte [] name = rt.getRegionName();
1053 Pair<HRegionInfo, ServerName> p = MetaReader.getRegion(catalogTracker, name);
1054 regionInfo = p.getFirst();
1055 } catch (IOException e) {
1056 LOG.info("Exception reading hbase:meta doing HBCK repair operation", e);
1057 return;
1058 }
1059 }
1060 LOG.info("HBCK repair is triggering assignment of region=" +
1061 regionInfo.getRegionNameAsString());
1062
1063 assign(regionInfo, false);
1064 break;
1065
1066 default:
1067 LOG.warn("Received unexpected region state from HBCK: " + rt.toString());
1068 break;
1069 }
1070
1071 }
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087 @Override
1088 public void nodeCreated(String path) {
1089 handleAssignmentEvent(path);
1090 }
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104 @Override
1105 public void nodeDataChanged(String path) {
1106 handleAssignmentEvent(path);
1107 }
1108
1109
1110
1111
1112
1113 private final Set<String> regionsInProgress = new HashSet<String>();
1114
1115
1116 private final LinkedHashMultimap <String, RegionRunnable>
1117 zkEventWorkerWaitingList = LinkedHashMultimap.create();
1118
1119
1120
1121
1122 private interface RegionRunnable extends Runnable{
1123
1124
1125
1126 String getRegionName();
1127 }
1128
1129
1130
1131
1132
1133 protected void zkEventWorkersSubmit(final RegionRunnable regRunnable) {
1134
1135 synchronized (regionsInProgress) {
1136
1137
1138 if (regionsInProgress.contains(regRunnable.getRegionName())) {
1139 synchronized (zkEventWorkerWaitingList){
1140 zkEventWorkerWaitingList.put(regRunnable.getRegionName(), regRunnable);
1141 }
1142 return;
1143 }
1144
1145
1146 regionsInProgress.add(regRunnable.getRegionName());
1147 zkEventWorkers.submit(new Runnable() {
1148 @Override
1149 public void run() {
1150 try {
1151 regRunnable.run();
1152 } finally {
1153
1154
1155 synchronized (regionsInProgress) {
1156 regionsInProgress.remove(regRunnable.getRegionName());
1157 synchronized (zkEventWorkerWaitingList) {
1158 java.util.Set<RegionRunnable> waiting = zkEventWorkerWaitingList.get(
1159 regRunnable.getRegionName());
1160 if (!waiting.isEmpty()) {
1161
1162 RegionRunnable toSubmit = waiting.iterator().next();
1163 zkEventWorkerWaitingList.remove(toSubmit.getRegionName(), toSubmit);
1164 zkEventWorkersSubmit(toSubmit);
1165 }
1166 }
1167 }
1168 }
1169 }
1170 });
1171 }
1172 }
1173
1174 @Override
1175 public void nodeDeleted(final String path) {
1176 if (path.startsWith(watcher.assignmentZNode)) {
1177 final String regionName = ZKAssign.getRegionName(watcher, path);
1178 zkEventWorkersSubmit(new RegionRunnable() {
1179 @Override
1180 public String getRegionName() {
1181 return regionName;
1182 }
1183
1184 @Override
1185 public void run() {
1186 Lock lock = locker.acquireLock(regionName);
1187 try {
1188 RegionState rs = regionStates.getRegionTransitionState(regionName);
1189 if (rs == null) {
1190 rs = regionStates.getRegionState(regionName);
1191 if (rs == null || !rs.isMergingNew()) {
1192
1193 return;
1194 }
1195 }
1196
1197 HRegionInfo regionInfo = rs.getRegion();
1198 String regionNameStr = regionInfo.getRegionNameAsString();
1199 LOG.debug("Znode " + regionNameStr + " deleted, state: " + rs);
1200 boolean disabled = getZKTable().isDisablingOrDisabledTable(regionInfo.getTable());
1201 ServerName serverName = rs.getServerName();
1202 if (serverManager.isServerOnline(serverName)) {
1203 if (rs.isOnServer(serverName)
1204 && (rs.isOpened() || rs.isSplitting())) {
1205 regionOnline(regionInfo, serverName);
1206 if (disabled) {
1207
1208 LOG.info("Opened " + regionNameStr
1209 + "but this table is disabled, triggering close of region");
1210 unassign(regionInfo);
1211 }
1212 } else if (rs.isMergingNew()) {
1213 synchronized (regionStates) {
1214 String p = regionInfo.getEncodedName();
1215 PairOfSameType<HRegionInfo> regions = mergingRegions.get(p);
1216 if (regions != null) {
1217 onlineMergingRegion(disabled, regions.getFirst(), serverName);
1218 onlineMergingRegion(disabled, regions.getSecond(), serverName);
1219 }
1220 }
1221 }
1222 }
1223 } finally {
1224 lock.unlock();
1225 }
1226 }
1227
1228 private void onlineMergingRegion(boolean disabled,
1229 final HRegionInfo hri, final ServerName serverName) {
1230 RegionState regionState = regionStates.getRegionState(hri);
1231 if (regionState != null && regionState.isMerging()
1232 && regionState.isOnServer(serverName)) {
1233 regionOnline(regionState.getRegion(), serverName);
1234 if (disabled) {
1235 unassign(hri);
1236 }
1237 }
1238 }
1239 });
1240 }
1241 }
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255 @Override
1256 public void nodeChildrenChanged(String path) {
1257 if (path.equals(watcher.assignmentZNode)) {
1258 zkEventWorkers.submit(new Runnable() {
1259 @Override
1260 public void run() {
1261 try {
1262
1263 List<String> children =
1264 ZKUtil.listChildrenAndWatchForNewChildren(
1265 watcher, watcher.assignmentZNode);
1266 if (children != null) {
1267 Stat stat = new Stat();
1268 for (String child : children) {
1269
1270
1271
1272 if (!regionStates.isRegionInTransition(child)) {
1273 ZKAssign.getDataAndWatch(watcher, child, stat);
1274 }
1275 }
1276 }
1277 } catch (KeeperException e) {
1278 server.abort("Unexpected ZK exception reading unassigned children", e);
1279 }
1280 }
1281 });
1282 }
1283 }
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293 void regionOnline(HRegionInfo regionInfo, ServerName sn) {
1294 numRegionsOpened.incrementAndGet();
1295 regionStates.regionOnline(regionInfo, sn);
1296
1297
1298 clearRegionPlan(regionInfo);
1299
1300 addToServersInUpdatingTimer(sn);
1301 balancer.regionOnline(regionInfo, sn);
1302
1303
1304 sendRegionOpenedNotification(regionInfo, sn);
1305 }
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315 private void handleAssignmentEvent(final String path) {
1316 if (path.startsWith(watcher.assignmentZNode)) {
1317 final String regionName = ZKAssign.getRegionName(watcher, path);
1318
1319 zkEventWorkersSubmit(new RegionRunnable() {
1320 @Override
1321 public String getRegionName() {
1322 return regionName;
1323 }
1324
1325 @Override
1326 public void run() {
1327 try {
1328 Stat stat = new Stat();
1329 byte [] data = ZKAssign.getDataAndWatch(watcher, path, stat);
1330 if (data == null) return;
1331
1332 RegionTransition rt = RegionTransition.parseFrom(data);
1333 handleRegion(rt, stat.getVersion());
1334 } catch (KeeperException e) {
1335 server.abort("Unexpected ZK exception reading unassigned node data", e);
1336 } catch (DeserializationException e) {
1337 server.abort("Unexpected exception deserializing node data", e);
1338 }
1339 }
1340 });
1341 }
1342 }
1343
1344
1345
1346
1347
1348
1349 private void addToServersInUpdatingTimer(final ServerName sn) {
1350 if (tomActivated){
1351 this.serversInUpdatingTimer.add(sn);
1352 }
1353 }
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368 private void updateTimers(final ServerName sn) {
1369 Preconditions.checkState(tomActivated);
1370 if (sn == null) return;
1371
1372
1373
1374
1375
1376 List<Map.Entry<String, RegionPlan>> rps;
1377 synchronized(this.regionPlans) {
1378 rps = new ArrayList<Map.Entry<String, RegionPlan>>(regionPlans.entrySet());
1379 }
1380
1381 for (Map.Entry<String, RegionPlan> e : rps) {
1382 if (e.getValue() != null && e.getKey() != null && sn.equals(e.getValue().getDestination())) {
1383 RegionState regionState = regionStates.getRegionTransitionState(e.getKey());
1384 if (regionState != null) {
1385 regionState.updateTimestampToNow();
1386 }
1387 }
1388 }
1389 }
1390
1391
1392
1393
1394
1395
1396
1397
1398 public void regionOffline(final HRegionInfo regionInfo) {
1399 regionOffline(regionInfo, null);
1400 }
1401
1402 public void offlineDisabledRegion(HRegionInfo regionInfo) {
1403
1404 LOG.debug("Table being disabled so deleting ZK node and removing from " +
1405 "regions in transition, skipping assignment of region " +
1406 regionInfo.getRegionNameAsString());
1407 String encodedName = regionInfo.getEncodedName();
1408 deleteNodeInStates(encodedName, "closed", null,
1409 EventType.RS_ZK_REGION_CLOSED, EventType.M_ZK_REGION_OFFLINE);
1410 regionOffline(regionInfo);
1411 }
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433 public void assign(HRegionInfo region, boolean setOfflineInZK) {
1434 assign(region, setOfflineInZK, false);
1435 }
1436
1437
1438
1439
1440 public void assign(HRegionInfo region,
1441 boolean setOfflineInZK, boolean forceNewPlan) {
1442 if (isDisabledorDisablingRegionInRIT(region)) {
1443 return;
1444 }
1445 if (this.serverManager.isClusterShutdown()) {
1446 LOG.info("Cluster shutdown is set; skipping assign of " +
1447 region.getRegionNameAsString());
1448 return;
1449 }
1450 String encodedName = region.getEncodedName();
1451 Lock lock = locker.acquireLock(encodedName);
1452 try {
1453 RegionState state = forceRegionStateToOffline(region, forceNewPlan);
1454 if (state != null) {
1455 if (regionStates.wasRegionOnDeadServer(encodedName)) {
1456 LOG.info("Skip assigning " + region.getRegionNameAsString()
1457 + ", it's host " + regionStates.getLastRegionServerOfRegion(encodedName)
1458 + " is dead but not processed yet");
1459 return;
1460 }
1461 assign(state, setOfflineInZK, forceNewPlan);
1462 }
1463 } finally {
1464 lock.unlock();
1465 }
1466 }
1467
1468
1469
1470
1471
1472
1473
1474 boolean assign(final ServerName destination, final List<HRegionInfo> regions) {
1475 long startTime = EnvironmentEdgeManager.currentTimeMillis();
1476 try {
1477 int regionCount = regions.size();
1478 if (regionCount == 0) {
1479 return true;
1480 }
1481 LOG.debug("Assigning " + regionCount + " region(s) to " + destination.toString());
1482 Set<String> encodedNames = new HashSet<String>(regionCount);
1483 for (HRegionInfo region : regions) {
1484 encodedNames.add(region.getEncodedName());
1485 }
1486
1487 List<HRegionInfo> failedToOpenRegions = new ArrayList<HRegionInfo>();
1488 Map<String, Lock> locks = locker.acquireLocks(encodedNames);
1489 try {
1490 AtomicInteger counter = new AtomicInteger(0);
1491 Map<String, Integer> offlineNodesVersions = new ConcurrentHashMap<String, Integer>();
1492 OfflineCallback cb = new OfflineCallback(
1493 watcher, destination, counter, offlineNodesVersions);
1494 Map<String, RegionPlan> plans = new HashMap<String, RegionPlan>(regions.size());
1495 List<RegionState> states = new ArrayList<RegionState>(regions.size());
1496 for (HRegionInfo region : regions) {
1497 String encodedName = region.getEncodedName();
1498 if (!isDisabledorDisablingRegionInRIT(region)) {
1499 RegionState state = forceRegionStateToOffline(region, false);
1500 boolean onDeadServer = false;
1501 if (state != null) {
1502 if (regionStates.wasRegionOnDeadServer(encodedName)) {
1503 LOG.info("Skip assigning " + region.getRegionNameAsString()
1504 + ", it's host " + regionStates.getLastRegionServerOfRegion(encodedName)
1505 + " is dead but not processed yet");
1506 onDeadServer = true;
1507 } else if (asyncSetOfflineInZooKeeper(state, cb, destination)) {
1508 RegionPlan plan = new RegionPlan(region, state.getServerName(), destination);
1509 plans.put(encodedName, plan);
1510 states.add(state);
1511 continue;
1512 }
1513 }
1514
1515 if (!onDeadServer) {
1516 LOG.info("failed to force region state to offline or "
1517 + "failed to set it offline in ZK, will reassign later: " + region);
1518 failedToOpenRegions.add(region);
1519 }
1520 }
1521
1522
1523 Lock lock = locks.remove(encodedName);
1524 lock.unlock();
1525 }
1526
1527
1528 int total = states.size();
1529 for (int oldCounter = 0; !server.isStopped();) {
1530 int count = counter.get();
1531 if (oldCounter != count) {
1532 LOG.info(destination.toString() + " unassigned znodes=" + count +
1533 " of total=" + total);
1534 oldCounter = count;
1535 }
1536 if (count >= total) break;
1537 Threads.sleep(5);
1538 }
1539
1540 if (server.isStopped()) {
1541 return false;
1542 }
1543
1544
1545
1546 this.addPlans(plans);
1547
1548 List<Triple<HRegionInfo, Integer, List<ServerName>>> regionOpenInfos =
1549 new ArrayList<Triple<HRegionInfo, Integer, List<ServerName>>>(states.size());
1550 for (RegionState state: states) {
1551 HRegionInfo region = state.getRegion();
1552 String encodedRegionName = region.getEncodedName();
1553 Integer nodeVersion = offlineNodesVersions.get(encodedRegionName);
1554 if (nodeVersion == null || nodeVersion == -1) {
1555 LOG.warn("failed to offline in zookeeper: " + region);
1556 failedToOpenRegions.add(region);
1557 Lock lock = locks.remove(encodedRegionName);
1558 lock.unlock();
1559 } else {
1560 regionStates.updateRegionState(
1561 region, State.PENDING_OPEN, destination);
1562 List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
1563 if (this.shouldAssignRegionsWithFavoredNodes) {
1564 favoredNodes = ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region);
1565 }
1566 regionOpenInfos.add(new Triple<HRegionInfo, Integer, List<ServerName>>(
1567 region, nodeVersion, favoredNodes));
1568 }
1569 }
1570
1571
1572 try {
1573
1574
1575 long maxWaitTime = System.currentTimeMillis() +
1576 this.server.getConfiguration().
1577 getLong("hbase.regionserver.rpc.startup.waittime", 60000);
1578 for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) {
1579 try {
1580 List<RegionOpeningState> regionOpeningStateList = serverManager
1581 .sendRegionOpen(destination, regionOpenInfos);
1582 if (regionOpeningStateList == null) {
1583
1584 return false;
1585 }
1586 for (int k = 0, n = regionOpeningStateList.size(); k < n; k++) {
1587 RegionOpeningState openingState = regionOpeningStateList.get(k);
1588 if (openingState != RegionOpeningState.OPENED) {
1589 HRegionInfo region = regionOpenInfos.get(k).getFirst();
1590 if (openingState == RegionOpeningState.ALREADY_OPENED) {
1591 processAlreadyOpenedRegion(region, destination);
1592 } else if (openingState == RegionOpeningState.FAILED_OPENING) {
1593
1594 failedToOpenRegions.add(region);
1595 } else {
1596 LOG.warn("THIS SHOULD NOT HAPPEN: unknown opening state "
1597 + openingState + " in assigning region " + region);
1598 }
1599 }
1600 }
1601 break;
1602 } catch (IOException e) {
1603 if (e instanceof RemoteException) {
1604 e = ((RemoteException)e).unwrapRemoteException();
1605 }
1606 if (e instanceof RegionServerStoppedException) {
1607 LOG.warn("The region server was shut down, ", e);
1608
1609 return false;
1610 } else if (e instanceof ServerNotRunningYetException) {
1611 long now = System.currentTimeMillis();
1612 if (now < maxWaitTime) {
1613 LOG.debug("Server is not yet up; waiting up to " +
1614 (maxWaitTime - now) + "ms", e);
1615 Thread.sleep(100);
1616 i--;
1617 continue;
1618 }
1619 } else if (e instanceof java.net.SocketTimeoutException
1620 && this.serverManager.isServerOnline(destination)) {
1621
1622
1623
1624
1625 if (LOG.isDebugEnabled()) {
1626 LOG.debug("Bulk assigner openRegion() to " + destination
1627 + " has timed out, but the regions might"
1628 + " already be opened on it.", e);
1629 }
1630 continue;
1631 }
1632 throw e;
1633 }
1634 }
1635 } catch (IOException e) {
1636
1637 LOG.info("Unable to communicate with " + destination
1638 + " in order to assign regions, ", e);
1639 return false;
1640 } catch (InterruptedException e) {
1641 throw new RuntimeException(e);
1642 }
1643 } finally {
1644 for (Lock lock : locks.values()) {
1645 lock.unlock();
1646 }
1647 }
1648
1649 if (!failedToOpenRegions.isEmpty()) {
1650 for (HRegionInfo region : failedToOpenRegions) {
1651 if (!regionStates.isRegionOnline(region)) {
1652 invokeAssign(region);
1653 }
1654 }
1655 }
1656 LOG.debug("Bulk assigning done for " + destination);
1657 return true;
1658 } finally {
1659 metricsAssignmentManager.updateBulkAssignTime(EnvironmentEdgeManager.currentTimeMillis() - startTime);
1660 }
1661 }
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673 private void unassign(final HRegionInfo region,
1674 final RegionState state, final int versionOfClosingNode,
1675 final ServerName dest, final boolean transitionInZK,
1676 final ServerName src) {
1677 ServerName server = src;
1678 if (state != null) {
1679 server = state.getServerName();
1680 }
1681 long maxWaitTime = -1;
1682 for (int i = 1; i <= this.maximumAttempts; i++) {
1683 if (this.server.isStopped() || this.server.isAborted()) {
1684 LOG.debug("Server stopped/aborted; skipping unassign of " + region);
1685 return;
1686 }
1687
1688 if (!serverManager.isServerOnline(server)) {
1689 LOG.debug("Offline " + region.getRegionNameAsString()
1690 + ", no need to unassign since it's on a dead server: " + server);
1691 if (transitionInZK) {
1692
1693 deleteClosingOrClosedNode(region, server);
1694 }
1695 if (state != null) {
1696 regionOffline(region);
1697 }
1698 return;
1699 }
1700 try {
1701
1702 if (serverManager.sendRegionClose(server, region,
1703 versionOfClosingNode, dest, transitionInZK)) {
1704 LOG.debug("Sent CLOSE to " + server + " for region " +
1705 region.getRegionNameAsString());
1706 if (!transitionInZK && state != null) {
1707
1708
1709 unassign(region, state, versionOfClosingNode,
1710 dest, transitionInZK,src);
1711 }
1712 return;
1713 }
1714
1715
1716 LOG.warn("Server " + server + " region CLOSE RPC returned false for " +
1717 region.getRegionNameAsString());
1718 } catch (Throwable t) {
1719 if (t instanceof RemoteException) {
1720 t = ((RemoteException)t).unwrapRemoteException();
1721 }
1722 boolean logRetries = true;
1723 if (t instanceof NotServingRegionException
1724 || t instanceof RegionServerStoppedException
1725 || t instanceof ServerNotRunningYetException) {
1726 LOG.debug("Offline " + region.getRegionNameAsString()
1727 + ", it's not any more on " + server, t);
1728 if (transitionInZK) {
1729 deleteClosingOrClosedNode(region, server);
1730 }
1731 if (state != null) {
1732 regionOffline(region);
1733 }
1734 return;
1735 } else if ((t instanceof FailedServerException) || (state != null &&
1736 t instanceof RegionAlreadyInTransitionException)) {
1737 long sleepTime = 0;
1738 Configuration conf = this.server.getConfiguration();
1739 if(t instanceof FailedServerException) {
1740 sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY,
1741 RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
1742 } else {
1743
1744 LOG.debug("update " + state + " the timestamp.");
1745 state.updateTimestampToNow();
1746 if (maxWaitTime < 0) {
1747 maxWaitTime =
1748 EnvironmentEdgeManager.currentTimeMillis()
1749 + conf.getLong(ALREADY_IN_TRANSITION_WAITTIME,
1750 DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
1751 }
1752 long now = EnvironmentEdgeManager.currentTimeMillis();
1753 if (now < maxWaitTime) {
1754 LOG.debug("Region is already in transition; "
1755 + "waiting up to " + (maxWaitTime - now) + "ms", t);
1756 sleepTime = 100;
1757 i--;
1758 logRetries = false;
1759 }
1760 }
1761 try {
1762 if (sleepTime > 0) {
1763 Thread.sleep(sleepTime);
1764 }
1765 } catch (InterruptedException ie) {
1766 LOG.warn("Failed to unassign "
1767 + region.getRegionNameAsString() + " since interrupted", ie);
1768 Thread.currentThread().interrupt();
1769 if (!tomActivated && state != null) {
1770 regionStates.updateRegionState(region, State.FAILED_CLOSE);
1771 }
1772 return;
1773 }
1774 }
1775
1776 if (logRetries) {
1777 LOG.info("Server " + server + " returned " + t + " for "
1778 + region.getRegionNameAsString() + ", try=" + i
1779 + " of " + this.maximumAttempts, t);
1780
1781 }
1782 }
1783 }
1784
1785 if (!tomActivated && state != null) {
1786 regionStates.updateRegionState(region, State.FAILED_CLOSE);
1787 }
1788 }
1789
1790
1791
1792
1793 private RegionState forceRegionStateToOffline(
1794 final HRegionInfo region, final boolean forceNewPlan) {
1795 RegionState state = regionStates.getRegionState(region);
1796 if (state == null) {
1797 LOG.warn("Assigning a region not in region states: " + region);
1798 state = regionStates.createRegionState(region);
1799 }
1800
1801 ServerName sn = state.getServerName();
1802 if (forceNewPlan && LOG.isDebugEnabled()) {
1803 LOG.debug("Force region state offline " + state);
1804 }
1805
1806 switch (state.getState()) {
1807 case OPEN:
1808 case OPENING:
1809 case PENDING_OPEN:
1810 case CLOSING:
1811 case PENDING_CLOSE:
1812 if (!forceNewPlan) {
1813 LOG.debug("Skip assigning " +
1814 region + ", it is already " + state);
1815 return null;
1816 }
1817 case FAILED_CLOSE:
1818 case FAILED_OPEN:
1819 unassign(region, state, -1, null, false, null);
1820 state = regionStates.getRegionState(region);
1821 if (state.isFailedClose()) {
1822
1823
1824 LOG.info("Skip assigning " +
1825 region + ", we couldn't close it: " + state);
1826 return null;
1827 }
1828 case OFFLINE:
1829
1830
1831
1832
1833
1834 if (regionStates.isServerDeadAndNotProcessed(sn)
1835 && wasRegionOnDeadServerByMeta(region, sn)) {
1836 LOG.info("Skip assigning " + region.getRegionNameAsString()
1837 + ", it is on a dead but not processed yet server");
1838 return null;
1839 }
1840 case CLOSED:
1841 break;
1842 default:
1843 LOG.error("Trying to assign region " + region
1844 + ", which is " + state);
1845 return null;
1846 }
1847 return state;
1848 }
1849
1850 private boolean wasRegionOnDeadServerByMeta(
1851 final HRegionInfo region, final ServerName sn) {
1852 try {
1853 if (region.isMetaRegion()) {
1854 ServerName server = catalogTracker.getMetaLocation();
1855 return regionStates.isServerDeadAndNotProcessed(server);
1856 }
1857 while (!server.isStopped()) {
1858 try {
1859 catalogTracker.waitForMeta();
1860 Pair<HRegionInfo, ServerName> r =
1861 MetaReader.getRegion(catalogTracker, region.getRegionName());
1862 ServerName server = r == null ? null : r.getSecond();
1863 return regionStates.isServerDeadAndNotProcessed(server);
1864 } catch (IOException ioe) {
1865 LOG.info("Received exception accessing hbase:meta during force assign "
1866 + region.getRegionNameAsString() + ", retrying", ioe);
1867 }
1868 }
1869 } catch (InterruptedException e) {
1870 Thread.currentThread().interrupt();
1871 LOG.info("Interrupted accessing hbase:meta", e);
1872 }
1873
1874 return regionStates.isServerDeadAndNotProcessed(sn);
1875 }
1876
1877
1878
1879
1880
1881
1882
1883 private void assign(RegionState state,
1884 final boolean setOfflineInZK, final boolean forceNewPlan) {
1885 long startTime = EnvironmentEdgeManager.currentTimeMillis();
1886 try {
1887 Configuration conf = server.getConfiguration();
1888 RegionState currentState = state;
1889 int versionOfOfflineNode = -1;
1890 RegionPlan plan = null;
1891 long maxWaitTime = -1;
1892 HRegionInfo region = state.getRegion();
1893 RegionOpeningState regionOpenState;
1894 Throwable previousException = null;
1895 for (int i = 1; i <= maximumAttempts; i++) {
1896 if (server.isStopped() || server.isAborted()) {
1897 LOG.info("Skip assigning " + region.getRegionNameAsString()
1898 + ", the server is stopped/aborted");
1899 return;
1900 }
1901 if (plan == null) {
1902 try {
1903 plan = getRegionPlan(region, forceNewPlan);
1904 } catch (HBaseIOException e) {
1905 LOG.warn("Failed to get region plan", e);
1906 }
1907 }
1908 if (plan == null) {
1909 LOG.warn("Unable to determine a plan to assign " + region);
1910 if (tomActivated){
1911 this.timeoutMonitor.setAllRegionServersOffline(true);
1912 } else {
1913 if (region.isMetaRegion()) {
1914 try {
1915 Thread.sleep(this.sleepTimeBeforeRetryingMetaAssignment);
1916 if (i == maximumAttempts) i = 1;
1917 continue;
1918 } catch (InterruptedException e) {
1919 LOG.error("Got exception while waiting for hbase:meta assignment");
1920 Thread.currentThread().interrupt();
1921 }
1922 }
1923 regionStates.updateRegionState(region, State.FAILED_OPEN);
1924 }
1925 return;
1926 }
1927 if (setOfflineInZK && versionOfOfflineNode == -1) {
1928
1929
1930 versionOfOfflineNode = setOfflineInZooKeeper(currentState, plan.getDestination());
1931 if (versionOfOfflineNode != -1) {
1932 if (isDisabledorDisablingRegionInRIT(region)) {
1933 return;
1934 }
1935
1936
1937
1938
1939
1940
1941 TableName tableName = region.getTable();
1942 if (!zkTable.isEnablingTable(tableName) && !zkTable.isEnabledTable(tableName)) {
1943 LOG.debug("Setting table " + tableName + " to ENABLED state.");
1944 setEnabledTable(tableName);
1945 }
1946 }
1947 }
1948 if (setOfflineInZK && versionOfOfflineNode == -1) {
1949 LOG.info("Unable to set offline in ZooKeeper to assign " + region);
1950
1951
1952
1953
1954 if (!server.isAborted()) {
1955 continue;
1956 }
1957 }
1958 LOG.info("Assigning " + region.getRegionNameAsString() +
1959 " to " + plan.getDestination().toString());
1960
1961 currentState = regionStates.updateRegionState(region,
1962 State.PENDING_OPEN, plan.getDestination());
1963
1964 boolean needNewPlan;
1965 final String assignMsg = "Failed assignment of " + region.getRegionNameAsString() +
1966 " to " + plan.getDestination();
1967 try {
1968 List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
1969 if (this.shouldAssignRegionsWithFavoredNodes) {
1970 favoredNodes = ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region);
1971 }
1972 regionOpenState = serverManager.sendRegionOpen(
1973 plan.getDestination(), region, versionOfOfflineNode, favoredNodes);
1974
1975 if (regionOpenState == RegionOpeningState.FAILED_OPENING) {
1976
1977 needNewPlan = true;
1978 LOG.warn(assignMsg + ", regionserver says 'FAILED_OPENING', " +
1979 " trying to assign elsewhere instead; " +
1980 "try=" + i + " of " + this.maximumAttempts);
1981 } else {
1982
1983 if (regionOpenState == RegionOpeningState.ALREADY_OPENED) {
1984 processAlreadyOpenedRegion(region, plan.getDestination());
1985 }
1986 return;
1987 }
1988
1989 } catch (Throwable t) {
1990 if (t instanceof RemoteException) {
1991 t = ((RemoteException) t).unwrapRemoteException();
1992 }
1993 previousException = t;
1994
1995
1996
1997
1998 boolean hold = (t instanceof RegionAlreadyInTransitionException ||
1999 t instanceof ServerNotRunningYetException);
2000
2001
2002
2003
2004
2005
2006 boolean retry = !hold && (t instanceof java.net.SocketTimeoutException
2007 && this.serverManager.isServerOnline(plan.getDestination()));
2008
2009
2010 if (hold) {
2011 LOG.warn(assignMsg + ", waiting a little before trying on the same region server " +
2012 "try=" + i + " of " + this.maximumAttempts, t);
2013
2014 if (maxWaitTime < 0) {
2015 if (t instanceof RegionAlreadyInTransitionException) {
2016 maxWaitTime = EnvironmentEdgeManager.currentTimeMillis()
2017 + this.server.getConfiguration().getLong(ALREADY_IN_TRANSITION_WAITTIME,
2018 DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
2019 } else {
2020 maxWaitTime = this.server.getConfiguration().
2021 getLong("hbase.regionserver.rpc.startup.waittime", 60000);
2022 }
2023 }
2024 try {
2025 needNewPlan = false;
2026 long now = EnvironmentEdgeManager.currentTimeMillis();
2027 if (now < maxWaitTime) {
2028 LOG.debug("Server is not yet up or region is already in transition; "
2029 + "waiting up to " + (maxWaitTime - now) + "ms", t);
2030 Thread.sleep(100);
2031 i--;
2032 } else if (!(t instanceof RegionAlreadyInTransitionException)) {
2033 LOG.debug("Server is not up for a while; try a new one", t);
2034 needNewPlan = true;
2035 }
2036 } catch (InterruptedException ie) {
2037 LOG.warn("Failed to assign "
2038 + region.getRegionNameAsString() + " since interrupted", ie);
2039 Thread.currentThread().interrupt();
2040 if (!tomActivated) {
2041 regionStates.updateRegionState(region, State.FAILED_OPEN);
2042 }
2043 return;
2044 }
2045 } else if (retry) {
2046 needNewPlan = false;
2047 LOG.warn(assignMsg + ", trying to assign to the same region server " +
2048 "try=" + i + " of " + this.maximumAttempts, t);
2049 } else {
2050 needNewPlan = true;
2051 LOG.warn(assignMsg + ", trying to assign elsewhere instead;" +
2052 " try=" + i + " of " + this.maximumAttempts, t);
2053 }
2054 }
2055
2056 if (i == this.maximumAttempts) {
2057
2058
2059 continue;
2060 }
2061
2062
2063
2064
2065 if (needNewPlan) {
2066
2067
2068
2069
2070 RegionPlan newPlan = null;
2071 try {
2072 newPlan = getRegionPlan(region, true);
2073 } catch (HBaseIOException e) {
2074 LOG.warn("Failed to get region plan", e);
2075 }
2076 if (newPlan == null) {
2077 if (tomActivated) {
2078 this.timeoutMonitor.setAllRegionServersOffline(true);
2079 } else {
2080 regionStates.updateRegionState(region, State.FAILED_OPEN);
2081 }
2082 LOG.warn("Unable to find a viable location to assign region " +
2083 region.getRegionNameAsString());
2084 return;
2085 }
2086
2087 if (plan != newPlan && !plan.getDestination().equals(newPlan.getDestination())) {
2088
2089
2090
2091 currentState = regionStates.updateRegionState(region, State.OFFLINE);
2092 versionOfOfflineNode = -1;
2093 plan = newPlan;
2094 } else if(plan.getDestination().equals(newPlan.getDestination()) &&
2095 previousException instanceof FailedServerException) {
2096 try {
2097 LOG.info("Trying to re-assign " + region.getRegionNameAsString() +
2098 " to the same failed server.");
2099 Thread.sleep(1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY,
2100 RpcClient.FAILED_SERVER_EXPIRY_DEFAULT));
2101 } catch (InterruptedException ie) {
2102 LOG.warn("Failed to assign "
2103 + region.getRegionNameAsString() + " since interrupted", ie);
2104 Thread.currentThread().interrupt();
2105 if (!tomActivated) {
2106 regionStates.updateRegionState(region, State.FAILED_OPEN);
2107 }
2108 return;
2109 }
2110 }
2111 }
2112 }
2113
2114 if (!tomActivated) {
2115 regionStates.updateRegionState(region, State.FAILED_OPEN);
2116 }
2117 } finally {
2118 metricsAssignmentManager.updateAssignmentTime(EnvironmentEdgeManager.currentTimeMillis() - startTime);
2119 }
2120 }
2121
2122 private void processAlreadyOpenedRegion(HRegionInfo region, ServerName sn) {
2123
2124
2125
2126 LOG.debug("ALREADY_OPENED " + region.getRegionNameAsString()
2127 + " to " + sn);
2128 String encodedName = region.getEncodedName();
2129 deleteNodeInStates(encodedName, "offline", sn, EventType.M_ZK_REGION_OFFLINE);
2130 regionStates.regionOnline(region, sn);
2131 }
2132
2133 private boolean isDisabledorDisablingRegionInRIT(final HRegionInfo region) {
2134 TableName tableName = region.getTable();
2135 boolean disabled = this.zkTable.isDisabledTable(tableName);
2136 if (disabled || this.zkTable.isDisablingTable(tableName)) {
2137 LOG.info("Table " + tableName + (disabled ? " disabled;" : " disabling;") +
2138 " skipping assign of " + region.getRegionNameAsString());
2139 offlineDisabledRegion(region);
2140 return true;
2141 }
2142 return false;
2143 }
2144
2145
2146
2147
2148
2149
2150
2151
2152 private int setOfflineInZooKeeper(final RegionState state, final ServerName destination) {
2153 if (!state.isClosed() && !state.isOffline()) {
2154 String msg = "Unexpected state : " + state + " .. Cannot transit it to OFFLINE.";
2155 this.server.abort(msg, new IllegalStateException(msg));
2156 return -1;
2157 }
2158 regionStates.updateRegionState(state.getRegion(), State.OFFLINE);
2159 int versionOfOfflineNode;
2160 try {
2161
2162 versionOfOfflineNode = ZKAssign.createOrForceNodeOffline(watcher,
2163 state.getRegion(), destination);
2164 if (versionOfOfflineNode == -1) {
2165 LOG.warn("Attempted to create/force node into OFFLINE state before "
2166 + "completing assignment but failed to do so for " + state);
2167 return -1;
2168 }
2169 } catch (KeeperException e) {
2170 server.abort("Unexpected ZK exception creating/setting node OFFLINE", e);
2171 return -1;
2172 }
2173 return versionOfOfflineNode;
2174 }
2175
2176
2177
2178
2179
2180
2181 private RegionPlan getRegionPlan(final HRegionInfo region,
2182 final boolean forceNewPlan) throws HBaseIOException {
2183 return getRegionPlan(region, null, forceNewPlan);
2184 }
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195 private RegionPlan getRegionPlan(final HRegionInfo region,
2196 final ServerName serverToExclude, final boolean forceNewPlan) throws HBaseIOException {
2197
2198 final String encodedName = region.getEncodedName();
2199 final List<ServerName> destServers =
2200 serverManager.createDestinationServersList(serverToExclude);
2201
2202 if (destServers.isEmpty()){
2203 LOG.warn("Can't move " + encodedName +
2204 ", there is no destination server available.");
2205 return null;
2206 }
2207
2208 RegionPlan randomPlan = null;
2209 boolean newPlan = false;
2210 RegionPlan existingPlan;
2211
2212 synchronized (this.regionPlans) {
2213 existingPlan = this.regionPlans.get(encodedName);
2214
2215 if (existingPlan != null && existingPlan.getDestination() != null) {
2216 LOG.debug("Found an existing plan for " + region.getRegionNameAsString()
2217 + " destination server is " + existingPlan.getDestination() +
2218 " accepted as a dest server = " + destServers.contains(existingPlan.getDestination()));
2219 }
2220
2221 if (forceNewPlan
2222 || existingPlan == null
2223 || existingPlan.getDestination() == null
2224 || !destServers.contains(existingPlan.getDestination())) {
2225 newPlan = true;
2226 randomPlan = new RegionPlan(region, null,
2227 balancer.randomAssignment(region, destServers));
2228 if (!region.isMetaTable() && shouldAssignRegionsWithFavoredNodes) {
2229 List<HRegionInfo> regions = new ArrayList<HRegionInfo>(1);
2230 regions.add(region);
2231 try {
2232 processFavoredNodes(regions);
2233 } catch (IOException ie) {
2234 LOG.warn("Ignoring exception in processFavoredNodes " + ie);
2235 }
2236 }
2237 this.regionPlans.put(encodedName, randomPlan);
2238 }
2239 }
2240
2241 if (newPlan) {
2242 if (randomPlan.getDestination() == null) {
2243 LOG.warn("Can't find a destination for " + encodedName);
2244 return null;
2245 }
2246 LOG.debug("No previous transition plan found (or ignoring " +
2247 "an existing plan) for " + region.getRegionNameAsString() +
2248 "; generated random plan=" + randomPlan + "; " +
2249 serverManager.countOfRegionServers() +
2250 " (online=" + serverManager.getOnlineServers().size() +
2251 ", available=" + destServers.size() + ") available servers" +
2252 ", forceNewPlan=" + forceNewPlan);
2253 return randomPlan;
2254 }
2255 LOG.debug("Using pre-existing plan for " +
2256 region.getRegionNameAsString() + "; plan=" + existingPlan);
2257 return existingPlan;
2258 }
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273 public void unassign(HRegionInfo region) {
2274 unassign(region, false);
2275 }
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292 public void unassign(HRegionInfo region, boolean force, ServerName dest) {
2293
2294 LOG.debug("Starting unassign of " + region.getRegionNameAsString()
2295 + " (offlining), current state: " + regionStates.getRegionState(region));
2296
2297 String encodedName = region.getEncodedName();
2298
2299 int versionOfClosingNode = -1;
2300
2301
2302 ReentrantLock lock = locker.acquireLock(encodedName);
2303 RegionState state = regionStates.getRegionTransitionState(encodedName);
2304 boolean reassign = true;
2305 try {
2306 if (state == null) {
2307
2308
2309 state = regionStates.getRegionState(encodedName);
2310 if (state != null && state.isUnassignable()) {
2311 LOG.info("Attempting to unassign " + state + ", ignored");
2312
2313 return;
2314 }
2315
2316 try {
2317 if (state == null || state.getServerName() == null) {
2318
2319
2320 LOG.warn("Attempting to unassign a region not in RegionStates"
2321 + region.getRegionNameAsString() + ", offlined");
2322 regionOffline(region);
2323 return;
2324 }
2325 versionOfClosingNode = ZKAssign.createNodeClosing(
2326 watcher, region, state.getServerName());
2327 if (versionOfClosingNode == -1) {
2328 LOG.info("Attempting to unassign " +
2329 region.getRegionNameAsString() + " but ZK closing node "
2330 + "can't be created.");
2331 reassign = false;
2332 return;
2333 }
2334 } catch (KeeperException e) {
2335 if (e instanceof NodeExistsException) {
2336
2337
2338
2339
2340 NodeExistsException nee = (NodeExistsException)e;
2341 String path = nee.getPath();
2342 try {
2343 if (isSplitOrSplittingOrMergedOrMerging(path)) {
2344 LOG.debug(path + " is SPLIT or SPLITTING or MERGED or MERGING; " +
2345 "skipping unassign because region no longer exists -- its split or merge");
2346 reassign = false;
2347 return;
2348 }
2349 } catch (KeeperException.NoNodeException ke) {
2350 LOG.warn("Failed getData on SPLITTING/SPLIT at " + path +
2351 "; presuming split and that the region to unassign, " +
2352 encodedName + ", no longer exists -- confirm", ke);
2353 return;
2354 } catch (KeeperException ke) {
2355 LOG.error("Unexpected zk state", ke);
2356 } catch (DeserializationException de) {
2357 LOG.error("Failed parse", de);
2358 }
2359 }
2360
2361 server.abort("Unexpected ZK exception creating node CLOSING", e);
2362 reassign = false;
2363 return;
2364 }
2365 state = regionStates.updateRegionState(region, State.PENDING_CLOSE);
2366 } else if (state.isFailedOpen()) {
2367
2368 regionOffline(region);
2369 return;
2370 } else if (force && state.isPendingCloseOrClosing()) {
2371 LOG.debug("Attempting to unassign " + region.getRegionNameAsString() +
2372 " which is already " + state.getState() +
2373 " but forcing to send a CLOSE RPC again ");
2374 if (state.isFailedClose()) {
2375 state = regionStates.updateRegionState(region, State.PENDING_CLOSE);
2376 }
2377 state.updateTimestampToNow();
2378 } else {
2379 LOG.debug("Attempting to unassign " +
2380 region.getRegionNameAsString() + " but it is " +
2381 "already in transition (" + state.getState() + ", force=" + force + ")");
2382 return;
2383 }
2384
2385 unassign(region, state, versionOfClosingNode, dest, true, null);
2386 } finally {
2387 lock.unlock();
2388
2389
2390 if (reassign && regionStates.isRegionOffline(region)) {
2391 assign(region, true);
2392 }
2393 }
2394 }
2395
2396 public void unassign(HRegionInfo region, boolean force){
2397 unassign(region, force, null);
2398 }
2399
2400
2401
2402
2403 public void deleteClosingOrClosedNode(HRegionInfo region, ServerName sn) {
2404 String encodedName = region.getEncodedName();
2405 deleteNodeInStates(encodedName, "closing", sn, EventType.M_ZK_REGION_CLOSING,
2406 EventType.RS_ZK_REGION_CLOSED);
2407 }
2408
2409
2410
2411
2412
2413
2414
2415 private boolean isSplitOrSplittingOrMergedOrMerging(final String path)
2416 throws KeeperException, DeserializationException {
2417 boolean result = false;
2418
2419
2420 byte [] data = ZKAssign.getData(watcher, path);
2421 if (data == null) {
2422 LOG.info("Node " + path + " is gone");
2423 return false;
2424 }
2425 RegionTransition rt = RegionTransition.parseFrom(data);
2426 switch (rt.getEventType()) {
2427 case RS_ZK_REQUEST_REGION_SPLIT:
2428 case RS_ZK_REGION_SPLIT:
2429 case RS_ZK_REGION_SPLITTING:
2430 case RS_ZK_REQUEST_REGION_MERGE:
2431 case RS_ZK_REGION_MERGED:
2432 case RS_ZK_REGION_MERGING:
2433 result = true;
2434 break;
2435 default:
2436 LOG.info("Node " + path + " is in " + rt.getEventType());
2437 break;
2438 }
2439 return result;
2440 }
2441
2442
2443
2444
2445
2446
2447 public int getNumRegionsOpened() {
2448 return numRegionsOpened.get();
2449 }
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459 public boolean waitForAssignment(HRegionInfo regionInfo)
2460 throws InterruptedException {
2461 while (!regionStates.isRegionOnline(regionInfo)) {
2462 if (regionStates.isRegionInState(regionInfo, State.FAILED_OPEN)
2463 || this.server.isStopped()) {
2464 return false;
2465 }
2466
2467
2468
2469
2470 regionStates.waitForUpdate(100);
2471 }
2472 return true;
2473 }
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485 public void assignMeta() throws KeeperException {
2486 MetaRegionTracker.deleteMetaLocation(this.watcher);
2487 assign(HRegionInfo.FIRST_META_REGIONINFO, true);
2488 }
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498 public void assign(Map<HRegionInfo, ServerName> regions)
2499 throws IOException, InterruptedException {
2500 if (regions == null || regions.isEmpty()) {
2501 return;
2502 }
2503 List<ServerName> servers = serverManager.createDestinationServersList();
2504 if (servers == null || servers.isEmpty()) {
2505 throw new IOException("Found no destination server to assign region(s)");
2506 }
2507
2508
2509 Map<ServerName, List<HRegionInfo>> bulkPlan =
2510 balancer.retainAssignment(regions, servers);
2511
2512 assign(regions.size(), servers.size(),
2513 "retainAssignment=true", bulkPlan);
2514 }
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524 public void assign(List<HRegionInfo> regions)
2525 throws IOException, InterruptedException {
2526 if (regions == null || regions.isEmpty()) {
2527 return;
2528 }
2529
2530 List<ServerName> servers = serverManager.createDestinationServersList();
2531 if (servers == null || servers.isEmpty()) {
2532 throw new IOException("Found no destination server to assign region(s)");
2533 }
2534
2535
2536 Map<ServerName, List<HRegionInfo>> bulkPlan
2537 = balancer.roundRobinAssignment(regions, servers);
2538 processFavoredNodes(regions);
2539
2540 assign(regions.size(), servers.size(),
2541 "round-robin=true", bulkPlan);
2542 }
2543
2544 private void assign(int regions, int totalServers,
2545 String message, Map<ServerName, List<HRegionInfo>> bulkPlan)
2546 throws InterruptedException, IOException {
2547
2548 int servers = bulkPlan.size();
2549 if (servers == 1 || (regions < bulkAssignThresholdRegions
2550 && servers < bulkAssignThresholdServers)) {
2551
2552
2553
2554 if (LOG.isTraceEnabled()) {
2555 LOG.trace("Not using bulk assignment since we are assigning only " + regions +
2556 " region(s) to " + servers + " server(s)");
2557 }
2558 for (Map.Entry<ServerName, List<HRegionInfo>> plan: bulkPlan.entrySet()) {
2559 if (!assign(plan.getKey(), plan.getValue())) {
2560 for (HRegionInfo region: plan.getValue()) {
2561 if (!regionStates.isRegionOnline(region)) {
2562 invokeAssign(region);
2563 }
2564 }
2565 }
2566 }
2567 } else {
2568 LOG.info("Bulk assigning " + regions + " region(s) across "
2569 + totalServers + " server(s), " + message);
2570
2571
2572 BulkAssigner ba = new GeneralBulkAssigner(
2573 this.server, bulkPlan, this, bulkAssignWaitTillAllAssigned);
2574 ba.bulkAssign();
2575 LOG.info("Bulk assigning done");
2576 }
2577 }
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589 private void assignAllUserRegions()
2590 throws IOException, InterruptedException, KeeperException {
2591
2592 ZKAssign.deleteAllNodes(watcher);
2593 ZKUtil.listChildrenAndWatchForNewChildren(this.watcher,
2594 this.watcher.assignmentZNode);
2595 failoverCleanupDone();
2596
2597
2598
2599
2600 Set<TableName> disabledOrDisablingOrEnabling = ZKTable.getDisabledOrDisablingTables(watcher);
2601 disabledOrDisablingOrEnabling.addAll(ZKTable.getEnablingTables(watcher));
2602
2603 Map<HRegionInfo, ServerName> allRegions;
2604 SnapshotOfRegionAssignmentFromMeta snapshotOfRegionAssignment =
2605 new SnapshotOfRegionAssignmentFromMeta(catalogTracker, disabledOrDisablingOrEnabling, true);
2606 snapshotOfRegionAssignment.initialize();
2607 allRegions = snapshotOfRegionAssignment.getRegionToRegionServerMap();
2608 if (allRegions == null || allRegions.isEmpty()) return;
2609
2610
2611 boolean retainAssignment = server.getConfiguration().
2612 getBoolean("hbase.master.startup.retainassign", true);
2613
2614 if (retainAssignment) {
2615 assign(allRegions);
2616 } else {
2617 List<HRegionInfo> regions = new ArrayList<HRegionInfo>(allRegions.keySet());
2618 assign(regions);
2619 }
2620
2621 for (HRegionInfo hri : allRegions.keySet()) {
2622 TableName tableName = hri.getTable();
2623 if (!zkTable.isEnabledTable(tableName)) {
2624 setEnabledTable(tableName);
2625 }
2626 }
2627 }
2628
2629
2630
2631
2632
2633
2634
2635 boolean waitUntilNoRegionsInTransition(final long timeout)
2636 throws InterruptedException {
2637
2638
2639
2640
2641
2642
2643 final long endTime = System.currentTimeMillis() + timeout;
2644
2645 while (!this.server.isStopped() && regionStates.isRegionsInTransition()
2646 && endTime > System.currentTimeMillis()) {
2647 regionStates.waitForUpdate(100);
2648 }
2649
2650 return !regionStates.isRegionsInTransition();
2651 }
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662 Map<ServerName, List<HRegionInfo>> rebuildUserRegions() throws IOException, KeeperException {
2663 Set<TableName> enablingTables = ZKTable.getEnablingTables(watcher);
2664 Set<TableName> disabledOrEnablingTables = ZKTable.getDisabledTables(watcher);
2665 disabledOrEnablingTables.addAll(enablingTables);
2666 Set<TableName> disabledOrDisablingOrEnabling = ZKTable.getDisablingTables(watcher);
2667 disabledOrDisablingOrEnabling.addAll(disabledOrEnablingTables);
2668
2669
2670 List<Result> results = MetaReader.fullScan(this.catalogTracker);
2671
2672 Set<ServerName> onlineServers = serverManager.getOnlineServers().keySet();
2673
2674 Map<ServerName, List<HRegionInfo>> offlineServers =
2675 new TreeMap<ServerName, List<HRegionInfo>>();
2676
2677 for (Result result : results) {
2678 Pair<HRegionInfo, ServerName> region = HRegionInfo.getHRegionInfoAndServerName(result);
2679 if (region == null) continue;
2680 HRegionInfo regionInfo = region.getFirst();
2681 ServerName regionLocation = region.getSecond();
2682 if (regionInfo == null) continue;
2683 regionStates.createRegionState(regionInfo);
2684 if (regionStates.isRegionInState(regionInfo, State.SPLIT)) {
2685
2686
2687 LOG.debug("Region " + regionInfo.getRegionNameAsString()
2688 + " split is completed. Hence need not add to regions list");
2689 continue;
2690 }
2691 TableName tableName = regionInfo.getTable();
2692 if (regionLocation == null) {
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703 if (!enablingTables.contains(tableName)) {
2704 LOG.warn("Region " + regionInfo.getEncodedName() +
2705 " has null regionLocation." + " But its table " + tableName +
2706 " isn't in ENABLING state.");
2707 }
2708 } else if (!onlineServers.contains(regionLocation)) {
2709
2710 List<HRegionInfo> offlineRegions = offlineServers.get(regionLocation);
2711 if (offlineRegions == null) {
2712 offlineRegions = new ArrayList<HRegionInfo>(1);
2713 offlineServers.put(regionLocation, offlineRegions);
2714 }
2715 offlineRegions.add(regionInfo);
2716
2717
2718 if (!disabledOrDisablingOrEnabling.contains(tableName)
2719 && !getZKTable().isEnabledTable(tableName)) {
2720 setEnabledTable(tableName);
2721 }
2722 } else {
2723
2724
2725 if (!disabledOrEnablingTables.contains(tableName)) {
2726 regionStates.updateRegionState(regionInfo, State.OPEN, regionLocation);
2727 regionStates.regionOnline(regionInfo, regionLocation);
2728 balancer.regionOnline(regionInfo, regionLocation);
2729 }
2730
2731
2732 if (!disabledOrDisablingOrEnabling.contains(tableName)
2733 && !getZKTable().isEnabledTable(tableName)) {
2734 setEnabledTable(tableName);
2735 }
2736 }
2737 }
2738 return offlineServers;
2739 }
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749 private void recoverTableInDisablingState()
2750 throws KeeperException, TableNotFoundException, IOException {
2751 Set<TableName> disablingTables = ZKTable.getDisablingTables(watcher);
2752 if (disablingTables.size() != 0) {
2753 for (TableName tableName : disablingTables) {
2754
2755 LOG.info("The table " + tableName
2756 + " is in DISABLING state. Hence recovering by moving the table"
2757 + " to DISABLED state.");
2758 new DisableTableHandler(this.server, tableName, catalogTracker,
2759 this, tableLockManager, true).prepare().process();
2760 }
2761 }
2762 }
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772 private void recoverTableInEnablingState()
2773 throws KeeperException, TableNotFoundException, IOException {
2774 Set<TableName> enablingTables = ZKTable.getEnablingTables(watcher);
2775 if (enablingTables.size() != 0) {
2776 for (TableName tableName : enablingTables) {
2777
2778 LOG.info("The table " + tableName
2779 + " is in ENABLING state. Hence recovering by moving the table"
2780 + " to ENABLED state.");
2781
2782
2783 EnableTableHandler eth = new EnableTableHandler(this.server, tableName,
2784 catalogTracker, this, tableLockManager, true);
2785 try {
2786 eth.prepare();
2787 } catch (TableNotFoundException e) {
2788 LOG.warn("Table " + tableName + " not found in hbase:meta to recover.");
2789 continue;
2790 }
2791 eth.process();
2792 }
2793 }
2794 }
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811 private void processDeadServersAndRecoverLostRegions(
2812 Map<ServerName, List<HRegionInfo>> deadServers)
2813 throws IOException, KeeperException {
2814 if (deadServers != null) {
2815 for (Map.Entry<ServerName, List<HRegionInfo>> server: deadServers.entrySet()) {
2816 ServerName serverName = server.getKey();
2817
2818 regionStates.setLastRegionServerOfRegions(serverName, server.getValue());
2819 if (!serverManager.isServerDead(serverName)) {
2820 serverManager.expireServer(serverName);
2821 }
2822 }
2823 }
2824 List<String> nodes = ZKUtil.listChildrenAndWatchForNewChildren(
2825 this.watcher, this.watcher.assignmentZNode);
2826 if (!nodes.isEmpty()) {
2827 for (String encodedRegionName : nodes) {
2828 processRegionInTransition(encodedRegionName, null);
2829 }
2830 }
2831
2832
2833
2834
2835
2836 failoverCleanupDone();
2837 }
2838
2839
2840
2841
2842
2843
2844
2845
2846 public void updateRegionsInTransitionMetrics() {
2847 long currentTime = System.currentTimeMillis();
2848 int totalRITs = 0;
2849 int totalRITsOverThreshold = 0;
2850 long oldestRITTime = 0;
2851 int ritThreshold = this.server.getConfiguration().
2852 getInt(HConstants.METRICS_RIT_STUCK_WARNING_THRESHOLD, 60000);
2853 for (RegionState state: regionStates.getRegionsInTransition().values()) {
2854 totalRITs++;
2855 long ritTime = currentTime - state.getStamp();
2856 if (ritTime > ritThreshold) {
2857 totalRITsOverThreshold++;
2858 }
2859 if (oldestRITTime < ritTime) {
2860 oldestRITTime = ritTime;
2861 }
2862 }
2863 if (this.metricsAssignmentManager != null) {
2864 this.metricsAssignmentManager.updateRITOldestAge(oldestRITTime);
2865 this.metricsAssignmentManager.updateRITCount(totalRITs);
2866 this.metricsAssignmentManager.updateRITCountOverThreshold(totalRITsOverThreshold);
2867 }
2868 }
2869
2870
2871
2872
2873 void clearRegionPlan(final HRegionInfo region) {
2874 synchronized (this.regionPlans) {
2875 this.regionPlans.remove(region.getEncodedName());
2876 }
2877 }
2878
2879
2880
2881
2882
2883
2884 public void waitOnRegionToClearRegionsInTransition(final HRegionInfo hri)
2885 throws IOException, InterruptedException {
2886 waitOnRegionToClearRegionsInTransition(hri, -1L);
2887 }
2888
2889
2890
2891
2892
2893
2894
2895
2896 public boolean waitOnRegionToClearRegionsInTransition(final HRegionInfo hri, long timeOut)
2897 throws InterruptedException {
2898 if (!regionStates.isRegionInTransition(hri)) return true;
2899 long end = (timeOut <= 0) ? Long.MAX_VALUE : EnvironmentEdgeManager.currentTimeMillis()
2900 + timeOut;
2901
2902
2903 LOG.info("Waiting for " + hri.getEncodedName() +
2904 " to leave regions-in-transition, timeOut=" + timeOut + " ms.");
2905 while (!this.server.isStopped() && regionStates.isRegionInTransition(hri)) {
2906 regionStates.waitForUpdate(100);
2907 if (EnvironmentEdgeManager.currentTimeMillis() > end) {
2908 LOG.info("Timed out on waiting for " + hri.getEncodedName() + " to be assigned.");
2909 return false;
2910 }
2911 }
2912 if (this.server.isStopped()) {
2913 LOG.info("Giving up wait on regions in transition because stoppable.isStopped is set");
2914 return false;
2915 }
2916 return true;
2917 }
2918
2919
2920
2921
2922
2923 public class TimerUpdater extends Chore {
2924
2925 public TimerUpdater(final int period, final Stoppable stopper) {
2926 super("AssignmentTimerUpdater", period, stopper);
2927 }
2928
2929 @Override
2930 protected void chore() {
2931 Preconditions.checkState(tomActivated);
2932 ServerName serverToUpdateTimer = null;
2933 while (!serversInUpdatingTimer.isEmpty() && !stopper.isStopped()) {
2934 if (serverToUpdateTimer == null) {
2935 serverToUpdateTimer = serversInUpdatingTimer.first();
2936 } else {
2937 serverToUpdateTimer = serversInUpdatingTimer
2938 .higher(serverToUpdateTimer);
2939 }
2940 if (serverToUpdateTimer == null) {
2941 break;
2942 }
2943 updateTimers(serverToUpdateTimer);
2944 serversInUpdatingTimer.remove(serverToUpdateTimer);
2945 }
2946 }
2947 }
2948
2949
2950
2951
2952 public class TimeoutMonitor extends Chore {
2953 private boolean allRegionServersOffline = false;
2954 private ServerManager serverManager;
2955 private final int timeout;
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966 public TimeoutMonitor(final int period, final Stoppable stopper,
2967 ServerManager serverManager,
2968 final int timeout) {
2969 super("AssignmentTimeoutMonitor", period, stopper);
2970 this.timeout = timeout;
2971 this.serverManager = serverManager;
2972 }
2973
2974 private synchronized void setAllRegionServersOffline(
2975 boolean allRegionServersOffline) {
2976 this.allRegionServersOffline = allRegionServersOffline;
2977 }
2978
2979 @Override
2980 protected void chore() {
2981 Preconditions.checkState(tomActivated);
2982 boolean noRSAvailable = this.serverManager.createDestinationServersList().isEmpty();
2983
2984
2985 long now = System.currentTimeMillis();
2986
2987
2988 for (String regionName : regionStates.getRegionsInTransition().keySet()) {
2989 RegionState regionState = regionStates.getRegionTransitionState(regionName);
2990 if (regionState == null) continue;
2991
2992 if (regionState.getStamp() + timeout <= now) {
2993
2994 actOnTimeOut(regionState);
2995 } else if (this.allRegionServersOffline && !noRSAvailable) {
2996 RegionPlan existingPlan = regionPlans.get(regionName);
2997 if (existingPlan == null
2998 || !this.serverManager.isServerOnline(existingPlan
2999 .getDestination())) {
3000
3001
3002 actOnTimeOut(regionState);
3003 }
3004 }
3005 }
3006 setAllRegionServersOffline(noRSAvailable);
3007 }
3008
3009 private void actOnTimeOut(RegionState regionState) {
3010 HRegionInfo regionInfo = regionState.getRegion();
3011 LOG.info("Regions in transition timed out: " + regionState);
3012
3013 switch (regionState.getState()) {
3014 case CLOSED:
3015 LOG.info("Region " + regionInfo.getEncodedName()
3016 + " has been CLOSED for too long, waiting on queued "
3017 + "ClosedRegionHandler to run or server shutdown");
3018
3019 regionState.updateTimestampToNow();
3020 break;
3021 case OFFLINE:
3022 LOG.info("Region has been OFFLINE for too long, " + "reassigning "
3023 + regionInfo.getRegionNameAsString() + " to a random server");
3024 invokeAssign(regionInfo);
3025 break;
3026 case PENDING_OPEN:
3027 LOG.info("Region has been PENDING_OPEN for too "
3028 + "long, reassigning region=" + regionInfo.getRegionNameAsString());
3029 invokeAssign(regionInfo);
3030 break;
3031 case OPENING:
3032 processOpeningState(regionInfo);
3033 break;
3034 case OPEN:
3035 LOG.error("Region has been OPEN for too long, " +
3036 "we don't know where region was opened so can't do anything");
3037 regionState.updateTimestampToNow();
3038 break;
3039
3040 case PENDING_CLOSE:
3041 LOG.info("Region has been PENDING_CLOSE for too "
3042 + "long, running forced unassign again on region="
3043 + regionInfo.getRegionNameAsString());
3044 invokeUnassign(regionInfo);
3045 break;
3046 case CLOSING:
3047 LOG.info("Region has been CLOSING for too " +
3048 "long, this should eventually complete or the server will " +
3049 "expire, send RPC again");
3050 invokeUnassign(regionInfo);
3051 break;
3052
3053 case SPLIT:
3054 case SPLITTING:
3055 case FAILED_OPEN:
3056 case FAILED_CLOSE:
3057 case MERGING:
3058 break;
3059
3060 default:
3061 throw new IllegalStateException("Received event is not valid.");
3062 }
3063 }
3064 }
3065
3066 private void processOpeningState(HRegionInfo regionInfo) {
3067 LOG.info("Region has been OPENING for too long, reassigning region="
3068 + regionInfo.getRegionNameAsString());
3069
3070 try {
3071 String node = ZKAssign.getNodeName(watcher, regionInfo.getEncodedName());
3072 Stat stat = new Stat();
3073 byte [] data = ZKAssign.getDataNoWatch(watcher, node, stat);
3074 if (data == null) {
3075 LOG.warn("Data is null, node " + node + " no longer exists");
3076 return;
3077 }
3078 RegionTransition rt = RegionTransition.parseFrom(data);
3079 EventType et = rt.getEventType();
3080 if (et == EventType.RS_ZK_REGION_OPENED) {
3081 LOG.debug("Region has transitioned to OPENED, allowing "
3082 + "watched event handlers to process");
3083 return;
3084 } else if (et != EventType.RS_ZK_REGION_OPENING && et != EventType.RS_ZK_REGION_FAILED_OPEN ) {
3085 LOG.warn("While timing out a region, found ZK node in unexpected state: " + et);
3086 return;
3087 }
3088 invokeAssign(regionInfo);
3089 } catch (KeeperException ke) {
3090 LOG.error("Unexpected ZK exception timing out CLOSING region", ke);
3091 } catch (DeserializationException e) {
3092 LOG.error("Unexpected exception parsing CLOSING region", e);
3093 }
3094 }
3095
3096 void invokeAssign(HRegionInfo regionInfo) {
3097 threadPoolExecutorService.submit(new AssignCallable(this, regionInfo));
3098 }
3099
3100 private void invokeUnassign(HRegionInfo regionInfo) {
3101 threadPoolExecutorService.submit(new UnAssignCallable(this, regionInfo));
3102 }
3103
3104 public boolean isCarryingMeta(ServerName serverName) {
3105 return isCarryingRegion(serverName, HRegionInfo.FIRST_META_REGIONINFO);
3106 }
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118 private boolean isCarryingRegion(ServerName serverName, HRegionInfo hri) {
3119 RegionTransition rt = null;
3120 try {
3121 byte [] data = ZKAssign.getData(watcher, hri.getEncodedName());
3122
3123 rt = data == null? null: RegionTransition.parseFrom(data);
3124 } catch (KeeperException e) {
3125 server.abort("Exception reading unassigned node for region=" + hri.getEncodedName(), e);
3126 } catch (DeserializationException e) {
3127 server.abort("Exception parsing unassigned node for region=" + hri.getEncodedName(), e);
3128 }
3129
3130 ServerName addressFromZK = rt != null? rt.getServerName(): null;
3131 if (addressFromZK != null) {
3132
3133 boolean matchZK = addressFromZK.equals(serverName);
3134 LOG.debug("Checking region=" + hri.getRegionNameAsString() + ", zk server=" + addressFromZK +
3135 " current=" + serverName + ", matches=" + matchZK);
3136 return matchZK;
3137 }
3138
3139 ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
3140 boolean matchAM = (addressFromAM != null &&
3141 addressFromAM.equals(serverName));
3142 LOG.debug("based on AM, current region=" + hri.getRegionNameAsString() +
3143 " is on server=" + (addressFromAM != null ? addressFromAM : "null") +
3144 " server being checked: " + serverName);
3145
3146 return matchAM;
3147 }
3148
3149
3150
3151
3152
3153
3154 public List<HRegionInfo> processServerShutdown(final ServerName sn) {
3155
3156 synchronized (this.regionPlans) {
3157 for (Iterator <Map.Entry<String, RegionPlan>> i =
3158 this.regionPlans.entrySet().iterator(); i.hasNext();) {
3159 Map.Entry<String, RegionPlan> e = i.next();
3160 ServerName otherSn = e.getValue().getDestination();
3161
3162 if (otherSn != null && otherSn.equals(sn)) {
3163
3164 i.remove();
3165 }
3166 }
3167 }
3168 List<HRegionInfo> regions = regionStates.serverOffline(watcher, sn);
3169 for (Iterator<HRegionInfo> it = regions.iterator(); it.hasNext(); ) {
3170 HRegionInfo hri = it.next();
3171 String encodedName = hri.getEncodedName();
3172
3173
3174 Lock lock = locker.acquireLock(encodedName);
3175 try {
3176 RegionState regionState =
3177 regionStates.getRegionTransitionState(encodedName);
3178 if (regionState == null
3179 || (regionState.getServerName() != null && !regionState.isOnServer(sn))
3180 || !(regionState.isFailedClose() || regionState.isOffline()
3181 || regionState.isPendingOpenOrOpening())) {
3182 LOG.info("Skip " + regionState + " since it is not opening/failed_close"
3183 + " on the dead server any more: " + sn);
3184 it.remove();
3185 } else {
3186 try {
3187
3188 ZKAssign.deleteNodeFailSilent(watcher, hri);
3189 } catch (KeeperException ke) {
3190 server.abort("Unexpected ZK exception deleting node " + hri, ke);
3191 }
3192 if (zkTable.isDisablingOrDisabledTable(hri.getTable())) {
3193 regionStates.regionOffline(hri);
3194 it.remove();
3195 continue;
3196 }
3197
3198 regionStates.updateRegionState(hri, State.OFFLINE);
3199 }
3200 } finally {
3201 lock.unlock();
3202 }
3203 }
3204 return regions;
3205 }
3206
3207
3208
3209
3210 public void balance(final RegionPlan plan) {
3211 HRegionInfo hri = plan.getRegionInfo();
3212 TableName tableName = hri.getTable();
3213 if (zkTable.isDisablingOrDisabledTable(tableName)) {
3214 LOG.info("Ignored moving region of disabling/disabled table "
3215 + tableName);
3216 return;
3217 }
3218
3219
3220 String encodedName = hri.getEncodedName();
3221 ReentrantLock lock = locker.acquireLock(encodedName);
3222 try {
3223 if (!regionStates.isRegionOnline(hri)) {
3224 RegionState state = regionStates.getRegionState(encodedName);
3225 LOG.info("Ignored moving region not assigned: " + hri + ", "
3226 + (state == null ? "not in region states" : state));
3227 return;
3228 }
3229 synchronized (this.regionPlans) {
3230 this.regionPlans.put(plan.getRegionName(), plan);
3231 }
3232 unassign(hri, false, plan.getDestination());
3233 } finally {
3234 lock.unlock();
3235 }
3236 }
3237
3238 public void stop() {
3239 shutdown();
3240 if (tomActivated){
3241 this.timeoutMonitor.interrupt();
3242 this.timerUpdater.interrupt();
3243 }
3244 }
3245
3246
3247
3248
3249 public void shutdown() {
3250
3251 synchronized (zkEventWorkerWaitingList){
3252 zkEventWorkerWaitingList.clear();
3253 }
3254 threadPoolExecutorService.shutdownNow();
3255 zkEventWorkers.shutdownNow();
3256 }
3257
3258 protected void setEnabledTable(TableName tableName) {
3259 try {
3260 this.zkTable.setEnabledTable(tableName);
3261 } catch (KeeperException e) {
3262
3263 String errorMsg = "Unable to ensure that the table " + tableName
3264 + " will be" + " enabled because of a ZooKeeper issue";
3265 LOG.error(errorMsg);
3266 this.server.abort(errorMsg, e);
3267 }
3268 }
3269
3270
3271
3272
3273
3274
3275
3276 private boolean asyncSetOfflineInZooKeeper(final RegionState state,
3277 final AsyncCallback.StringCallback cb, final ServerName destination) {
3278 if (!state.isClosed() && !state.isOffline()) {
3279 this.server.abort("Unexpected state trying to OFFLINE; " + state,
3280 new IllegalStateException());
3281 return false;
3282 }
3283 regionStates.updateRegionState(state.getRegion(), State.OFFLINE);
3284 try {
3285 ZKAssign.asyncCreateNodeOffline(watcher, state.getRegion(),
3286 destination, cb, state);
3287 } catch (KeeperException e) {
3288 if (e instanceof NodeExistsException) {
3289 LOG.warn("Node for " + state.getRegion() + " already exists");
3290 } else {
3291 server.abort("Unexpected ZK exception creating/setting node OFFLINE", e);
3292 }
3293 return false;
3294 }
3295 return true;
3296 }
3297
3298 private boolean deleteNodeInStates(String encodedName,
3299 String desc, ServerName sn, EventType... types) {
3300 try {
3301 for (EventType et: types) {
3302 if (ZKAssign.deleteNode(watcher, encodedName, et, sn)) {
3303 return true;
3304 }
3305 }
3306 LOG.info("Failed to delete the " + desc + " node for "
3307 + encodedName + ". The node type may not match");
3308 } catch (NoNodeException e) {
3309 if (LOG.isDebugEnabled()) {
3310 LOG.debug("The " + desc + " node for " + encodedName + " already deleted");
3311 }
3312 } catch (KeeperException ke) {
3313 server.abort("Unexpected ZK exception deleting " + desc
3314 + " node for the region " + encodedName, ke);
3315 }
3316 return false;
3317 }
3318
3319 private void deleteMergingNode(String encodedName, ServerName sn) {
3320 deleteNodeInStates(encodedName, "merging", sn, EventType.RS_ZK_REGION_MERGING,
3321 EventType.RS_ZK_REQUEST_REGION_MERGE, EventType.RS_ZK_REGION_MERGED);
3322 }
3323
3324 private void deleteSplittingNode(String encodedName, ServerName sn) {
3325 deleteNodeInStates(encodedName, "splitting", sn, EventType.RS_ZK_REGION_SPLITTING,
3326 EventType.RS_ZK_REQUEST_REGION_SPLIT, EventType.RS_ZK_REGION_SPLIT);
3327 }
3328
3329
3330
3331
3332
3333 private boolean handleRegionMerging(final RegionTransition rt, final String encodedName,
3334 final String prettyPrintedRegionName, final ServerName sn) {
3335 if (!serverManager.isServerOnline(sn)) {
3336 LOG.warn("Dropped merging! ServerName=" + sn + " unknown.");
3337 return false;
3338 }
3339 byte [] payloadOfMerging = rt.getPayload();
3340 List<HRegionInfo> mergingRegions;
3341 try {
3342 mergingRegions = HRegionInfo.parseDelimitedFrom(
3343 payloadOfMerging, 0, payloadOfMerging.length);
3344 } catch (IOException e) {
3345 LOG.error("Dropped merging! Failed reading " + rt.getEventType()
3346 + " payload for " + prettyPrintedRegionName);
3347 return false;
3348 }
3349 assert mergingRegions.size() == 3;
3350 HRegionInfo p = mergingRegions.get(0);
3351 HRegionInfo hri_a = mergingRegions.get(1);
3352 HRegionInfo hri_b = mergingRegions.get(2);
3353
3354 RegionState rs_p = regionStates.getRegionState(p);
3355 RegionState rs_a = regionStates.getRegionState(hri_a);
3356 RegionState rs_b = regionStates.getRegionState(hri_b);
3357
3358 if (!((rs_a == null || rs_a.isOpenOrMergingOnServer(sn))
3359 && (rs_b == null || rs_b.isOpenOrMergingOnServer(sn))
3360 && (rs_p == null || rs_p.isOpenOrMergingNewOnServer(sn)))) {
3361 LOG.warn("Dropped merging! Not in state good for MERGING; rs_p="
3362 + rs_p + ", rs_a=" + rs_a + ", rs_b=" + rs_b);
3363 return false;
3364 }
3365
3366 EventType et = rt.getEventType();
3367 if (et == EventType.RS_ZK_REQUEST_REGION_MERGE) {
3368 try {
3369 if (RegionMergeTransaction.transitionMergingNode(watcher, p,
3370 hri_a, hri_b, sn, -1, EventType.RS_ZK_REQUEST_REGION_MERGE,
3371 EventType.RS_ZK_REGION_MERGING) == -1) {
3372 byte[] data = ZKAssign.getData(watcher, encodedName);
3373 EventType currentType = null;
3374 if (data != null) {
3375 RegionTransition newRt = RegionTransition.parseFrom(data);
3376 currentType = newRt.getEventType();
3377 }
3378 if (currentType == null || (currentType != EventType.RS_ZK_REGION_MERGED
3379 && currentType != EventType.RS_ZK_REGION_MERGING)) {
3380 LOG.warn("Failed to transition pending_merge node "
3381 + encodedName + " to merging, it's now " + currentType);
3382 return false;
3383 }
3384 }
3385 } catch (Exception e) {
3386 LOG.warn("Failed to transition pending_merge node "
3387 + encodedName + " to merging", e);
3388 return false;
3389 }
3390 }
3391
3392 synchronized (regionStates) {
3393 regionStates.updateRegionState(hri_a, State.MERGING);
3394 regionStates.updateRegionState(hri_b, State.MERGING);
3395 regionStates.updateRegionState(p, State.MERGING_NEW, sn);
3396
3397 if (et != EventType.RS_ZK_REGION_MERGED) {
3398 regionStates.regionOffline(p, State.MERGING_NEW);
3399 this.mergingRegions.put(encodedName,
3400 new PairOfSameType<HRegionInfo>(hri_a, hri_b));
3401 } else {
3402 this.mergingRegions.remove(encodedName);
3403 regionOffline(hri_a, State.MERGED);
3404 regionOffline(hri_b, State.MERGED);
3405 regionOnline(p, sn);
3406 }
3407 }
3408
3409 if (et == EventType.RS_ZK_REGION_MERGED) {
3410 LOG.debug("Handling MERGED event for " + encodedName + "; deleting node");
3411
3412 try {
3413 boolean successful = false;
3414 while (!successful) {
3415
3416
3417 successful = ZKAssign.deleteNode(watcher, encodedName,
3418 EventType.RS_ZK_REGION_MERGED, sn);
3419 }
3420 } catch (KeeperException e) {
3421 if (e instanceof NoNodeException) {
3422 String znodePath = ZKUtil.joinZNode(watcher.splitLogZNode, encodedName);
3423 LOG.debug("The znode " + znodePath + " does not exist. May be deleted already.");
3424 } else {
3425 server.abort("Error deleting MERGED node " + encodedName, e);
3426 }
3427 }
3428 LOG.info("Handled MERGED event; merged=" + p.getRegionNameAsString()
3429 + ", region_a=" + hri_a.getRegionNameAsString() + ", region_b="
3430 + hri_b.getRegionNameAsString() + ", on " + sn);
3431
3432
3433 if (zkTable.isDisablingOrDisabledTable(p.getTable())) {
3434 unassign(p);
3435 }
3436 }
3437 return true;
3438 }
3439
3440
3441
3442
3443 private boolean handleRegionSplitting(final RegionTransition rt, final String encodedName,
3444 final String prettyPrintedRegionName, final ServerName sn) {
3445 if (!serverManager.isServerOnline(sn)) {
3446 LOG.warn("Dropped splitting! ServerName=" + sn + " unknown.");
3447 return false;
3448 }
3449 byte [] payloadOfSplitting = rt.getPayload();
3450 List<HRegionInfo> splittingRegions;
3451 try {
3452 splittingRegions = HRegionInfo.parseDelimitedFrom(
3453 payloadOfSplitting, 0, payloadOfSplitting.length);
3454 } catch (IOException e) {
3455 LOG.error("Dropped splitting! Failed reading " + rt.getEventType()
3456 + " payload for " + prettyPrintedRegionName);
3457 return false;
3458 }
3459 assert splittingRegions.size() == 2;
3460 HRegionInfo hri_a = splittingRegions.get(0);
3461 HRegionInfo hri_b = splittingRegions.get(1);
3462
3463 RegionState rs_p = regionStates.getRegionState(encodedName);
3464 RegionState rs_a = regionStates.getRegionState(hri_a);
3465 RegionState rs_b = regionStates.getRegionState(hri_b);
3466
3467 if (!((rs_p == null || rs_p.isOpenOrSplittingOnServer(sn))
3468 && (rs_a == null || rs_a.isOpenOrSplittingNewOnServer(sn))
3469 && (rs_b == null || rs_b.isOpenOrSplittingNewOnServer(sn)))) {
3470 LOG.warn("Dropped splitting! Not in state good for SPLITTING; rs_p="
3471 + rs_p + ", rs_a=" + rs_a + ", rs_b=" + rs_b);
3472 return false;
3473 }
3474
3475 if (rs_p == null) {
3476
3477 rs_p = regionStates.updateRegionState(rt, State.OPEN);
3478 if (rs_p == null) {
3479 LOG.warn("Received splitting for region " + prettyPrintedRegionName
3480 + " from server " + sn + " but it doesn't exist anymore,"
3481 + " probably already processed its split");
3482 return false;
3483 }
3484 regionStates.regionOnline(rs_p.getRegion(), sn);
3485 }
3486
3487 HRegionInfo p = rs_p.getRegion();
3488 EventType et = rt.getEventType();
3489 if (et == EventType.RS_ZK_REQUEST_REGION_SPLIT) {
3490 try {
3491 if (SplitTransaction.transitionSplittingNode(watcher, p,
3492 hri_a, hri_b, sn, -1, EventType.RS_ZK_REQUEST_REGION_SPLIT,
3493 EventType.RS_ZK_REGION_SPLITTING) == -1) {
3494 byte[] data = ZKAssign.getData(watcher, encodedName);
3495 EventType currentType = null;
3496 if (data != null) {
3497 RegionTransition newRt = RegionTransition.parseFrom(data);
3498 currentType = newRt.getEventType();
3499 }
3500 if (currentType == null || (currentType != EventType.RS_ZK_REGION_SPLIT
3501 && currentType != EventType.RS_ZK_REGION_SPLITTING)) {
3502 LOG.warn("Failed to transition pending_split node "
3503 + encodedName + " to splitting, it's now " + currentType);
3504 return false;
3505 }
3506 }
3507 } catch (Exception e) {
3508 LOG.warn("Failed to transition pending_split node "
3509 + encodedName + " to splitting", e);
3510 return false;
3511 }
3512 }
3513
3514 synchronized (regionStates) {
3515 regionStates.updateRegionState(hri_a, State.SPLITTING_NEW, sn);
3516 regionStates.updateRegionState(hri_b, State.SPLITTING_NEW, sn);
3517 regionStates.regionOffline(hri_a, State.SPLITTING_NEW);
3518 regionStates.regionOffline(hri_b, State.SPLITTING_NEW);
3519 regionStates.updateRegionState(rt, State.SPLITTING);
3520
3521
3522
3523 if (TEST_SKIP_SPLIT_HANDLING) {
3524 LOG.warn("Skipping split message, TEST_SKIP_SPLIT_HANDLING is set");
3525 return true;
3526 }
3527
3528 if (et == EventType.RS_ZK_REGION_SPLIT) {
3529 regionOffline(p, State.SPLIT);
3530 regionOnline(hri_a, sn);
3531 regionOnline(hri_b, sn);
3532 }
3533 }
3534
3535 if (et == EventType.RS_ZK_REGION_SPLIT) {
3536 LOG.debug("Handling SPLIT event for " + encodedName + "; deleting node");
3537
3538 try {
3539 boolean successful = false;
3540 while (!successful) {
3541
3542
3543 successful = ZKAssign.deleteNode(watcher, encodedName,
3544 EventType.RS_ZK_REGION_SPLIT, sn);
3545 }
3546 } catch (KeeperException e) {
3547 if (e instanceof NoNodeException) {
3548 String znodePath = ZKUtil.joinZNode(watcher.splitLogZNode, encodedName);
3549 LOG.debug("The znode " + znodePath + " does not exist. May be deleted already.");
3550 } else {
3551 server.abort("Error deleting SPLIT node " + encodedName, e);
3552 }
3553 }
3554 LOG.info("Handled SPLIT event; parent=" + p.getRegionNameAsString()
3555 + ", daughter a=" + hri_a.getRegionNameAsString() + ", daughter b="
3556 + hri_b.getRegionNameAsString() + ", on " + sn);
3557
3558
3559 if (zkTable.isDisablingOrDisabledTable(p.getTable())) {
3560 unassign(hri_a);
3561 unassign(hri_b);
3562 }
3563 }
3564 return true;
3565 }
3566
3567
3568
3569
3570
3571
3572 private void regionOffline(final HRegionInfo regionInfo, final State state) {
3573 regionStates.regionOffline(regionInfo, state);
3574 removeClosedRegion(regionInfo);
3575
3576 clearRegionPlan(regionInfo);
3577 balancer.regionOffline(regionInfo);
3578
3579
3580 sendRegionClosedNotification(regionInfo);
3581 }
3582
3583 private void sendRegionOpenedNotification(final HRegionInfo regionInfo,
3584 final ServerName serverName) {
3585 if (!this.listeners.isEmpty()) {
3586 for (AssignmentListener listener : this.listeners) {
3587 listener.regionOpened(regionInfo, serverName);
3588 }
3589 }
3590 }
3591
3592 private void sendRegionClosedNotification(final HRegionInfo regionInfo) {
3593 if (!this.listeners.isEmpty()) {
3594 for (AssignmentListener listener : this.listeners) {
3595 listener.regionClosed(regionInfo);
3596 }
3597 }
3598 }
3599
3600
3601
3602
3603 public LoadBalancer getBalancer() {
3604 return this.balancer;
3605 }
3606 }