1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master;
20
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.Arrays;
24 import java.util.Collections;
25 import java.util.HashMap;
26 import java.util.HashSet;
27 import java.util.Iterator;
28 import java.util.List;
29 import java.util.Map;
30 import java.util.NavigableMap;
31 import java.util.Set;
32 import java.util.TreeMap;
33 import java.util.concurrent.ConcurrentHashMap;
34 import java.util.concurrent.ConcurrentSkipListSet;
35 import java.util.concurrent.ThreadFactory;
36 import java.util.concurrent.TimeUnit;
37 import java.util.concurrent.atomic.AtomicBoolean;
38 import java.util.concurrent.atomic.AtomicInteger;
39 import java.util.concurrent.locks.Lock;
40 import java.util.concurrent.locks.ReentrantLock;
41
42 import org.apache.commons.logging.Log;
43 import org.apache.commons.logging.LogFactory;
44 import org.apache.hadoop.classification.InterfaceAudience;
45 import org.apache.hadoop.conf.Configuration;
46 import org.apache.hadoop.hbase.Chore;
47 import org.apache.hadoop.hbase.HBaseIOException;
48 import org.apache.hadoop.hbase.HConstants;
49 import org.apache.hadoop.hbase.HRegionInfo;
50 import org.apache.hadoop.hbase.NotServingRegionException;
51 import org.apache.hadoop.hbase.RegionTransition;
52 import org.apache.hadoop.hbase.Server;
53 import org.apache.hadoop.hbase.ServerName;
54 import org.apache.hadoop.hbase.Stoppable;
55 import org.apache.hadoop.hbase.TableName;
56 import org.apache.hadoop.hbase.TableNotFoundException;
57 import org.apache.hadoop.hbase.catalog.CatalogTracker;
58 import org.apache.hadoop.hbase.catalog.MetaReader;
59 import org.apache.hadoop.hbase.client.Result;
60 import org.apache.hadoop.hbase.exceptions.DeserializationException;
61 import org.apache.hadoop.hbase.executor.EventHandler;
62 import org.apache.hadoop.hbase.executor.EventType;
63 import org.apache.hadoop.hbase.executor.ExecutorService;
64 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
65 import org.apache.hadoop.hbase.master.RegionState.State;
66 import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper;
67 import org.apache.hadoop.hbase.master.balancer.FavoredNodeLoadBalancer;
68 import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler;
69 import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
70 import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
71 import org.apache.hadoop.hbase.master.handler.OpenedRegionHandler;
72 import org.apache.hadoop.hbase.regionserver.RegionAlreadyInTransitionException;
73 import org.apache.hadoop.hbase.regionserver.RegionMergeTransaction;
74 import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
75 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
76 import org.apache.hadoop.hbase.regionserver.SplitTransaction;
77 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
78 import org.apache.hadoop.hbase.util.KeyLocker;
79 import org.apache.hadoop.hbase.util.Pair;
80 import org.apache.hadoop.hbase.util.PairOfSameType;
81 import org.apache.hadoop.hbase.util.Threads;
82 import org.apache.hadoop.hbase.util.Triple;
83 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
84 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
85 import org.apache.hadoop.hbase.zookeeper.ZKTable;
86 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
87 import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
88 import org.apache.hadoop.ipc.RemoteException;
89 import org.apache.zookeeper.AsyncCallback;
90 import org.apache.zookeeper.KeeperException;
91 import org.apache.zookeeper.KeeperException.NoNodeException;
92 import org.apache.zookeeper.KeeperException.NodeExistsException;
93 import org.apache.zookeeper.data.Stat;
94
95 import com.google.common.base.Preconditions;
96 import com.google.common.collect.LinkedHashMultimap;
97
98
99
100
101
102
103
104
105 @InterfaceAudience.Private
106 public class AssignmentManager extends ZooKeeperListener {
107 private static final Log LOG = LogFactory.getLog(AssignmentManager.class);
108
109 public static final ServerName HBCK_CODE_SERVERNAME = ServerName.valueOf(HConstants.HBCK_CODE_NAME,
110 -1, -1L);
111
112 public static final String ASSIGNMENT_TIMEOUT = "hbase.master.assignment.timeoutmonitor.timeout";
113 public static final int DEFAULT_ASSIGNMENT_TIMEOUT_DEFAULT = 600000;
114 public static final String ASSIGNMENT_TIMEOUT_MANAGEMENT = "hbase.assignment.timeout.management";
115 public static final boolean DEFAULT_ASSIGNMENT_TIMEOUT_MANAGEMENT = false;
116
117 public static final String ALREADY_IN_TRANSITION_WAITTIME
118 = "hbase.assignment.already.intransition.waittime";
119 public static final int DEFAULT_ALREADY_IN_TRANSITION_WAITTIME = 60000;
120
121 protected final Server server;
122
123 private ServerManager serverManager;
124
125 private boolean shouldAssignRegionsWithFavoredNodes;
126
127 private CatalogTracker catalogTracker;
128
129 protected final TimeoutMonitor timeoutMonitor;
130
131 private final TimerUpdater timerUpdater;
132
133 private LoadBalancer balancer;
134
135 private final MetricsAssignmentManager metricsAssignmentManager;
136
137 private final TableLockManager tableLockManager;
138
139 private AtomicInteger numRegionsOpened = new AtomicInteger(0);
140
141 final private KeyLocker<String> locker = new KeyLocker<String>();
142
143
144
145
146
147 private final Map <String, HRegionInfo> regionsToReopen;
148
149
150
151
152
153 private final int maximumAttempts;
154
155
156
157
158 private final Map<String, PairOfSameType<HRegionInfo>> mergingRegions
159 = new HashMap<String, PairOfSameType<HRegionInfo>>();
160
161
162
163
164
165 private final long sleepTimeBeforeRetryingMetaAssignment;
166
167
168
169
170
171 final NavigableMap<String, RegionPlan> regionPlans =
172 new TreeMap<String, RegionPlan>();
173
174 private final ZKTable zkTable;
175
176
177
178
179
180 private final ConcurrentSkipListSet<ServerName> serversInUpdatingTimer;
181
182 private final ExecutorService executorService;
183
184
185 private Map<HRegionInfo, AtomicBoolean> closedRegionHandlerCalled = null;
186
187
188 private Map<HRegionInfo, AtomicBoolean> openedRegionHandlerCalled = null;
189
190
191 private java.util.concurrent.ExecutorService threadPoolExecutorService;
192
193
194 private final java.util.concurrent.ExecutorService zkEventWorkers;
195
196 private List<EventType> ignoreStatesRSOffline = Arrays.asList(
197 EventType.RS_ZK_REGION_FAILED_OPEN, EventType.RS_ZK_REGION_CLOSED);
198
199 private final RegionStates regionStates;
200
201
202
203
204
205 private final int bulkAssignThresholdRegions;
206 private final int bulkAssignThresholdServers;
207
208
209
210
211 private final boolean bulkAssignWaitTillAllAssigned;
212
213
214
215
216
217
218
219
220
221 protected final AtomicBoolean failoverCleanupDone = new AtomicBoolean(false);
222
223
224 private final boolean tomActivated;
225
226
227
228
229
230
231
232
233 private final ConcurrentHashMap<String, AtomicInteger>
234 failedOpenTracker = new ConcurrentHashMap<String, AtomicInteger>();
235
236
237
238
239 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="MS_SHOULD_BE_FINAL")
240 public static boolean TEST_SKIP_SPLIT_HANDLING = false;
241
242
243
244
245
246
247
248
249
250
251
252 public AssignmentManager(Server server, ServerManager serverManager,
253 CatalogTracker catalogTracker, final LoadBalancer balancer,
254 final ExecutorService service, MetricsMaster metricsMaster,
255 final TableLockManager tableLockManager) throws KeeperException, IOException {
256 super(server.getZooKeeper());
257 this.server = server;
258 this.serverManager = serverManager;
259 this.catalogTracker = catalogTracker;
260 this.executorService = service;
261 this.regionsToReopen = Collections.synchronizedMap
262 (new HashMap<String, HRegionInfo> ());
263 Configuration conf = server.getConfiguration();
264
265 this.shouldAssignRegionsWithFavoredNodes = conf.getClass(
266 HConstants.HBASE_MASTER_LOADBALANCER_CLASS, Object.class).equals(
267 FavoredNodeLoadBalancer.class);
268 this.tomActivated = conf.getBoolean(
269 ASSIGNMENT_TIMEOUT_MANAGEMENT, DEFAULT_ASSIGNMENT_TIMEOUT_MANAGEMENT);
270 if (tomActivated){
271 this.serversInUpdatingTimer = new ConcurrentSkipListSet<ServerName>();
272 this.timeoutMonitor = new TimeoutMonitor(
273 conf.getInt("hbase.master.assignment.timeoutmonitor.period", 30000),
274 server, serverManager,
275 conf.getInt(ASSIGNMENT_TIMEOUT, DEFAULT_ASSIGNMENT_TIMEOUT_DEFAULT));
276 this.timerUpdater = new TimerUpdater(conf.getInt(
277 "hbase.master.assignment.timerupdater.period", 10000), server);
278 Threads.setDaemonThreadRunning(timerUpdater.getThread(),
279 server.getServerName() + ".timerUpdater");
280 } else {
281 this.serversInUpdatingTimer = null;
282 this.timeoutMonitor = null;
283 this.timerUpdater = null;
284 }
285 this.zkTable = new ZKTable(this.watcher);
286
287 this.maximumAttempts = Math.max(1,
288 this.server.getConfiguration().getInt("hbase.assignment.maximum.attempts", 10));
289 this.sleepTimeBeforeRetryingMetaAssignment = this.server.getConfiguration().getLong(
290 "hbase.meta.assignment.retry.sleeptime", 1000l);
291 this.balancer = balancer;
292 int maxThreads = conf.getInt("hbase.assignment.threads.max", 30);
293 this.threadPoolExecutorService = Threads.getBoundedCachedThreadPool(
294 maxThreads, 60L, TimeUnit.SECONDS, Threads.newDaemonThreadFactory("AM."));
295 this.regionStates = new RegionStates(server, serverManager);
296
297 this.bulkAssignWaitTillAllAssigned =
298 conf.getBoolean("hbase.bulk.assignment.waittillallassigned", false);
299 this.bulkAssignThresholdRegions = conf.getInt("hbase.bulk.assignment.threshold.regions", 7);
300 this.bulkAssignThresholdServers = conf.getInt("hbase.bulk.assignment.threshold.servers", 3);
301
302 int workers = conf.getInt("hbase.assignment.zkevent.workers", 20);
303 ThreadFactory threadFactory = Threads.newDaemonThreadFactory("AM.ZK.Worker");
304 zkEventWorkers = Threads.getBoundedCachedThreadPool(workers, 60L,
305 TimeUnit.SECONDS, threadFactory);
306 this.tableLockManager = tableLockManager;
307
308 this.metricsAssignmentManager = new MetricsAssignmentManager();
309 }
310
311 void startTimeOutMonitor() {
312 if (tomActivated) {
313 Threads.setDaemonThreadRunning(timeoutMonitor.getThread(), server.getServerName()
314 + ".timeoutMonitor");
315 }
316 }
317
318
319
320
321 public ZKTable getZKTable() {
322
323
324 return this.zkTable;
325 }
326
327
328
329
330
331
332
333 public RegionStates getRegionStates() {
334 return regionStates;
335 }
336
337 public RegionPlan getRegionReopenPlan(HRegionInfo hri) {
338 return new RegionPlan(hri, null, regionStates.getRegionServerOfRegion(hri));
339 }
340
341
342
343
344
345
346 public void addPlan(String encodedName, RegionPlan plan) {
347 synchronized (regionPlans) {
348 regionPlans.put(encodedName, plan);
349 }
350 }
351
352
353
354
355 public void addPlans(Map<String, RegionPlan> plans) {
356 synchronized (regionPlans) {
357 regionPlans.putAll(plans);
358 }
359 }
360
361
362
363
364
365
366
367
368 public void setRegionsToReopen(List <HRegionInfo> regions) {
369 for(HRegionInfo hri : regions) {
370 regionsToReopen.put(hri.getEncodedName(), hri);
371 }
372 }
373
374
375
376
377
378
379
380
381 public Pair<Integer, Integer> getReopenStatus(TableName tableName)
382 throws IOException {
383 List <HRegionInfo> hris =
384 MetaReader.getTableRegions(this.server.getCatalogTracker(), tableName, true);
385 Integer pending = 0;
386 for (HRegionInfo hri : hris) {
387 String name = hri.getEncodedName();
388
389 if (regionsToReopen.containsKey(name)
390 || regionStates.isRegionInTransition(name)) {
391 pending++;
392 }
393 }
394 return new Pair<Integer, Integer>(pending, hris.size());
395 }
396
397
398
399
400
401
402 public boolean isFailoverCleanupDone() {
403 return failoverCleanupDone.get();
404 }
405
406
407
408
409
410 public Lock acquireRegionLock(final String encodedName) {
411 return locker.acquireLock(encodedName);
412 }
413
414
415
416
417
418 void failoverCleanupDone() {
419 failoverCleanupDone.set(true);
420 serverManager.processQueuedDeadServers();
421 }
422
423
424
425
426
427
428
429
430 void joinCluster() throws IOException,
431 KeeperException, InterruptedException {
432
433
434
435
436
437
438
439
440
441
442 Map<ServerName, List<HRegionInfo>> deadServers = rebuildUserRegions();
443
444
445
446
447 processDeadServersAndRegionsInTransition(deadServers);
448
449 recoverTableInDisablingState();
450 recoverTableInEnablingState();
451 }
452
453
454
455
456
457
458
459
460
461
462
463
464 void processDeadServersAndRegionsInTransition(
465 final Map<ServerName, List<HRegionInfo>> deadServers)
466 throws KeeperException, IOException, InterruptedException {
467 List<String> nodes = ZKUtil.listChildrenNoWatch(watcher,
468 watcher.assignmentZNode);
469
470 if (nodes == null) {
471 String errorMessage = "Failed to get the children from ZK";
472 server.abort(errorMessage, new IOException(errorMessage));
473 return;
474 }
475
476 boolean failover = (!serverManager.getDeadServers().isEmpty() || !serverManager
477 .getRequeuedDeadServers().isEmpty());
478
479 if (!failover) {
480
481 Map<HRegionInfo, ServerName> regions = regionStates.getRegionAssignments();
482 for (HRegionInfo hri: regions.keySet()) {
483 if (!hri.isMetaTable()) {
484 LOG.debug("Found " + hri + " out on cluster");
485 failover = true;
486 break;
487 }
488 }
489 if (!failover) {
490
491 for (String encodedName: nodes) {
492 RegionState state = regionStates.getRegionState(encodedName);
493 if (state != null && !state.getRegion().isMetaRegion()) {
494 LOG.debug("Found " + state.getRegion().getRegionNameAsString() + " in RITs");
495 failover = true;
496 break;
497 }
498 }
499 }
500 }
501
502
503 if (failover) {
504 LOG.info("Found regions out on cluster or in RIT; presuming failover");
505
506
507 processDeadServersAndRecoverLostRegions(deadServers);
508 } else {
509
510 LOG.info("Clean cluster startup. Assigning userregions");
511 assignAllUserRegions();
512 }
513 }
514
515
516
517
518
519
520
521
522
523
524
525
526 boolean processRegionInTransitionAndBlockUntilAssigned(final HRegionInfo hri)
527 throws InterruptedException, KeeperException, IOException {
528 String encodedRegionName = hri.getEncodedName();
529 if (!processRegionInTransition(encodedRegionName, hri)) {
530 return false;
531 }
532 LOG.debug("Waiting on " + HRegionInfo.prettyPrint(encodedRegionName));
533 while (!this.server.isStopped() &&
534 this.regionStates.isRegionInTransition(encodedRegionName)) {
535 RegionState state = this.regionStates.getRegionTransitionState(encodedRegionName);
536 if (state == null || !serverManager.isServerOnline(state.getServerName())) {
537
538
539
540 break;
541 }
542 this.regionStates.waitForUpdate(100);
543 }
544 return true;
545 }
546
547
548
549
550
551
552
553
554
555
556 boolean processRegionInTransition(final String encodedRegionName,
557 final HRegionInfo regionInfo) throws KeeperException, IOException {
558
559
560
561
562 Lock lock = locker.acquireLock(encodedRegionName);
563 try {
564 Stat stat = new Stat();
565 byte [] data = ZKAssign.getDataAndWatch(watcher, encodedRegionName, stat);
566 if (data == null) return false;
567 RegionTransition rt;
568 try {
569 rt = RegionTransition.parseFrom(data);
570 } catch (DeserializationException e) {
571 LOG.warn("Failed parse znode data", e);
572 return false;
573 }
574 HRegionInfo hri = regionInfo;
575 if (hri == null) {
576
577
578
579
580
581 hri = regionStates.getRegionInfo(rt.getRegionName());
582 EventType et = rt.getEventType();
583 if (hri == null && et != EventType.RS_ZK_REGION_MERGING
584 && et != EventType.RS_ZK_REQUEST_REGION_MERGE) {
585 LOG.warn("Couldn't find the region in recovering " + rt);
586 return false;
587 }
588 }
589 return processRegionsInTransition(
590 rt, hri, stat.getVersion());
591 } finally {
592 lock.unlock();
593 }
594 }
595
596
597
598
599
600
601
602
603
604 boolean processRegionsInTransition(
605 final RegionTransition rt, final HRegionInfo regionInfo,
606 final int expectedVersion) throws KeeperException {
607 EventType et = rt.getEventType();
608
609 final ServerName sn = rt.getServerName();
610 final byte[] regionName = rt.getRegionName();
611 final String encodedName = HRegionInfo.encodeRegionName(regionName);
612 final String prettyPrintedRegionName = HRegionInfo.prettyPrint(encodedName);
613 LOG.info("Processing " + prettyPrintedRegionName + " in state: " + et);
614
615 if (regionStates.isRegionInTransition(encodedName)) {
616 LOG.info("Processed region " + prettyPrintedRegionName + " in state: "
617 + et + ", does nothing since the region is already in transition "
618 + regionStates.getRegionTransitionState(encodedName));
619
620 return true;
621 }
622 if (!serverManager.isServerOnline(sn)) {
623
624
625
626 LOG.debug("RIT " + encodedName + " in state=" + rt.getEventType() +
627 " was on deadserver; forcing offline");
628 if (regionStates.isRegionOnline(regionInfo)) {
629
630
631
632 regionStates.regionOffline(regionInfo);
633 }
634
635 regionStates.updateRegionState(regionInfo, State.OFFLINE, sn);
636
637 if (regionInfo.isMetaRegion()) {
638
639
640 MetaRegionTracker.setMetaLocation(watcher, sn);
641 } else {
642
643
644 regionStates.setLastRegionServerOfRegion(sn, encodedName);
645
646 if (!serverManager.isServerDead(sn)) {
647 serverManager.expireServer(sn);
648 }
649 }
650 return false;
651 }
652 switch (et) {
653 case M_ZK_REGION_CLOSING:
654
655
656 final RegionState rsClosing = regionStates.updateRegionState(rt, State.CLOSING);
657 this.executorService.submit(
658 new EventHandler(server, EventType.M_MASTER_RECOVERY) {
659 @Override
660 public void process() throws IOException {
661 ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName());
662 try {
663 unassign(regionInfo, rsClosing, expectedVersion, null, true, null);
664 if (regionStates.isRegionOffline(regionInfo)) {
665 assign(regionInfo, true);
666 }
667 } finally {
668 lock.unlock();
669 }
670 }
671 });
672 break;
673
674 case RS_ZK_REGION_CLOSED:
675 case RS_ZK_REGION_FAILED_OPEN:
676
677 regionStates.updateRegionState(regionInfo, State.CLOSED, sn);
678 invokeAssign(regionInfo);
679 break;
680
681 case M_ZK_REGION_OFFLINE:
682
683 regionStates.updateRegionState(rt, State.PENDING_OPEN);
684 final RegionState rsOffline = regionStates.getRegionState(regionInfo);
685 this.executorService.submit(
686 new EventHandler(server, EventType.M_MASTER_RECOVERY) {
687 @Override
688 public void process() throws IOException {
689 ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName());
690 try {
691 RegionPlan plan = new RegionPlan(regionInfo, null, sn);
692 addPlan(encodedName, plan);
693 assign(rsOffline, false, false);
694 } finally {
695 lock.unlock();
696 }
697 }
698 });
699 break;
700
701 case RS_ZK_REGION_OPENING:
702 regionStates.updateRegionState(rt, State.OPENING);
703 break;
704
705 case RS_ZK_REGION_OPENED:
706
707
708
709 regionStates.updateRegionState(rt, State.OPEN);
710 new OpenedRegionHandler(server, this, regionInfo, sn, expectedVersion).process();
711 break;
712 case RS_ZK_REQUEST_REGION_SPLIT:
713 case RS_ZK_REGION_SPLITTING:
714 case RS_ZK_REGION_SPLIT:
715
716
717
718 regionStates.regionOnline(regionInfo, sn);
719 regionStates.updateRegionState(rt, State.SPLITTING);
720 if (!handleRegionSplitting(
721 rt, encodedName, prettyPrintedRegionName, sn)) {
722 deleteSplittingNode(encodedName, sn);
723 }
724 break;
725 case RS_ZK_REQUEST_REGION_MERGE:
726 case RS_ZK_REGION_MERGING:
727 case RS_ZK_REGION_MERGED:
728 if (!handleRegionMerging(
729 rt, encodedName, prettyPrintedRegionName, sn)) {
730 deleteMergingNode(encodedName, sn);
731 }
732 break;
733 default:
734 throw new IllegalStateException("Received region in state:" + et + " is not valid.");
735 }
736 LOG.info("Processed region " + prettyPrintedRegionName + " in state "
737 + et + ", on " + (serverManager.isServerOnline(sn) ? "" : "dead ")
738 + "server: " + sn);
739 return true;
740 }
741
742
743
744
745
746 public void removeClosedRegion(HRegionInfo hri) {
747 if (regionsToReopen.remove(hri.getEncodedName()) != null) {
748 LOG.debug("Removed region from reopening regions because it was closed");
749 }
750 }
751
752
753
754
755
756
757
758
759
760
761
762 void handleRegion(final RegionTransition rt, int expectedVersion) {
763 if (rt == null) {
764 LOG.warn("Unexpected NULL input for RegionTransition rt");
765 return;
766 }
767 final ServerName sn = rt.getServerName();
768
769 if (sn.equals(HBCK_CODE_SERVERNAME)) {
770 handleHBCK(rt);
771 return;
772 }
773 final long createTime = rt.getCreateTime();
774 final byte[] regionName = rt.getRegionName();
775 String encodedName = HRegionInfo.encodeRegionName(regionName);
776 String prettyPrintedRegionName = HRegionInfo.prettyPrint(encodedName);
777
778 if (!serverManager.isServerOnline(sn)
779 && !ignoreStatesRSOffline.contains(rt.getEventType())) {
780 LOG.warn("Attempted to handle region transition for server but " +
781 "it is not online: " + prettyPrintedRegionName + ", " + rt);
782 return;
783 }
784
785 RegionState regionState =
786 regionStates.getRegionState(encodedName);
787 long startTime = System.currentTimeMillis();
788 if (LOG.isDebugEnabled()) {
789 boolean lateEvent = createTime < (startTime - 15000);
790 LOG.debug("Handling " + rt.getEventType() +
791 ", server=" + sn + ", region=" +
792 (prettyPrintedRegionName == null ? "null" : prettyPrintedRegionName) +
793 (lateEvent ? ", which is more than 15 seconds late" : "") +
794 ", current_state=" + regionState);
795 }
796
797
798 if (rt.getEventType() == EventType.M_ZK_REGION_OFFLINE) {
799 return;
800 }
801
802
803 Lock lock = locker.acquireLock(encodedName);
804 try {
805 RegionState latestState =
806 regionStates.getRegionState(encodedName);
807 if ((regionState == null && latestState != null)
808 || (regionState != null && latestState == null)
809 || (regionState != null && latestState != null
810 && latestState.getState() != regionState.getState())) {
811 LOG.warn("Region state changed from " + regionState + " to "
812 + latestState + ", while acquiring lock");
813 }
814 long waitedTime = System.currentTimeMillis() - startTime;
815 if (waitedTime > 5000) {
816 LOG.warn("Took " + waitedTime + "ms to acquire the lock");
817 }
818 regionState = latestState;
819 switch (rt.getEventType()) {
820 case RS_ZK_REQUEST_REGION_SPLIT:
821 case RS_ZK_REGION_SPLITTING:
822 case RS_ZK_REGION_SPLIT:
823 if (!handleRegionSplitting(
824 rt, encodedName, prettyPrintedRegionName, sn)) {
825 deleteSplittingNode(encodedName, sn);
826 }
827 break;
828
829 case RS_ZK_REQUEST_REGION_MERGE:
830 case RS_ZK_REGION_MERGING:
831 case RS_ZK_REGION_MERGED:
832
833
834 if (!handleRegionMerging(
835 rt, encodedName, prettyPrintedRegionName, sn)) {
836 deleteMergingNode(encodedName, sn);
837 }
838 break;
839
840 case M_ZK_REGION_CLOSING:
841
842
843 if (regionState == null
844 || !regionState.isPendingCloseOrClosingOnServer(sn)) {
845 LOG.warn("Received CLOSING for " + prettyPrintedRegionName
846 + " from " + sn + " but the region isn't PENDING_CLOSE/CLOSING here: "
847 + regionStates.getRegionState(encodedName));
848 return;
849 }
850
851 regionStates.updateRegionState(rt, State.CLOSING);
852 break;
853
854 case RS_ZK_REGION_CLOSED:
855
856 if (regionState == null
857 || !regionState.isPendingCloseOrClosingOnServer(sn)) {
858 LOG.warn("Received CLOSED for " + prettyPrintedRegionName
859 + " from " + sn + " but the region isn't PENDING_CLOSE/CLOSING here: "
860 + regionStates.getRegionState(encodedName));
861 return;
862 }
863
864
865
866 new ClosedRegionHandler(server, this, regionState.getRegion()).process();
867 updateClosedRegionHandlerTracker(regionState.getRegion());
868 break;
869
870 case RS_ZK_REGION_FAILED_OPEN:
871 if (regionState == null
872 || !regionState.isPendingOpenOrOpeningOnServer(sn)) {
873 LOG.warn("Received FAILED_OPEN for " + prettyPrintedRegionName
874 + " from " + sn + " but the region isn't PENDING_OPEN/OPENING here: "
875 + regionStates.getRegionState(encodedName));
876 return;
877 }
878 AtomicInteger failedOpenCount = failedOpenTracker.get(encodedName);
879 if (failedOpenCount == null) {
880 failedOpenCount = new AtomicInteger();
881
882
883
884 failedOpenTracker.put(encodedName, failedOpenCount);
885 }
886 if (failedOpenCount.incrementAndGet() >= maximumAttempts) {
887 regionStates.updateRegionState(rt, State.FAILED_OPEN);
888
889
890 failedOpenTracker.remove(encodedName);
891 } else {
892
893 regionState = regionStates.updateRegionState(rt, State.CLOSED);
894 if (regionState != null) {
895
896
897 try {
898 getRegionPlan(regionState.getRegion(), sn, true);
899 new ClosedRegionHandler(server, this, regionState.getRegion()).process();
900 } catch (HBaseIOException e) {
901 LOG.warn("Failed to get region plan", e);
902 }
903 }
904 }
905 break;
906
907 case RS_ZK_REGION_OPENING:
908
909
910 if (regionState == null
911 || !regionState.isPendingOpenOrOpeningOnServer(sn)) {
912 LOG.warn("Received OPENING for " + prettyPrintedRegionName
913 + " from " + sn + " but the region isn't PENDING_OPEN/OPENING here: "
914 + regionStates.getRegionState(encodedName));
915 return;
916 }
917
918 regionStates.updateRegionState(rt, State.OPENING);
919 break;
920
921 case RS_ZK_REGION_OPENED:
922
923 if (regionState == null
924 || !regionState.isPendingOpenOrOpeningOnServer(sn)) {
925 LOG.warn("Received OPENED for " + prettyPrintedRegionName
926 + " from " + sn + " but the region isn't PENDING_OPEN/OPENING here: "
927 + regionStates.getRegionState(encodedName));
928
929 if (regionState != null) {
930
931
932
933 unassign(regionState.getRegion(), null, -1, null, false, sn);
934 }
935 return;
936 }
937
938 regionState = regionStates.updateRegionState(rt, State.OPEN);
939 if (regionState != null) {
940 failedOpenTracker.remove(encodedName);
941 new OpenedRegionHandler(
942 server, this, regionState.getRegion(), sn, expectedVersion).process();
943 updateOpenedRegionHandlerTracker(regionState.getRegion());
944 }
945 break;
946
947 default:
948 throw new IllegalStateException("Received event is not valid.");
949 }
950 } finally {
951 lock.unlock();
952 }
953 }
954
955
956 boolean wasClosedHandlerCalled(HRegionInfo hri) {
957 AtomicBoolean b = closedRegionHandlerCalled.get(hri);
958
959
960
961 return b == null ? false : b.compareAndSet(true, false);
962 }
963
964
965 boolean wasOpenedHandlerCalled(HRegionInfo hri) {
966 AtomicBoolean b = openedRegionHandlerCalled.get(hri);
967
968
969
970 return b == null ? false : b.compareAndSet(true, false);
971 }
972
973
974 void initializeHandlerTrackers() {
975 closedRegionHandlerCalled = new HashMap<HRegionInfo, AtomicBoolean>();
976 openedRegionHandlerCalled = new HashMap<HRegionInfo, AtomicBoolean>();
977 }
978
979 void updateClosedRegionHandlerTracker(HRegionInfo hri) {
980 if (closedRegionHandlerCalled != null) {
981 closedRegionHandlerCalled.put(hri, new AtomicBoolean(true));
982 }
983 }
984
985 void updateOpenedRegionHandlerTracker(HRegionInfo hri) {
986 if (openedRegionHandlerCalled != null) {
987 openedRegionHandlerCalled.put(hri, new AtomicBoolean(true));
988 }
989 }
990
991
992
993
994
995
996 void processFavoredNodes(List<HRegionInfo> regions) throws IOException {
997 if (!shouldAssignRegionsWithFavoredNodes) return;
998
999
1000 Map<HRegionInfo, List<ServerName>> regionToFavoredNodes =
1001 new HashMap<HRegionInfo, List<ServerName>>();
1002 for (HRegionInfo region : regions) {
1003 regionToFavoredNodes.put(region,
1004 ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region));
1005 }
1006 FavoredNodeAssignmentHelper.updateMetaWithFavoredNodesInfo(regionToFavoredNodes, catalogTracker);
1007 }
1008
1009
1010
1011
1012
1013
1014
1015 private void handleHBCK(RegionTransition rt) {
1016 String encodedName = HRegionInfo.encodeRegionName(rt.getRegionName());
1017 LOG.info("Handling HBCK triggered transition=" + rt.getEventType() +
1018 ", server=" + rt.getServerName() + ", region=" +
1019 HRegionInfo.prettyPrint(encodedName));
1020 RegionState regionState = regionStates.getRegionTransitionState(encodedName);
1021 switch (rt.getEventType()) {
1022 case M_ZK_REGION_OFFLINE:
1023 HRegionInfo regionInfo;
1024 if (regionState != null) {
1025 regionInfo = regionState.getRegion();
1026 } else {
1027 try {
1028 byte [] name = rt.getRegionName();
1029 Pair<HRegionInfo, ServerName> p = MetaReader.getRegion(catalogTracker, name);
1030 regionInfo = p.getFirst();
1031 } catch (IOException e) {
1032 LOG.info("Exception reading hbase:meta doing HBCK repair operation", e);
1033 return;
1034 }
1035 }
1036 LOG.info("HBCK repair is triggering assignment of region=" +
1037 regionInfo.getRegionNameAsString());
1038
1039 assign(regionInfo, false);
1040 break;
1041
1042 default:
1043 LOG.warn("Received unexpected region state from HBCK: " + rt.toString());
1044 break;
1045 }
1046
1047 }
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063 @Override
1064 public void nodeCreated(String path) {
1065 handleAssignmentEvent(path);
1066 }
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080 @Override
1081 public void nodeDataChanged(String path) {
1082 handleAssignmentEvent(path);
1083 }
1084
1085
1086
1087
1088
1089 private final Set<String> regionsInProgress = new HashSet<String>();
1090
1091
1092 private final LinkedHashMultimap <String, RegionRunnable>
1093 zkEventWorkerWaitingList = LinkedHashMultimap.create();
1094
1095
1096
1097
1098 private interface RegionRunnable extends Runnable{
1099
1100
1101
1102 String getRegionName();
1103 }
1104
1105
1106
1107
1108
1109 protected void zkEventWorkersSubmit(final RegionRunnable regRunnable) {
1110
1111 synchronized (regionsInProgress) {
1112
1113
1114 if (regionsInProgress.contains(regRunnable.getRegionName())) {
1115 synchronized (zkEventWorkerWaitingList){
1116 zkEventWorkerWaitingList.put(regRunnable.getRegionName(), regRunnable);
1117 }
1118 return;
1119 }
1120
1121
1122 regionsInProgress.add(regRunnable.getRegionName());
1123 zkEventWorkers.submit(new Runnable() {
1124 @Override
1125 public void run() {
1126 try {
1127 regRunnable.run();
1128 } finally {
1129
1130
1131 synchronized (regionsInProgress) {
1132 regionsInProgress.remove(regRunnable.getRegionName());
1133 synchronized (zkEventWorkerWaitingList) {
1134 java.util.Set<RegionRunnable> waiting = zkEventWorkerWaitingList.get(
1135 regRunnable.getRegionName());
1136 if (!waiting.isEmpty()) {
1137
1138 RegionRunnable toSubmit = waiting.iterator().next();
1139 zkEventWorkerWaitingList.remove(toSubmit.getRegionName(), toSubmit);
1140 zkEventWorkersSubmit(toSubmit);
1141 }
1142 }
1143 }
1144 }
1145 }
1146 });
1147 }
1148 }
1149
1150 @Override
1151 public void nodeDeleted(final String path) {
1152 if (path.startsWith(watcher.assignmentZNode)) {
1153 final String regionName = ZKAssign.getRegionName(watcher, path);
1154 zkEventWorkersSubmit(new RegionRunnable() {
1155 @Override
1156 public String getRegionName() {
1157 return regionName;
1158 }
1159
1160 @Override
1161 public void run() {
1162 Lock lock = locker.acquireLock(regionName);
1163 try {
1164 RegionState rs = regionStates.getRegionTransitionState(regionName);
1165 if (rs == null) {
1166 rs = regionStates.getRegionState(regionName);
1167 if (rs == null || !rs.isMergingNew()) {
1168
1169 return;
1170 }
1171 }
1172
1173 HRegionInfo regionInfo = rs.getRegion();
1174 String regionNameStr = regionInfo.getRegionNameAsString();
1175 LOG.debug("Znode " + regionNameStr + " deleted, state: " + rs);
1176 boolean disabled = getZKTable().isDisablingOrDisabledTable(regionInfo.getTable());
1177 ServerName serverName = rs.getServerName();
1178 if (serverManager.isServerOnline(serverName)) {
1179 if (rs.isOnServer(serverName)
1180 && (rs.isOpened() || rs.isSplitting())) {
1181 regionOnline(regionInfo, serverName);
1182 if (disabled) {
1183
1184 LOG.info("Opened " + regionNameStr
1185 + "but this table is disabled, triggering close of region");
1186 unassign(regionInfo);
1187 }
1188 } else if (rs.isMergingNew()) {
1189 synchronized (regionStates) {
1190 String p = regionInfo.getEncodedName();
1191 PairOfSameType<HRegionInfo> regions = mergingRegions.get(p);
1192 if (regions != null) {
1193 onlineMergingRegion(disabled, regions.getFirst(), serverName);
1194 onlineMergingRegion(disabled, regions.getSecond(), serverName);
1195 }
1196 }
1197 }
1198 }
1199 } finally {
1200 lock.unlock();
1201 }
1202 }
1203
1204 private void onlineMergingRegion(boolean disabled,
1205 final HRegionInfo hri, final ServerName serverName) {
1206 RegionState regionState = regionStates.getRegionState(hri);
1207 if (regionState != null && regionState.isMerging()
1208 && regionState.isOnServer(serverName)) {
1209 regionOnline(regionState.getRegion(), serverName);
1210 if (disabled) {
1211 unassign(hri);
1212 }
1213 }
1214 }
1215 });
1216 }
1217 }
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231 @Override
1232 public void nodeChildrenChanged(String path) {
1233 if (path.equals(watcher.assignmentZNode)) {
1234 zkEventWorkers.submit(new Runnable() {
1235 @Override
1236 public void run() {
1237 try {
1238
1239 List<String> children =
1240 ZKUtil.listChildrenAndWatchForNewChildren(
1241 watcher, watcher.assignmentZNode);
1242 if (children != null) {
1243 Stat stat = new Stat();
1244 for (String child : children) {
1245
1246
1247
1248 if (!regionStates.isRegionInTransition(child)) {
1249 ZKAssign.getDataAndWatch(watcher, child, stat);
1250 }
1251 }
1252 }
1253 } catch (KeeperException e) {
1254 server.abort("Unexpected ZK exception reading unassigned children", e);
1255 }
1256 }
1257 });
1258 }
1259 }
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269 void regionOnline(HRegionInfo regionInfo, ServerName sn) {
1270 numRegionsOpened.incrementAndGet();
1271 regionStates.regionOnline(regionInfo, sn);
1272
1273
1274 clearRegionPlan(regionInfo);
1275
1276 addToServersInUpdatingTimer(sn);
1277 }
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287 private void handleAssignmentEvent(final String path) {
1288 if (path.startsWith(watcher.assignmentZNode)) {
1289 final String regionName = ZKAssign.getRegionName(watcher, path);
1290
1291 zkEventWorkersSubmit(new RegionRunnable() {
1292 @Override
1293 public String getRegionName() {
1294 return regionName;
1295 }
1296
1297 @Override
1298 public void run() {
1299 try {
1300 Stat stat = new Stat();
1301 byte [] data = ZKAssign.getDataAndWatch(watcher, path, stat);
1302 if (data == null) return;
1303
1304 RegionTransition rt = RegionTransition.parseFrom(data);
1305 handleRegion(rt, stat.getVersion());
1306 } catch (KeeperException e) {
1307 server.abort("Unexpected ZK exception reading unassigned node data", e);
1308 } catch (DeserializationException e) {
1309 server.abort("Unexpected exception deserializing node data", e);
1310 }
1311 }
1312 });
1313 }
1314 }
1315
1316
1317
1318
1319
1320
1321 private void addToServersInUpdatingTimer(final ServerName sn) {
1322 if (tomActivated){
1323 this.serversInUpdatingTimer.add(sn);
1324 }
1325 }
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340 private void updateTimers(final ServerName sn) {
1341 Preconditions.checkState(tomActivated);
1342 if (sn == null) return;
1343
1344
1345
1346
1347
1348 List<Map.Entry<String, RegionPlan>> rps;
1349 synchronized(this.regionPlans) {
1350 rps = new ArrayList<Map.Entry<String, RegionPlan>>(regionPlans.entrySet());
1351 }
1352
1353 for (Map.Entry<String, RegionPlan> e : rps) {
1354 if (e.getValue() != null && e.getKey() != null && sn.equals(e.getValue().getDestination())) {
1355 RegionState regionState = regionStates.getRegionTransitionState(e.getKey());
1356 if (regionState != null) {
1357 regionState.updateTimestampToNow();
1358 }
1359 }
1360 }
1361 }
1362
1363
1364
1365
1366
1367
1368
1369
1370 public void regionOffline(final HRegionInfo regionInfo) {
1371 regionOffline(regionInfo, null);
1372 }
1373
1374 public void offlineDisabledRegion(HRegionInfo regionInfo) {
1375
1376 LOG.debug("Table being disabled so deleting ZK node and removing from " +
1377 "regions in transition, skipping assignment of region " +
1378 regionInfo.getRegionNameAsString());
1379 String encodedName = regionInfo.getEncodedName();
1380 deleteNodeInStates(encodedName, "closed", null,
1381 EventType.RS_ZK_REGION_CLOSED, EventType.M_ZK_REGION_OFFLINE);
1382 regionOffline(regionInfo);
1383 }
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405 public void assign(HRegionInfo region, boolean setOfflineInZK) {
1406 assign(region, setOfflineInZK, false);
1407 }
1408
1409
1410
1411
1412 public void assign(HRegionInfo region,
1413 boolean setOfflineInZK, boolean forceNewPlan) {
1414 if (isDisabledorDisablingRegionInRIT(region)) {
1415 return;
1416 }
1417 if (this.serverManager.isClusterShutdown()) {
1418 LOG.info("Cluster shutdown is set; skipping assign of " +
1419 region.getRegionNameAsString());
1420 return;
1421 }
1422 String encodedName = region.getEncodedName();
1423 Lock lock = locker.acquireLock(encodedName);
1424 try {
1425 RegionState state = forceRegionStateToOffline(region, forceNewPlan);
1426 if (state != null) {
1427 if (regionStates.wasRegionOnDeadServer(encodedName)) {
1428 LOG.info("Skip assigning " + region.getRegionNameAsString()
1429 + ", it's host " + regionStates.getLastRegionServerOfRegion(encodedName)
1430 + " is dead but not processed yet");
1431 return;
1432 }
1433 assign(state, setOfflineInZK, forceNewPlan);
1434 }
1435 } finally {
1436 lock.unlock();
1437 }
1438 }
1439
1440
1441
1442
1443
1444
1445
1446 boolean assign(final ServerName destination, final List<HRegionInfo> regions) {
1447 long startTime = EnvironmentEdgeManager.currentTimeMillis();
1448 try {
1449 int regionCount = regions.size();
1450 if (regionCount == 0) {
1451 return true;
1452 }
1453 LOG.debug("Assigning " + regionCount + " region(s) to " + destination.toString());
1454 Set<String> encodedNames = new HashSet<String>(regionCount);
1455 for (HRegionInfo region : regions) {
1456 encodedNames.add(region.getEncodedName());
1457 }
1458
1459 List<HRegionInfo> failedToOpenRegions = new ArrayList<HRegionInfo>();
1460 Map<String, Lock> locks = locker.acquireLocks(encodedNames);
1461 try {
1462 AtomicInteger counter = new AtomicInteger(0);
1463 Map<String, Integer> offlineNodesVersions = new ConcurrentHashMap<String, Integer>();
1464 OfflineCallback cb = new OfflineCallback(
1465 watcher, destination, counter, offlineNodesVersions);
1466 Map<String, RegionPlan> plans = new HashMap<String, RegionPlan>(regions.size());
1467 List<RegionState> states = new ArrayList<RegionState>(regions.size());
1468 for (HRegionInfo region : regions) {
1469 String encodedName = region.getEncodedName();
1470 if (!isDisabledorDisablingRegionInRIT(region)) {
1471 RegionState state = forceRegionStateToOffline(region, false);
1472 boolean onDeadServer = false;
1473 if (state != null) {
1474 if (regionStates.wasRegionOnDeadServer(encodedName)) {
1475 LOG.info("Skip assigning " + region.getRegionNameAsString()
1476 + ", it's host " + regionStates.getLastRegionServerOfRegion(encodedName)
1477 + " is dead but not processed yet");
1478 onDeadServer = true;
1479 } else if (asyncSetOfflineInZooKeeper(state, cb, destination)) {
1480 RegionPlan plan = new RegionPlan(region, state.getServerName(), destination);
1481 plans.put(encodedName, plan);
1482 states.add(state);
1483 continue;
1484 }
1485 }
1486
1487 if (!onDeadServer) {
1488 LOG.info("failed to force region state to offline or "
1489 + "failed to set it offline in ZK, will reassign later: " + region);
1490 failedToOpenRegions.add(region);
1491 }
1492 }
1493
1494
1495 Lock lock = locks.remove(encodedName);
1496 lock.unlock();
1497 }
1498
1499
1500 int total = states.size();
1501 for (int oldCounter = 0; !server.isStopped();) {
1502 int count = counter.get();
1503 if (oldCounter != count) {
1504 LOG.info(destination.toString() + " unassigned znodes=" + count +
1505 " of total=" + total);
1506 oldCounter = count;
1507 }
1508 if (count >= total) break;
1509 Threads.sleep(5);
1510 }
1511
1512 if (server.isStopped()) {
1513 return false;
1514 }
1515
1516
1517
1518 this.addPlans(plans);
1519
1520 List<Triple<HRegionInfo, Integer, List<ServerName>>> regionOpenInfos =
1521 new ArrayList<Triple<HRegionInfo, Integer, List<ServerName>>>(states.size());
1522 for (RegionState state: states) {
1523 HRegionInfo region = state.getRegion();
1524 String encodedRegionName = region.getEncodedName();
1525 Integer nodeVersion = offlineNodesVersions.get(encodedRegionName);
1526 if (nodeVersion == null || nodeVersion == -1) {
1527 LOG.warn("failed to offline in zookeeper: " + region);
1528 failedToOpenRegions.add(region);
1529 Lock lock = locks.remove(encodedRegionName);
1530 lock.unlock();
1531 } else {
1532 regionStates.updateRegionState(
1533 region, State.PENDING_OPEN, destination);
1534 List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
1535 if (this.shouldAssignRegionsWithFavoredNodes) {
1536 favoredNodes = ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region);
1537 }
1538 regionOpenInfos.add(new Triple<HRegionInfo, Integer, List<ServerName>>(
1539 region, nodeVersion, favoredNodes));
1540 }
1541 }
1542
1543
1544 try {
1545
1546
1547 long maxWaitTime = System.currentTimeMillis() +
1548 this.server.getConfiguration().
1549 getLong("hbase.regionserver.rpc.startup.waittime", 60000);
1550 for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) {
1551 try {
1552 List<RegionOpeningState> regionOpeningStateList = serverManager
1553 .sendRegionOpen(destination, regionOpenInfos);
1554 if (regionOpeningStateList == null) {
1555
1556 return false;
1557 }
1558 for (int k = 0, n = regionOpeningStateList.size(); k < n; k++) {
1559 RegionOpeningState openingState = regionOpeningStateList.get(k);
1560 if (openingState != RegionOpeningState.OPENED) {
1561 HRegionInfo region = regionOpenInfos.get(k).getFirst();
1562 if (openingState == RegionOpeningState.ALREADY_OPENED) {
1563 processAlreadyOpenedRegion(region, destination);
1564 } else if (openingState == RegionOpeningState.FAILED_OPENING) {
1565
1566 failedToOpenRegions.add(region);
1567 } else {
1568 LOG.warn("THIS SHOULD NOT HAPPEN: unknown opening state "
1569 + openingState + " in assigning region " + region);
1570 }
1571 }
1572 }
1573 break;
1574 } catch (IOException e) {
1575 if (e instanceof RemoteException) {
1576 e = ((RemoteException)e).unwrapRemoteException();
1577 }
1578 if (e instanceof RegionServerStoppedException) {
1579 LOG.warn("The region server was shut down, ", e);
1580
1581 return false;
1582 } else if (e instanceof ServerNotRunningYetException) {
1583 long now = System.currentTimeMillis();
1584 if (now < maxWaitTime) {
1585 LOG.debug("Server is not yet up; waiting up to " +
1586 (maxWaitTime - now) + "ms", e);
1587 Thread.sleep(100);
1588 i--;
1589 continue;
1590 }
1591 } else if (e instanceof java.net.SocketTimeoutException
1592 && this.serverManager.isServerOnline(destination)) {
1593
1594
1595
1596
1597 if (LOG.isDebugEnabled()) {
1598 LOG.debug("Bulk assigner openRegion() to " + destination
1599 + " has timed out, but the regions might"
1600 + " already be opened on it.", e);
1601 }
1602 continue;
1603 }
1604 throw e;
1605 }
1606 }
1607 } catch (IOException e) {
1608
1609 LOG.info("Unable to communicate with " + destination
1610 + " in order to assign regions, ", e);
1611 return false;
1612 } catch (InterruptedException e) {
1613 throw new RuntimeException(e);
1614 }
1615 } finally {
1616 for (Lock lock : locks.values()) {
1617 lock.unlock();
1618 }
1619 }
1620
1621 if (!failedToOpenRegions.isEmpty()) {
1622 for (HRegionInfo region : failedToOpenRegions) {
1623 if (!regionStates.isRegionOnline(region)) {
1624 invokeAssign(region);
1625 }
1626 }
1627 }
1628 LOG.debug("Bulk assigning done for " + destination);
1629 return true;
1630 } finally {
1631 metricsAssignmentManager.updateBulkAssignTime(EnvironmentEdgeManager.currentTimeMillis() - startTime);
1632 }
1633 }
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645 private void unassign(final HRegionInfo region,
1646 final RegionState state, final int versionOfClosingNode,
1647 final ServerName dest, final boolean transitionInZK,
1648 final ServerName src) {
1649 ServerName server = src;
1650 if (state != null) {
1651 server = state.getServerName();
1652 }
1653 long maxWaitTime = -1;
1654 for (int i = 1; i <= this.maximumAttempts; i++) {
1655 if (this.server.isStopped() || this.server.isAborted()) {
1656 LOG.debug("Server stopped/aborted; skipping unassign of " + region);
1657 return;
1658 }
1659
1660 if (!serverManager.isServerOnline(server)) {
1661 LOG.debug("Offline " + region.getRegionNameAsString()
1662 + ", no need to unassign since it's on a dead server: " + server);
1663 if (transitionInZK) {
1664
1665 deleteClosingOrClosedNode(region, server);
1666 }
1667 if (state != null) {
1668 regionOffline(region);
1669 }
1670 return;
1671 }
1672 try {
1673
1674 if (serverManager.sendRegionClose(server, region,
1675 versionOfClosingNode, dest, transitionInZK)) {
1676 LOG.debug("Sent CLOSE to " + server + " for region " +
1677 region.getRegionNameAsString());
1678 if (!transitionInZK && state != null) {
1679
1680
1681 unassign(region, state, versionOfClosingNode,
1682 dest, transitionInZK,src);
1683 }
1684 return;
1685 }
1686
1687
1688 LOG.warn("Server " + server + " region CLOSE RPC returned false for " +
1689 region.getRegionNameAsString());
1690 } catch (Throwable t) {
1691 if (t instanceof RemoteException) {
1692 t = ((RemoteException)t).unwrapRemoteException();
1693 }
1694 if (t instanceof NotServingRegionException
1695 || t instanceof RegionServerStoppedException) {
1696 LOG.debug("Offline " + region.getRegionNameAsString()
1697 + ", it's not any more on " + server, t);
1698 if (transitionInZK) {
1699 deleteClosingOrClosedNode(region, server);
1700 }
1701 if (state != null) {
1702 regionOffline(region);
1703 }
1704 return;
1705 } else if (state != null
1706 && t instanceof RegionAlreadyInTransitionException) {
1707
1708 LOG.debug("update " + state + " the timestamp.");
1709 state.updateTimestampToNow();
1710 if (maxWaitTime < 0) {
1711 maxWaitTime = EnvironmentEdgeManager.currentTimeMillis()
1712 + this.server.getConfiguration().getLong(ALREADY_IN_TRANSITION_WAITTIME,
1713 DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
1714 }
1715 try {
1716 long now = EnvironmentEdgeManager.currentTimeMillis();
1717 if (now < maxWaitTime) {
1718 LOG.debug("Region is already in transition; "
1719 + "waiting up to " + (maxWaitTime - now) + "ms", t);
1720 Thread.sleep(100);
1721 i--;
1722 }
1723 } catch (InterruptedException ie) {
1724 LOG.warn("Failed to unassign "
1725 + region.getRegionNameAsString() + " since interrupted", ie);
1726 Thread.currentThread().interrupt();
1727 if (!tomActivated) {
1728 regionStates.updateRegionState(region, State.FAILED_CLOSE);
1729 }
1730 return;
1731 }
1732 } else {
1733 LOG.info("Server " + server + " returned " + t + " for "
1734 + region.getRegionNameAsString() + ", try=" + i
1735 + " of " + this.maximumAttempts, t);
1736
1737 }
1738 }
1739 }
1740
1741 if (!tomActivated && state != null) {
1742 regionStates.updateRegionState(region, State.FAILED_CLOSE);
1743 }
1744 }
1745
1746
1747
1748
1749 private RegionState forceRegionStateToOffline(
1750 final HRegionInfo region, final boolean forceNewPlan) {
1751 RegionState state = regionStates.getRegionState(region);
1752 if (state == null) {
1753 LOG.warn("Assigning a region not in region states: " + region);
1754 state = regionStates.createRegionState(region);
1755 }
1756
1757 ServerName sn = state.getServerName();
1758 if (forceNewPlan && LOG.isDebugEnabled()) {
1759 LOG.debug("Force region state offline " + state);
1760 }
1761
1762 switch (state.getState()) {
1763 case OPEN:
1764 case OPENING:
1765 case PENDING_OPEN:
1766 case CLOSING:
1767 case PENDING_CLOSE:
1768 if (!forceNewPlan) {
1769 LOG.debug("Skip assigning " +
1770 region + ", it is already " + state);
1771 return null;
1772 }
1773 case FAILED_CLOSE:
1774 case FAILED_OPEN:
1775 unassign(region, state, -1, null, false, null);
1776 state = regionStates.getRegionState(region);
1777 if (state.isFailedClose()) {
1778
1779
1780 LOG.info("Skip assigning " +
1781 region + ", we couldn't close it: " + state);
1782 return null;
1783 }
1784 case OFFLINE:
1785
1786
1787
1788
1789
1790 if (regionStates.isServerDeadAndNotProcessed(sn)
1791 && wasRegionOnDeadServerByMeta(region, sn)) {
1792 LOG.info("Skip assigning " + region.getRegionNameAsString()
1793 + ", it is on a dead but not processed yet server");
1794 return null;
1795 }
1796 case CLOSED:
1797 break;
1798 default:
1799 LOG.error("Trying to assign region " + region
1800 + ", which is " + state);
1801 return null;
1802 }
1803 return state;
1804 }
1805
1806 private boolean wasRegionOnDeadServerByMeta(
1807 final HRegionInfo region, final ServerName sn) {
1808 try {
1809 if (region.isMetaRegion()) {
1810 ServerName server = catalogTracker.getMetaLocation();
1811 return regionStates.isServerDeadAndNotProcessed(server);
1812 }
1813 while (!server.isStopped()) {
1814 try {
1815 catalogTracker.waitForMeta();
1816 Pair<HRegionInfo, ServerName> r =
1817 MetaReader.getRegion(catalogTracker, region.getRegionName());
1818 ServerName server = r == null ? null : r.getSecond();
1819 return regionStates.isServerDeadAndNotProcessed(server);
1820 } catch (IOException ioe) {
1821 LOG.info("Received exception accessing hbase:meta during force assign "
1822 + region.getRegionNameAsString() + ", retrying", ioe);
1823 }
1824 }
1825 } catch (InterruptedException e) {
1826 Thread.currentThread().interrupt();
1827 LOG.info("Interrupted accessing hbase:meta", e);
1828 }
1829
1830 return regionStates.isServerDeadAndNotProcessed(sn);
1831 }
1832
1833
1834
1835
1836
1837
1838
1839 private void assign(RegionState state,
1840 final boolean setOfflineInZK, final boolean forceNewPlan) {
1841 long startTime = EnvironmentEdgeManager.currentTimeMillis();
1842 try {
1843 RegionState currentState = state;
1844 int versionOfOfflineNode = -1;
1845 RegionPlan plan = null;
1846 long maxWaitTime = -1;
1847 HRegionInfo region = state.getRegion();
1848 RegionOpeningState regionOpenState;
1849 for (int i = 1; i <= maximumAttempts; i++) {
1850 if (server.isStopped() || server.isAborted()) {
1851 LOG.info("Skip assigning " + region.getRegionNameAsString()
1852 + ", the server is stopped/aborted");
1853 return;
1854 }
1855 if (plan == null) {
1856 try {
1857 plan = getRegionPlan(region, forceNewPlan);
1858 } catch (HBaseIOException e) {
1859 LOG.warn("Failed to get region plan", e);
1860 }
1861 }
1862 if (plan == null) {
1863 LOG.warn("Unable to determine a plan to assign " + region);
1864 if (tomActivated){
1865 this.timeoutMonitor.setAllRegionServersOffline(true);
1866 } else {
1867 if (region.isMetaRegion()) {
1868 try {
1869 Thread.sleep(this.sleepTimeBeforeRetryingMetaAssignment);
1870 if (i == maximumAttempts) i = 1;
1871 continue;
1872 } catch (InterruptedException e) {
1873 LOG.error("Got exception while waiting for hbase:meta assignment");
1874 Thread.currentThread().interrupt();
1875 }
1876 }
1877 regionStates.updateRegionState(region, State.FAILED_OPEN);
1878 }
1879 return;
1880 }
1881 if (setOfflineInZK && versionOfOfflineNode == -1) {
1882
1883
1884 versionOfOfflineNode = setOfflineInZooKeeper(currentState, plan.getDestination());
1885 if (versionOfOfflineNode != -1) {
1886 if (isDisabledorDisablingRegionInRIT(region)) {
1887 return;
1888 }
1889
1890
1891
1892
1893
1894
1895 TableName tableName = region.getTable();
1896 if (!zkTable.isEnablingTable(tableName) && !zkTable.isEnabledTable(tableName)) {
1897 LOG.debug("Setting table " + tableName + " to ENABLED state.");
1898 setEnabledTable(tableName);
1899 }
1900 }
1901 }
1902 if (setOfflineInZK && versionOfOfflineNode == -1) {
1903 LOG.info("Unable to set offline in ZooKeeper to assign " + region);
1904
1905
1906
1907
1908 if (!server.isAborted()) {
1909 continue;
1910 }
1911 }
1912 LOG.info("Assigning " + region.getRegionNameAsString() +
1913 " to " + plan.getDestination().toString());
1914
1915 currentState = regionStates.updateRegionState(region,
1916 State.PENDING_OPEN, plan.getDestination());
1917
1918 boolean needNewPlan;
1919 final String assignMsg = "Failed assignment of " + region.getRegionNameAsString() +
1920 " to " + plan.getDestination();
1921 try {
1922 List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
1923 if (this.shouldAssignRegionsWithFavoredNodes) {
1924 favoredNodes = ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region);
1925 }
1926 regionOpenState = serverManager.sendRegionOpen(
1927 plan.getDestination(), region, versionOfOfflineNode, favoredNodes);
1928
1929 if (regionOpenState == RegionOpeningState.FAILED_OPENING) {
1930
1931 needNewPlan = true;
1932 LOG.warn(assignMsg + ", regionserver says 'FAILED_OPENING', " +
1933 " trying to assign elsewhere instead; " +
1934 "try=" + i + " of " + this.maximumAttempts);
1935 } else {
1936
1937 if (regionOpenState == RegionOpeningState.ALREADY_OPENED) {
1938 processAlreadyOpenedRegion(region, plan.getDestination());
1939 }
1940 return;
1941 }
1942
1943 } catch (Throwable t) {
1944 if (t instanceof RemoteException) {
1945 t = ((RemoteException) t).unwrapRemoteException();
1946 }
1947
1948
1949
1950
1951 boolean hold = (t instanceof RegionAlreadyInTransitionException ||
1952 t instanceof ServerNotRunningYetException);
1953
1954
1955
1956
1957
1958
1959 boolean retry = !hold && (t instanceof java.net.SocketTimeoutException
1960 && this.serverManager.isServerOnline(plan.getDestination()));
1961
1962
1963 if (hold) {
1964 LOG.warn(assignMsg + ", waiting a little before trying on the same region server " +
1965 "try=" + i + " of " + this.maximumAttempts, t);
1966
1967 if (maxWaitTime < 0) {
1968 if (t instanceof RegionAlreadyInTransitionException) {
1969 maxWaitTime = EnvironmentEdgeManager.currentTimeMillis()
1970 + this.server.getConfiguration().getLong(ALREADY_IN_TRANSITION_WAITTIME,
1971 DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
1972 } else {
1973 maxWaitTime = this.server.getConfiguration().
1974 getLong("hbase.regionserver.rpc.startup.waittime", 60000);
1975 }
1976 }
1977 try {
1978 needNewPlan = false;
1979 long now = EnvironmentEdgeManager.currentTimeMillis();
1980 if (now < maxWaitTime) {
1981 LOG.debug("Server is not yet up or region is already in transition; "
1982 + "waiting up to " + (maxWaitTime - now) + "ms", t);
1983 Thread.sleep(100);
1984 i--;
1985 } else if (!(t instanceof RegionAlreadyInTransitionException)) {
1986 LOG.debug("Server is not up for a while; try a new one", t);
1987 needNewPlan = true;
1988 }
1989 } catch (InterruptedException ie) {
1990 LOG.warn("Failed to assign "
1991 + region.getRegionNameAsString() + " since interrupted", ie);
1992 Thread.currentThread().interrupt();
1993 if (!tomActivated) {
1994 regionStates.updateRegionState(region, State.FAILED_OPEN);
1995 }
1996 return;
1997 }
1998 } else if (retry) {
1999 needNewPlan = false;
2000 LOG.warn(assignMsg + ", trying to assign to the same region server " +
2001 "try=" + i + " of " + this.maximumAttempts, t);
2002 } else {
2003 needNewPlan = true;
2004 LOG.warn(assignMsg + ", trying to assign elsewhere instead;" +
2005 " try=" + i + " of " + this.maximumAttempts, t);
2006 }
2007 }
2008
2009 if (i == this.maximumAttempts) {
2010
2011
2012 continue;
2013 }
2014
2015
2016
2017
2018 if (needNewPlan) {
2019
2020
2021
2022
2023 RegionPlan newPlan = null;
2024 try {
2025 newPlan = getRegionPlan(region, true);
2026 } catch (HBaseIOException e) {
2027 LOG.warn("Failed to get region plan", e);
2028 }
2029 if (newPlan == null) {
2030 if (tomActivated) {
2031 this.timeoutMonitor.setAllRegionServersOffline(true);
2032 } else {
2033 regionStates.updateRegionState(region, State.FAILED_OPEN);
2034 }
2035 LOG.warn("Unable to find a viable location to assign region " +
2036 region.getRegionNameAsString());
2037 return;
2038 }
2039
2040 if (plan != newPlan && !plan.getDestination().equals(newPlan.getDestination())) {
2041
2042
2043
2044 currentState = regionStates.updateRegionState(region, State.OFFLINE);
2045 versionOfOfflineNode = -1;
2046 plan = newPlan;
2047 }
2048 }
2049 }
2050
2051 if (!tomActivated) {
2052 regionStates.updateRegionState(region, State.FAILED_OPEN);
2053 }
2054 } finally {
2055 metricsAssignmentManager.updateAssignmentTime(EnvironmentEdgeManager.currentTimeMillis() - startTime);
2056 }
2057 }
2058
2059 private void processAlreadyOpenedRegion(HRegionInfo region, ServerName sn) {
2060
2061
2062
2063 LOG.debug("ALREADY_OPENED " + region.getRegionNameAsString()
2064 + " to " + sn);
2065 String encodedName = region.getEncodedName();
2066 deleteNodeInStates(encodedName, "offline", sn, EventType.M_ZK_REGION_OFFLINE);
2067 regionStates.regionOnline(region, sn);
2068 }
2069
2070 private boolean isDisabledorDisablingRegionInRIT(final HRegionInfo region) {
2071 TableName tableName = region.getTable();
2072 boolean disabled = this.zkTable.isDisabledTable(tableName);
2073 if (disabled || this.zkTable.isDisablingTable(tableName)) {
2074 LOG.info("Table " + tableName + (disabled ? " disabled;" : " disabling;") +
2075 " skipping assign of " + region.getRegionNameAsString());
2076 offlineDisabledRegion(region);
2077 return true;
2078 }
2079 return false;
2080 }
2081
2082
2083
2084
2085
2086
2087
2088
2089 private int setOfflineInZooKeeper(final RegionState state, final ServerName destination) {
2090 if (!state.isClosed() && !state.isOffline()) {
2091 String msg = "Unexpected state : " + state + " .. Cannot transit it to OFFLINE.";
2092 this.server.abort(msg, new IllegalStateException(msg));
2093 return -1;
2094 }
2095 regionStates.updateRegionState(state.getRegion(), State.OFFLINE);
2096 int versionOfOfflineNode;
2097 try {
2098
2099 versionOfOfflineNode = ZKAssign.createOrForceNodeOffline(watcher,
2100 state.getRegion(), destination);
2101 if (versionOfOfflineNode == -1) {
2102 LOG.warn("Attempted to create/force node into OFFLINE state before "
2103 + "completing assignment but failed to do so for " + state);
2104 return -1;
2105 }
2106 } catch (KeeperException e) {
2107 server.abort("Unexpected ZK exception creating/setting node OFFLINE", e);
2108 return -1;
2109 }
2110 return versionOfOfflineNode;
2111 }
2112
2113
2114
2115
2116
2117
2118 private RegionPlan getRegionPlan(final HRegionInfo region,
2119 final boolean forceNewPlan) throws HBaseIOException {
2120 return getRegionPlan(region, null, forceNewPlan);
2121 }
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132 private RegionPlan getRegionPlan(final HRegionInfo region,
2133 final ServerName serverToExclude, final boolean forceNewPlan) throws HBaseIOException {
2134
2135 final String encodedName = region.getEncodedName();
2136 final List<ServerName> destServers =
2137 serverManager.createDestinationServersList(serverToExclude);
2138
2139 if (destServers.isEmpty()){
2140 LOG.warn("Can't move " + encodedName +
2141 ", there is no destination server available.");
2142 return null;
2143 }
2144
2145 RegionPlan randomPlan = null;
2146 boolean newPlan = false;
2147 RegionPlan existingPlan;
2148
2149 synchronized (this.regionPlans) {
2150 existingPlan = this.regionPlans.get(encodedName);
2151
2152 if (existingPlan != null && existingPlan.getDestination() != null) {
2153 LOG.debug("Found an existing plan for " + region.getRegionNameAsString()
2154 + " destination server is " + existingPlan.getDestination() +
2155 " accepted as a dest server = " + destServers.contains(existingPlan.getDestination()));
2156 }
2157
2158 if (forceNewPlan
2159 || existingPlan == null
2160 || existingPlan.getDestination() == null
2161 || !destServers.contains(existingPlan.getDestination())) {
2162 newPlan = true;
2163 randomPlan = new RegionPlan(region, null,
2164 balancer.randomAssignment(region, destServers));
2165 if (!region.isMetaTable() && shouldAssignRegionsWithFavoredNodes) {
2166 List<HRegionInfo> regions = new ArrayList<HRegionInfo>(1);
2167 regions.add(region);
2168 try {
2169 processFavoredNodes(regions);
2170 } catch (IOException ie) {
2171 LOG.warn("Ignoring exception in processFavoredNodes " + ie);
2172 }
2173 }
2174 this.regionPlans.put(encodedName, randomPlan);
2175 }
2176 }
2177
2178 if (newPlan) {
2179 if (randomPlan.getDestination() == null) {
2180 LOG.warn("Can't find a destination for " + encodedName);
2181 return null;
2182 }
2183 LOG.debug("No previous transition plan found (or ignoring " +
2184 "an existing plan) for " + region.getRegionNameAsString() +
2185 "; generated random plan=" + randomPlan + "; " +
2186 serverManager.countOfRegionServers() +
2187 " (online=" + serverManager.getOnlineServers().size() +
2188 ", available=" + destServers.size() + ") available servers" +
2189 ", forceNewPlan=" + forceNewPlan);
2190 return randomPlan;
2191 }
2192 LOG.debug("Using pre-existing plan for " +
2193 region.getRegionNameAsString() + "; plan=" + existingPlan);
2194 return existingPlan;
2195 }
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210 public void unassign(HRegionInfo region) {
2211 unassign(region, false);
2212 }
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229 public void unassign(HRegionInfo region, boolean force, ServerName dest) {
2230
2231 LOG.debug("Starting unassign of " + region.getRegionNameAsString()
2232 + " (offlining), current state: " + regionStates.getRegionState(region));
2233
2234 String encodedName = region.getEncodedName();
2235
2236 int versionOfClosingNode = -1;
2237
2238
2239 ReentrantLock lock = locker.acquireLock(encodedName);
2240 RegionState state = regionStates.getRegionTransitionState(encodedName);
2241 boolean reassign = true;
2242 try {
2243 if (state == null) {
2244
2245
2246 state = regionStates.getRegionState(encodedName);
2247 if (state != null && state.isUnassignable()) {
2248 LOG.info("Attempting to unassign " + state + ", ignored");
2249
2250 return;
2251 }
2252
2253 try {
2254 if (state == null || state.getServerName() == null) {
2255
2256
2257 LOG.warn("Attempting to unassign a region not in RegionStates"
2258 + region.getRegionNameAsString() + ", offlined");
2259 regionOffline(region);
2260 return;
2261 }
2262 versionOfClosingNode = ZKAssign.createNodeClosing(
2263 watcher, region, state.getServerName());
2264 if (versionOfClosingNode == -1) {
2265 LOG.info("Attempting to unassign " +
2266 region.getRegionNameAsString() + " but ZK closing node "
2267 + "can't be created.");
2268 reassign = false;
2269 return;
2270 }
2271 } catch (KeeperException e) {
2272 if (e instanceof NodeExistsException) {
2273
2274
2275
2276
2277 NodeExistsException nee = (NodeExistsException)e;
2278 String path = nee.getPath();
2279 try {
2280 if (isSplitOrSplittingOrMergedOrMerging(path)) {
2281 LOG.debug(path + " is SPLIT or SPLITTING or MERGED or MERGING; " +
2282 "skipping unassign because region no longer exists -- its split or merge");
2283 reassign = false;
2284 return;
2285 }
2286 } catch (KeeperException.NoNodeException ke) {
2287 LOG.warn("Failed getData on SPLITTING/SPLIT at " + path +
2288 "; presuming split and that the region to unassign, " +
2289 encodedName + ", no longer exists -- confirm", ke);
2290 return;
2291 } catch (KeeperException ke) {
2292 LOG.error("Unexpected zk state", ke);
2293 } catch (DeserializationException de) {
2294 LOG.error("Failed parse", de);
2295 }
2296 }
2297
2298 server.abort("Unexpected ZK exception creating node CLOSING", e);
2299 reassign = false;
2300 return;
2301 }
2302 state = regionStates.updateRegionState(region, State.PENDING_CLOSE);
2303 } else if (state.isFailedOpen()) {
2304
2305 regionOffline(region);
2306 return;
2307 } else if (force && state.isPendingCloseOrClosing()) {
2308 LOG.debug("Attempting to unassign " + region.getRegionNameAsString() +
2309 " which is already " + state.getState() +
2310 " but forcing to send a CLOSE RPC again ");
2311 if (state.isFailedClose()) {
2312 state = regionStates.updateRegionState(region, State.PENDING_CLOSE);
2313 }
2314 state.updateTimestampToNow();
2315 } else {
2316 LOG.debug("Attempting to unassign " +
2317 region.getRegionNameAsString() + " but it is " +
2318 "already in transition (" + state.getState() + ", force=" + force + ")");
2319 return;
2320 }
2321
2322 unassign(region, state, versionOfClosingNode, dest, true, null);
2323 } finally {
2324 lock.unlock();
2325
2326
2327 if (reassign && regionStates.isRegionOffline(region)) {
2328 assign(region, true);
2329 }
2330 }
2331 }
2332
2333 public void unassign(HRegionInfo region, boolean force){
2334 unassign(region, force, null);
2335 }
2336
2337
2338
2339
2340 public void deleteClosingOrClosedNode(HRegionInfo region, ServerName sn) {
2341 String encodedName = region.getEncodedName();
2342 deleteNodeInStates(encodedName, "closing", sn, EventType.M_ZK_REGION_CLOSING,
2343 EventType.RS_ZK_REGION_CLOSED);
2344 }
2345
2346
2347
2348
2349
2350
2351
2352 private boolean isSplitOrSplittingOrMergedOrMerging(final String path)
2353 throws KeeperException, DeserializationException {
2354 boolean result = false;
2355
2356
2357 byte [] data = ZKAssign.getData(watcher, path);
2358 if (data == null) {
2359 LOG.info("Node " + path + " is gone");
2360 return false;
2361 }
2362 RegionTransition rt = RegionTransition.parseFrom(data);
2363 switch (rt.getEventType()) {
2364 case RS_ZK_REQUEST_REGION_SPLIT:
2365 case RS_ZK_REGION_SPLIT:
2366 case RS_ZK_REGION_SPLITTING:
2367 case RS_ZK_REQUEST_REGION_MERGE:
2368 case RS_ZK_REGION_MERGED:
2369 case RS_ZK_REGION_MERGING:
2370 result = true;
2371 break;
2372 default:
2373 LOG.info("Node " + path + " is in " + rt.getEventType());
2374 break;
2375 }
2376 return result;
2377 }
2378
2379
2380
2381
2382
2383
2384 public int getNumRegionsOpened() {
2385 return numRegionsOpened.get();
2386 }
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396 public boolean waitForAssignment(HRegionInfo regionInfo)
2397 throws InterruptedException {
2398 while (!regionStates.isRegionOnline(regionInfo)) {
2399 if (regionStates.isRegionInState(regionInfo, State.FAILED_OPEN)
2400 || this.server.isStopped()) {
2401 return false;
2402 }
2403
2404
2405
2406
2407 regionStates.waitForUpdate(100);
2408 }
2409 return true;
2410 }
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422 public void assignMeta() throws KeeperException {
2423 MetaRegionTracker.deleteMetaLocation(this.watcher);
2424 assign(HRegionInfo.FIRST_META_REGIONINFO, true);
2425 }
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435 public void assign(Map<HRegionInfo, ServerName> regions)
2436 throws IOException, InterruptedException {
2437 if (regions == null || regions.isEmpty()) {
2438 return;
2439 }
2440 List<ServerName> servers = serverManager.createDestinationServersList();
2441 if (servers == null || servers.isEmpty()) {
2442 throw new IOException("Found no destination server to assign region(s)");
2443 }
2444
2445
2446 Map<ServerName, List<HRegionInfo>> bulkPlan =
2447 balancer.retainAssignment(regions, servers);
2448
2449 assign(regions.size(), servers.size(),
2450 "retainAssignment=true", bulkPlan);
2451 }
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461 public void assign(List<HRegionInfo> regions)
2462 throws IOException, InterruptedException {
2463 if (regions == null || regions.isEmpty()) {
2464 return;
2465 }
2466
2467 List<ServerName> servers = serverManager.createDestinationServersList();
2468 if (servers == null || servers.isEmpty()) {
2469 throw new IOException("Found no destination server to assign region(s)");
2470 }
2471
2472
2473 Map<ServerName, List<HRegionInfo>> bulkPlan
2474 = balancer.roundRobinAssignment(regions, servers);
2475 processFavoredNodes(regions);
2476
2477 assign(regions.size(), servers.size(),
2478 "round-robin=true", bulkPlan);
2479 }
2480
2481 private void assign(int regions, int totalServers,
2482 String message, Map<ServerName, List<HRegionInfo>> bulkPlan)
2483 throws InterruptedException, IOException {
2484
2485 int servers = bulkPlan.size();
2486 if (servers == 1 || (regions < bulkAssignThresholdRegions
2487 && servers < bulkAssignThresholdServers)) {
2488
2489
2490
2491 if (LOG.isTraceEnabled()) {
2492 LOG.trace("Not using bulk assignment since we are assigning only " + regions +
2493 " region(s) to " + servers + " server(s)");
2494 }
2495 for (Map.Entry<ServerName, List<HRegionInfo>> plan: bulkPlan.entrySet()) {
2496 if (!assign(plan.getKey(), plan.getValue())) {
2497 for (HRegionInfo region: plan.getValue()) {
2498 if (!regionStates.isRegionOnline(region)) {
2499 invokeAssign(region);
2500 }
2501 }
2502 }
2503 }
2504 } else {
2505 LOG.info("Bulk assigning " + regions + " region(s) across "
2506 + totalServers + " server(s), " + message);
2507
2508
2509 BulkAssigner ba = new GeneralBulkAssigner(
2510 this.server, bulkPlan, this, bulkAssignWaitTillAllAssigned);
2511 ba.bulkAssign();
2512 LOG.info("Bulk assigning done");
2513 }
2514 }
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526 private void assignAllUserRegions()
2527 throws IOException, InterruptedException, KeeperException {
2528
2529 ZKAssign.deleteAllNodes(watcher);
2530 ZKUtil.listChildrenAndWatchForNewChildren(this.watcher,
2531 this.watcher.assignmentZNode);
2532 failoverCleanupDone();
2533
2534
2535
2536
2537 Set<TableName> disabledOrDisablingOrEnabling = ZKTable.getDisabledOrDisablingTables(watcher);
2538 disabledOrDisablingOrEnabling.addAll(ZKTable.getEnablingTables(watcher));
2539
2540 Map<HRegionInfo, ServerName> allRegions;
2541 SnapshotOfRegionAssignmentFromMeta snapshotOfRegionAssignment =
2542 new SnapshotOfRegionAssignmentFromMeta(catalogTracker, disabledOrDisablingOrEnabling, true);
2543 snapshotOfRegionAssignment.initialize();
2544 allRegions = snapshotOfRegionAssignment.getRegionToRegionServerMap();
2545 if (allRegions == null || allRegions.isEmpty()) return;
2546
2547
2548 boolean retainAssignment = server.getConfiguration().
2549 getBoolean("hbase.master.startup.retainassign", true);
2550
2551 if (retainAssignment) {
2552 assign(allRegions);
2553 } else {
2554 List<HRegionInfo> regions = new ArrayList<HRegionInfo>(allRegions.keySet());
2555 assign(regions);
2556 }
2557
2558 for (HRegionInfo hri : allRegions.keySet()) {
2559 TableName tableName = hri.getTable();
2560 if (!zkTable.isEnabledTable(tableName)) {
2561 setEnabledTable(tableName);
2562 }
2563 }
2564 }
2565
2566
2567
2568
2569
2570
2571
2572 boolean waitUntilNoRegionsInTransition(final long timeout)
2573 throws InterruptedException {
2574
2575
2576
2577
2578
2579
2580 final long endTime = System.currentTimeMillis() + timeout;
2581
2582 while (!this.server.isStopped() && regionStates.isRegionsInTransition()
2583 && endTime > System.currentTimeMillis()) {
2584 regionStates.waitForUpdate(100);
2585 }
2586
2587 return !regionStates.isRegionsInTransition();
2588 }
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599 Map<ServerName, List<HRegionInfo>> rebuildUserRegions() throws IOException, KeeperException {
2600 Set<TableName> enablingTables = ZKTable.getEnablingTables(watcher);
2601 Set<TableName> disabledOrEnablingTables = ZKTable.getDisabledTables(watcher);
2602 disabledOrEnablingTables.addAll(enablingTables);
2603 Set<TableName> disabledOrDisablingOrEnabling = ZKTable.getDisablingTables(watcher);
2604 disabledOrDisablingOrEnabling.addAll(disabledOrEnablingTables);
2605
2606
2607 List<Result> results = MetaReader.fullScan(this.catalogTracker);
2608
2609 Set<ServerName> onlineServers = serverManager.getOnlineServers().keySet();
2610
2611 Map<ServerName, List<HRegionInfo>> offlineServers =
2612 new TreeMap<ServerName, List<HRegionInfo>>();
2613
2614 for (Result result : results) {
2615 Pair<HRegionInfo, ServerName> region = HRegionInfo.getHRegionInfoAndServerName(result);
2616 if (region == null) continue;
2617 HRegionInfo regionInfo = region.getFirst();
2618 ServerName regionLocation = region.getSecond();
2619 if (regionInfo == null) continue;
2620 regionStates.createRegionState(regionInfo);
2621 if (regionStates.isRegionInState(regionInfo, State.SPLIT)) {
2622
2623
2624 LOG.debug("Region " + regionInfo.getRegionNameAsString()
2625 + " split is completed. Hence need not add to regions list");
2626 continue;
2627 }
2628 TableName tableName = regionInfo.getTable();
2629 if (regionLocation == null) {
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640 if (!enablingTables.contains(tableName)) {
2641 LOG.warn("Region " + regionInfo.getEncodedName() +
2642 " has null regionLocation." + " But its table " + tableName +
2643 " isn't in ENABLING state.");
2644 }
2645 } else if (!onlineServers.contains(regionLocation)) {
2646
2647 List<HRegionInfo> offlineRegions = offlineServers.get(regionLocation);
2648 if (offlineRegions == null) {
2649 offlineRegions = new ArrayList<HRegionInfo>(1);
2650 offlineServers.put(regionLocation, offlineRegions);
2651 }
2652 offlineRegions.add(regionInfo);
2653
2654
2655 if (!disabledOrDisablingOrEnabling.contains(tableName)
2656 && !getZKTable().isEnabledTable(tableName)) {
2657 setEnabledTable(tableName);
2658 }
2659 } else {
2660
2661
2662 if (!disabledOrEnablingTables.contains(tableName)) {
2663 regionStates.updateRegionState(regionInfo, State.OPEN, regionLocation);
2664 regionStates.regionOnline(regionInfo, regionLocation);
2665 }
2666
2667
2668 if (!disabledOrDisablingOrEnabling.contains(tableName)
2669 && !getZKTable().isEnabledTable(tableName)) {
2670 setEnabledTable(tableName);
2671 }
2672 }
2673 }
2674 return offlineServers;
2675 }
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685 private void recoverTableInDisablingState()
2686 throws KeeperException, TableNotFoundException, IOException {
2687 Set<TableName> disablingTables = ZKTable.getDisablingTables(watcher);
2688 if (disablingTables.size() != 0) {
2689 for (TableName tableName : disablingTables) {
2690
2691 LOG.info("The table " + tableName
2692 + " is in DISABLING state. Hence recovering by moving the table"
2693 + " to DISABLED state.");
2694 new DisableTableHandler(this.server, tableName, catalogTracker,
2695 this, tableLockManager, true).prepare().process();
2696 }
2697 }
2698 }
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708 private void recoverTableInEnablingState()
2709 throws KeeperException, TableNotFoundException, IOException {
2710 Set<TableName> enablingTables = ZKTable.getEnablingTables(watcher);
2711 if (enablingTables.size() != 0) {
2712 for (TableName tableName : enablingTables) {
2713
2714 LOG.info("The table " + tableName
2715 + " is in ENABLING state. Hence recovering by moving the table"
2716 + " to ENABLED state.");
2717
2718
2719 EnableTableHandler eth = new EnableTableHandler(this.server, tableName,
2720 catalogTracker, this, tableLockManager, true);
2721 try {
2722 eth.prepare();
2723 } catch (TableNotFoundException e) {
2724 LOG.warn("Table " + tableName + " not found in hbase:meta to recover.");
2725 continue;
2726 }
2727 eth.process();
2728 }
2729 }
2730 }
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747 private void processDeadServersAndRecoverLostRegions(
2748 Map<ServerName, List<HRegionInfo>> deadServers)
2749 throws IOException, KeeperException {
2750 if (deadServers != null) {
2751 for (Map.Entry<ServerName, List<HRegionInfo>> server: deadServers.entrySet()) {
2752 ServerName serverName = server.getKey();
2753
2754 regionStates.setLastRegionServerOfRegions(serverName, server.getValue());
2755 if (!serverManager.isServerDead(serverName)) {
2756 serverManager.expireServer(serverName);
2757 }
2758 }
2759 }
2760 List<String> nodes = ZKUtil.listChildrenAndWatchForNewChildren(
2761 this.watcher, this.watcher.assignmentZNode);
2762 if (!nodes.isEmpty()) {
2763 for (String encodedRegionName : nodes) {
2764 processRegionInTransition(encodedRegionName, null);
2765 }
2766 }
2767
2768
2769
2770
2771
2772 failoverCleanupDone();
2773 }
2774
2775
2776
2777
2778
2779
2780
2781
2782 public void updateRegionsInTransitionMetrics() {
2783 long currentTime = System.currentTimeMillis();
2784 int totalRITs = 0;
2785 int totalRITsOverThreshold = 0;
2786 long oldestRITTime = 0;
2787 int ritThreshold = this.server.getConfiguration().
2788 getInt(HConstants.METRICS_RIT_STUCK_WARNING_THRESHOLD, 60000);
2789 for (RegionState state: regionStates.getRegionsInTransition().values()) {
2790 totalRITs++;
2791 long ritTime = currentTime - state.getStamp();
2792 if (ritTime > ritThreshold) {
2793 totalRITsOverThreshold++;
2794 }
2795 if (oldestRITTime < ritTime) {
2796 oldestRITTime = ritTime;
2797 }
2798 }
2799 if (this.metricsAssignmentManager != null) {
2800 this.metricsAssignmentManager.updateRITOldestAge(oldestRITTime);
2801 this.metricsAssignmentManager.updateRITCount(totalRITs);
2802 this.metricsAssignmentManager.updateRITCountOverThreshold(totalRITsOverThreshold);
2803 }
2804 }
2805
2806
2807
2808
2809 void clearRegionPlan(final HRegionInfo region) {
2810 synchronized (this.regionPlans) {
2811 this.regionPlans.remove(region.getEncodedName());
2812 }
2813 }
2814
2815
2816
2817
2818
2819
2820 public void waitOnRegionToClearRegionsInTransition(final HRegionInfo hri)
2821 throws IOException, InterruptedException {
2822 waitOnRegionToClearRegionsInTransition(hri, -1L);
2823 }
2824
2825
2826
2827
2828
2829
2830
2831
2832 public boolean waitOnRegionToClearRegionsInTransition(final HRegionInfo hri, long timeOut)
2833 throws InterruptedException {
2834 if (!regionStates.isRegionInTransition(hri)) return true;
2835 long end = (timeOut <= 0) ? Long.MAX_VALUE : EnvironmentEdgeManager.currentTimeMillis()
2836 + timeOut;
2837
2838
2839 LOG.info("Waiting for " + hri.getEncodedName() +
2840 " to leave regions-in-transition, timeOut=" + timeOut + " ms.");
2841 while (!this.server.isStopped() && regionStates.isRegionInTransition(hri)) {
2842 regionStates.waitForUpdate(100);
2843 if (EnvironmentEdgeManager.currentTimeMillis() > end) {
2844 LOG.info("Timed out on waiting for " + hri.getEncodedName() + " to be assigned.");
2845 return false;
2846 }
2847 }
2848 if (this.server.isStopped()) {
2849 LOG.info("Giving up wait on regions in transition because stoppable.isStopped is set");
2850 return false;
2851 }
2852 return true;
2853 }
2854
2855
2856
2857
2858
2859 public class TimerUpdater extends Chore {
2860
2861 public TimerUpdater(final int period, final Stoppable stopper) {
2862 super("AssignmentTimerUpdater", period, stopper);
2863 }
2864
2865 @Override
2866 protected void chore() {
2867 Preconditions.checkState(tomActivated);
2868 ServerName serverToUpdateTimer = null;
2869 while (!serversInUpdatingTimer.isEmpty() && !stopper.isStopped()) {
2870 if (serverToUpdateTimer == null) {
2871 serverToUpdateTimer = serversInUpdatingTimer.first();
2872 } else {
2873 serverToUpdateTimer = serversInUpdatingTimer
2874 .higher(serverToUpdateTimer);
2875 }
2876 if (serverToUpdateTimer == null) {
2877 break;
2878 }
2879 updateTimers(serverToUpdateTimer);
2880 serversInUpdatingTimer.remove(serverToUpdateTimer);
2881 }
2882 }
2883 }
2884
2885
2886
2887
2888 public class TimeoutMonitor extends Chore {
2889 private boolean allRegionServersOffline = false;
2890 private ServerManager serverManager;
2891 private final int timeout;
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902 public TimeoutMonitor(final int period, final Stoppable stopper,
2903 ServerManager serverManager,
2904 final int timeout) {
2905 super("AssignmentTimeoutMonitor", period, stopper);
2906 this.timeout = timeout;
2907 this.serverManager = serverManager;
2908 }
2909
2910 private synchronized void setAllRegionServersOffline(
2911 boolean allRegionServersOffline) {
2912 this.allRegionServersOffline = allRegionServersOffline;
2913 }
2914
2915 @Override
2916 protected void chore() {
2917 Preconditions.checkState(tomActivated);
2918 boolean noRSAvailable = this.serverManager.createDestinationServersList().isEmpty();
2919
2920
2921 long now = System.currentTimeMillis();
2922
2923
2924 for (String regionName : regionStates.getRegionsInTransition().keySet()) {
2925 RegionState regionState = regionStates.getRegionTransitionState(regionName);
2926 if (regionState == null) continue;
2927
2928 if (regionState.getStamp() + timeout <= now) {
2929
2930 actOnTimeOut(regionState);
2931 } else if (this.allRegionServersOffline && !noRSAvailable) {
2932 RegionPlan existingPlan = regionPlans.get(regionName);
2933 if (existingPlan == null
2934 || !this.serverManager.isServerOnline(existingPlan
2935 .getDestination())) {
2936
2937
2938 actOnTimeOut(regionState);
2939 }
2940 }
2941 }
2942 setAllRegionServersOffline(noRSAvailable);
2943 }
2944
2945 private void actOnTimeOut(RegionState regionState) {
2946 HRegionInfo regionInfo = regionState.getRegion();
2947 LOG.info("Regions in transition timed out: " + regionState);
2948
2949 switch (regionState.getState()) {
2950 case CLOSED:
2951 LOG.info("Region " + regionInfo.getEncodedName()
2952 + " has been CLOSED for too long, waiting on queued "
2953 + "ClosedRegionHandler to run or server shutdown");
2954
2955 regionState.updateTimestampToNow();
2956 break;
2957 case OFFLINE:
2958 LOG.info("Region has been OFFLINE for too long, " + "reassigning "
2959 + regionInfo.getRegionNameAsString() + " to a random server");
2960 invokeAssign(regionInfo);
2961 break;
2962 case PENDING_OPEN:
2963 LOG.info("Region has been PENDING_OPEN for too "
2964 + "long, reassigning region=" + regionInfo.getRegionNameAsString());
2965 invokeAssign(regionInfo);
2966 break;
2967 case OPENING:
2968 processOpeningState(regionInfo);
2969 break;
2970 case OPEN:
2971 LOG.error("Region has been OPEN for too long, " +
2972 "we don't know where region was opened so can't do anything");
2973 regionState.updateTimestampToNow();
2974 break;
2975
2976 case PENDING_CLOSE:
2977 LOG.info("Region has been PENDING_CLOSE for too "
2978 + "long, running forced unassign again on region="
2979 + regionInfo.getRegionNameAsString());
2980 invokeUnassign(regionInfo);
2981 break;
2982 case CLOSING:
2983 LOG.info("Region has been CLOSING for too " +
2984 "long, this should eventually complete or the server will " +
2985 "expire, send RPC again");
2986 invokeUnassign(regionInfo);
2987 break;
2988
2989 case SPLIT:
2990 case SPLITTING:
2991 case FAILED_OPEN:
2992 case FAILED_CLOSE:
2993 case MERGING:
2994 break;
2995
2996 default:
2997 throw new IllegalStateException("Received event is not valid.");
2998 }
2999 }
3000 }
3001
3002 private void processOpeningState(HRegionInfo regionInfo) {
3003 LOG.info("Region has been OPENING for too long, reassigning region="
3004 + regionInfo.getRegionNameAsString());
3005
3006 try {
3007 String node = ZKAssign.getNodeName(watcher, regionInfo.getEncodedName());
3008 Stat stat = new Stat();
3009 byte [] data = ZKAssign.getDataNoWatch(watcher, node, stat);
3010 if (data == null) {
3011 LOG.warn("Data is null, node " + node + " no longer exists");
3012 return;
3013 }
3014 RegionTransition rt = RegionTransition.parseFrom(data);
3015 EventType et = rt.getEventType();
3016 if (et == EventType.RS_ZK_REGION_OPENED) {
3017 LOG.debug("Region has transitioned to OPENED, allowing "
3018 + "watched event handlers to process");
3019 return;
3020 } else if (et != EventType.RS_ZK_REGION_OPENING && et != EventType.RS_ZK_REGION_FAILED_OPEN ) {
3021 LOG.warn("While timing out a region, found ZK node in unexpected state: " + et);
3022 return;
3023 }
3024 invokeAssign(regionInfo);
3025 } catch (KeeperException ke) {
3026 LOG.error("Unexpected ZK exception timing out CLOSING region", ke);
3027 } catch (DeserializationException e) {
3028 LOG.error("Unexpected exception parsing CLOSING region", e);
3029 }
3030 }
3031
3032 void invokeAssign(HRegionInfo regionInfo) {
3033 threadPoolExecutorService.submit(new AssignCallable(this, regionInfo));
3034 }
3035
3036 private void invokeUnassign(HRegionInfo regionInfo) {
3037 threadPoolExecutorService.submit(new UnAssignCallable(this, regionInfo));
3038 }
3039
3040 public boolean isCarryingMeta(ServerName serverName) {
3041 return isCarryingRegion(serverName, HRegionInfo.FIRST_META_REGIONINFO);
3042 }
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054 private boolean isCarryingRegion(ServerName serverName, HRegionInfo hri) {
3055 RegionTransition rt = null;
3056 try {
3057 byte [] data = ZKAssign.getData(watcher, hri.getEncodedName());
3058
3059 rt = data == null? null: RegionTransition.parseFrom(data);
3060 } catch (KeeperException e) {
3061 server.abort("Exception reading unassigned node for region=" + hri.getEncodedName(), e);
3062 } catch (DeserializationException e) {
3063 server.abort("Exception parsing unassigned node for region=" + hri.getEncodedName(), e);
3064 }
3065
3066 ServerName addressFromZK = rt != null? rt.getServerName(): null;
3067 if (addressFromZK != null) {
3068
3069 boolean matchZK = addressFromZK.equals(serverName);
3070 LOG.debug("Checking region=" + hri.getRegionNameAsString() + ", zk server=" + addressFromZK +
3071 " current=" + serverName + ", matches=" + matchZK);
3072 return matchZK;
3073 }
3074
3075 ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
3076 boolean matchAM = (addressFromAM != null &&
3077 addressFromAM.equals(serverName));
3078 LOG.debug("based on AM, current region=" + hri.getRegionNameAsString() +
3079 " is on server=" + (addressFromAM != null ? addressFromAM : "null") +
3080 " server being checked: " + serverName);
3081
3082 return matchAM;
3083 }
3084
3085
3086
3087
3088
3089
3090 public List<HRegionInfo> processServerShutdown(final ServerName sn) {
3091
3092 synchronized (this.regionPlans) {
3093 for (Iterator <Map.Entry<String, RegionPlan>> i =
3094 this.regionPlans.entrySet().iterator(); i.hasNext();) {
3095 Map.Entry<String, RegionPlan> e = i.next();
3096 ServerName otherSn = e.getValue().getDestination();
3097
3098 if (otherSn != null && otherSn.equals(sn)) {
3099
3100 i.remove();
3101 }
3102 }
3103 }
3104 List<HRegionInfo> regions = regionStates.serverOffline(watcher, sn);
3105 for (Iterator<HRegionInfo> it = regions.iterator(); it.hasNext(); ) {
3106 HRegionInfo hri = it.next();
3107 String encodedName = hri.getEncodedName();
3108
3109
3110 Lock lock = locker.acquireLock(encodedName);
3111 try {
3112 RegionState regionState =
3113 regionStates.getRegionTransitionState(encodedName);
3114 if (regionState == null
3115 || (regionState.getServerName() != null && !regionState.isOnServer(sn))
3116 || !(regionState.isFailedClose() || regionState.isOffline()
3117 || regionState.isPendingOpenOrOpening())) {
3118 LOG.info("Skip " + regionState + " since it is not opening/failed_close"
3119 + " on the dead server any more: " + sn);
3120 it.remove();
3121 } else {
3122 try {
3123
3124 ZKAssign.deleteNodeFailSilent(watcher, hri);
3125 } catch (KeeperException ke) {
3126 server.abort("Unexpected ZK exception deleting node " + hri, ke);
3127 }
3128 if (zkTable.isDisablingOrDisabledTable(hri.getTable())) {
3129 regionStates.regionOffline(hri);
3130 it.remove();
3131 continue;
3132 }
3133
3134 regionStates.updateRegionState(hri, State.OFFLINE);
3135 }
3136 } finally {
3137 lock.unlock();
3138 }
3139 }
3140 return regions;
3141 }
3142
3143
3144
3145
3146 public void balance(final RegionPlan plan) {
3147 HRegionInfo hri = plan.getRegionInfo();
3148 TableName tableName = hri.getTable();
3149 if (zkTable.isDisablingOrDisabledTable(tableName)) {
3150 LOG.info("Ignored moving region of disabling/disabled table "
3151 + tableName);
3152 return;
3153 }
3154
3155
3156 String encodedName = hri.getEncodedName();
3157 ReentrantLock lock = locker.acquireLock(encodedName);
3158 try {
3159 if (!regionStates.isRegionOnline(hri)) {
3160 RegionState state = regionStates.getRegionState(encodedName);
3161 LOG.info("Ignored moving region not assigned: " + hri + ", "
3162 + (state == null ? "not in region states" : state));
3163 return;
3164 }
3165 synchronized (this.regionPlans) {
3166 this.regionPlans.put(plan.getRegionName(), plan);
3167 }
3168 unassign(hri, false, plan.getDestination());
3169 } finally {
3170 lock.unlock();
3171 }
3172 }
3173
3174 public void stop() {
3175 if (tomActivated){
3176 this.timeoutMonitor.interrupt();
3177 this.timerUpdater.interrupt();
3178 }
3179 }
3180
3181
3182
3183
3184 public void shutdown() {
3185
3186 synchronized (zkEventWorkerWaitingList){
3187 zkEventWorkerWaitingList.clear();
3188 }
3189 threadPoolExecutorService.shutdownNow();
3190 zkEventWorkers.shutdownNow();
3191 }
3192
3193 protected void setEnabledTable(TableName tableName) {
3194 try {
3195 this.zkTable.setEnabledTable(tableName);
3196 } catch (KeeperException e) {
3197
3198 String errorMsg = "Unable to ensure that the table " + tableName
3199 + " will be" + " enabled because of a ZooKeeper issue";
3200 LOG.error(errorMsg);
3201 this.server.abort(errorMsg, e);
3202 }
3203 }
3204
3205
3206
3207
3208
3209
3210
3211 private boolean asyncSetOfflineInZooKeeper(final RegionState state,
3212 final AsyncCallback.StringCallback cb, final ServerName destination) {
3213 if (!state.isClosed() && !state.isOffline()) {
3214 this.server.abort("Unexpected state trying to OFFLINE; " + state,
3215 new IllegalStateException());
3216 return false;
3217 }
3218 regionStates.updateRegionState(state.getRegion(), State.OFFLINE);
3219 try {
3220 ZKAssign.asyncCreateNodeOffline(watcher, state.getRegion(),
3221 destination, cb, state);
3222 } catch (KeeperException e) {
3223 if (e instanceof NodeExistsException) {
3224 LOG.warn("Node for " + state.getRegion() + " already exists");
3225 } else {
3226 server.abort("Unexpected ZK exception creating/setting node OFFLINE", e);
3227 }
3228 return false;
3229 }
3230 return true;
3231 }
3232
3233 private boolean deleteNodeInStates(String encodedName,
3234 String desc, ServerName sn, EventType... types) {
3235 try {
3236 for (EventType et: types) {
3237 if (ZKAssign.deleteNode(watcher, encodedName, et, sn)) {
3238 return true;
3239 }
3240 }
3241 LOG.info("Failed to delete the " + desc + " node for "
3242 + encodedName + ". The node type may not match");
3243 } catch (NoNodeException e) {
3244 if (LOG.isDebugEnabled()) {
3245 LOG.debug("The " + desc + " node for " + encodedName + " already deleted");
3246 }
3247 } catch (KeeperException ke) {
3248 server.abort("Unexpected ZK exception deleting " + desc
3249 + " node for the region " + encodedName, ke);
3250 }
3251 return false;
3252 }
3253
3254 private void deleteMergingNode(String encodedName, ServerName sn) {
3255 deleteNodeInStates(encodedName, "merging", sn, EventType.RS_ZK_REGION_MERGING,
3256 EventType.RS_ZK_REQUEST_REGION_MERGE, EventType.RS_ZK_REGION_MERGED);
3257 }
3258
3259 private void deleteSplittingNode(String encodedName, ServerName sn) {
3260 deleteNodeInStates(encodedName, "splitting", sn, EventType.RS_ZK_REGION_SPLITTING,
3261 EventType.RS_ZK_REQUEST_REGION_SPLIT, EventType.RS_ZK_REGION_SPLIT);
3262 }
3263
3264
3265
3266
3267
3268 private boolean handleRegionMerging(final RegionTransition rt, final String encodedName,
3269 final String prettyPrintedRegionName, final ServerName sn) {
3270 if (!serverManager.isServerOnline(sn)) {
3271 LOG.warn("Dropped merging! ServerName=" + sn + " unknown.");
3272 return false;
3273 }
3274 byte [] payloadOfMerging = rt.getPayload();
3275 List<HRegionInfo> mergingRegions;
3276 try {
3277 mergingRegions = HRegionInfo.parseDelimitedFrom(
3278 payloadOfMerging, 0, payloadOfMerging.length);
3279 } catch (IOException e) {
3280 LOG.error("Dropped merging! Failed reading " + rt.getEventType()
3281 + " payload for " + prettyPrintedRegionName);
3282 return false;
3283 }
3284 assert mergingRegions.size() == 3;
3285 HRegionInfo p = mergingRegions.get(0);
3286 HRegionInfo hri_a = mergingRegions.get(1);
3287 HRegionInfo hri_b = mergingRegions.get(2);
3288
3289 RegionState rs_p = regionStates.getRegionState(p);
3290 RegionState rs_a = regionStates.getRegionState(hri_a);
3291 RegionState rs_b = regionStates.getRegionState(hri_b);
3292
3293 if (!((rs_a == null || rs_a.isOpenOrMergingOnServer(sn))
3294 && (rs_b == null || rs_b.isOpenOrMergingOnServer(sn))
3295 && (rs_p == null || rs_p.isOpenOrMergingNewOnServer(sn)))) {
3296 LOG.warn("Dropped merging! Not in state good for MERGING; rs_p="
3297 + rs_p + ", rs_a=" + rs_a + ", rs_b=" + rs_b);
3298 return false;
3299 }
3300
3301 EventType et = rt.getEventType();
3302 if (et == EventType.RS_ZK_REQUEST_REGION_MERGE) {
3303 try {
3304 if (RegionMergeTransaction.transitionMergingNode(watcher, p,
3305 hri_a, hri_b, sn, -1, EventType.RS_ZK_REQUEST_REGION_MERGE,
3306 EventType.RS_ZK_REGION_MERGING) == -1) {
3307 byte[] data = ZKAssign.getData(watcher, encodedName);
3308 EventType currentType = null;
3309 if (data != null) {
3310 RegionTransition newRt = RegionTransition.parseFrom(data);
3311 currentType = newRt.getEventType();
3312 }
3313 if (currentType == null || (currentType != EventType.RS_ZK_REGION_MERGED
3314 && currentType != EventType.RS_ZK_REGION_MERGING)) {
3315 LOG.warn("Failed to transition pending_merge node "
3316 + encodedName + " to merging, it's now " + currentType);
3317 return false;
3318 }
3319 }
3320 } catch (Exception e) {
3321 LOG.warn("Failed to transition pending_merge node "
3322 + encodedName + " to merging", e);
3323 return false;
3324 }
3325 }
3326
3327 synchronized (regionStates) {
3328 regionStates.updateRegionState(hri_a, State.MERGING);
3329 regionStates.updateRegionState(hri_b, State.MERGING);
3330 regionStates.updateRegionState(p, State.MERGING_NEW, sn);
3331
3332 if (et != EventType.RS_ZK_REGION_MERGED) {
3333 regionStates.regionOffline(p, State.MERGING_NEW);
3334 this.mergingRegions.put(encodedName,
3335 new PairOfSameType<HRegionInfo>(hri_a, hri_b));
3336 } else {
3337 this.mergingRegions.remove(encodedName);
3338 regionOffline(hri_a, State.MERGED);
3339 regionOffline(hri_b, State.MERGED);
3340 regionOnline(p, sn);
3341 }
3342 }
3343
3344 if (et == EventType.RS_ZK_REGION_MERGED) {
3345 LOG.debug("Handling MERGED event for " + encodedName + "; deleting node");
3346
3347 try {
3348 boolean successful = false;
3349 while (!successful) {
3350
3351
3352 successful = ZKAssign.deleteNode(watcher, encodedName,
3353 EventType.RS_ZK_REGION_MERGED, sn);
3354 }
3355 } catch (KeeperException e) {
3356 if (e instanceof NoNodeException) {
3357 String znodePath = ZKUtil.joinZNode(watcher.splitLogZNode, encodedName);
3358 LOG.debug("The znode " + znodePath + " does not exist. May be deleted already.");
3359 } else {
3360 server.abort("Error deleting MERGED node " + encodedName, e);
3361 }
3362 }
3363 LOG.info("Handled MERGED event; merged=" + p.getRegionNameAsString()
3364 + ", region_a=" + hri_a.getRegionNameAsString() + ", region_b="
3365 + hri_b.getRegionNameAsString() + ", on " + sn);
3366
3367
3368 if (zkTable.isDisablingOrDisabledTable(p.getTable())) {
3369 unassign(p);
3370 }
3371 }
3372 return true;
3373 }
3374
3375
3376
3377
3378 private boolean handleRegionSplitting(final RegionTransition rt, final String encodedName,
3379 final String prettyPrintedRegionName, final ServerName sn) {
3380 if (!serverManager.isServerOnline(sn)) {
3381 LOG.warn("Dropped splitting! ServerName=" + sn + " unknown.");
3382 return false;
3383 }
3384 byte [] payloadOfSplitting = rt.getPayload();
3385 List<HRegionInfo> splittingRegions;
3386 try {
3387 splittingRegions = HRegionInfo.parseDelimitedFrom(
3388 payloadOfSplitting, 0, payloadOfSplitting.length);
3389 } catch (IOException e) {
3390 LOG.error("Dropped splitting! Failed reading " + rt.getEventType()
3391 + " payload for " + prettyPrintedRegionName);
3392 return false;
3393 }
3394 assert splittingRegions.size() == 2;
3395 HRegionInfo hri_a = splittingRegions.get(0);
3396 HRegionInfo hri_b = splittingRegions.get(1);
3397
3398 RegionState rs_p = regionStates.getRegionState(encodedName);
3399 RegionState rs_a = regionStates.getRegionState(hri_a);
3400 RegionState rs_b = regionStates.getRegionState(hri_b);
3401
3402 if (!((rs_p == null || rs_p.isOpenOrSplittingOnServer(sn))
3403 && (rs_a == null || rs_a.isOpenOrSplittingNewOnServer(sn))
3404 && (rs_b == null || rs_b.isOpenOrSplittingNewOnServer(sn)))) {
3405 LOG.warn("Dropped splitting! Not in state good for SPLITTING; rs_p="
3406 + rs_p + ", rs_a=" + rs_a + ", rs_b=" + rs_b);
3407 return false;
3408 }
3409
3410 if (rs_p == null) {
3411
3412 rs_p = regionStates.updateRegionState(rt, State.OPEN);
3413 if (rs_p == null) {
3414 LOG.warn("Received splitting for region " + prettyPrintedRegionName
3415 + " from server " + sn + " but it doesn't exist anymore,"
3416 + " probably already processed its split");
3417 return false;
3418 }
3419 regionStates.regionOnline(rs_p.getRegion(), sn);
3420 }
3421
3422 HRegionInfo p = rs_p.getRegion();
3423 EventType et = rt.getEventType();
3424 if (et == EventType.RS_ZK_REQUEST_REGION_SPLIT) {
3425 try {
3426 if (SplitTransaction.transitionSplittingNode(watcher, p,
3427 hri_a, hri_b, sn, -1, EventType.RS_ZK_REQUEST_REGION_SPLIT,
3428 EventType.RS_ZK_REGION_SPLITTING) == -1) {
3429 byte[] data = ZKAssign.getData(watcher, encodedName);
3430 EventType currentType = null;
3431 if (data != null) {
3432 RegionTransition newRt = RegionTransition.parseFrom(data);
3433 currentType = newRt.getEventType();
3434 }
3435 if (currentType == null || (currentType != EventType.RS_ZK_REGION_SPLIT
3436 && currentType != EventType.RS_ZK_REGION_SPLITTING)) {
3437 LOG.warn("Failed to transition pending_split node "
3438 + encodedName + " to splitting, it's now " + currentType);
3439 return false;
3440 }
3441 }
3442 } catch (Exception e) {
3443 LOG.warn("Failed to transition pending_split node "
3444 + encodedName + " to splitting", e);
3445 return false;
3446 }
3447 }
3448
3449 synchronized (regionStates) {
3450 regionStates.updateRegionState(hri_a, State.SPLITTING_NEW, sn);
3451 regionStates.updateRegionState(hri_b, State.SPLITTING_NEW, sn);
3452 regionStates.regionOffline(hri_a, State.SPLITTING_NEW);
3453 regionStates.regionOffline(hri_b, State.SPLITTING_NEW);
3454 regionStates.updateRegionState(rt, State.SPLITTING);
3455
3456
3457
3458 if (TEST_SKIP_SPLIT_HANDLING) {
3459 LOG.warn("Skipping split message, TEST_SKIP_SPLIT_HANDLING is set");
3460 return true;
3461 }
3462
3463 if (et == EventType.RS_ZK_REGION_SPLIT) {
3464 regionOffline(p, State.SPLIT);
3465 regionOnline(hri_a, sn);
3466 regionOnline(hri_b, sn);
3467 }
3468 }
3469
3470 if (et == EventType.RS_ZK_REGION_SPLIT) {
3471 LOG.debug("Handling SPLIT event for " + encodedName + "; deleting node");
3472
3473 try {
3474 boolean successful = false;
3475 while (!successful) {
3476
3477
3478 successful = ZKAssign.deleteNode(watcher, encodedName,
3479 EventType.RS_ZK_REGION_SPLIT, sn);
3480 }
3481 } catch (KeeperException e) {
3482 if (e instanceof NoNodeException) {
3483 String znodePath = ZKUtil.joinZNode(watcher.splitLogZNode, encodedName);
3484 LOG.debug("The znode " + znodePath + " does not exist. May be deleted already.");
3485 } else {
3486 server.abort("Error deleting SPLIT node " + encodedName, e);
3487 }
3488 }
3489 LOG.info("Handled SPLIT event; parent=" + p.getRegionNameAsString()
3490 + ", daughter a=" + hri_a.getRegionNameAsString() + ", daughter b="
3491 + hri_b.getRegionNameAsString() + ", on " + sn);
3492
3493
3494 if (zkTable.isDisablingOrDisabledTable(p.getTable())) {
3495 unassign(hri_a);
3496 unassign(hri_b);
3497 }
3498 }
3499 return true;
3500 }
3501
3502
3503
3504
3505
3506
3507 private void regionOffline(final HRegionInfo regionInfo, final State state) {
3508 regionStates.regionOffline(regionInfo, state);
3509 removeClosedRegion(regionInfo);
3510
3511 clearRegionPlan(regionInfo);
3512 }
3513
3514
3515
3516
3517 public LoadBalancer getBalancer() {
3518 return this.balancer;
3519 }
3520 }