1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master;
20
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.Arrays;
24 import java.util.Collections;
25 import java.util.HashMap;
26 import java.util.HashSet;
27 import java.util.Iterator;
28 import java.util.List;
29 import java.util.Map;
30 import java.util.NavigableMap;
31 import java.util.Set;
32 import java.util.TreeMap;
33 import java.util.concurrent.ConcurrentHashMap;
34 import java.util.concurrent.ConcurrentSkipListSet;
35 import java.util.concurrent.ThreadFactory;
36 import java.util.concurrent.TimeUnit;
37 import java.util.concurrent.atomic.AtomicBoolean;
38 import java.util.concurrent.atomic.AtomicInteger;
39 import java.util.concurrent.locks.Lock;
40 import java.util.concurrent.locks.ReentrantLock;
41
42 import org.apache.commons.logging.Log;
43 import org.apache.commons.logging.LogFactory;
44 import org.apache.hadoop.classification.InterfaceAudience;
45 import org.apache.hadoop.conf.Configuration;
46 import org.apache.hadoop.hbase.Chore;
47 import org.apache.hadoop.hbase.HBaseIOException;
48 import org.apache.hadoop.hbase.HConstants;
49 import org.apache.hadoop.hbase.HRegionInfo;
50 import org.apache.hadoop.hbase.NotServingRegionException;
51 import org.apache.hadoop.hbase.RegionTransition;
52 import org.apache.hadoop.hbase.Server;
53 import org.apache.hadoop.hbase.ServerName;
54 import org.apache.hadoop.hbase.Stoppable;
55 import org.apache.hadoop.hbase.TableName;
56 import org.apache.hadoop.hbase.TableNotFoundException;
57 import org.apache.hadoop.hbase.catalog.CatalogTracker;
58 import org.apache.hadoop.hbase.catalog.MetaReader;
59 import org.apache.hadoop.hbase.client.Result;
60 import org.apache.hadoop.hbase.exceptions.DeserializationException;
61 import org.apache.hadoop.hbase.executor.EventHandler;
62 import org.apache.hadoop.hbase.executor.EventType;
63 import org.apache.hadoop.hbase.executor.ExecutorService;
64 import org.apache.hadoop.hbase.ipc.RpcClient.FailedServerException;
65 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
66 import org.apache.hadoop.hbase.master.RegionState.State;
67 import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper;
68 import org.apache.hadoop.hbase.master.balancer.FavoredNodeLoadBalancer;
69 import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler;
70 import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
71 import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
72 import org.apache.hadoop.hbase.master.handler.OpenedRegionHandler;
73 import org.apache.hadoop.hbase.regionserver.RegionAlreadyInTransitionException;
74 import org.apache.hadoop.hbase.regionserver.RegionMergeTransaction;
75 import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
76 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
77 import org.apache.hadoop.hbase.regionserver.SplitTransaction;
78 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
79 import org.apache.hadoop.hbase.util.KeyLocker;
80 import org.apache.hadoop.hbase.util.Pair;
81 import org.apache.hadoop.hbase.util.PairOfSameType;
82 import org.apache.hadoop.hbase.util.Threads;
83 import org.apache.hadoop.hbase.util.Triple;
84 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
85 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
86 import org.apache.hadoop.hbase.zookeeper.ZKTable;
87 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
88 import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
89 import org.apache.hadoop.ipc.RemoteException;
90 import org.apache.zookeeper.AsyncCallback;
91 import org.apache.zookeeper.KeeperException;
92 import org.apache.zookeeper.KeeperException.NoNodeException;
93 import org.apache.zookeeper.KeeperException.NodeExistsException;
94 import org.apache.zookeeper.data.Stat;
95
96 import com.google.common.base.Preconditions;
97 import com.google.common.collect.LinkedHashMultimap;
98
99
100
101
102
103
104
105
106 @InterfaceAudience.Private
107 public class AssignmentManager extends ZooKeeperListener {
108 private static final Log LOG = LogFactory.getLog(AssignmentManager.class);
109
110 public static final ServerName HBCK_CODE_SERVERNAME = ServerName.valueOf(HConstants.HBCK_CODE_NAME,
111 -1, -1L);
112
113 public static final String ASSIGNMENT_TIMEOUT = "hbase.master.assignment.timeoutmonitor.timeout";
114 public static final int DEFAULT_ASSIGNMENT_TIMEOUT_DEFAULT = 600000;
115 public static final String ASSIGNMENT_TIMEOUT_MANAGEMENT = "hbase.assignment.timeout.management";
116 public static final boolean DEFAULT_ASSIGNMENT_TIMEOUT_MANAGEMENT = false;
117
118 public static final String ALREADY_IN_TRANSITION_WAITTIME
119 = "hbase.assignment.already.intransition.waittime";
120 public static final int DEFAULT_ALREADY_IN_TRANSITION_WAITTIME = 60000;
121
122 protected final Server server;
123
124 private ServerManager serverManager;
125
126 private boolean shouldAssignRegionsWithFavoredNodes;
127
128 private CatalogTracker catalogTracker;
129
130 protected final TimeoutMonitor timeoutMonitor;
131
132 private final TimerUpdater timerUpdater;
133
134 private LoadBalancer balancer;
135
136 private final MetricsAssignmentManager metricsAssignmentManager;
137
138 private final TableLockManager tableLockManager;
139
140 private AtomicInteger numRegionsOpened = new AtomicInteger(0);
141
142 final private KeyLocker<String> locker = new KeyLocker<String>();
143
144
145
146
147
148 private final Map <String, HRegionInfo> regionsToReopen;
149
150
151
152
153
154 private final int maximumAttempts;
155
156
157
158
159 private final Map<String, PairOfSameType<HRegionInfo>> mergingRegions
160 = new HashMap<String, PairOfSameType<HRegionInfo>>();
161
162
163
164
165
166 private final long sleepTimeBeforeRetryingMetaAssignment;
167
168
169
170
171
172 final NavigableMap<String, RegionPlan> regionPlans =
173 new TreeMap<String, RegionPlan>();
174
175 private final ZKTable zkTable;
176
177
178
179
180
181 private final ConcurrentSkipListSet<ServerName> serversInUpdatingTimer;
182
183 private final ExecutorService executorService;
184
185
186 private Map<HRegionInfo, AtomicBoolean> closedRegionHandlerCalled = null;
187
188
189 private Map<HRegionInfo, AtomicBoolean> openedRegionHandlerCalled = null;
190
191
192 private java.util.concurrent.ExecutorService threadPoolExecutorService;
193
194
195 private final java.util.concurrent.ExecutorService zkEventWorkers;
196
197 private List<EventType> ignoreStatesRSOffline = Arrays.asList(
198 EventType.RS_ZK_REGION_FAILED_OPEN, EventType.RS_ZK_REGION_CLOSED);
199
200 private final RegionStates regionStates;
201
202
203
204
205
206 private final int bulkAssignThresholdRegions;
207 private final int bulkAssignThresholdServers;
208
209
210
211
212 private final boolean bulkAssignWaitTillAllAssigned;
213
214
215
216
217
218
219
220
221
222 protected final AtomicBoolean failoverCleanupDone = new AtomicBoolean(false);
223
224
225 private final boolean tomActivated;
226
227
228
229
230
231
232
233
234 private final ConcurrentHashMap<String, AtomicInteger>
235 failedOpenTracker = new ConcurrentHashMap<String, AtomicInteger>();
236
237
238
239
240 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="MS_SHOULD_BE_FINAL")
241 public static boolean TEST_SKIP_SPLIT_HANDLING = false;
242
243
244
245
246
247
248
249
250
251
252
253 public AssignmentManager(Server server, ServerManager serverManager,
254 CatalogTracker catalogTracker, final LoadBalancer balancer,
255 final ExecutorService service, MetricsMaster metricsMaster,
256 final TableLockManager tableLockManager) throws KeeperException, IOException {
257 super(server.getZooKeeper());
258 this.server = server;
259 this.serverManager = serverManager;
260 this.catalogTracker = catalogTracker;
261 this.executorService = service;
262 this.regionsToReopen = Collections.synchronizedMap
263 (new HashMap<String, HRegionInfo> ());
264 Configuration conf = server.getConfiguration();
265
266 this.shouldAssignRegionsWithFavoredNodes = conf.getClass(
267 HConstants.HBASE_MASTER_LOADBALANCER_CLASS, Object.class).equals(
268 FavoredNodeLoadBalancer.class);
269 this.tomActivated = conf.getBoolean(
270 ASSIGNMENT_TIMEOUT_MANAGEMENT, DEFAULT_ASSIGNMENT_TIMEOUT_MANAGEMENT);
271 if (tomActivated){
272 this.serversInUpdatingTimer = new ConcurrentSkipListSet<ServerName>();
273 this.timeoutMonitor = new TimeoutMonitor(
274 conf.getInt("hbase.master.assignment.timeoutmonitor.period", 30000),
275 server, serverManager,
276 conf.getInt(ASSIGNMENT_TIMEOUT, DEFAULT_ASSIGNMENT_TIMEOUT_DEFAULT));
277 this.timerUpdater = new TimerUpdater(conf.getInt(
278 "hbase.master.assignment.timerupdater.period", 10000), server);
279 Threads.setDaemonThreadRunning(timerUpdater.getThread(),
280 server.getServerName() + ".timerUpdater");
281 } else {
282 this.serversInUpdatingTimer = null;
283 this.timeoutMonitor = null;
284 this.timerUpdater = null;
285 }
286 this.zkTable = new ZKTable(this.watcher);
287
288 this.maximumAttempts = Math.max(1,
289 this.server.getConfiguration().getInt("hbase.assignment.maximum.attempts", 10));
290 this.sleepTimeBeforeRetryingMetaAssignment = this.server.getConfiguration().getLong(
291 "hbase.meta.assignment.retry.sleeptime", 1000l);
292 this.balancer = balancer;
293 int maxThreads = conf.getInt("hbase.assignment.threads.max", 30);
294 this.threadPoolExecutorService = Threads.getBoundedCachedThreadPool(
295 maxThreads, 60L, TimeUnit.SECONDS, Threads.newDaemonThreadFactory("AM."));
296 this.regionStates = new RegionStates(server, serverManager);
297
298 this.bulkAssignWaitTillAllAssigned =
299 conf.getBoolean("hbase.bulk.assignment.waittillallassigned", false);
300 this.bulkAssignThresholdRegions = conf.getInt("hbase.bulk.assignment.threshold.regions", 7);
301 this.bulkAssignThresholdServers = conf.getInt("hbase.bulk.assignment.threshold.servers", 3);
302
303 int workers = conf.getInt("hbase.assignment.zkevent.workers", 20);
304 ThreadFactory threadFactory = Threads.newDaemonThreadFactory("AM.ZK.Worker");
305 zkEventWorkers = Threads.getBoundedCachedThreadPool(workers, 60L,
306 TimeUnit.SECONDS, threadFactory);
307 this.tableLockManager = tableLockManager;
308
309 this.metricsAssignmentManager = new MetricsAssignmentManager();
310 }
311
312 void startTimeOutMonitor() {
313 if (tomActivated) {
314 Threads.setDaemonThreadRunning(timeoutMonitor.getThread(), server.getServerName()
315 + ".timeoutMonitor");
316 }
317 }
318
319
320
321
322 public ZKTable getZKTable() {
323
324
325 return this.zkTable;
326 }
327
328
329
330
331
332
333
334 public RegionStates getRegionStates() {
335 return regionStates;
336 }
337
338 public RegionPlan getRegionReopenPlan(HRegionInfo hri) {
339 return new RegionPlan(hri, null, regionStates.getRegionServerOfRegion(hri));
340 }
341
342
343
344
345
346
347 public void addPlan(String encodedName, RegionPlan plan) {
348 synchronized (regionPlans) {
349 regionPlans.put(encodedName, plan);
350 }
351 }
352
353
354
355
356 public void addPlans(Map<String, RegionPlan> plans) {
357 synchronized (regionPlans) {
358 regionPlans.putAll(plans);
359 }
360 }
361
362
363
364
365
366
367
368
369 public void setRegionsToReopen(List <HRegionInfo> regions) {
370 for(HRegionInfo hri : regions) {
371 regionsToReopen.put(hri.getEncodedName(), hri);
372 }
373 }
374
375
376
377
378
379
380
381
382 public Pair<Integer, Integer> getReopenStatus(TableName tableName)
383 throws IOException {
384 List <HRegionInfo> hris =
385 MetaReader.getTableRegions(this.server.getCatalogTracker(), tableName, true);
386 Integer pending = 0;
387 for (HRegionInfo hri : hris) {
388 String name = hri.getEncodedName();
389
390 if (regionsToReopen.containsKey(name)
391 || regionStates.isRegionInTransition(name)) {
392 pending++;
393 }
394 }
395 return new Pair<Integer, Integer>(pending, hris.size());
396 }
397
398
399
400
401
402
403 public boolean isFailoverCleanupDone() {
404 return failoverCleanupDone.get();
405 }
406
407
408
409
410
411 public Lock acquireRegionLock(final String encodedName) {
412 return locker.acquireLock(encodedName);
413 }
414
415
416
417
418
419 void failoverCleanupDone() {
420 failoverCleanupDone.set(true);
421 serverManager.processQueuedDeadServers();
422 }
423
424
425
426
427
428
429
430
431 void joinCluster() throws IOException,
432 KeeperException, InterruptedException {
433
434
435
436
437
438
439
440
441
442
443 Map<ServerName, List<HRegionInfo>> deadServers = rebuildUserRegions();
444
445
446
447
448 processDeadServersAndRegionsInTransition(deadServers);
449
450 recoverTableInDisablingState();
451 recoverTableInEnablingState();
452 }
453
454
455
456
457
458
459
460
461
462
463
464
465 void processDeadServersAndRegionsInTransition(
466 final Map<ServerName, List<HRegionInfo>> deadServers)
467 throws KeeperException, IOException, InterruptedException {
468 List<String> nodes = ZKUtil.listChildrenNoWatch(watcher,
469 watcher.assignmentZNode);
470
471 if (nodes == null) {
472 String errorMessage = "Failed to get the children from ZK";
473 server.abort(errorMessage, new IOException(errorMessage));
474 return;
475 }
476
477 boolean failover = (!serverManager.getDeadServers().isEmpty() || !serverManager
478 .getRequeuedDeadServers().isEmpty());
479
480 if (!failover) {
481
482 Map<HRegionInfo, ServerName> regions = regionStates.getRegionAssignments();
483 for (HRegionInfo hri: regions.keySet()) {
484 if (!hri.isMetaTable()) {
485 LOG.debug("Found " + hri + " out on cluster");
486 failover = true;
487 break;
488 }
489 }
490 if (!failover) {
491
492 for (String encodedName: nodes) {
493 RegionState state = regionStates.getRegionState(encodedName);
494 if (state != null && !state.getRegion().isMetaRegion()) {
495 LOG.debug("Found " + state.getRegion().getRegionNameAsString() + " in RITs");
496 failover = true;
497 break;
498 }
499 }
500 }
501 }
502
503
504 if (failover) {
505 LOG.info("Found regions out on cluster or in RIT; presuming failover");
506
507
508 processDeadServersAndRecoverLostRegions(deadServers);
509 } else {
510
511 LOG.info("Clean cluster startup. Assigning userregions");
512 assignAllUserRegions();
513 }
514 }
515
516
517
518
519
520
521
522
523
524
525
526
527 boolean processRegionInTransitionAndBlockUntilAssigned(final HRegionInfo hri)
528 throws InterruptedException, KeeperException, IOException {
529 String encodedRegionName = hri.getEncodedName();
530 if (!processRegionInTransition(encodedRegionName, hri)) {
531 return false;
532 }
533 LOG.debug("Waiting on " + HRegionInfo.prettyPrint(encodedRegionName));
534 while (!this.server.isStopped() &&
535 this.regionStates.isRegionInTransition(encodedRegionName)) {
536 RegionState state = this.regionStates.getRegionTransitionState(encodedRegionName);
537 if (state == null || !serverManager.isServerOnline(state.getServerName())) {
538
539
540
541 break;
542 }
543 this.regionStates.waitForUpdate(100);
544 }
545 return true;
546 }
547
548
549
550
551
552
553
554
555
556
557 boolean processRegionInTransition(final String encodedRegionName,
558 final HRegionInfo regionInfo) throws KeeperException, IOException {
559
560
561
562
563 Lock lock = locker.acquireLock(encodedRegionName);
564 try {
565 Stat stat = new Stat();
566 byte [] data = ZKAssign.getDataAndWatch(watcher, encodedRegionName, stat);
567 if (data == null) return false;
568 RegionTransition rt;
569 try {
570 rt = RegionTransition.parseFrom(data);
571 } catch (DeserializationException e) {
572 LOG.warn("Failed parse znode data", e);
573 return false;
574 }
575 HRegionInfo hri = regionInfo;
576 if (hri == null) {
577
578
579
580
581
582 hri = regionStates.getRegionInfo(rt.getRegionName());
583 EventType et = rt.getEventType();
584 if (hri == null && et != EventType.RS_ZK_REGION_MERGING
585 && et != EventType.RS_ZK_REQUEST_REGION_MERGE) {
586 LOG.warn("Couldn't find the region in recovering " + rt);
587 return false;
588 }
589 }
590 return processRegionsInTransition(
591 rt, hri, stat.getVersion());
592 } finally {
593 lock.unlock();
594 }
595 }
596
597
598
599
600
601
602
603
604
605 boolean processRegionsInTransition(
606 final RegionTransition rt, final HRegionInfo regionInfo,
607 final int expectedVersion) throws KeeperException {
608 EventType et = rt.getEventType();
609
610 final ServerName sn = rt.getServerName();
611 final byte[] regionName = rt.getRegionName();
612 final String encodedName = HRegionInfo.encodeRegionName(regionName);
613 final String prettyPrintedRegionName = HRegionInfo.prettyPrint(encodedName);
614 LOG.info("Processing " + prettyPrintedRegionName + " in state: " + et);
615
616 if (regionStates.isRegionInTransition(encodedName)) {
617 LOG.info("Processed region " + prettyPrintedRegionName + " in state: "
618 + et + ", does nothing since the region is already in transition "
619 + regionStates.getRegionTransitionState(encodedName));
620
621 return true;
622 }
623 if (!serverManager.isServerOnline(sn)) {
624
625
626
627 LOG.debug("RIT " + encodedName + " in state=" + rt.getEventType() +
628 " was on deadserver; forcing offline");
629 if (regionStates.isRegionOnline(regionInfo)) {
630
631
632
633 regionStates.regionOffline(regionInfo);
634 }
635
636 regionStates.updateRegionState(regionInfo, State.OFFLINE, sn);
637
638 if (regionInfo.isMetaRegion()) {
639
640
641 MetaRegionTracker.setMetaLocation(watcher, sn);
642 } else {
643
644
645 regionStates.setLastRegionServerOfRegion(sn, encodedName);
646
647 if (!serverManager.isServerDead(sn)) {
648 serverManager.expireServer(sn);
649 }
650 }
651 return false;
652 }
653 switch (et) {
654 case M_ZK_REGION_CLOSING:
655
656
657 final RegionState rsClosing = regionStates.updateRegionState(rt, State.CLOSING);
658 this.executorService.submit(
659 new EventHandler(server, EventType.M_MASTER_RECOVERY) {
660 @Override
661 public void process() throws IOException {
662 ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName());
663 try {
664 unassign(regionInfo, rsClosing, expectedVersion, null, true, null);
665 if (regionStates.isRegionOffline(regionInfo)) {
666 assign(regionInfo, true);
667 }
668 } finally {
669 lock.unlock();
670 }
671 }
672 });
673 break;
674
675 case RS_ZK_REGION_CLOSED:
676 case RS_ZK_REGION_FAILED_OPEN:
677
678 regionStates.updateRegionState(regionInfo, State.CLOSED, sn);
679 invokeAssign(regionInfo);
680 break;
681
682 case M_ZK_REGION_OFFLINE:
683
684 regionStates.updateRegionState(rt, State.PENDING_OPEN);
685 final RegionState rsOffline = regionStates.getRegionState(regionInfo);
686 this.executorService.submit(
687 new EventHandler(server, EventType.M_MASTER_RECOVERY) {
688 @Override
689 public void process() throws IOException {
690 ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName());
691 try {
692 RegionPlan plan = new RegionPlan(regionInfo, null, sn);
693 addPlan(encodedName, plan);
694 assign(rsOffline, false, false);
695 } finally {
696 lock.unlock();
697 }
698 }
699 });
700 break;
701
702 case RS_ZK_REGION_OPENING:
703 regionStates.updateRegionState(rt, State.OPENING);
704 break;
705
706 case RS_ZK_REGION_OPENED:
707
708
709
710 regionStates.updateRegionState(rt, State.OPEN);
711 new OpenedRegionHandler(server, this, regionInfo, sn, expectedVersion).process();
712 break;
713 case RS_ZK_REQUEST_REGION_SPLIT:
714 case RS_ZK_REGION_SPLITTING:
715 case RS_ZK_REGION_SPLIT:
716
717
718
719 regionStates.regionOnline(regionInfo, sn);
720 regionStates.updateRegionState(rt, State.SPLITTING);
721 if (!handleRegionSplitting(
722 rt, encodedName, prettyPrintedRegionName, sn)) {
723 deleteSplittingNode(encodedName, sn);
724 }
725 break;
726 case RS_ZK_REQUEST_REGION_MERGE:
727 case RS_ZK_REGION_MERGING:
728 case RS_ZK_REGION_MERGED:
729 if (!handleRegionMerging(
730 rt, encodedName, prettyPrintedRegionName, sn)) {
731 deleteMergingNode(encodedName, sn);
732 }
733 break;
734 default:
735 throw new IllegalStateException("Received region in state:" + et + " is not valid.");
736 }
737 LOG.info("Processed region " + prettyPrintedRegionName + " in state "
738 + et + ", on " + (serverManager.isServerOnline(sn) ? "" : "dead ")
739 + "server: " + sn);
740 return true;
741 }
742
743
744
745
746
747 public void removeClosedRegion(HRegionInfo hri) {
748 if (regionsToReopen.remove(hri.getEncodedName()) != null) {
749 LOG.debug("Removed region from reopening regions because it was closed");
750 }
751 }
752
753
754
755
756
757
758
759
760
761
762
763 void handleRegion(final RegionTransition rt, int expectedVersion) {
764 if (rt == null) {
765 LOG.warn("Unexpected NULL input for RegionTransition rt");
766 return;
767 }
768 final ServerName sn = rt.getServerName();
769
770 if (sn.equals(HBCK_CODE_SERVERNAME)) {
771 handleHBCK(rt);
772 return;
773 }
774 final long createTime = rt.getCreateTime();
775 final byte[] regionName = rt.getRegionName();
776 String encodedName = HRegionInfo.encodeRegionName(regionName);
777 String prettyPrintedRegionName = HRegionInfo.prettyPrint(encodedName);
778
779 if (!serverManager.isServerOnline(sn)
780 && !ignoreStatesRSOffline.contains(rt.getEventType())) {
781 LOG.warn("Attempted to handle region transition for server but " +
782 "it is not online: " + prettyPrintedRegionName + ", " + rt);
783 return;
784 }
785
786 RegionState regionState =
787 regionStates.getRegionState(encodedName);
788 long startTime = System.currentTimeMillis();
789 if (LOG.isDebugEnabled()) {
790 boolean lateEvent = createTime < (startTime - 15000);
791 LOG.debug("Handling " + rt.getEventType() +
792 ", server=" + sn + ", region=" +
793 (prettyPrintedRegionName == null ? "null" : prettyPrintedRegionName) +
794 (lateEvent ? ", which is more than 15 seconds late" : "") +
795 ", current_state=" + regionState);
796 }
797
798
799 if (rt.getEventType() == EventType.M_ZK_REGION_OFFLINE) {
800 return;
801 }
802
803
804 Lock lock = locker.acquireLock(encodedName);
805 try {
806 RegionState latestState =
807 regionStates.getRegionState(encodedName);
808 if ((regionState == null && latestState != null)
809 || (regionState != null && latestState == null)
810 || (regionState != null && latestState != null
811 && latestState.getState() != regionState.getState())) {
812 LOG.warn("Region state changed from " + regionState + " to "
813 + latestState + ", while acquiring lock");
814 }
815 long waitedTime = System.currentTimeMillis() - startTime;
816 if (waitedTime > 5000) {
817 LOG.warn("Took " + waitedTime + "ms to acquire the lock");
818 }
819 regionState = latestState;
820 switch (rt.getEventType()) {
821 case RS_ZK_REQUEST_REGION_SPLIT:
822 case RS_ZK_REGION_SPLITTING:
823 case RS_ZK_REGION_SPLIT:
824 if (!handleRegionSplitting(
825 rt, encodedName, prettyPrintedRegionName, sn)) {
826 deleteSplittingNode(encodedName, sn);
827 }
828 break;
829
830 case RS_ZK_REQUEST_REGION_MERGE:
831 case RS_ZK_REGION_MERGING:
832 case RS_ZK_REGION_MERGED:
833
834
835 if (!handleRegionMerging(
836 rt, encodedName, prettyPrintedRegionName, sn)) {
837 deleteMergingNode(encodedName, sn);
838 }
839 break;
840
841 case M_ZK_REGION_CLOSING:
842
843
844 if (regionState == null
845 || !regionState.isPendingCloseOrClosingOnServer(sn)) {
846 LOG.warn("Received CLOSING for " + prettyPrintedRegionName
847 + " from " + sn + " but the region isn't PENDING_CLOSE/CLOSING here: "
848 + regionStates.getRegionState(encodedName));
849 return;
850 }
851
852 regionStates.updateRegionState(rt, State.CLOSING);
853 break;
854
855 case RS_ZK_REGION_CLOSED:
856
857 if (regionState == null
858 || !regionState.isPendingCloseOrClosingOnServer(sn)) {
859 LOG.warn("Received CLOSED for " + prettyPrintedRegionName
860 + " from " + sn + " but the region isn't PENDING_CLOSE/CLOSING here: "
861 + regionStates.getRegionState(encodedName));
862 return;
863 }
864
865
866
867 new ClosedRegionHandler(server, this, regionState.getRegion()).process();
868 updateClosedRegionHandlerTracker(regionState.getRegion());
869 break;
870
871 case RS_ZK_REGION_FAILED_OPEN:
872 if (regionState == null
873 || !regionState.isPendingOpenOrOpeningOnServer(sn)) {
874 LOG.warn("Received FAILED_OPEN for " + prettyPrintedRegionName
875 + " from " + sn + " but the region isn't PENDING_OPEN/OPENING here: "
876 + regionStates.getRegionState(encodedName));
877 return;
878 }
879 AtomicInteger failedOpenCount = failedOpenTracker.get(encodedName);
880 if (failedOpenCount == null) {
881 failedOpenCount = new AtomicInteger();
882
883
884
885 failedOpenTracker.put(encodedName, failedOpenCount);
886 }
887 if (failedOpenCount.incrementAndGet() >= maximumAttempts) {
888 regionStates.updateRegionState(rt, State.FAILED_OPEN);
889
890
891 failedOpenTracker.remove(encodedName);
892 } else {
893
894 regionState = regionStates.updateRegionState(rt, State.CLOSED);
895 if (regionState != null) {
896
897
898 try {
899 getRegionPlan(regionState.getRegion(), sn, true);
900 new ClosedRegionHandler(server, this, regionState.getRegion()).process();
901 } catch (HBaseIOException e) {
902 LOG.warn("Failed to get region plan", e);
903 }
904 }
905 }
906 break;
907
908 case RS_ZK_REGION_OPENING:
909
910
911 if (regionState == null
912 || !regionState.isPendingOpenOrOpeningOnServer(sn)) {
913 LOG.warn("Received OPENING for " + prettyPrintedRegionName
914 + " from " + sn + " but the region isn't PENDING_OPEN/OPENING here: "
915 + regionStates.getRegionState(encodedName));
916 return;
917 }
918
919 regionStates.updateRegionState(rt, State.OPENING);
920 break;
921
922 case RS_ZK_REGION_OPENED:
923
924 if (regionState == null
925 || !regionState.isPendingOpenOrOpeningOnServer(sn)) {
926 LOG.warn("Received OPENED for " + prettyPrintedRegionName
927 + " from " + sn + " but the region isn't PENDING_OPEN/OPENING here: "
928 + regionStates.getRegionState(encodedName));
929
930 if (regionState != null) {
931
932
933
934 unassign(regionState.getRegion(), null, -1, null, false, sn);
935 }
936 return;
937 }
938
939 regionState = regionStates.updateRegionState(rt, State.OPEN);
940 if (regionState != null) {
941 failedOpenTracker.remove(encodedName);
942 new OpenedRegionHandler(
943 server, this, regionState.getRegion(), sn, expectedVersion).process();
944 updateOpenedRegionHandlerTracker(regionState.getRegion());
945 }
946 break;
947
948 default:
949 throw new IllegalStateException("Received event is not valid.");
950 }
951 } finally {
952 lock.unlock();
953 }
954 }
955
956
957 boolean wasClosedHandlerCalled(HRegionInfo hri) {
958 AtomicBoolean b = closedRegionHandlerCalled.get(hri);
959
960
961
962 return b == null ? false : b.compareAndSet(true, false);
963 }
964
965
966 boolean wasOpenedHandlerCalled(HRegionInfo hri) {
967 AtomicBoolean b = openedRegionHandlerCalled.get(hri);
968
969
970
971 return b == null ? false : b.compareAndSet(true, false);
972 }
973
974
975 void initializeHandlerTrackers() {
976 closedRegionHandlerCalled = new HashMap<HRegionInfo, AtomicBoolean>();
977 openedRegionHandlerCalled = new HashMap<HRegionInfo, AtomicBoolean>();
978 }
979
980 void updateClosedRegionHandlerTracker(HRegionInfo hri) {
981 if (closedRegionHandlerCalled != null) {
982 closedRegionHandlerCalled.put(hri, new AtomicBoolean(true));
983 }
984 }
985
986 void updateOpenedRegionHandlerTracker(HRegionInfo hri) {
987 if (openedRegionHandlerCalled != null) {
988 openedRegionHandlerCalled.put(hri, new AtomicBoolean(true));
989 }
990 }
991
992
993
994
995
996
997 void processFavoredNodes(List<HRegionInfo> regions) throws IOException {
998 if (!shouldAssignRegionsWithFavoredNodes) return;
999
1000
1001 Map<HRegionInfo, List<ServerName>> regionToFavoredNodes =
1002 new HashMap<HRegionInfo, List<ServerName>>();
1003 for (HRegionInfo region : regions) {
1004 regionToFavoredNodes.put(region,
1005 ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region));
1006 }
1007 FavoredNodeAssignmentHelper.updateMetaWithFavoredNodesInfo(regionToFavoredNodes, catalogTracker);
1008 }
1009
1010
1011
1012
1013
1014
1015
1016 private void handleHBCK(RegionTransition rt) {
1017 String encodedName = HRegionInfo.encodeRegionName(rt.getRegionName());
1018 LOG.info("Handling HBCK triggered transition=" + rt.getEventType() +
1019 ", server=" + rt.getServerName() + ", region=" +
1020 HRegionInfo.prettyPrint(encodedName));
1021 RegionState regionState = regionStates.getRegionTransitionState(encodedName);
1022 switch (rt.getEventType()) {
1023 case M_ZK_REGION_OFFLINE:
1024 HRegionInfo regionInfo;
1025 if (regionState != null) {
1026 regionInfo = regionState.getRegion();
1027 } else {
1028 try {
1029 byte [] name = rt.getRegionName();
1030 Pair<HRegionInfo, ServerName> p = MetaReader.getRegion(catalogTracker, name);
1031 regionInfo = p.getFirst();
1032 } catch (IOException e) {
1033 LOG.info("Exception reading hbase:meta doing HBCK repair operation", e);
1034 return;
1035 }
1036 }
1037 LOG.info("HBCK repair is triggering assignment of region=" +
1038 regionInfo.getRegionNameAsString());
1039
1040 assign(regionInfo, false);
1041 break;
1042
1043 default:
1044 LOG.warn("Received unexpected region state from HBCK: " + rt.toString());
1045 break;
1046 }
1047
1048 }
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064 @Override
1065 public void nodeCreated(String path) {
1066 handleAssignmentEvent(path);
1067 }
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081 @Override
1082 public void nodeDataChanged(String path) {
1083 handleAssignmentEvent(path);
1084 }
1085
1086
1087
1088
1089
1090 private final Set<String> regionsInProgress = new HashSet<String>();
1091
1092
1093 private final LinkedHashMultimap <String, RegionRunnable>
1094 zkEventWorkerWaitingList = LinkedHashMultimap.create();
1095
1096
1097
1098
1099 private interface RegionRunnable extends Runnable{
1100
1101
1102
1103 String getRegionName();
1104 }
1105
1106
1107
1108
1109
1110 protected void zkEventWorkersSubmit(final RegionRunnable regRunnable) {
1111
1112 synchronized (regionsInProgress) {
1113
1114
1115 if (regionsInProgress.contains(regRunnable.getRegionName())) {
1116 synchronized (zkEventWorkerWaitingList){
1117 zkEventWorkerWaitingList.put(regRunnable.getRegionName(), regRunnable);
1118 }
1119 return;
1120 }
1121
1122
1123 regionsInProgress.add(regRunnable.getRegionName());
1124 zkEventWorkers.submit(new Runnable() {
1125 @Override
1126 public void run() {
1127 try {
1128 regRunnable.run();
1129 } finally {
1130
1131
1132 synchronized (regionsInProgress) {
1133 regionsInProgress.remove(regRunnable.getRegionName());
1134 synchronized (zkEventWorkerWaitingList) {
1135 java.util.Set<RegionRunnable> waiting = zkEventWorkerWaitingList.get(
1136 regRunnable.getRegionName());
1137 if (!waiting.isEmpty()) {
1138
1139 RegionRunnable toSubmit = waiting.iterator().next();
1140 zkEventWorkerWaitingList.remove(toSubmit.getRegionName(), toSubmit);
1141 zkEventWorkersSubmit(toSubmit);
1142 }
1143 }
1144 }
1145 }
1146 }
1147 });
1148 }
1149 }
1150
1151 @Override
1152 public void nodeDeleted(final String path) {
1153 if (path.startsWith(watcher.assignmentZNode)) {
1154 final String regionName = ZKAssign.getRegionName(watcher, path);
1155 zkEventWorkersSubmit(new RegionRunnable() {
1156 @Override
1157 public String getRegionName() {
1158 return regionName;
1159 }
1160
1161 @Override
1162 public void run() {
1163 Lock lock = locker.acquireLock(regionName);
1164 try {
1165 RegionState rs = regionStates.getRegionTransitionState(regionName);
1166 if (rs == null) {
1167 rs = regionStates.getRegionState(regionName);
1168 if (rs == null || !rs.isMergingNew()) {
1169
1170 return;
1171 }
1172 }
1173
1174 HRegionInfo regionInfo = rs.getRegion();
1175 String regionNameStr = regionInfo.getRegionNameAsString();
1176 LOG.debug("Znode " + regionNameStr + " deleted, state: " + rs);
1177 boolean disabled = getZKTable().isDisablingOrDisabledTable(regionInfo.getTable());
1178 ServerName serverName = rs.getServerName();
1179 if (serverManager.isServerOnline(serverName)) {
1180 if (rs.isOnServer(serverName)
1181 && (rs.isOpened() || rs.isSplitting())) {
1182 regionOnline(regionInfo, serverName);
1183 if (disabled) {
1184
1185 LOG.info("Opened " + regionNameStr
1186 + "but this table is disabled, triggering close of region");
1187 unassign(regionInfo);
1188 }
1189 } else if (rs.isMergingNew()) {
1190 synchronized (regionStates) {
1191 String p = regionInfo.getEncodedName();
1192 PairOfSameType<HRegionInfo> regions = mergingRegions.get(p);
1193 if (regions != null) {
1194 onlineMergingRegion(disabled, regions.getFirst(), serverName);
1195 onlineMergingRegion(disabled, regions.getSecond(), serverName);
1196 }
1197 }
1198 }
1199 }
1200 } finally {
1201 lock.unlock();
1202 }
1203 }
1204
1205 private void onlineMergingRegion(boolean disabled,
1206 final HRegionInfo hri, final ServerName serverName) {
1207 RegionState regionState = regionStates.getRegionState(hri);
1208 if (regionState != null && regionState.isMerging()
1209 && regionState.isOnServer(serverName)) {
1210 regionOnline(regionState.getRegion(), serverName);
1211 if (disabled) {
1212 unassign(hri);
1213 }
1214 }
1215 }
1216 });
1217 }
1218 }
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232 @Override
1233 public void nodeChildrenChanged(String path) {
1234 if (path.equals(watcher.assignmentZNode)) {
1235 zkEventWorkers.submit(new Runnable() {
1236 @Override
1237 public void run() {
1238 try {
1239
1240 List<String> children =
1241 ZKUtil.listChildrenAndWatchForNewChildren(
1242 watcher, watcher.assignmentZNode);
1243 if (children != null) {
1244 Stat stat = new Stat();
1245 for (String child : children) {
1246
1247
1248
1249 if (!regionStates.isRegionInTransition(child)) {
1250 ZKAssign.getDataAndWatch(watcher, child, stat);
1251 }
1252 }
1253 }
1254 } catch (KeeperException e) {
1255 server.abort("Unexpected ZK exception reading unassigned children", e);
1256 }
1257 }
1258 });
1259 }
1260 }
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270 void regionOnline(HRegionInfo regionInfo, ServerName sn) {
1271 numRegionsOpened.incrementAndGet();
1272 regionStates.regionOnline(regionInfo, sn);
1273
1274
1275 clearRegionPlan(regionInfo);
1276
1277 addToServersInUpdatingTimer(sn);
1278 }
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288 private void handleAssignmentEvent(final String path) {
1289 if (path.startsWith(watcher.assignmentZNode)) {
1290 final String regionName = ZKAssign.getRegionName(watcher, path);
1291
1292 zkEventWorkersSubmit(new RegionRunnable() {
1293 @Override
1294 public String getRegionName() {
1295 return regionName;
1296 }
1297
1298 @Override
1299 public void run() {
1300 try {
1301 Stat stat = new Stat();
1302 byte [] data = ZKAssign.getDataAndWatch(watcher, path, stat);
1303 if (data == null) return;
1304
1305 RegionTransition rt = RegionTransition.parseFrom(data);
1306 handleRegion(rt, stat.getVersion());
1307 } catch (KeeperException e) {
1308 server.abort("Unexpected ZK exception reading unassigned node data", e);
1309 } catch (DeserializationException e) {
1310 server.abort("Unexpected exception deserializing node data", e);
1311 }
1312 }
1313 });
1314 }
1315 }
1316
1317
1318
1319
1320
1321
1322 private void addToServersInUpdatingTimer(final ServerName sn) {
1323 if (tomActivated){
1324 this.serversInUpdatingTimer.add(sn);
1325 }
1326 }
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341 private void updateTimers(final ServerName sn) {
1342 Preconditions.checkState(tomActivated);
1343 if (sn == null) return;
1344
1345
1346
1347
1348
1349 List<Map.Entry<String, RegionPlan>> rps;
1350 synchronized(this.regionPlans) {
1351 rps = new ArrayList<Map.Entry<String, RegionPlan>>(regionPlans.entrySet());
1352 }
1353
1354 for (Map.Entry<String, RegionPlan> e : rps) {
1355 if (e.getValue() != null && e.getKey() != null && sn.equals(e.getValue().getDestination())) {
1356 RegionState regionState = regionStates.getRegionTransitionState(e.getKey());
1357 if (regionState != null) {
1358 regionState.updateTimestampToNow();
1359 }
1360 }
1361 }
1362 }
1363
1364
1365
1366
1367
1368
1369
1370
1371 public void regionOffline(final HRegionInfo regionInfo) {
1372 regionOffline(regionInfo, null);
1373 }
1374
1375 public void offlineDisabledRegion(HRegionInfo regionInfo) {
1376
1377 LOG.debug("Table being disabled so deleting ZK node and removing from " +
1378 "regions in transition, skipping assignment of region " +
1379 regionInfo.getRegionNameAsString());
1380 String encodedName = regionInfo.getEncodedName();
1381 deleteNodeInStates(encodedName, "closed", null,
1382 EventType.RS_ZK_REGION_CLOSED, EventType.M_ZK_REGION_OFFLINE);
1383 regionOffline(regionInfo);
1384 }
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406 public void assign(HRegionInfo region, boolean setOfflineInZK) {
1407 assign(region, setOfflineInZK, false);
1408 }
1409
1410
1411
1412
1413 public void assign(HRegionInfo region,
1414 boolean setOfflineInZK, boolean forceNewPlan) {
1415 if (isDisabledorDisablingRegionInRIT(region)) {
1416 return;
1417 }
1418 if (this.serverManager.isClusterShutdown()) {
1419 LOG.info("Cluster shutdown is set; skipping assign of " +
1420 region.getRegionNameAsString());
1421 return;
1422 }
1423 String encodedName = region.getEncodedName();
1424 Lock lock = locker.acquireLock(encodedName);
1425 try {
1426 RegionState state = forceRegionStateToOffline(region, forceNewPlan);
1427 if (state != null) {
1428 if (regionStates.wasRegionOnDeadServer(encodedName)) {
1429 LOG.info("Skip assigning " + region.getRegionNameAsString()
1430 + ", it's host " + regionStates.getLastRegionServerOfRegion(encodedName)
1431 + " is dead but not processed yet");
1432 return;
1433 }
1434 assign(state, setOfflineInZK, forceNewPlan);
1435 }
1436 } finally {
1437 lock.unlock();
1438 }
1439 }
1440
1441
1442
1443
1444
1445
1446
1447 boolean assign(final ServerName destination, final List<HRegionInfo> regions) {
1448 long startTime = EnvironmentEdgeManager.currentTimeMillis();
1449 try {
1450 int regionCount = regions.size();
1451 if (regionCount == 0) {
1452 return true;
1453 }
1454 LOG.debug("Assigning " + regionCount + " region(s) to " + destination.toString());
1455 Set<String> encodedNames = new HashSet<String>(regionCount);
1456 for (HRegionInfo region : regions) {
1457 encodedNames.add(region.getEncodedName());
1458 }
1459
1460 List<HRegionInfo> failedToOpenRegions = new ArrayList<HRegionInfo>();
1461 Map<String, Lock> locks = locker.acquireLocks(encodedNames);
1462 try {
1463 AtomicInteger counter = new AtomicInteger(0);
1464 Map<String, Integer> offlineNodesVersions = new ConcurrentHashMap<String, Integer>();
1465 OfflineCallback cb = new OfflineCallback(
1466 watcher, destination, counter, offlineNodesVersions);
1467 Map<String, RegionPlan> plans = new HashMap<String, RegionPlan>(regions.size());
1468 List<RegionState> states = new ArrayList<RegionState>(regions.size());
1469 for (HRegionInfo region : regions) {
1470 String encodedName = region.getEncodedName();
1471 if (!isDisabledorDisablingRegionInRIT(region)) {
1472 RegionState state = forceRegionStateToOffline(region, false);
1473 boolean onDeadServer = false;
1474 if (state != null) {
1475 if (regionStates.wasRegionOnDeadServer(encodedName)) {
1476 LOG.info("Skip assigning " + region.getRegionNameAsString()
1477 + ", it's host " + regionStates.getLastRegionServerOfRegion(encodedName)
1478 + " is dead but not processed yet");
1479 onDeadServer = true;
1480 } else if (asyncSetOfflineInZooKeeper(state, cb, destination)) {
1481 RegionPlan plan = new RegionPlan(region, state.getServerName(), destination);
1482 plans.put(encodedName, plan);
1483 states.add(state);
1484 continue;
1485 }
1486 }
1487
1488 if (!onDeadServer) {
1489 LOG.info("failed to force region state to offline or "
1490 + "failed to set it offline in ZK, will reassign later: " + region);
1491 failedToOpenRegions.add(region);
1492 }
1493 }
1494
1495
1496 Lock lock = locks.remove(encodedName);
1497 lock.unlock();
1498 }
1499
1500
1501 int total = states.size();
1502 for (int oldCounter = 0; !server.isStopped();) {
1503 int count = counter.get();
1504 if (oldCounter != count) {
1505 LOG.info(destination.toString() + " unassigned znodes=" + count +
1506 " of total=" + total);
1507 oldCounter = count;
1508 }
1509 if (count >= total) break;
1510 Threads.sleep(5);
1511 }
1512
1513 if (server.isStopped()) {
1514 return false;
1515 }
1516
1517
1518
1519 this.addPlans(plans);
1520
1521 List<Triple<HRegionInfo, Integer, List<ServerName>>> regionOpenInfos =
1522 new ArrayList<Triple<HRegionInfo, Integer, List<ServerName>>>(states.size());
1523 for (RegionState state: states) {
1524 HRegionInfo region = state.getRegion();
1525 String encodedRegionName = region.getEncodedName();
1526 Integer nodeVersion = offlineNodesVersions.get(encodedRegionName);
1527 if (nodeVersion == null || nodeVersion == -1) {
1528 LOG.warn("failed to offline in zookeeper: " + region);
1529 failedToOpenRegions.add(region);
1530 Lock lock = locks.remove(encodedRegionName);
1531 lock.unlock();
1532 } else {
1533 regionStates.updateRegionState(
1534 region, State.PENDING_OPEN, destination);
1535 List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
1536 if (this.shouldAssignRegionsWithFavoredNodes) {
1537 favoredNodes = ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region);
1538 }
1539 regionOpenInfos.add(new Triple<HRegionInfo, Integer, List<ServerName>>(
1540 region, nodeVersion, favoredNodes));
1541 }
1542 }
1543
1544
1545 try {
1546
1547
1548 long maxWaitTime = System.currentTimeMillis() +
1549 this.server.getConfiguration().
1550 getLong("hbase.regionserver.rpc.startup.waittime", 60000);
1551 for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) {
1552 try {
1553 List<RegionOpeningState> regionOpeningStateList = serverManager
1554 .sendRegionOpen(destination, regionOpenInfos);
1555 if (regionOpeningStateList == null) {
1556
1557 return false;
1558 }
1559 for (int k = 0, n = regionOpeningStateList.size(); k < n; k++) {
1560 RegionOpeningState openingState = regionOpeningStateList.get(k);
1561 if (openingState != RegionOpeningState.OPENED) {
1562 HRegionInfo region = regionOpenInfos.get(k).getFirst();
1563 if (openingState == RegionOpeningState.ALREADY_OPENED) {
1564 processAlreadyOpenedRegion(region, destination);
1565 } else if (openingState == RegionOpeningState.FAILED_OPENING) {
1566
1567 failedToOpenRegions.add(region);
1568 } else {
1569 LOG.warn("THIS SHOULD NOT HAPPEN: unknown opening state "
1570 + openingState + " in assigning region " + region);
1571 }
1572 }
1573 }
1574 break;
1575 } catch (IOException e) {
1576 if (e instanceof RemoteException) {
1577 e = ((RemoteException)e).unwrapRemoteException();
1578 }
1579 if (e instanceof RegionServerStoppedException) {
1580 LOG.warn("The region server was shut down, ", e);
1581
1582 return false;
1583 } else if (e instanceof ServerNotRunningYetException) {
1584 long now = System.currentTimeMillis();
1585 if (now < maxWaitTime) {
1586 LOG.debug("Server is not yet up; waiting up to " +
1587 (maxWaitTime - now) + "ms", e);
1588 Thread.sleep(100);
1589 i--;
1590 continue;
1591 }
1592 } else if (e instanceof java.net.SocketTimeoutException
1593 && this.serverManager.isServerOnline(destination)) {
1594
1595
1596
1597
1598 if (LOG.isDebugEnabled()) {
1599 LOG.debug("Bulk assigner openRegion() to " + destination
1600 + " has timed out, but the regions might"
1601 + " already be opened on it.", e);
1602 }
1603 continue;
1604 }
1605 throw e;
1606 }
1607 }
1608 } catch (IOException e) {
1609
1610 LOG.info("Unable to communicate with " + destination
1611 + " in order to assign regions, ", e);
1612 return false;
1613 } catch (InterruptedException e) {
1614 throw new RuntimeException(e);
1615 }
1616 } finally {
1617 for (Lock lock : locks.values()) {
1618 lock.unlock();
1619 }
1620 }
1621
1622 if (!failedToOpenRegions.isEmpty()) {
1623 for (HRegionInfo region : failedToOpenRegions) {
1624 if (!regionStates.isRegionOnline(region)) {
1625 invokeAssign(region);
1626 }
1627 }
1628 }
1629 LOG.debug("Bulk assigning done for " + destination);
1630 return true;
1631 } finally {
1632 metricsAssignmentManager.updateBulkAssignTime(EnvironmentEdgeManager.currentTimeMillis() - startTime);
1633 }
1634 }
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646 private void unassign(final HRegionInfo region,
1647 final RegionState state, final int versionOfClosingNode,
1648 final ServerName dest, final boolean transitionInZK,
1649 final ServerName src) {
1650 ServerName server = src;
1651 if (state != null) {
1652 server = state.getServerName();
1653 }
1654 long maxWaitTime = -1;
1655 for (int i = 1; i <= this.maximumAttempts; i++) {
1656 if (this.server.isStopped() || this.server.isAborted()) {
1657 LOG.debug("Server stopped/aborted; skipping unassign of " + region);
1658 return;
1659 }
1660
1661 if (!serverManager.isServerOnline(server)) {
1662 LOG.debug("Offline " + region.getRegionNameAsString()
1663 + ", no need to unassign since it's on a dead server: " + server);
1664 if (transitionInZK) {
1665
1666 deleteClosingOrClosedNode(region, server);
1667 }
1668 if (state != null) {
1669 regionOffline(region);
1670 }
1671 return;
1672 }
1673 try {
1674
1675 if (serverManager.sendRegionClose(server, region,
1676 versionOfClosingNode, dest, transitionInZK)) {
1677 LOG.debug("Sent CLOSE to " + server + " for region " +
1678 region.getRegionNameAsString());
1679 if (!transitionInZK && state != null) {
1680
1681
1682 unassign(region, state, versionOfClosingNode,
1683 dest, transitionInZK,src);
1684 }
1685 return;
1686 }
1687
1688
1689 LOG.warn("Server " + server + " region CLOSE RPC returned false for " +
1690 region.getRegionNameAsString());
1691 } catch (Throwable t) {
1692 if (t instanceof RemoteException) {
1693 t = ((RemoteException)t).unwrapRemoteException();
1694 }
1695 if (t instanceof NotServingRegionException
1696 || t instanceof RegionServerStoppedException
1697 || t instanceof ServerNotRunningYetException
1698 || t instanceof FailedServerException) {
1699 LOG.debug("Offline " + region.getRegionNameAsString()
1700 + ", it's not any more on " + server, t);
1701 if (transitionInZK) {
1702 deleteClosingOrClosedNode(region, server);
1703 }
1704 if (state != null) {
1705 regionOffline(region);
1706 }
1707 return;
1708 } else if (state != null
1709 && t instanceof RegionAlreadyInTransitionException) {
1710
1711 LOG.debug("update " + state + " the timestamp.");
1712 state.updateTimestampToNow();
1713 if (maxWaitTime < 0) {
1714 maxWaitTime = EnvironmentEdgeManager.currentTimeMillis()
1715 + this.server.getConfiguration().getLong(ALREADY_IN_TRANSITION_WAITTIME,
1716 DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
1717 }
1718 try {
1719 long now = EnvironmentEdgeManager.currentTimeMillis();
1720 if (now < maxWaitTime) {
1721 LOG.debug("Region is already in transition; "
1722 + "waiting up to " + (maxWaitTime - now) + "ms", t);
1723 Thread.sleep(100);
1724 i--;
1725 }
1726 } catch (InterruptedException ie) {
1727 LOG.warn("Failed to unassign "
1728 + region.getRegionNameAsString() + " since interrupted", ie);
1729 Thread.currentThread().interrupt();
1730 if (!tomActivated) {
1731 regionStates.updateRegionState(region, State.FAILED_CLOSE);
1732 }
1733 return;
1734 }
1735 } else {
1736 LOG.info("Server " + server + " returned " + t + " for "
1737 + region.getRegionNameAsString() + ", try=" + i
1738 + " of " + this.maximumAttempts, t);
1739
1740 }
1741 }
1742 }
1743
1744 if (!tomActivated && state != null) {
1745 regionStates.updateRegionState(region, State.FAILED_CLOSE);
1746 }
1747 }
1748
1749
1750
1751
1752 private RegionState forceRegionStateToOffline(
1753 final HRegionInfo region, final boolean forceNewPlan) {
1754 RegionState state = regionStates.getRegionState(region);
1755 if (state == null) {
1756 LOG.warn("Assigning a region not in region states: " + region);
1757 state = regionStates.createRegionState(region);
1758 }
1759
1760 ServerName sn = state.getServerName();
1761 if (forceNewPlan && LOG.isDebugEnabled()) {
1762 LOG.debug("Force region state offline " + state);
1763 }
1764
1765 switch (state.getState()) {
1766 case OPEN:
1767 case OPENING:
1768 case PENDING_OPEN:
1769 case CLOSING:
1770 case PENDING_CLOSE:
1771 if (!forceNewPlan) {
1772 LOG.debug("Skip assigning " +
1773 region + ", it is already " + state);
1774 return null;
1775 }
1776 case FAILED_CLOSE:
1777 case FAILED_OPEN:
1778 unassign(region, state, -1, null, false, null);
1779 state = regionStates.getRegionState(region);
1780 if (state.isFailedClose()) {
1781
1782
1783 LOG.info("Skip assigning " +
1784 region + ", we couldn't close it: " + state);
1785 return null;
1786 }
1787 case OFFLINE:
1788
1789
1790
1791
1792
1793 if (regionStates.isServerDeadAndNotProcessed(sn)
1794 && wasRegionOnDeadServerByMeta(region, sn)) {
1795 LOG.info("Skip assigning " + region.getRegionNameAsString()
1796 + ", it is on a dead but not processed yet server");
1797 return null;
1798 }
1799 case CLOSED:
1800 break;
1801 default:
1802 LOG.error("Trying to assign region " + region
1803 + ", which is " + state);
1804 return null;
1805 }
1806 return state;
1807 }
1808
1809 private boolean wasRegionOnDeadServerByMeta(
1810 final HRegionInfo region, final ServerName sn) {
1811 try {
1812 if (region.isMetaRegion()) {
1813 ServerName server = catalogTracker.getMetaLocation();
1814 return regionStates.isServerDeadAndNotProcessed(server);
1815 }
1816 while (!server.isStopped()) {
1817 try {
1818 catalogTracker.waitForMeta();
1819 Pair<HRegionInfo, ServerName> r =
1820 MetaReader.getRegion(catalogTracker, region.getRegionName());
1821 ServerName server = r == null ? null : r.getSecond();
1822 return regionStates.isServerDeadAndNotProcessed(server);
1823 } catch (IOException ioe) {
1824 LOG.info("Received exception accessing hbase:meta during force assign "
1825 + region.getRegionNameAsString() + ", retrying", ioe);
1826 }
1827 }
1828 } catch (InterruptedException e) {
1829 Thread.currentThread().interrupt();
1830 LOG.info("Interrupted accessing hbase:meta", e);
1831 }
1832
1833 return regionStates.isServerDeadAndNotProcessed(sn);
1834 }
1835
1836
1837
1838
1839
1840
1841
1842 private void assign(RegionState state,
1843 final boolean setOfflineInZK, final boolean forceNewPlan) {
1844 long startTime = EnvironmentEdgeManager.currentTimeMillis();
1845 try {
1846 RegionState currentState = state;
1847 int versionOfOfflineNode = -1;
1848 RegionPlan plan = null;
1849 long maxWaitTime = -1;
1850 HRegionInfo region = state.getRegion();
1851 RegionOpeningState regionOpenState;
1852 for (int i = 1; i <= maximumAttempts; i++) {
1853 if (server.isStopped() || server.isAborted()) {
1854 LOG.info("Skip assigning " + region.getRegionNameAsString()
1855 + ", the server is stopped/aborted");
1856 return;
1857 }
1858 if (plan == null) {
1859 try {
1860 plan = getRegionPlan(region, forceNewPlan);
1861 } catch (HBaseIOException e) {
1862 LOG.warn("Failed to get region plan", e);
1863 }
1864 }
1865 if (plan == null) {
1866 LOG.warn("Unable to determine a plan to assign " + region);
1867 if (tomActivated){
1868 this.timeoutMonitor.setAllRegionServersOffline(true);
1869 } else {
1870 if (region.isMetaRegion()) {
1871 try {
1872 Thread.sleep(this.sleepTimeBeforeRetryingMetaAssignment);
1873 if (i == maximumAttempts) i = 1;
1874 continue;
1875 } catch (InterruptedException e) {
1876 LOG.error("Got exception while waiting for hbase:meta assignment");
1877 Thread.currentThread().interrupt();
1878 }
1879 }
1880 regionStates.updateRegionState(region, State.FAILED_OPEN);
1881 }
1882 return;
1883 }
1884 if (setOfflineInZK && versionOfOfflineNode == -1) {
1885
1886
1887 versionOfOfflineNode = setOfflineInZooKeeper(currentState, plan.getDestination());
1888 if (versionOfOfflineNode != -1) {
1889 if (isDisabledorDisablingRegionInRIT(region)) {
1890 return;
1891 }
1892
1893
1894
1895
1896
1897
1898 TableName tableName = region.getTable();
1899 if (!zkTable.isEnablingTable(tableName) && !zkTable.isEnabledTable(tableName)) {
1900 LOG.debug("Setting table " + tableName + " to ENABLED state.");
1901 setEnabledTable(tableName);
1902 }
1903 }
1904 }
1905 if (setOfflineInZK && versionOfOfflineNode == -1) {
1906 LOG.info("Unable to set offline in ZooKeeper to assign " + region);
1907
1908
1909
1910
1911 if (!server.isAborted()) {
1912 continue;
1913 }
1914 }
1915 LOG.info("Assigning " + region.getRegionNameAsString() +
1916 " to " + plan.getDestination().toString());
1917
1918 currentState = regionStates.updateRegionState(region,
1919 State.PENDING_OPEN, plan.getDestination());
1920
1921 boolean needNewPlan;
1922 final String assignMsg = "Failed assignment of " + region.getRegionNameAsString() +
1923 " to " + plan.getDestination();
1924 try {
1925 List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
1926 if (this.shouldAssignRegionsWithFavoredNodes) {
1927 favoredNodes = ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region);
1928 }
1929 regionOpenState = serverManager.sendRegionOpen(
1930 plan.getDestination(), region, versionOfOfflineNode, favoredNodes);
1931
1932 if (regionOpenState == RegionOpeningState.FAILED_OPENING) {
1933
1934 needNewPlan = true;
1935 LOG.warn(assignMsg + ", regionserver says 'FAILED_OPENING', " +
1936 " trying to assign elsewhere instead; " +
1937 "try=" + i + " of " + this.maximumAttempts);
1938 } else {
1939
1940 if (regionOpenState == RegionOpeningState.ALREADY_OPENED) {
1941 processAlreadyOpenedRegion(region, plan.getDestination());
1942 }
1943 return;
1944 }
1945
1946 } catch (Throwable t) {
1947 if (t instanceof RemoteException) {
1948 t = ((RemoteException) t).unwrapRemoteException();
1949 }
1950
1951
1952
1953
1954 boolean hold = (t instanceof RegionAlreadyInTransitionException ||
1955 t instanceof ServerNotRunningYetException);
1956
1957
1958
1959
1960
1961
1962 boolean retry = !hold && (t instanceof java.net.SocketTimeoutException
1963 && this.serverManager.isServerOnline(plan.getDestination()));
1964
1965
1966 if (hold) {
1967 LOG.warn(assignMsg + ", waiting a little before trying on the same region server " +
1968 "try=" + i + " of " + this.maximumAttempts, t);
1969
1970 if (maxWaitTime < 0) {
1971 if (t instanceof RegionAlreadyInTransitionException) {
1972 maxWaitTime = EnvironmentEdgeManager.currentTimeMillis()
1973 + this.server.getConfiguration().getLong(ALREADY_IN_TRANSITION_WAITTIME,
1974 DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
1975 } else {
1976 maxWaitTime = this.server.getConfiguration().
1977 getLong("hbase.regionserver.rpc.startup.waittime", 60000);
1978 }
1979 }
1980 try {
1981 needNewPlan = false;
1982 long now = EnvironmentEdgeManager.currentTimeMillis();
1983 if (now < maxWaitTime) {
1984 LOG.debug("Server is not yet up or region is already in transition; "
1985 + "waiting up to " + (maxWaitTime - now) + "ms", t);
1986 Thread.sleep(100);
1987 i--;
1988 } else if (!(t instanceof RegionAlreadyInTransitionException)) {
1989 LOG.debug("Server is not up for a while; try a new one", t);
1990 needNewPlan = true;
1991 }
1992 } catch (InterruptedException ie) {
1993 LOG.warn("Failed to assign "
1994 + region.getRegionNameAsString() + " since interrupted", ie);
1995 Thread.currentThread().interrupt();
1996 if (!tomActivated) {
1997 regionStates.updateRegionState(region, State.FAILED_OPEN);
1998 }
1999 return;
2000 }
2001 } else if (retry) {
2002 needNewPlan = false;
2003 LOG.warn(assignMsg + ", trying to assign to the same region server " +
2004 "try=" + i + " of " + this.maximumAttempts, t);
2005 } else {
2006 needNewPlan = true;
2007 LOG.warn(assignMsg + ", trying to assign elsewhere instead;" +
2008 " try=" + i + " of " + this.maximumAttempts, t);
2009 }
2010 }
2011
2012 if (i == this.maximumAttempts) {
2013
2014
2015 continue;
2016 }
2017
2018
2019
2020
2021 if (needNewPlan) {
2022
2023
2024
2025
2026 RegionPlan newPlan = null;
2027 try {
2028 newPlan = getRegionPlan(region, true);
2029 } catch (HBaseIOException e) {
2030 LOG.warn("Failed to get region plan", e);
2031 }
2032 if (newPlan == null) {
2033 if (tomActivated) {
2034 this.timeoutMonitor.setAllRegionServersOffline(true);
2035 } else {
2036 regionStates.updateRegionState(region, State.FAILED_OPEN);
2037 }
2038 LOG.warn("Unable to find a viable location to assign region " +
2039 region.getRegionNameAsString());
2040 return;
2041 }
2042
2043 if (plan != newPlan && !plan.getDestination().equals(newPlan.getDestination())) {
2044
2045
2046
2047 currentState = regionStates.updateRegionState(region, State.OFFLINE);
2048 versionOfOfflineNode = -1;
2049 plan = newPlan;
2050 }
2051 }
2052 }
2053
2054 if (!tomActivated) {
2055 regionStates.updateRegionState(region, State.FAILED_OPEN);
2056 }
2057 } finally {
2058 metricsAssignmentManager.updateAssignmentTime(EnvironmentEdgeManager.currentTimeMillis() - startTime);
2059 }
2060 }
2061
2062 private void processAlreadyOpenedRegion(HRegionInfo region, ServerName sn) {
2063
2064
2065
2066 LOG.debug("ALREADY_OPENED " + region.getRegionNameAsString()
2067 + " to " + sn);
2068 String encodedName = region.getEncodedName();
2069 deleteNodeInStates(encodedName, "offline", sn, EventType.M_ZK_REGION_OFFLINE);
2070 regionStates.regionOnline(region, sn);
2071 }
2072
2073 private boolean isDisabledorDisablingRegionInRIT(final HRegionInfo region) {
2074 TableName tableName = region.getTable();
2075 boolean disabled = this.zkTable.isDisabledTable(tableName);
2076 if (disabled || this.zkTable.isDisablingTable(tableName)) {
2077 LOG.info("Table " + tableName + (disabled ? " disabled;" : " disabling;") +
2078 " skipping assign of " + region.getRegionNameAsString());
2079 offlineDisabledRegion(region);
2080 return true;
2081 }
2082 return false;
2083 }
2084
2085
2086
2087
2088
2089
2090
2091
2092 private int setOfflineInZooKeeper(final RegionState state, final ServerName destination) {
2093 if (!state.isClosed() && !state.isOffline()) {
2094 String msg = "Unexpected state : " + state + " .. Cannot transit it to OFFLINE.";
2095 this.server.abort(msg, new IllegalStateException(msg));
2096 return -1;
2097 }
2098 regionStates.updateRegionState(state.getRegion(), State.OFFLINE);
2099 int versionOfOfflineNode;
2100 try {
2101
2102 versionOfOfflineNode = ZKAssign.createOrForceNodeOffline(watcher,
2103 state.getRegion(), destination);
2104 if (versionOfOfflineNode == -1) {
2105 LOG.warn("Attempted to create/force node into OFFLINE state before "
2106 + "completing assignment but failed to do so for " + state);
2107 return -1;
2108 }
2109 } catch (KeeperException e) {
2110 server.abort("Unexpected ZK exception creating/setting node OFFLINE", e);
2111 return -1;
2112 }
2113 return versionOfOfflineNode;
2114 }
2115
2116
2117
2118
2119
2120
2121 private RegionPlan getRegionPlan(final HRegionInfo region,
2122 final boolean forceNewPlan) throws HBaseIOException {
2123 return getRegionPlan(region, null, forceNewPlan);
2124 }
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135 private RegionPlan getRegionPlan(final HRegionInfo region,
2136 final ServerName serverToExclude, final boolean forceNewPlan) throws HBaseIOException {
2137
2138 final String encodedName = region.getEncodedName();
2139 final List<ServerName> destServers =
2140 serverManager.createDestinationServersList(serverToExclude);
2141
2142 if (destServers.isEmpty()){
2143 LOG.warn("Can't move " + encodedName +
2144 ", there is no destination server available.");
2145 return null;
2146 }
2147
2148 RegionPlan randomPlan = null;
2149 boolean newPlan = false;
2150 RegionPlan existingPlan;
2151
2152 synchronized (this.regionPlans) {
2153 existingPlan = this.regionPlans.get(encodedName);
2154
2155 if (existingPlan != null && existingPlan.getDestination() != null) {
2156 LOG.debug("Found an existing plan for " + region.getRegionNameAsString()
2157 + " destination server is " + existingPlan.getDestination() +
2158 " accepted as a dest server = " + destServers.contains(existingPlan.getDestination()));
2159 }
2160
2161 if (forceNewPlan
2162 || existingPlan == null
2163 || existingPlan.getDestination() == null
2164 || !destServers.contains(existingPlan.getDestination())) {
2165 newPlan = true;
2166 randomPlan = new RegionPlan(region, null,
2167 balancer.randomAssignment(region, destServers));
2168 if (!region.isMetaTable() && shouldAssignRegionsWithFavoredNodes) {
2169 List<HRegionInfo> regions = new ArrayList<HRegionInfo>(1);
2170 regions.add(region);
2171 try {
2172 processFavoredNodes(regions);
2173 } catch (IOException ie) {
2174 LOG.warn("Ignoring exception in processFavoredNodes " + ie);
2175 }
2176 }
2177 this.regionPlans.put(encodedName, randomPlan);
2178 }
2179 }
2180
2181 if (newPlan) {
2182 if (randomPlan.getDestination() == null) {
2183 LOG.warn("Can't find a destination for " + encodedName);
2184 return null;
2185 }
2186 LOG.debug("No previous transition plan found (or ignoring " +
2187 "an existing plan) for " + region.getRegionNameAsString() +
2188 "; generated random plan=" + randomPlan + "; " +
2189 serverManager.countOfRegionServers() +
2190 " (online=" + serverManager.getOnlineServers().size() +
2191 ", available=" + destServers.size() + ") available servers" +
2192 ", forceNewPlan=" + forceNewPlan);
2193 return randomPlan;
2194 }
2195 LOG.debug("Using pre-existing plan for " +
2196 region.getRegionNameAsString() + "; plan=" + existingPlan);
2197 return existingPlan;
2198 }
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213 public void unassign(HRegionInfo region) {
2214 unassign(region, false);
2215 }
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232 public void unassign(HRegionInfo region, boolean force, ServerName dest) {
2233
2234 LOG.debug("Starting unassign of " + region.getRegionNameAsString()
2235 + " (offlining), current state: " + regionStates.getRegionState(region));
2236
2237 String encodedName = region.getEncodedName();
2238
2239 int versionOfClosingNode = -1;
2240
2241
2242 ReentrantLock lock = locker.acquireLock(encodedName);
2243 RegionState state = regionStates.getRegionTransitionState(encodedName);
2244 boolean reassign = true;
2245 try {
2246 if (state == null) {
2247
2248
2249 state = regionStates.getRegionState(encodedName);
2250 if (state != null && state.isUnassignable()) {
2251 LOG.info("Attempting to unassign " + state + ", ignored");
2252
2253 return;
2254 }
2255
2256 try {
2257 if (state == null || state.getServerName() == null) {
2258
2259
2260 LOG.warn("Attempting to unassign a region not in RegionStates"
2261 + region.getRegionNameAsString() + ", offlined");
2262 regionOffline(region);
2263 return;
2264 }
2265 versionOfClosingNode = ZKAssign.createNodeClosing(
2266 watcher, region, state.getServerName());
2267 if (versionOfClosingNode == -1) {
2268 LOG.info("Attempting to unassign " +
2269 region.getRegionNameAsString() + " but ZK closing node "
2270 + "can't be created.");
2271 reassign = false;
2272 return;
2273 }
2274 } catch (KeeperException e) {
2275 if (e instanceof NodeExistsException) {
2276
2277
2278
2279
2280 NodeExistsException nee = (NodeExistsException)e;
2281 String path = nee.getPath();
2282 try {
2283 if (isSplitOrSplittingOrMergedOrMerging(path)) {
2284 LOG.debug(path + " is SPLIT or SPLITTING or MERGED or MERGING; " +
2285 "skipping unassign because region no longer exists -- its split or merge");
2286 reassign = false;
2287 return;
2288 }
2289 } catch (KeeperException.NoNodeException ke) {
2290 LOG.warn("Failed getData on SPLITTING/SPLIT at " + path +
2291 "; presuming split and that the region to unassign, " +
2292 encodedName + ", no longer exists -- confirm", ke);
2293 return;
2294 } catch (KeeperException ke) {
2295 LOG.error("Unexpected zk state", ke);
2296 } catch (DeserializationException de) {
2297 LOG.error("Failed parse", de);
2298 }
2299 }
2300
2301 server.abort("Unexpected ZK exception creating node CLOSING", e);
2302 reassign = false;
2303 return;
2304 }
2305 state = regionStates.updateRegionState(region, State.PENDING_CLOSE);
2306 } else if (state.isFailedOpen()) {
2307
2308 regionOffline(region);
2309 return;
2310 } else if (force && state.isPendingCloseOrClosing()) {
2311 LOG.debug("Attempting to unassign " + region.getRegionNameAsString() +
2312 " which is already " + state.getState() +
2313 " but forcing to send a CLOSE RPC again ");
2314 if (state.isFailedClose()) {
2315 state = regionStates.updateRegionState(region, State.PENDING_CLOSE);
2316 }
2317 state.updateTimestampToNow();
2318 } else {
2319 LOG.debug("Attempting to unassign " +
2320 region.getRegionNameAsString() + " but it is " +
2321 "already in transition (" + state.getState() + ", force=" + force + ")");
2322 return;
2323 }
2324
2325 unassign(region, state, versionOfClosingNode, dest, true, null);
2326 } finally {
2327 lock.unlock();
2328
2329
2330 if (reassign && regionStates.isRegionOffline(region)) {
2331 assign(region, true);
2332 }
2333 }
2334 }
2335
2336 public void unassign(HRegionInfo region, boolean force){
2337 unassign(region, force, null);
2338 }
2339
2340
2341
2342
2343 public void deleteClosingOrClosedNode(HRegionInfo region, ServerName sn) {
2344 String encodedName = region.getEncodedName();
2345 deleteNodeInStates(encodedName, "closing", sn, EventType.M_ZK_REGION_CLOSING,
2346 EventType.RS_ZK_REGION_CLOSED);
2347 }
2348
2349
2350
2351
2352
2353
2354
2355 private boolean isSplitOrSplittingOrMergedOrMerging(final String path)
2356 throws KeeperException, DeserializationException {
2357 boolean result = false;
2358
2359
2360 byte [] data = ZKAssign.getData(watcher, path);
2361 if (data == null) {
2362 LOG.info("Node " + path + " is gone");
2363 return false;
2364 }
2365 RegionTransition rt = RegionTransition.parseFrom(data);
2366 switch (rt.getEventType()) {
2367 case RS_ZK_REQUEST_REGION_SPLIT:
2368 case RS_ZK_REGION_SPLIT:
2369 case RS_ZK_REGION_SPLITTING:
2370 case RS_ZK_REQUEST_REGION_MERGE:
2371 case RS_ZK_REGION_MERGED:
2372 case RS_ZK_REGION_MERGING:
2373 result = true;
2374 break;
2375 default:
2376 LOG.info("Node " + path + " is in " + rt.getEventType());
2377 break;
2378 }
2379 return result;
2380 }
2381
2382
2383
2384
2385
2386
2387 public int getNumRegionsOpened() {
2388 return numRegionsOpened.get();
2389 }
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399 public boolean waitForAssignment(HRegionInfo regionInfo)
2400 throws InterruptedException {
2401 while (!regionStates.isRegionOnline(regionInfo)) {
2402 if (regionStates.isRegionInState(regionInfo, State.FAILED_OPEN)
2403 || this.server.isStopped()) {
2404 return false;
2405 }
2406
2407
2408
2409
2410 regionStates.waitForUpdate(100);
2411 }
2412 return true;
2413 }
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425 public void assignMeta() throws KeeperException {
2426 MetaRegionTracker.deleteMetaLocation(this.watcher);
2427 assign(HRegionInfo.FIRST_META_REGIONINFO, true);
2428 }
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438 public void assign(Map<HRegionInfo, ServerName> regions)
2439 throws IOException, InterruptedException {
2440 if (regions == null || regions.isEmpty()) {
2441 return;
2442 }
2443 List<ServerName> servers = serverManager.createDestinationServersList();
2444 if (servers == null || servers.isEmpty()) {
2445 throw new IOException("Found no destination server to assign region(s)");
2446 }
2447
2448
2449 Map<ServerName, List<HRegionInfo>> bulkPlan =
2450 balancer.retainAssignment(regions, servers);
2451
2452 assign(regions.size(), servers.size(),
2453 "retainAssignment=true", bulkPlan);
2454 }
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464 public void assign(List<HRegionInfo> regions)
2465 throws IOException, InterruptedException {
2466 if (regions == null || regions.isEmpty()) {
2467 return;
2468 }
2469
2470 List<ServerName> servers = serverManager.createDestinationServersList();
2471 if (servers == null || servers.isEmpty()) {
2472 throw new IOException("Found no destination server to assign region(s)");
2473 }
2474
2475
2476 Map<ServerName, List<HRegionInfo>> bulkPlan
2477 = balancer.roundRobinAssignment(regions, servers);
2478 processFavoredNodes(regions);
2479
2480 assign(regions.size(), servers.size(),
2481 "round-robin=true", bulkPlan);
2482 }
2483
2484 private void assign(int regions, int totalServers,
2485 String message, Map<ServerName, List<HRegionInfo>> bulkPlan)
2486 throws InterruptedException, IOException {
2487
2488 int servers = bulkPlan.size();
2489 if (servers == 1 || (regions < bulkAssignThresholdRegions
2490 && servers < bulkAssignThresholdServers)) {
2491
2492
2493
2494 if (LOG.isTraceEnabled()) {
2495 LOG.trace("Not using bulk assignment since we are assigning only " + regions +
2496 " region(s) to " + servers + " server(s)");
2497 }
2498 for (Map.Entry<ServerName, List<HRegionInfo>> plan: bulkPlan.entrySet()) {
2499 if (!assign(plan.getKey(), plan.getValue())) {
2500 for (HRegionInfo region: plan.getValue()) {
2501 if (!regionStates.isRegionOnline(region)) {
2502 invokeAssign(region);
2503 }
2504 }
2505 }
2506 }
2507 } else {
2508 LOG.info("Bulk assigning " + regions + " region(s) across "
2509 + totalServers + " server(s), " + message);
2510
2511
2512 BulkAssigner ba = new GeneralBulkAssigner(
2513 this.server, bulkPlan, this, bulkAssignWaitTillAllAssigned);
2514 ba.bulkAssign();
2515 LOG.info("Bulk assigning done");
2516 }
2517 }
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529 private void assignAllUserRegions()
2530 throws IOException, InterruptedException, KeeperException {
2531
2532 ZKAssign.deleteAllNodes(watcher);
2533 ZKUtil.listChildrenAndWatchForNewChildren(this.watcher,
2534 this.watcher.assignmentZNode);
2535 failoverCleanupDone();
2536
2537
2538
2539
2540 Set<TableName> disabledOrDisablingOrEnabling = ZKTable.getDisabledOrDisablingTables(watcher);
2541 disabledOrDisablingOrEnabling.addAll(ZKTable.getEnablingTables(watcher));
2542
2543 Map<HRegionInfo, ServerName> allRegions;
2544 SnapshotOfRegionAssignmentFromMeta snapshotOfRegionAssignment =
2545 new SnapshotOfRegionAssignmentFromMeta(catalogTracker, disabledOrDisablingOrEnabling, true);
2546 snapshotOfRegionAssignment.initialize();
2547 allRegions = snapshotOfRegionAssignment.getRegionToRegionServerMap();
2548 if (allRegions == null || allRegions.isEmpty()) return;
2549
2550
2551 boolean retainAssignment = server.getConfiguration().
2552 getBoolean("hbase.master.startup.retainassign", true);
2553
2554 if (retainAssignment) {
2555 assign(allRegions);
2556 } else {
2557 List<HRegionInfo> regions = new ArrayList<HRegionInfo>(allRegions.keySet());
2558 assign(regions);
2559 }
2560
2561 for (HRegionInfo hri : allRegions.keySet()) {
2562 TableName tableName = hri.getTable();
2563 if (!zkTable.isEnabledTable(tableName)) {
2564 setEnabledTable(tableName);
2565 }
2566 }
2567 }
2568
2569
2570
2571
2572
2573
2574
2575 boolean waitUntilNoRegionsInTransition(final long timeout)
2576 throws InterruptedException {
2577
2578
2579
2580
2581
2582
2583 final long endTime = System.currentTimeMillis() + timeout;
2584
2585 while (!this.server.isStopped() && regionStates.isRegionsInTransition()
2586 && endTime > System.currentTimeMillis()) {
2587 regionStates.waitForUpdate(100);
2588 }
2589
2590 return !regionStates.isRegionsInTransition();
2591 }
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602 Map<ServerName, List<HRegionInfo>> rebuildUserRegions() throws IOException, KeeperException {
2603 Set<TableName> enablingTables = ZKTable.getEnablingTables(watcher);
2604 Set<TableName> disabledOrEnablingTables = ZKTable.getDisabledTables(watcher);
2605 disabledOrEnablingTables.addAll(enablingTables);
2606 Set<TableName> disabledOrDisablingOrEnabling = ZKTable.getDisablingTables(watcher);
2607 disabledOrDisablingOrEnabling.addAll(disabledOrEnablingTables);
2608
2609
2610 List<Result> results = MetaReader.fullScan(this.catalogTracker);
2611
2612 Set<ServerName> onlineServers = serverManager.getOnlineServers().keySet();
2613
2614 Map<ServerName, List<HRegionInfo>> offlineServers =
2615 new TreeMap<ServerName, List<HRegionInfo>>();
2616
2617 for (Result result : results) {
2618 Pair<HRegionInfo, ServerName> region = HRegionInfo.getHRegionInfoAndServerName(result);
2619 if (region == null) continue;
2620 HRegionInfo regionInfo = region.getFirst();
2621 ServerName regionLocation = region.getSecond();
2622 if (regionInfo == null) continue;
2623 regionStates.createRegionState(regionInfo);
2624 if (regionStates.isRegionInState(regionInfo, State.SPLIT)) {
2625
2626
2627 LOG.debug("Region " + regionInfo.getRegionNameAsString()
2628 + " split is completed. Hence need not add to regions list");
2629 continue;
2630 }
2631 TableName tableName = regionInfo.getTable();
2632 if (regionLocation == null) {
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643 if (!enablingTables.contains(tableName)) {
2644 LOG.warn("Region " + regionInfo.getEncodedName() +
2645 " has null regionLocation." + " But its table " + tableName +
2646 " isn't in ENABLING state.");
2647 }
2648 } else if (!onlineServers.contains(regionLocation)) {
2649
2650 List<HRegionInfo> offlineRegions = offlineServers.get(regionLocation);
2651 if (offlineRegions == null) {
2652 offlineRegions = new ArrayList<HRegionInfo>(1);
2653 offlineServers.put(regionLocation, offlineRegions);
2654 }
2655 offlineRegions.add(regionInfo);
2656
2657
2658 if (!disabledOrDisablingOrEnabling.contains(tableName)
2659 && !getZKTable().isEnabledTable(tableName)) {
2660 setEnabledTable(tableName);
2661 }
2662 } else {
2663
2664
2665 if (!disabledOrEnablingTables.contains(tableName)) {
2666 regionStates.updateRegionState(regionInfo, State.OPEN, regionLocation);
2667 regionStates.regionOnline(regionInfo, regionLocation);
2668 }
2669
2670
2671 if (!disabledOrDisablingOrEnabling.contains(tableName)
2672 && !getZKTable().isEnabledTable(tableName)) {
2673 setEnabledTable(tableName);
2674 }
2675 }
2676 }
2677 return offlineServers;
2678 }
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688 private void recoverTableInDisablingState()
2689 throws KeeperException, TableNotFoundException, IOException {
2690 Set<TableName> disablingTables = ZKTable.getDisablingTables(watcher);
2691 if (disablingTables.size() != 0) {
2692 for (TableName tableName : disablingTables) {
2693
2694 LOG.info("The table " + tableName
2695 + " is in DISABLING state. Hence recovering by moving the table"
2696 + " to DISABLED state.");
2697 new DisableTableHandler(this.server, tableName, catalogTracker,
2698 this, tableLockManager, true).prepare().process();
2699 }
2700 }
2701 }
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711 private void recoverTableInEnablingState()
2712 throws KeeperException, TableNotFoundException, IOException {
2713 Set<TableName> enablingTables = ZKTable.getEnablingTables(watcher);
2714 if (enablingTables.size() != 0) {
2715 for (TableName tableName : enablingTables) {
2716
2717 LOG.info("The table " + tableName
2718 + " is in ENABLING state. Hence recovering by moving the table"
2719 + " to ENABLED state.");
2720
2721
2722 EnableTableHandler eth = new EnableTableHandler(this.server, tableName,
2723 catalogTracker, this, tableLockManager, true);
2724 try {
2725 eth.prepare();
2726 } catch (TableNotFoundException e) {
2727 LOG.warn("Table " + tableName + " not found in hbase:meta to recover.");
2728 continue;
2729 }
2730 eth.process();
2731 }
2732 }
2733 }
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750 private void processDeadServersAndRecoverLostRegions(
2751 Map<ServerName, List<HRegionInfo>> deadServers)
2752 throws IOException, KeeperException {
2753 if (deadServers != null) {
2754 for (Map.Entry<ServerName, List<HRegionInfo>> server: deadServers.entrySet()) {
2755 ServerName serverName = server.getKey();
2756
2757 regionStates.setLastRegionServerOfRegions(serverName, server.getValue());
2758 if (!serverManager.isServerDead(serverName)) {
2759 serverManager.expireServer(serverName);
2760 }
2761 }
2762 }
2763 List<String> nodes = ZKUtil.listChildrenAndWatchForNewChildren(
2764 this.watcher, this.watcher.assignmentZNode);
2765 if (!nodes.isEmpty()) {
2766 for (String encodedRegionName : nodes) {
2767 processRegionInTransition(encodedRegionName, null);
2768 }
2769 }
2770
2771
2772
2773
2774
2775 failoverCleanupDone();
2776 }
2777
2778
2779
2780
2781
2782
2783
2784
2785 public void updateRegionsInTransitionMetrics() {
2786 long currentTime = System.currentTimeMillis();
2787 int totalRITs = 0;
2788 int totalRITsOverThreshold = 0;
2789 long oldestRITTime = 0;
2790 int ritThreshold = this.server.getConfiguration().
2791 getInt(HConstants.METRICS_RIT_STUCK_WARNING_THRESHOLD, 60000);
2792 for (RegionState state: regionStates.getRegionsInTransition().values()) {
2793 totalRITs++;
2794 long ritTime = currentTime - state.getStamp();
2795 if (ritTime > ritThreshold) {
2796 totalRITsOverThreshold++;
2797 }
2798 if (oldestRITTime < ritTime) {
2799 oldestRITTime = ritTime;
2800 }
2801 }
2802 if (this.metricsAssignmentManager != null) {
2803 this.metricsAssignmentManager.updateRITOldestAge(oldestRITTime);
2804 this.metricsAssignmentManager.updateRITCount(totalRITs);
2805 this.metricsAssignmentManager.updateRITCountOverThreshold(totalRITsOverThreshold);
2806 }
2807 }
2808
2809
2810
2811
2812 void clearRegionPlan(final HRegionInfo region) {
2813 synchronized (this.regionPlans) {
2814 this.regionPlans.remove(region.getEncodedName());
2815 }
2816 }
2817
2818
2819
2820
2821
2822
2823 public void waitOnRegionToClearRegionsInTransition(final HRegionInfo hri)
2824 throws IOException, InterruptedException {
2825 waitOnRegionToClearRegionsInTransition(hri, -1L);
2826 }
2827
2828
2829
2830
2831
2832
2833
2834
2835 public boolean waitOnRegionToClearRegionsInTransition(final HRegionInfo hri, long timeOut)
2836 throws InterruptedException {
2837 if (!regionStates.isRegionInTransition(hri)) return true;
2838 long end = (timeOut <= 0) ? Long.MAX_VALUE : EnvironmentEdgeManager.currentTimeMillis()
2839 + timeOut;
2840
2841
2842 LOG.info("Waiting for " + hri.getEncodedName() +
2843 " to leave regions-in-transition, timeOut=" + timeOut + " ms.");
2844 while (!this.server.isStopped() && regionStates.isRegionInTransition(hri)) {
2845 regionStates.waitForUpdate(100);
2846 if (EnvironmentEdgeManager.currentTimeMillis() > end) {
2847 LOG.info("Timed out on waiting for " + hri.getEncodedName() + " to be assigned.");
2848 return false;
2849 }
2850 }
2851 if (this.server.isStopped()) {
2852 LOG.info("Giving up wait on regions in transition because stoppable.isStopped is set");
2853 return false;
2854 }
2855 return true;
2856 }
2857
2858
2859
2860
2861
2862 public class TimerUpdater extends Chore {
2863
2864 public TimerUpdater(final int period, final Stoppable stopper) {
2865 super("AssignmentTimerUpdater", period, stopper);
2866 }
2867
2868 @Override
2869 protected void chore() {
2870 Preconditions.checkState(tomActivated);
2871 ServerName serverToUpdateTimer = null;
2872 while (!serversInUpdatingTimer.isEmpty() && !stopper.isStopped()) {
2873 if (serverToUpdateTimer == null) {
2874 serverToUpdateTimer = serversInUpdatingTimer.first();
2875 } else {
2876 serverToUpdateTimer = serversInUpdatingTimer
2877 .higher(serverToUpdateTimer);
2878 }
2879 if (serverToUpdateTimer == null) {
2880 break;
2881 }
2882 updateTimers(serverToUpdateTimer);
2883 serversInUpdatingTimer.remove(serverToUpdateTimer);
2884 }
2885 }
2886 }
2887
2888
2889
2890
2891 public class TimeoutMonitor extends Chore {
2892 private boolean allRegionServersOffline = false;
2893 private ServerManager serverManager;
2894 private final int timeout;
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905 public TimeoutMonitor(final int period, final Stoppable stopper,
2906 ServerManager serverManager,
2907 final int timeout) {
2908 super("AssignmentTimeoutMonitor", period, stopper);
2909 this.timeout = timeout;
2910 this.serverManager = serverManager;
2911 }
2912
2913 private synchronized void setAllRegionServersOffline(
2914 boolean allRegionServersOffline) {
2915 this.allRegionServersOffline = allRegionServersOffline;
2916 }
2917
2918 @Override
2919 protected void chore() {
2920 Preconditions.checkState(tomActivated);
2921 boolean noRSAvailable = this.serverManager.createDestinationServersList().isEmpty();
2922
2923
2924 long now = System.currentTimeMillis();
2925
2926
2927 for (String regionName : regionStates.getRegionsInTransition().keySet()) {
2928 RegionState regionState = regionStates.getRegionTransitionState(regionName);
2929 if (regionState == null) continue;
2930
2931 if (regionState.getStamp() + timeout <= now) {
2932
2933 actOnTimeOut(regionState);
2934 } else if (this.allRegionServersOffline && !noRSAvailable) {
2935 RegionPlan existingPlan = regionPlans.get(regionName);
2936 if (existingPlan == null
2937 || !this.serverManager.isServerOnline(existingPlan
2938 .getDestination())) {
2939
2940
2941 actOnTimeOut(regionState);
2942 }
2943 }
2944 }
2945 setAllRegionServersOffline(noRSAvailable);
2946 }
2947
2948 private void actOnTimeOut(RegionState regionState) {
2949 HRegionInfo regionInfo = regionState.getRegion();
2950 LOG.info("Regions in transition timed out: " + regionState);
2951
2952 switch (regionState.getState()) {
2953 case CLOSED:
2954 LOG.info("Region " + regionInfo.getEncodedName()
2955 + " has been CLOSED for too long, waiting on queued "
2956 + "ClosedRegionHandler to run or server shutdown");
2957
2958 regionState.updateTimestampToNow();
2959 break;
2960 case OFFLINE:
2961 LOG.info("Region has been OFFLINE for too long, " + "reassigning "
2962 + regionInfo.getRegionNameAsString() + " to a random server");
2963 invokeAssign(regionInfo);
2964 break;
2965 case PENDING_OPEN:
2966 LOG.info("Region has been PENDING_OPEN for too "
2967 + "long, reassigning region=" + regionInfo.getRegionNameAsString());
2968 invokeAssign(regionInfo);
2969 break;
2970 case OPENING:
2971 processOpeningState(regionInfo);
2972 break;
2973 case OPEN:
2974 LOG.error("Region has been OPEN for too long, " +
2975 "we don't know where region was opened so can't do anything");
2976 regionState.updateTimestampToNow();
2977 break;
2978
2979 case PENDING_CLOSE:
2980 LOG.info("Region has been PENDING_CLOSE for too "
2981 + "long, running forced unassign again on region="
2982 + regionInfo.getRegionNameAsString());
2983 invokeUnassign(regionInfo);
2984 break;
2985 case CLOSING:
2986 LOG.info("Region has been CLOSING for too " +
2987 "long, this should eventually complete or the server will " +
2988 "expire, send RPC again");
2989 invokeUnassign(regionInfo);
2990 break;
2991
2992 case SPLIT:
2993 case SPLITTING:
2994 case FAILED_OPEN:
2995 case FAILED_CLOSE:
2996 case MERGING:
2997 break;
2998
2999 default:
3000 throw new IllegalStateException("Received event is not valid.");
3001 }
3002 }
3003 }
3004
3005 private void processOpeningState(HRegionInfo regionInfo) {
3006 LOG.info("Region has been OPENING for too long, reassigning region="
3007 + regionInfo.getRegionNameAsString());
3008
3009 try {
3010 String node = ZKAssign.getNodeName(watcher, regionInfo.getEncodedName());
3011 Stat stat = new Stat();
3012 byte [] data = ZKAssign.getDataNoWatch(watcher, node, stat);
3013 if (data == null) {
3014 LOG.warn("Data is null, node " + node + " no longer exists");
3015 return;
3016 }
3017 RegionTransition rt = RegionTransition.parseFrom(data);
3018 EventType et = rt.getEventType();
3019 if (et == EventType.RS_ZK_REGION_OPENED) {
3020 LOG.debug("Region has transitioned to OPENED, allowing "
3021 + "watched event handlers to process");
3022 return;
3023 } else if (et != EventType.RS_ZK_REGION_OPENING && et != EventType.RS_ZK_REGION_FAILED_OPEN ) {
3024 LOG.warn("While timing out a region, found ZK node in unexpected state: " + et);
3025 return;
3026 }
3027 invokeAssign(regionInfo);
3028 } catch (KeeperException ke) {
3029 LOG.error("Unexpected ZK exception timing out CLOSING region", ke);
3030 } catch (DeserializationException e) {
3031 LOG.error("Unexpected exception parsing CLOSING region", e);
3032 }
3033 }
3034
3035 void invokeAssign(HRegionInfo regionInfo) {
3036 threadPoolExecutorService.submit(new AssignCallable(this, regionInfo));
3037 }
3038
3039 private void invokeUnassign(HRegionInfo regionInfo) {
3040 threadPoolExecutorService.submit(new UnAssignCallable(this, regionInfo));
3041 }
3042
3043 public boolean isCarryingMeta(ServerName serverName) {
3044 return isCarryingRegion(serverName, HRegionInfo.FIRST_META_REGIONINFO);
3045 }
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057 private boolean isCarryingRegion(ServerName serverName, HRegionInfo hri) {
3058 RegionTransition rt = null;
3059 try {
3060 byte [] data = ZKAssign.getData(watcher, hri.getEncodedName());
3061
3062 rt = data == null? null: RegionTransition.parseFrom(data);
3063 } catch (KeeperException e) {
3064 server.abort("Exception reading unassigned node for region=" + hri.getEncodedName(), e);
3065 } catch (DeserializationException e) {
3066 server.abort("Exception parsing unassigned node for region=" + hri.getEncodedName(), e);
3067 }
3068
3069 ServerName addressFromZK = rt != null? rt.getServerName(): null;
3070 if (addressFromZK != null) {
3071
3072 boolean matchZK = addressFromZK.equals(serverName);
3073 LOG.debug("Checking region=" + hri.getRegionNameAsString() + ", zk server=" + addressFromZK +
3074 " current=" + serverName + ", matches=" + matchZK);
3075 return matchZK;
3076 }
3077
3078 ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
3079 boolean matchAM = (addressFromAM != null &&
3080 addressFromAM.equals(serverName));
3081 LOG.debug("based on AM, current region=" + hri.getRegionNameAsString() +
3082 " is on server=" + (addressFromAM != null ? addressFromAM : "null") +
3083 " server being checked: " + serverName);
3084
3085 return matchAM;
3086 }
3087
3088
3089
3090
3091
3092
3093 public List<HRegionInfo> processServerShutdown(final ServerName sn) {
3094
3095 synchronized (this.regionPlans) {
3096 for (Iterator <Map.Entry<String, RegionPlan>> i =
3097 this.regionPlans.entrySet().iterator(); i.hasNext();) {
3098 Map.Entry<String, RegionPlan> e = i.next();
3099 ServerName otherSn = e.getValue().getDestination();
3100
3101 if (otherSn != null && otherSn.equals(sn)) {
3102
3103 i.remove();
3104 }
3105 }
3106 }
3107 List<HRegionInfo> regions = regionStates.serverOffline(watcher, sn);
3108 for (Iterator<HRegionInfo> it = regions.iterator(); it.hasNext(); ) {
3109 HRegionInfo hri = it.next();
3110 String encodedName = hri.getEncodedName();
3111
3112
3113 Lock lock = locker.acquireLock(encodedName);
3114 try {
3115 RegionState regionState =
3116 regionStates.getRegionTransitionState(encodedName);
3117 if (regionState == null
3118 || (regionState.getServerName() != null && !regionState.isOnServer(sn))
3119 || !(regionState.isFailedClose() || regionState.isOffline()
3120 || regionState.isPendingOpenOrOpening())) {
3121 LOG.info("Skip " + regionState + " since it is not opening/failed_close"
3122 + " on the dead server any more: " + sn);
3123 it.remove();
3124 } else {
3125 try {
3126
3127 ZKAssign.deleteNodeFailSilent(watcher, hri);
3128 } catch (KeeperException ke) {
3129 server.abort("Unexpected ZK exception deleting node " + hri, ke);
3130 }
3131 if (zkTable.isDisablingOrDisabledTable(hri.getTable())) {
3132 regionStates.regionOffline(hri);
3133 it.remove();
3134 continue;
3135 }
3136
3137 regionStates.updateRegionState(hri, State.OFFLINE);
3138 }
3139 } finally {
3140 lock.unlock();
3141 }
3142 }
3143 return regions;
3144 }
3145
3146
3147
3148
3149 public void balance(final RegionPlan plan) {
3150 HRegionInfo hri = plan.getRegionInfo();
3151 TableName tableName = hri.getTable();
3152 if (zkTable.isDisablingOrDisabledTable(tableName)) {
3153 LOG.info("Ignored moving region of disabling/disabled table "
3154 + tableName);
3155 return;
3156 }
3157
3158
3159 String encodedName = hri.getEncodedName();
3160 ReentrantLock lock = locker.acquireLock(encodedName);
3161 try {
3162 if (!regionStates.isRegionOnline(hri)) {
3163 RegionState state = regionStates.getRegionState(encodedName);
3164 LOG.info("Ignored moving region not assigned: " + hri + ", "
3165 + (state == null ? "not in region states" : state));
3166 return;
3167 }
3168 synchronized (this.regionPlans) {
3169 this.regionPlans.put(plan.getRegionName(), plan);
3170 }
3171 unassign(hri, false, plan.getDestination());
3172 } finally {
3173 lock.unlock();
3174 }
3175 }
3176
3177 public void stop() {
3178 shutdown();
3179 if (tomActivated){
3180 this.timeoutMonitor.interrupt();
3181 this.timerUpdater.interrupt();
3182 }
3183 }
3184
3185
3186
3187
3188 public void shutdown() {
3189
3190 synchronized (zkEventWorkerWaitingList){
3191 zkEventWorkerWaitingList.clear();
3192 }
3193 threadPoolExecutorService.shutdownNow();
3194 zkEventWorkers.shutdownNow();
3195 }
3196
3197 protected void setEnabledTable(TableName tableName) {
3198 try {
3199 this.zkTable.setEnabledTable(tableName);
3200 } catch (KeeperException e) {
3201
3202 String errorMsg = "Unable to ensure that the table " + tableName
3203 + " will be" + " enabled because of a ZooKeeper issue";
3204 LOG.error(errorMsg);
3205 this.server.abort(errorMsg, e);
3206 }
3207 }
3208
3209
3210
3211
3212
3213
3214
3215 private boolean asyncSetOfflineInZooKeeper(final RegionState state,
3216 final AsyncCallback.StringCallback cb, final ServerName destination) {
3217 if (!state.isClosed() && !state.isOffline()) {
3218 this.server.abort("Unexpected state trying to OFFLINE; " + state,
3219 new IllegalStateException());
3220 return false;
3221 }
3222 regionStates.updateRegionState(state.getRegion(), State.OFFLINE);
3223 try {
3224 ZKAssign.asyncCreateNodeOffline(watcher, state.getRegion(),
3225 destination, cb, state);
3226 } catch (KeeperException e) {
3227 if (e instanceof NodeExistsException) {
3228 LOG.warn("Node for " + state.getRegion() + " already exists");
3229 } else {
3230 server.abort("Unexpected ZK exception creating/setting node OFFLINE", e);
3231 }
3232 return false;
3233 }
3234 return true;
3235 }
3236
3237 private boolean deleteNodeInStates(String encodedName,
3238 String desc, ServerName sn, EventType... types) {
3239 try {
3240 for (EventType et: types) {
3241 if (ZKAssign.deleteNode(watcher, encodedName, et, sn)) {
3242 return true;
3243 }
3244 }
3245 LOG.info("Failed to delete the " + desc + " node for "
3246 + encodedName + ". The node type may not match");
3247 } catch (NoNodeException e) {
3248 if (LOG.isDebugEnabled()) {
3249 LOG.debug("The " + desc + " node for " + encodedName + " already deleted");
3250 }
3251 } catch (KeeperException ke) {
3252 server.abort("Unexpected ZK exception deleting " + desc
3253 + " node for the region " + encodedName, ke);
3254 }
3255 return false;
3256 }
3257
3258 private void deleteMergingNode(String encodedName, ServerName sn) {
3259 deleteNodeInStates(encodedName, "merging", sn, EventType.RS_ZK_REGION_MERGING,
3260 EventType.RS_ZK_REQUEST_REGION_MERGE, EventType.RS_ZK_REGION_MERGED);
3261 }
3262
3263 private void deleteSplittingNode(String encodedName, ServerName sn) {
3264 deleteNodeInStates(encodedName, "splitting", sn, EventType.RS_ZK_REGION_SPLITTING,
3265 EventType.RS_ZK_REQUEST_REGION_SPLIT, EventType.RS_ZK_REGION_SPLIT);
3266 }
3267
3268
3269
3270
3271
3272 private boolean handleRegionMerging(final RegionTransition rt, final String encodedName,
3273 final String prettyPrintedRegionName, final ServerName sn) {
3274 if (!serverManager.isServerOnline(sn)) {
3275 LOG.warn("Dropped merging! ServerName=" + sn + " unknown.");
3276 return false;
3277 }
3278 byte [] payloadOfMerging = rt.getPayload();
3279 List<HRegionInfo> mergingRegions;
3280 try {
3281 mergingRegions = HRegionInfo.parseDelimitedFrom(
3282 payloadOfMerging, 0, payloadOfMerging.length);
3283 } catch (IOException e) {
3284 LOG.error("Dropped merging! Failed reading " + rt.getEventType()
3285 + " payload for " + prettyPrintedRegionName);
3286 return false;
3287 }
3288 assert mergingRegions.size() == 3;
3289 HRegionInfo p = mergingRegions.get(0);
3290 HRegionInfo hri_a = mergingRegions.get(1);
3291 HRegionInfo hri_b = mergingRegions.get(2);
3292
3293 RegionState rs_p = regionStates.getRegionState(p);
3294 RegionState rs_a = regionStates.getRegionState(hri_a);
3295 RegionState rs_b = regionStates.getRegionState(hri_b);
3296
3297 if (!((rs_a == null || rs_a.isOpenOrMergingOnServer(sn))
3298 && (rs_b == null || rs_b.isOpenOrMergingOnServer(sn))
3299 && (rs_p == null || rs_p.isOpenOrMergingNewOnServer(sn)))) {
3300 LOG.warn("Dropped merging! Not in state good for MERGING; rs_p="
3301 + rs_p + ", rs_a=" + rs_a + ", rs_b=" + rs_b);
3302 return false;
3303 }
3304
3305 EventType et = rt.getEventType();
3306 if (et == EventType.RS_ZK_REQUEST_REGION_MERGE) {
3307 try {
3308 if (RegionMergeTransaction.transitionMergingNode(watcher, p,
3309 hri_a, hri_b, sn, -1, EventType.RS_ZK_REQUEST_REGION_MERGE,
3310 EventType.RS_ZK_REGION_MERGING) == -1) {
3311 byte[] data = ZKAssign.getData(watcher, encodedName);
3312 EventType currentType = null;
3313 if (data != null) {
3314 RegionTransition newRt = RegionTransition.parseFrom(data);
3315 currentType = newRt.getEventType();
3316 }
3317 if (currentType == null || (currentType != EventType.RS_ZK_REGION_MERGED
3318 && currentType != EventType.RS_ZK_REGION_MERGING)) {
3319 LOG.warn("Failed to transition pending_merge node "
3320 + encodedName + " to merging, it's now " + currentType);
3321 return false;
3322 }
3323 }
3324 } catch (Exception e) {
3325 LOG.warn("Failed to transition pending_merge node "
3326 + encodedName + " to merging", e);
3327 return false;
3328 }
3329 }
3330
3331 synchronized (regionStates) {
3332 regionStates.updateRegionState(hri_a, State.MERGING);
3333 regionStates.updateRegionState(hri_b, State.MERGING);
3334 regionStates.updateRegionState(p, State.MERGING_NEW, sn);
3335
3336 if (et != EventType.RS_ZK_REGION_MERGED) {
3337 regionStates.regionOffline(p, State.MERGING_NEW);
3338 this.mergingRegions.put(encodedName,
3339 new PairOfSameType<HRegionInfo>(hri_a, hri_b));
3340 } else {
3341 this.mergingRegions.remove(encodedName);
3342 regionOffline(hri_a, State.MERGED);
3343 regionOffline(hri_b, State.MERGED);
3344 regionOnline(p, sn);
3345 }
3346 }
3347
3348 if (et == EventType.RS_ZK_REGION_MERGED) {
3349 LOG.debug("Handling MERGED event for " + encodedName + "; deleting node");
3350
3351 try {
3352 boolean successful = false;
3353 while (!successful) {
3354
3355
3356 successful = ZKAssign.deleteNode(watcher, encodedName,
3357 EventType.RS_ZK_REGION_MERGED, sn);
3358 }
3359 } catch (KeeperException e) {
3360 if (e instanceof NoNodeException) {
3361 String znodePath = ZKUtil.joinZNode(watcher.splitLogZNode, encodedName);
3362 LOG.debug("The znode " + znodePath + " does not exist. May be deleted already.");
3363 } else {
3364 server.abort("Error deleting MERGED node " + encodedName, e);
3365 }
3366 }
3367 LOG.info("Handled MERGED event; merged=" + p.getRegionNameAsString()
3368 + ", region_a=" + hri_a.getRegionNameAsString() + ", region_b="
3369 + hri_b.getRegionNameAsString() + ", on " + sn);
3370
3371
3372 if (zkTable.isDisablingOrDisabledTable(p.getTable())) {
3373 unassign(p);
3374 }
3375 }
3376 return true;
3377 }
3378
3379
3380
3381
3382 private boolean handleRegionSplitting(final RegionTransition rt, final String encodedName,
3383 final String prettyPrintedRegionName, final ServerName sn) {
3384 if (!serverManager.isServerOnline(sn)) {
3385 LOG.warn("Dropped splitting! ServerName=" + sn + " unknown.");
3386 return false;
3387 }
3388 byte [] payloadOfSplitting = rt.getPayload();
3389 List<HRegionInfo> splittingRegions;
3390 try {
3391 splittingRegions = HRegionInfo.parseDelimitedFrom(
3392 payloadOfSplitting, 0, payloadOfSplitting.length);
3393 } catch (IOException e) {
3394 LOG.error("Dropped splitting! Failed reading " + rt.getEventType()
3395 + " payload for " + prettyPrintedRegionName);
3396 return false;
3397 }
3398 assert splittingRegions.size() == 2;
3399 HRegionInfo hri_a = splittingRegions.get(0);
3400 HRegionInfo hri_b = splittingRegions.get(1);
3401
3402 RegionState rs_p = regionStates.getRegionState(encodedName);
3403 RegionState rs_a = regionStates.getRegionState(hri_a);
3404 RegionState rs_b = regionStates.getRegionState(hri_b);
3405
3406 if (!((rs_p == null || rs_p.isOpenOrSplittingOnServer(sn))
3407 && (rs_a == null || rs_a.isOpenOrSplittingNewOnServer(sn))
3408 && (rs_b == null || rs_b.isOpenOrSplittingNewOnServer(sn)))) {
3409 LOG.warn("Dropped splitting! Not in state good for SPLITTING; rs_p="
3410 + rs_p + ", rs_a=" + rs_a + ", rs_b=" + rs_b);
3411 return false;
3412 }
3413
3414 if (rs_p == null) {
3415
3416 rs_p = regionStates.updateRegionState(rt, State.OPEN);
3417 if (rs_p == null) {
3418 LOG.warn("Received splitting for region " + prettyPrintedRegionName
3419 + " from server " + sn + " but it doesn't exist anymore,"
3420 + " probably already processed its split");
3421 return false;
3422 }
3423 regionStates.regionOnline(rs_p.getRegion(), sn);
3424 }
3425
3426 HRegionInfo p = rs_p.getRegion();
3427 EventType et = rt.getEventType();
3428 if (et == EventType.RS_ZK_REQUEST_REGION_SPLIT) {
3429 try {
3430 if (SplitTransaction.transitionSplittingNode(watcher, p,
3431 hri_a, hri_b, sn, -1, EventType.RS_ZK_REQUEST_REGION_SPLIT,
3432 EventType.RS_ZK_REGION_SPLITTING) == -1) {
3433 byte[] data = ZKAssign.getData(watcher, encodedName);
3434 EventType currentType = null;
3435 if (data != null) {
3436 RegionTransition newRt = RegionTransition.parseFrom(data);
3437 currentType = newRt.getEventType();
3438 }
3439 if (currentType == null || (currentType != EventType.RS_ZK_REGION_SPLIT
3440 && currentType != EventType.RS_ZK_REGION_SPLITTING)) {
3441 LOG.warn("Failed to transition pending_split node "
3442 + encodedName + " to splitting, it's now " + currentType);
3443 return false;
3444 }
3445 }
3446 } catch (Exception e) {
3447 LOG.warn("Failed to transition pending_split node "
3448 + encodedName + " to splitting", e);
3449 return false;
3450 }
3451 }
3452
3453 synchronized (regionStates) {
3454 regionStates.updateRegionState(hri_a, State.SPLITTING_NEW, sn);
3455 regionStates.updateRegionState(hri_b, State.SPLITTING_NEW, sn);
3456 regionStates.regionOffline(hri_a, State.SPLITTING_NEW);
3457 regionStates.regionOffline(hri_b, State.SPLITTING_NEW);
3458 regionStates.updateRegionState(rt, State.SPLITTING);
3459
3460
3461
3462 if (TEST_SKIP_SPLIT_HANDLING) {
3463 LOG.warn("Skipping split message, TEST_SKIP_SPLIT_HANDLING is set");
3464 return true;
3465 }
3466
3467 if (et == EventType.RS_ZK_REGION_SPLIT) {
3468 regionOffline(p, State.SPLIT);
3469 regionOnline(hri_a, sn);
3470 regionOnline(hri_b, sn);
3471 }
3472 }
3473
3474 if (et == EventType.RS_ZK_REGION_SPLIT) {
3475 LOG.debug("Handling SPLIT event for " + encodedName + "; deleting node");
3476
3477 try {
3478 boolean successful = false;
3479 while (!successful) {
3480
3481
3482 successful = ZKAssign.deleteNode(watcher, encodedName,
3483 EventType.RS_ZK_REGION_SPLIT, sn);
3484 }
3485 } catch (KeeperException e) {
3486 if (e instanceof NoNodeException) {
3487 String znodePath = ZKUtil.joinZNode(watcher.splitLogZNode, encodedName);
3488 LOG.debug("The znode " + znodePath + " does not exist. May be deleted already.");
3489 } else {
3490 server.abort("Error deleting SPLIT node " + encodedName, e);
3491 }
3492 }
3493 LOG.info("Handled SPLIT event; parent=" + p.getRegionNameAsString()
3494 + ", daughter a=" + hri_a.getRegionNameAsString() + ", daughter b="
3495 + hri_b.getRegionNameAsString() + ", on " + sn);
3496
3497
3498 if (zkTable.isDisablingOrDisabledTable(p.getTable())) {
3499 unassign(hri_a);
3500 unassign(hri_b);
3501 }
3502 }
3503 return true;
3504 }
3505
3506
3507
3508
3509
3510
3511 private void regionOffline(final HRegionInfo regionInfo, final State state) {
3512 regionStates.regionOffline(regionInfo, state);
3513 removeClosedRegion(regionInfo);
3514
3515 clearRegionPlan(regionInfo);
3516 }
3517
3518
3519
3520
3521 public LoadBalancer getBalancer() {
3522 return this.balancer;
3523 }
3524 }