1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.master;
21
22 import java.io.DataInput;
23 import java.io.DataOutput;
24 import java.io.IOException;
25 import java.util.ArrayList;
26 import java.util.Arrays;
27 import java.util.Collections;
28 import java.util.Date;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.Iterator;
32 import java.util.List;
33 import java.util.Map;
34 import java.util.NavigableMap;
35 import java.util.Set;
36 import java.util.SortedMap;
37 import java.util.TreeMap;
38 import java.util.TreeSet;
39 import java.util.concurrent.ConcurrentSkipListMap;
40 import java.util.concurrent.ConcurrentSkipListSet;
41 import java.util.concurrent.Executors;
42 import java.util.concurrent.atomic.AtomicInteger;
43 import java.util.concurrent.atomic.AtomicLong;
44
45 import org.apache.commons.logging.Log;
46 import org.apache.commons.logging.LogFactory;
47 import org.apache.hadoop.conf.Configuration;
48 import org.apache.hadoop.hbase.Chore;
49 import org.apache.hadoop.hbase.HConstants;
50 import org.apache.hadoop.hbase.HRegionInfo;
51 import org.apache.hadoop.hbase.HServerLoad;
52 import org.apache.hadoop.hbase.NotServingRegionException;
53 import org.apache.hadoop.hbase.Server;
54 import org.apache.hadoop.hbase.ServerName;
55 import org.apache.hadoop.hbase.Stoppable;
56 import org.apache.hadoop.hbase.TableNotFoundException;
57 import org.apache.hadoop.hbase.catalog.CatalogTracker;
58 import org.apache.hadoop.hbase.catalog.MetaReader;
59 import org.apache.hadoop.hbase.catalog.RootLocationEditor;
60 import org.apache.hadoop.hbase.client.Result;
61 import org.apache.hadoop.hbase.executor.EventHandler;
62 import org.apache.hadoop.hbase.executor.EventHandler.EventType;
63 import org.apache.hadoop.hbase.executor.ExecutorService;
64 import org.apache.hadoop.hbase.executor.RegionTransitionData;
65 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
66 import org.apache.hadoop.hbase.master.AssignmentManager.RegionState.State;
67 import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler;
68 import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
69 import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
70 import org.apache.hadoop.hbase.master.handler.OpenedRegionHandler;
71 import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
72 import org.apache.hadoop.hbase.master.handler.SplitRegionHandler;
73 import org.apache.hadoop.hbase.regionserver.RegionAlreadyInTransitionException;
74 import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
75 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
76 import org.apache.hadoop.hbase.util.Bytes;
77 import org.apache.hadoop.hbase.util.Pair;
78 import org.apache.hadoop.hbase.util.Threads;
79 import org.apache.hadoop.hbase.util.Writables;
80 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
81 import org.apache.hadoop.hbase.zookeeper.ZKTable;
82 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
83 import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
84 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
85 import org.apache.hadoop.ipc.RemoteException;
86 import org.apache.zookeeper.AsyncCallback;
87 import org.apache.zookeeper.KeeperException;
88 import org.apache.zookeeper.KeeperException.NoNodeException;
89 import org.apache.zookeeper.KeeperException.NodeExistsException;
90 import org.apache.zookeeper.data.Stat;
91
92
93
94
95
96
97
98
99 public class AssignmentManager extends ZooKeeperListener {
100
101 private static final Log LOG = LogFactory.getLog(AssignmentManager.class);
102
103 protected Server master;
104
105 private ServerManager serverManager;
106
107 private CatalogTracker catalogTracker;
108
109 private TimeoutMonitor timeoutMonitor;
110
111 private TimerUpdater timerUpdater;
112
113 private LoadBalancer balancer;
114
115
116
117
118
119 private final Map <String, HRegionInfo> regionsToReopen;
120
121
122
123
124 private final int maximumAssignmentAttempts;
125
126
127
128
129
130 final ConcurrentSkipListMap<String, RegionState> regionsInTransition =
131 new ConcurrentSkipListMap<String, RegionState>();
132
133
134
135
136
137 final NavigableMap<String, RegionPlan> regionPlans =
138 new TreeMap<String, RegionPlan>();
139
140 private final ZKTable zkTable;
141
142
143 Set<String> disablingTables = new HashSet<String>(1);
144
145
146
147 Map<String, List<HRegionInfo>> enablingTables = new HashMap<String, List<HRegionInfo>>(1);
148
149
150
151
152
153
154
155 private final NavigableMap<ServerName, Set<HRegionInfo>> servers =
156 new TreeMap<ServerName, Set<HRegionInfo>>();
157
158
159
160
161
162 private final ConcurrentSkipListSet<ServerName> serversInUpdatingTimer =
163 new ConcurrentSkipListSet<ServerName>();
164
165
166
167
168
169
170
171
172 private final SortedMap<HRegionInfo, ServerName> regions =
173 new TreeMap<HRegionInfo, ServerName>();
174
175 private final ExecutorService executorService;
176
177
178 private java.util.concurrent.ExecutorService threadPoolExecutorService;
179
180 private List<EventType> ignoreStatesRSOffline = Arrays.asList(new EventType[]{
181 EventType.RS_ZK_REGION_FAILED_OPEN, EventType.RS_ZK_REGION_CLOSED });
182
183
184
185
186
187 private volatile boolean failover = false;
188
189
190
191 private Map<String, HRegionInfo> failoverProcessedRegions =
192 new HashMap<String, HRegionInfo>();
193
194
195
196
197
198
199
200
201
202
203
204 public AssignmentManager(Server master, ServerManager serverManager,
205 CatalogTracker catalogTracker, final LoadBalancer balancer,
206 final ExecutorService service) throws KeeperException, IOException {
207 super(master.getZooKeeper());
208 this.master = master;
209 this.serverManager = serverManager;
210 this.catalogTracker = catalogTracker;
211 this.executorService = service;
212 this.regionsToReopen = Collections.synchronizedMap
213 (new HashMap<String, HRegionInfo> ());
214 Configuration conf = master.getConfiguration();
215 this.timeoutMonitor = new TimeoutMonitor(
216 conf.getInt("hbase.master.assignment.timeoutmonitor.period", 10000),
217 master, serverManager,
218 conf.getInt("hbase.master.assignment.timeoutmonitor.timeout", 1800000));
219 this.timerUpdater = new TimerUpdater(conf.getInt(
220 "hbase.master.assignment.timerupdater.period", 10000), master);
221 Threads.setDaemonThreadRunning(timerUpdater.getThread(),
222 master.getServerName() + ".timerUpdater");
223 this.zkTable = new ZKTable(this.master.getZooKeeper());
224 this.maximumAssignmentAttempts =
225 this.master.getConfiguration().getInt("hbase.assignment.maximum.attempts", 10);
226 this.balancer = balancer;
227 this.threadPoolExecutorService = Executors.newCachedThreadPool();
228 }
229
230 void startTimeOutMonitor() {
231 Threads.setDaemonThreadRunning(timeoutMonitor.getThread(), master.getServerName()
232 + ".timeoutMonitor");
233 }
234
235
236
237
238
239
240
241 double getAverageLoad() {
242 int totalLoad = 0;
243 int numServers = 0;
244
245
246 synchronized (this.regions) {
247 for (Map.Entry<ServerName, Set<HRegionInfo>> e: servers.entrySet()) {
248 numServers++;
249 totalLoad += e.getValue().size();
250 }
251 }
252 return (double)totalLoad / (double)numServers;
253 }
254
255
256
257
258 public ZKTable getZKTable() {
259
260
261 return this.zkTable;
262 }
263
264
265
266
267
268
269
270 public ServerName getRegionServerOfRegion(HRegionInfo hri) {
271 synchronized (this.regions ) {
272 return regions.get(hri);
273 }
274 }
275
276
277
278
279
280
281 public boolean isRegionAssigned(HRegionInfo hri) {
282 synchronized (this.regions ) {
283 return regions.containsKey(hri);
284 }
285 }
286
287
288
289
290
291
292
293 public List<HRegionInfo> getEnablingTableRegions(String tableName){
294 return this.enablingTables.get(tableName);
295 }
296
297
298
299
300
301
302 public void addPlan(String encodedName, RegionPlan plan) {
303 synchronized (regionPlans) {
304 regionPlans.put(encodedName, plan);
305 }
306 }
307
308
309
310
311 public void addPlans(Map<String, RegionPlan> plans) {
312 synchronized (regionPlans) {
313 regionPlans.putAll(plans);
314 }
315 }
316
317
318
319
320
321
322
323
324 public void setRegionsToReopen(List <HRegionInfo> regions) {
325 for(HRegionInfo hri : regions) {
326 regionsToReopen.put(hri.getEncodedName(), hri);
327 }
328 }
329
330
331
332
333
334
335
336
337 public Pair<Integer, Integer> getReopenStatus(byte[] tableName)
338 throws IOException {
339 List <HRegionInfo> hris =
340 MetaReader.getTableRegions(this.master.getCatalogTracker(), tableName);
341 Integer pending = 0;
342 for(HRegionInfo hri : hris) {
343 String name = hri.getEncodedName();
344 if (regionsToReopen.containsKey(name) || regionsInTransition.containsKey(name)) {
345 pending++;
346 }
347 }
348 return new Pair<Integer, Integer>(pending, hris.size());
349 }
350
351
352
353
354
355
356 void cleanoutUnassigned() throws IOException, KeeperException {
357
358 ZKAssign.deleteAllNodes(watcher);
359 ZKUtil.listChildrenAndWatchForNewChildren(this.watcher,
360 this.watcher.assignmentZNode);
361 }
362
363
364
365
366
367
368
369
370 void joinCluster() throws IOException,
371 KeeperException, InterruptedException {
372
373
374
375
376
377
378
379
380
381
382 Map<ServerName, List<Pair<HRegionInfo, Result>>> deadServers = rebuildUserRegions();
383
384 processDeadServersAndRegionsInTransition(deadServers);
385
386
387
388 boolean isWatcherCreated = recoverTableInDisablingState(this.disablingTables);
389 recoverTableInEnablingState(this.enablingTables.keySet(), isWatcherCreated);
390 this.enablingTables.clear();
391 this.disablingTables.clear();
392 }
393
394
395
396
397
398
399
400
401 void processDeadServersAndRegionsInTransition()
402 throws KeeperException, IOException, InterruptedException {
403
404 processDeadServersAndRegionsInTransition(null);
405 }
406
407
408
409
410
411
412
413
414
415
416
417 void processDeadServersAndRegionsInTransition(
418 final Map<ServerName, List<Pair<HRegionInfo, Result>>> deadServers)
419 throws KeeperException, IOException, InterruptedException {
420 List<String> nodes = ZKUtil.listChildrenAndWatchForNewChildren(watcher,
421 watcher.assignmentZNode);
422
423 if (nodes == null) {
424 String errorMessage = "Failed to get the children from ZK";
425 master.abort(errorMessage, new IOException(errorMessage));
426 return;
427 }
428
429
430 synchronized (this.regions) {
431 for (Map.Entry<HRegionInfo, ServerName> e : this.regions.entrySet()) {
432 if (!e.getKey().isMetaTable() && e.getValue() != null) {
433 LOG.debug("Found " + e + " out on cluster");
434 this.failover = true;
435 break;
436 }
437 if (nodes.contains(e.getKey().getEncodedName())) {
438 LOG.debug("Found " + e.getKey().getRegionNameAsString() + " in RITs");
439
440 this.failover = true;
441 break;
442 }
443 }
444 }
445
446
447
448 synchronized (regionsInTransition) {
449 nodes.removeAll(regionsInTransition.keySet());
450 }
451
452
453
454
455 if (!this.serverManager.getDeadServers().isEmpty()) {
456 this.failover = true;
457 }
458
459
460 if (this.failover) {
461 LOG.info("Found regions out on cluster or in RIT; failover");
462
463
464 processDeadServersAndRecoverLostRegions(deadServers, nodes);
465 this.failover = false;
466 failoverProcessedRegions.clear();
467 } else {
468
469 LOG.info("Clean cluster startup. Assigning userregions");
470 cleanoutUnassigned();
471 assignAllUserRegions();
472 }
473 }
474
475
476
477
478
479
480
481
482
483
484
485
486 boolean processRegionInTransitionAndBlockUntilAssigned(final HRegionInfo hri)
487 throws InterruptedException, KeeperException, IOException {
488 boolean intransistion =
489 processRegionInTransition(hri.getEncodedName(), hri, null);
490 if (!intransistion) return intransistion;
491 LOG.debug("Waiting on " + HRegionInfo.prettyPrint(hri.getEncodedName()));
492 synchronized(this.regionsInTransition) {
493 while (!this.master.isStopped() &&
494 this.regionsInTransition.containsKey(hri.getEncodedName())) {
495
496 this.regionsInTransition.wait(100);
497 }
498 }
499 return intransistion;
500 }
501
502
503
504
505
506
507
508
509
510
511
512 boolean processRegionInTransition(final String encodedRegionName,
513 final HRegionInfo regionInfo,
514 final Map<ServerName,List<Pair<HRegionInfo,Result>>> deadServers)
515 throws KeeperException, IOException {
516 Stat stat = new Stat();
517 RegionTransitionData data = ZKAssign.getDataAndWatch(watcher,
518 encodedRegionName, stat);
519 if (data == null) return false;
520 HRegionInfo hri = regionInfo;
521 if (hri == null) {
522 if ((hri = getHRegionInfo(data)) == null) return false;
523 }
524 processRegionsInTransition(data, hri, deadServers, stat.getVersion());
525 return true;
526 }
527
528 void processRegionsInTransition(final RegionTransitionData data,
529 final HRegionInfo regionInfo,
530 final Map<ServerName, List<Pair<HRegionInfo, Result>>> deadServers,
531 int expectedVersion)
532 throws KeeperException {
533 String encodedRegionName = regionInfo.getEncodedName();
534 LOG.info("Processing region " + regionInfo.getRegionNameAsString() +
535 " in state " + data.getEventType());
536 List<HRegionInfo> hris = this.enablingTables.get(regionInfo.getTableNameAsString());
537 if (hris != null && !hris.isEmpty()) {
538 hris.remove(regionInfo);
539 }
540 synchronized (regionsInTransition) {
541 RegionState regionState = regionsInTransition.get(encodedRegionName);
542 if (regionState != null ||
543 failoverProcessedRegions.containsKey(encodedRegionName)) {
544
545 return;
546 }
547 switch (data.getEventType()) {
548 case M_ZK_REGION_CLOSING:
549
550
551 if (isOnDeadServer(regionInfo, deadServers) &&
552 (data.getOrigin() == null || !serverManager.isServerOnline(data.getOrigin()))) {
553
554
555 forceOffline(regionInfo, data);
556 } else {
557
558
559 regionsInTransition.put(encodedRegionName, new RegionState(
560 regionInfo, RegionState.State.CLOSING,
561 data.getStamp(), data.getOrigin()));
562 }
563 failoverProcessedRegions.put(encodedRegionName, regionInfo);
564 break;
565
566 case RS_ZK_REGION_CLOSED:
567 case RS_ZK_REGION_FAILED_OPEN:
568
569 addToRITandCallClose(regionInfo, RegionState.State.CLOSED, data);
570 failoverProcessedRegions.put(encodedRegionName, regionInfo);
571 break;
572
573 case M_ZK_REGION_OFFLINE:
574
575
576 if (isOnDeadServer(regionInfo, deadServers) &&
577 (data.getOrigin() == null ||
578 !serverManager.isServerOnline(data.getOrigin()))) {
579
580 addToRITandCallClose(regionInfo, RegionState.State.OFFLINE, data);
581 } else if (data.getOrigin() != null &&
582 !serverManager.isServerOnline(data.getOrigin())) {
583
584
585 addToRITandCallClose(regionInfo, RegionState.State.OFFLINE, data);
586 } else {
587 regionsInTransition.put(encodedRegionName, new RegionState(
588 regionInfo, RegionState.State.PENDING_OPEN, data.getStamp(), data
589 .getOrigin()));
590 }
591 failoverProcessedRegions.put(encodedRegionName, regionInfo);
592 break;
593
594 case RS_ZK_REGION_OPENING:
595
596
597
598
599
600 if (regionInfo.isMetaTable()) {
601 regionsInTransition.put(encodedRegionName, new RegionState(
602 regionInfo, RegionState.State.OPENING, data.getStamp(), data
603 .getOrigin()));
604
605
606
607
608
609 processOpeningState(regionInfo);
610 break;
611 }
612 regionsInTransition.put(encodedRegionName, new RegionState(regionInfo,
613 RegionState.State.OPENING, data.getStamp(), data.getOrigin()));
614 failoverProcessedRegions.put(encodedRegionName, regionInfo);
615 break;
616
617 case RS_ZK_REGION_OPENED:
618
619 regionsInTransition.put(encodedRegionName, new RegionState(
620 regionInfo, RegionState.State.OPEN,
621 data.getStamp(), data.getOrigin()));
622 ServerName sn = data.getOrigin() == null? null: data.getOrigin();
623
624
625
626 if (sn == null) {
627 LOG.warn("Region in transition " + regionInfo.getEncodedName() +
628 " references a null server; letting RIT timeout so will be " +
629 "assigned elsewhere");
630 } else if (!serverManager.isServerOnline(sn)
631 && (isOnDeadServer(regionInfo, deadServers)
632 || regionInfo.isMetaRegion() || regionInfo.isRootRegion())) {
633 forceOffline(regionInfo, data);
634 } else {
635 new OpenedRegionHandler(master, this, regionInfo, sn, expectedVersion)
636 .process();
637 }
638 failoverProcessedRegions.put(encodedRegionName, regionInfo);
639 break;
640 }
641 }
642 }
643
644
645
646
647
648
649
650
651 private void forceOffline(final HRegionInfo hri,
652 final RegionTransitionData oldData)
653 throws KeeperException {
654
655
656 LOG.debug("RIT " + hri.getEncodedName() + " in state=" +
657 oldData.getEventType() + " was on deadserver; forcing offline");
658 ZKAssign.createOrForceNodeOffline(this.watcher, hri,
659 this.master.getServerName());
660 addToRITandCallClose(hri, RegionState.State.OFFLINE, oldData);
661 }
662
663
664
665
666
667
668
669
670 private void addToRITandCallClose(final HRegionInfo hri,
671 final RegionState.State state, final RegionTransitionData oldData) {
672 this.regionsInTransition.put(hri.getEncodedName(),
673 new RegionState(hri, state, oldData.getStamp(), oldData.getOrigin()));
674 new ClosedRegionHandler(this.master, this, hri).process();
675 }
676
677
678
679
680
681 public void removeClosedRegion(HRegionInfo hri) {
682 if (!regionsToReopen.isEmpty()) {
683 if (regionsToReopen.remove(hri.getEncodedName()) != null) {
684 LOG.debug("Removed region from reopening regions because it was closed");
685 }
686 }
687 }
688
689
690
691
692
693
694
695 private boolean isOnDeadServer(final HRegionInfo regionInfo,
696 final Map<ServerName, List<Pair<HRegionInfo, Result>>> deadServers) {
697 if (deadServers == null) return false;
698 for (Map.Entry<ServerName, List<Pair<HRegionInfo, Result>>> deadServer:
699 deadServers.entrySet()) {
700 for (Pair<HRegionInfo, Result> e: deadServer.getValue()) {
701 if (e.getFirst().equals(regionInfo)) return true;
702 }
703 }
704 return false;
705 }
706
707
708
709
710
711
712
713
714
715
716
717 private void handleRegion(final RegionTransitionData data, int expectedVersion) {
718 synchronized(regionsInTransition) {
719 HRegionInfo hri = null;
720 if (data == null || data.getOrigin() == null) {
721 LOG.warn("Unexpected NULL input " + data);
722 return;
723 }
724 ServerName sn = data.getOrigin();
725
726 if (sn.equals(HConstants.HBCK_CODE_SERVERNAME)) {
727 handleHBCK(data);
728 return;
729 }
730 String encodedName = HRegionInfo.encodeRegionName(data.getRegionName());
731 String prettyPrintedRegionName = HRegionInfo.prettyPrint(encodedName);
732
733 if (!serverManager.isServerOnline(sn) &&
734 !this.master.getServerName().equals(sn)
735 && !ignoreStatesRSOffline.contains(data.getEventType())) {
736 LOG.warn("Attempted to handle region transition for server but " +
737 "server is not online: " + prettyPrintedRegionName);
738 return;
739 }
740
741 boolean lateEvent = data.getStamp() <
742 (System.currentTimeMillis() - 15000);
743 LOG.debug("Handling transition=" + data.getEventType() +
744 ", server=" + data.getOrigin() + ", region=" +
745 (prettyPrintedRegionName == null? "null": prettyPrintedRegionName) +
746 (lateEvent? ", which is more than 15 seconds late" : ""));
747 RegionState regionState = regionsInTransition.get(encodedName);
748 switch (data.getEventType()) {
749 case M_ZK_REGION_OFFLINE:
750
751 break;
752
753 case RS_ZK_REGION_SPLITTING:
754 if (!isInStateForSplitting(regionState)) break;
755 addSplittingToRIT(sn, encodedName);
756 break;
757
758 case RS_ZK_REGION_SPLIT:
759
760 if (!isInStateForSplitting(regionState)) break;
761
762 if (regionState == null) {
763 regionState = addSplittingToRIT(sn, encodedName);
764 String message = "Received SPLIT for region " + prettyPrintedRegionName +
765 " from server " + sn;
766
767 if (regionState == null) {
768 LOG.warn(message + " but it doesn't exist anymore," +
769 " probably already processed its split");
770 break;
771 }
772 LOG.info(message +
773 " but region was not first in SPLITTING state; continuing");
774 }
775
776 byte [] payload = data.getPayload();
777 List<HRegionInfo> daughters = null;
778 try {
779 daughters = Writables.getHRegionInfos(payload, 0, payload.length);
780 } catch (IOException e) {
781 LOG.error("Dropped split! Failed reading split payload for " +
782 prettyPrintedRegionName);
783 break;
784 }
785 assert daughters.size() == 2;
786
787 if (!this.serverManager.isServerOnline(sn)) {
788 LOG.error("Dropped split! ServerName=" + sn + " unknown.");
789 break;
790 }
791
792 this.executorService.submit(new SplitRegionHandler(master, this,
793 regionState.getRegion(), sn, daughters));
794 break;
795
796 case M_ZK_REGION_CLOSING:
797 hri = checkIfInFailover(regionState, encodedName, data);
798 if (hri != null) {
799 regionState = new RegionState(hri, RegionState.State.CLOSING, data
800 .getStamp(), data.getOrigin());
801 regionsInTransition.put(encodedName, regionState);
802 failoverProcessedRegions.put(encodedName, hri);
803 break;
804 }
805
806
807 if (regionState == null ||
808 (!regionState.isPendingClose() && !regionState.isClosing())) {
809 LOG.warn("Received CLOSING for region " + prettyPrintedRegionName +
810 " from server " + data.getOrigin() + " but region was in " +
811 " the state " + regionState + " and not " +
812 "in expected PENDING_CLOSE or CLOSING states");
813 return;
814 }
815
816 regionState.update(RegionState.State.CLOSING,
817 data.getStamp(), data.getOrigin());
818 break;
819
820 case RS_ZK_REGION_CLOSED:
821 hri = checkIfInFailover(regionState, encodedName, data);
822 if (hri != null) {
823 regionState = new RegionState(hri, RegionState.State.CLOSED, data
824 .getStamp(), data.getOrigin());
825 regionsInTransition.put(encodedName, regionState);
826 removeClosedRegion(regionState.getRegion());
827 new ClosedRegionHandler(master, this, regionState.getRegion())
828 .process();
829 failoverProcessedRegions.put(encodedName, hri);
830 break;
831 }
832
833 if (regionState == null ||
834 (!regionState.isPendingClose() && !regionState.isClosing())) {
835 LOG.warn("Received CLOSED for region " + prettyPrintedRegionName +
836 " from server " + data.getOrigin() + " but region was in " +
837 " the state " + regionState + " and not " +
838 "in expected PENDING_CLOSE or CLOSING states");
839 return;
840 }
841
842
843
844 regionState.update(RegionState.State.CLOSED,
845 data.getStamp(), data.getOrigin());
846 removeClosedRegion(regionState.getRegion());
847 this.executorService.submit(new ClosedRegionHandler(master,
848 this, regionState.getRegion()));
849 break;
850
851 case RS_ZK_REGION_FAILED_OPEN:
852 hri = checkIfInFailover(regionState, encodedName, data);
853 if (hri != null) {
854 regionState = new RegionState(hri, RegionState.State.CLOSED, data
855 .getStamp(), data.getOrigin());
856 regionsInTransition.put(encodedName, regionState);
857 new ClosedRegionHandler(master, this, regionState.getRegion())
858 .process();
859 failoverProcessedRegions.put(encodedName, hri);
860 break;
861 }
862 if (regionState == null ||
863 (!regionState.isOffline() && !regionState.isPendingOpen() && !regionState.isOpening())) {
864 LOG.warn("Received FAILED_OPEN for region " + prettyPrintedRegionName +
865 " from server " + data.getOrigin() + " but region was in " +
866 " the state " + regionState + " and not in OFFLINE, PENDING_OPEN or OPENING");
867 return;
868 }
869
870 regionState.update(RegionState.State.CLOSED,
871 data.getStamp(), data.getOrigin());
872
873
874 getRegionPlan(regionState, sn, true);
875 this.executorService.submit(new ClosedRegionHandler(master,
876 this, regionState.getRegion()));
877 break;
878
879 case RS_ZK_REGION_OPENING:
880 hri = checkIfInFailover(regionState, encodedName, data);
881 if (hri != null) {
882 regionState = new RegionState(hri, RegionState.State.OPENING, data
883 .getStamp(), data.getOrigin());
884 regionsInTransition.put(encodedName, regionState);
885 failoverProcessedRegions.put(encodedName, hri);
886 break;
887 }
888 if (regionState == null ||
889 (!regionState.isOffline() && !regionState.isPendingOpen() &&
890 !regionState.isOpening())) {
891 LOG.warn("Received OPENING for region " + prettyPrintedRegionName + " from server " +
892 sn + " but region was in " + " the state " + regionState + " and not " +
893 "in expected OFFLINE, PENDING_OPEN or OPENING states");
894 return;
895 }
896
897 regionState.update(RegionState.State.OPENING,
898 data.getStamp(), data.getOrigin());
899 break;
900
901 case RS_ZK_REGION_OPENED:
902 hri = checkIfInFailover(regionState, encodedName, data);
903 if (hri != null) {
904 regionState = new RegionState(hri, RegionState.State.OPEN, data
905 .getStamp(), data.getOrigin());
906 regionsInTransition.put(encodedName, regionState);
907 new OpenedRegionHandler(master, this, regionState.getRegion(), data
908 .getOrigin(), expectedVersion).process();
909 failoverProcessedRegions.put(encodedName, hri);
910 break;
911 }
912
913 if (regionState == null ||
914 (!regionState.isOffline() && !regionState.isPendingOpen() && !regionState.isOpening())) {
915 LOG.warn("Received OPENED for region " +
916 prettyPrintedRegionName +
917 " from server " + data.getOrigin() + " but region was in " +
918 " the state " + regionState + " and not " +
919 "in expected OFFLINE, PENDING_OPEN or OPENING states");
920 return;
921 }
922
923 regionState.update(RegionState.State.OPEN,
924 data.getStamp(), data.getOrigin());
925 this.executorService.submit(
926 new OpenedRegionHandler(master, this, regionState.getRegion(),
927 data.getOrigin(), expectedVersion));
928 break;
929 }
930 }
931 }
932
933
934
935
936
937
938
939
940
941 private HRegionInfo checkIfInFailover(RegionState regionState,
942 String encodedName, RegionTransitionData data) {
943 if (regionState == null && this.failover &&
944 (failoverProcessedRegions.containsKey(encodedName) == false ||
945 failoverProcessedRegions.get(encodedName) == null)) {
946 HRegionInfo hri = this.failoverProcessedRegions.get(encodedName);
947 if (hri == null) hri = getHRegionInfo(data);
948 return hri;
949 }
950 return null;
951 }
952
953
954
955
956
957
958 private HRegionInfo getHRegionInfo(RegionTransitionData data) {
959 Pair<HRegionInfo, ServerName> p = null;
960 try {
961 p = MetaReader.getRegion(catalogTracker, data.getRegionName());
962 if (p == null) return null;
963 return p.getFirst();
964 } catch (IOException e) {
965 master.abort("Aborting because error occoured while reading "
966 + data.getRegionName() + " from .META.", e);
967 return null;
968 }
969 }
970
971
972
973
974
975
976 private boolean isInStateForSplitting(final RegionState rs) {
977 if (rs == null) return true;
978 if (rs.isSplitting()) return true;
979 if (convertPendingCloseToSplitting(rs)) return true;
980 LOG.warn("Dropped region split! Not in state good for SPLITTING; rs=" + rs);
981 return false;
982 }
983
984
985
986
987
988
989
990
991
992
993 private boolean convertPendingCloseToSplitting(final RegionState rs) {
994 if (!rs.isPendingClose()) return false;
995 LOG.debug("Converting PENDING_CLOSE to SPLITING; rs=" + rs);
996 rs.update(RegionState.State.SPLITTING);
997
998
999 clearRegionPlan(rs.getRegion());
1000 return true;
1001 }
1002
1003
1004
1005
1006
1007
1008
1009 private RegionState addSplittingToRIT(final ServerName serverName,
1010 final String encodedName) {
1011 RegionState regionState = null;
1012 synchronized (this.regions) {
1013 regionState = findHRegionInfoThenAddToRIT(serverName, encodedName);
1014 if (regionState != null) {
1015 regionState.update(RegionState.State.SPLITTING,
1016 System.currentTimeMillis(), serverName);
1017 }
1018 }
1019 return regionState;
1020 }
1021
1022
1023
1024
1025
1026
1027
1028 private RegionState findHRegionInfoThenAddToRIT(final ServerName serverName,
1029 final String encodedName) {
1030 HRegionInfo hri = findHRegionInfo(serverName, encodedName);
1031 if (hri == null) {
1032 LOG.warn("Region " + encodedName + " not found on server " + serverName +
1033 "; failed processing");
1034 return null;
1035 }
1036
1037 return addToRegionsInTransition(hri);
1038 }
1039
1040
1041
1042
1043
1044
1045
1046 private HRegionInfo findHRegionInfo(final ServerName sn,
1047 final String encodedName) {
1048 if (!this.serverManager.isServerOnline(sn)) return null;
1049 Set<HRegionInfo> hris = this.servers.get(sn);
1050 HRegionInfo foundHri = null;
1051 for (HRegionInfo hri: hris) {
1052 if (hri.getEncodedName().equals(encodedName)) {
1053 foundHri = hri;
1054 break;
1055 }
1056 }
1057 return foundHri;
1058 }
1059
1060
1061
1062
1063
1064
1065
1066 private void handleHBCK(RegionTransitionData data) {
1067 String encodedName = HRegionInfo.encodeRegionName(data.getRegionName());
1068 LOG.info("Handling HBCK triggered transition=" + data.getEventType() +
1069 ", server=" + data.getOrigin() + ", region=" +
1070 HRegionInfo.prettyPrint(encodedName));
1071 RegionState regionState = regionsInTransition.get(encodedName);
1072 switch (data.getEventType()) {
1073 case M_ZK_REGION_OFFLINE:
1074 HRegionInfo regionInfo = null;
1075 if (regionState != null) {
1076 regionInfo = regionState.getRegion();
1077 } else {
1078 try {
1079 byte[] name = data.getRegionName();
1080 Pair<HRegionInfo, ServerName> p = MetaReader.getRegion(catalogTracker, name);
1081 regionInfo = p.getFirst();
1082 } catch (IOException e) {
1083 LOG.info("Exception reading META doing HBCK repair operation", e);
1084 return;
1085 }
1086 }
1087 LOG.info("HBCK repair is triggering assignment of region=" +
1088 regionInfo.getRegionNameAsString());
1089
1090 assign(regionInfo, false);
1091 break;
1092
1093 default:
1094 LOG.warn("Received unexpected region state from HBCK (" +
1095 data.getEventType() + ")");
1096 break;
1097 }
1098 }
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114 @Override
1115 public void nodeCreated(String path) {
1116 if(path.startsWith(watcher.assignmentZNode)) {
1117 try {
1118 Stat stat = new Stat();
1119 RegionTransitionData data = ZKAssign.getDataAndWatch(watcher, path, stat);
1120 if (data == null) {
1121 return;
1122 }
1123 handleRegion(data, stat.getVersion());
1124 } catch (KeeperException e) {
1125 master.abort("Unexpected ZK exception reading unassigned node data", e);
1126 }
1127 }
1128 }
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142 @Override
1143 public void nodeDataChanged(String path) {
1144 if(path.startsWith(watcher.assignmentZNode)) {
1145 try {
1146 Stat stat = new Stat();
1147 RegionTransitionData data = ZKAssign.getDataAndWatch(watcher, path, stat);
1148 if (data == null) {
1149 return;
1150 }
1151 handleRegion(data, stat.getVersion());
1152 } catch (KeeperException e) {
1153 master.abort("Unexpected ZK exception reading unassigned node data", e);
1154 }
1155 }
1156 }
1157
1158 @Override
1159 public void nodeDeleted(final String path) {
1160 if (path.startsWith(this.watcher.assignmentZNode)) {
1161 String regionName = ZKAssign.getRegionName(this.master.getZooKeeper(), path);
1162 RegionState rs = this.regionsInTransition.get(regionName);
1163 if (rs != null) {
1164 HRegionInfo regionInfo = rs.getRegion();
1165 if (rs.isSplit()) {
1166 LOG.debug("Ephemeral node deleted, regionserver crashed?, offlining the region"
1167 + rs.getRegion() + " clearing from RIT;");
1168 regionOffline(rs.getRegion());
1169 } else if (rs.isSplitting()) {
1170 LOG.debug("Ephemeral node deleted. Found in SPLITTING state. " + "Removing from RIT "
1171 + rs.getRegion());
1172 synchronized(this.regionsInTransition) {
1173 this.regionsInTransition.remove(regionName);
1174 }
1175 } else {
1176 LOG.debug("The znode of region " + regionInfo.getRegionNameAsString()
1177 + " has been deleted.");
1178 if (rs.isOpened()) {
1179 makeRegionOnline(rs, regionInfo);
1180 }
1181 }
1182 }
1183 }
1184 }
1185
1186 private void makeRegionOnline(RegionState rs, HRegionInfo regionInfo) {
1187 regionOnline(regionInfo, rs.serverName);
1188 LOG.info("The master has opened the region "
1189 + regionInfo.getRegionNameAsString() + " that was online on "
1190 + rs.serverName);
1191 if (this.getZKTable().isDisablingOrDisabledTable(
1192 regionInfo.getTableNameAsString())) {
1193 LOG.debug("Opened region "
1194 + regionInfo.getRegionNameAsString() + " but "
1195 + "this table is disabled, triggering close of region");
1196 unassign(regionInfo);
1197 }
1198 }
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212 @Override
1213 public void nodeChildrenChanged(String path) {
1214 if(path.equals(watcher.assignmentZNode)) {
1215 try {
1216 List<String> children = ZKUtil.listChildrenAndWatchForNewChildren(watcher,
1217 watcher.assignmentZNode);
1218 if (children != null) {
1219 Stat stat = new Stat();
1220 for (String child : children) {
1221 stat.setVersion(0);
1222 RegionTransitionData data = ZKAssign.getDataAndWatch(watcher,
1223 ZKUtil.joinZNode(watcher.assignmentZNode, child), stat);
1224
1225 if (stat.getVersion() > 0 && data.getEventType() == EventType.RS_ZK_REGION_SPLITTING) {
1226 handleRegion(data, stat.getVersion());
1227 }
1228 }
1229 }
1230 } catch(KeeperException e) {
1231 master.abort("Unexpected ZK exception reading unassigned children", e);
1232 }
1233 }
1234 }
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244 void regionOnline(HRegionInfo regionInfo, ServerName sn) {
1245 synchronized (this.regionsInTransition) {
1246 RegionState rs =
1247 this.regionsInTransition.remove(regionInfo.getEncodedName());
1248 if (rs != null) {
1249 this.regionsInTransition.notifyAll();
1250 }
1251 }
1252 synchronized (this.regions) {
1253
1254 ServerName oldSn = this.regions.get(regionInfo);
1255 if (oldSn != null) LOG.warn("Overwriting " + regionInfo.getEncodedName() +
1256 " on " + oldSn + " with " + sn);
1257
1258 if (isServerOnline(sn)) {
1259 this.regions.put(regionInfo, sn);
1260 addToServers(sn, regionInfo);
1261 this.regions.notifyAll();
1262 } else {
1263 LOG.info("The server is not in online servers, ServerName=" +
1264 sn.getServerName() + ", region=" + regionInfo.getEncodedName());
1265 }
1266 }
1267
1268 clearRegionPlan(regionInfo);
1269
1270 addToServersInUpdatingTimer(sn);
1271 }
1272
1273
1274
1275
1276
1277
1278 private void addToServersInUpdatingTimer(final ServerName sn) {
1279 this.serversInUpdatingTimer.add(sn);
1280 }
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295 private void updateTimers(final ServerName sn) {
1296
1297
1298
1299
1300 Map<String, RegionPlan> copy = new HashMap<String, RegionPlan>();
1301 synchronized(this.regionPlans) {
1302 copy.putAll(this.regionPlans);
1303 }
1304 for (Map.Entry<String, RegionPlan> e: copy.entrySet()) {
1305 if (e.getValue() == null || e.getValue().getDestination() == null) continue;
1306 if (!e.getValue().getDestination().equals(sn)) continue;
1307 RegionState rs = null;
1308 synchronized (this.regionsInTransition) {
1309 rs = this.regionsInTransition.get(e.getKey());
1310 }
1311 if (rs == null) continue;
1312 rs.updateTimestampToNow();
1313 }
1314 }
1315
1316
1317
1318
1319
1320
1321
1322
1323 public void regionOffline(final HRegionInfo regionInfo) {
1324
1325 clearRegionPlan(regionInfo);
1326 setOffline(regionInfo);
1327
1328 synchronized(this.regionsInTransition) {
1329 if (this.regionsInTransition.remove(regionInfo.getEncodedName()) != null) {
1330 this.regionsInTransition.notifyAll();
1331 }
1332 }
1333 }
1334
1335
1336
1337
1338
1339
1340
1341
1342 public void setOffline(HRegionInfo regionInfo) {
1343 synchronized (this.regions) {
1344 ServerName sn = this.regions.remove(regionInfo);
1345 if (sn == null) return;
1346 Set<HRegionInfo> serverRegions = this.servers.get(sn);
1347 if (!serverRegions.remove(regionInfo)) {
1348 LOG.warn("No " + regionInfo + " on " + sn);
1349 }
1350 }
1351 }
1352
1353 public void offlineDisabledRegion(HRegionInfo regionInfo) {
1354
1355 LOG.debug("Table being disabled so deleting ZK node and removing from " +
1356 "regions in transition, skipping assignment of region " +
1357 regionInfo.getRegionNameAsString());
1358 try {
1359 if (!ZKAssign.deleteClosedNode(watcher, regionInfo.getEncodedName())) {
1360
1361 ZKAssign.deleteOfflineNode(watcher, regionInfo.getEncodedName());
1362 }
1363 } catch (KeeperException.NoNodeException nne) {
1364 LOG.debug("Tried to delete closed node for " + regionInfo + " but it " +
1365 "does not exist so just offlining");
1366 } catch (KeeperException e) {
1367 this.master.abort("Error deleting CLOSED node in ZK", e);
1368 }
1369 regionOffline(regionInfo);
1370 }
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392 public void assign(HRegionInfo region, boolean setOfflineInZK) {
1393 assign(region, setOfflineInZK, false);
1394 }
1395
1396 public void assign(HRegionInfo region, boolean setOfflineInZK,
1397 boolean forceNewPlan) {
1398 assign(region, setOfflineInZK, forceNewPlan, false);
1399 }
1400
1401
1402
1403
1404
1405
1406
1407
1408 public void assign(HRegionInfo region, boolean setOfflineInZK,
1409 boolean forceNewPlan, boolean hijack) {
1410
1411
1412 if (!hijack && isDisabledorDisablingRegionInRIT(region)) {
1413 return;
1414 }
1415 if (this.serverManager.isClusterShutdown()) {
1416 LOG.info("Cluster shutdown is set; skipping assign of " +
1417 region.getRegionNameAsString());
1418 return;
1419 }
1420 if (isAssigningSplitParentRegion(region)) {
1421 return;
1422 }
1423 RegionState state = addToRegionsInTransition(region,
1424 hijack);
1425 synchronized (state) {
1426 assign(region, state, setOfflineInZK, forceNewPlan, hijack);
1427 }
1428 }
1429
1430
1431
1432
1433
1434
1435 void assign(final ServerName destination,
1436 final List<HRegionInfo> regions) {
1437 if (regions.size() == 0) {
1438 return;
1439 }
1440 LOG.debug("Bulk assigning " + regions.size() + " region(s) to " +
1441 destination.toString());
1442
1443 List<RegionState> states = new ArrayList<RegionState>(regions.size());
1444 synchronized (this.regionsInTransition) {
1445 for (HRegionInfo region: regions) {
1446 states.add(forceRegionStateToOffline(region));
1447 }
1448 }
1449
1450
1451 Map<String, RegionPlan> plans=new HashMap<String, RegionPlan>();
1452 for (HRegionInfo region : regions) {
1453 plans.put(region.getEncodedName(), new RegionPlan(region, null,
1454 destination));
1455 }
1456 this.addPlans(plans);
1457
1458
1459
1460 AtomicInteger counter = new AtomicInteger(0);
1461 CreateUnassignedAsyncCallback cb =
1462 new CreateUnassignedAsyncCallback(this.watcher, destination, counter);
1463 for (RegionState state: states) {
1464 if (!asyncSetOfflineInZooKeeper(state, cb, state)) {
1465 return;
1466 }
1467 }
1468
1469 int total = regions.size();
1470 for (int oldCounter = 0; true;) {
1471 int count = counter.get();
1472 if (oldCounter != count) {
1473 LOG.info(destination.toString() + " outstanding calls=" + count +
1474 " of total=" + total);
1475 oldCounter = count;
1476 }
1477 if (count == total) break;
1478 Threads.sleep(1);
1479 }
1480
1481 if (cb.hasErrors()) {
1482
1483
1484 LOG.error("Error creating nodes for some of the regions we are trying to bulk assign");
1485 return;
1486 }
1487
1488
1489 try {
1490
1491
1492 long maxWaitTime = System.currentTimeMillis() +
1493 this.master.getConfiguration().
1494 getLong("hbase.regionserver.rpc.startup.waittime", 60000);
1495 while (!this.master.isStopped()) {
1496 try {
1497 this.serverManager.sendRegionOpen(destination, regions);
1498 break;
1499 } catch (RemoteException e) {
1500 IOException decodedException = e.unwrapRemoteException();
1501 if (decodedException instanceof RegionServerStoppedException) {
1502 LOG.warn("The region server was shut down, ", decodedException);
1503
1504 return;
1505 } else if (decodedException instanceof ServerNotRunningYetException) {
1506
1507
1508 long now = System.currentTimeMillis();
1509 if (now > maxWaitTime) throw e;
1510 LOG.debug("Server is not yet up; waiting up to " +
1511 (maxWaitTime - now) + "ms", e);
1512 Thread.sleep(1000);
1513 }
1514
1515 throw decodedException;
1516 }
1517 }
1518 } catch (IOException e) {
1519
1520 LOG.info("Unable to communicate with the region server in order" +
1521 " to assign regions", e);
1522 } catch (InterruptedException e) {
1523 throw new RuntimeException(e);
1524 }
1525 LOG.debug("Bulk assigning done for " + destination.toString());
1526 }
1527
1528
1529
1530
1531 static class CreateUnassignedAsyncCallback implements AsyncCallback.StringCallback {
1532 private final Log LOG = LogFactory.getLog(CreateUnassignedAsyncCallback.class);
1533 private final ZooKeeperWatcher zkw;
1534 private final ServerName destination;
1535 private final AtomicInteger counter;
1536 private final AtomicInteger errorCount = new AtomicInteger(0);
1537
1538 CreateUnassignedAsyncCallback(final ZooKeeperWatcher zkw,
1539 final ServerName destination, final AtomicInteger counter) {
1540 this.zkw = zkw;
1541 this.destination = destination;
1542 this.counter = counter;
1543 }
1544
1545 boolean hasErrors() {
1546 return this.errorCount.get() > 0;
1547 }
1548
1549 @Override
1550 public void processResult(int rc, String path, Object ctx, String name) {
1551 if (rc == KeeperException.Code.NODEEXISTS.intValue()) {
1552 LOG.warn("Node for " + path + " already exists");
1553 reportCompletion(false);
1554 return;
1555 }
1556 if (rc != 0) {
1557
1558 LOG.warn("rc != 0 for " + path + " -- some error, may be retryable connection loss -- "
1559 + "FIX see http://wiki.apache.org/hadoop/ZooKeeper/FAQ#A2");
1560 this.zkw.abort("Some error, may be connection loss writing unassigned at " + path +
1561 ", rc=" + rc, null);
1562 return;
1563 }
1564 LOG.debug("rs=" + (RegionState)ctx + ", server=" + this.destination.toString());
1565
1566
1567 this.zkw.getRecoverableZooKeeper().getZooKeeper().exists(path, this.zkw,
1568 new ExistsUnassignedAsyncCallback(this, destination), ctx);
1569 }
1570
1571 void reportCompletion(boolean success) {
1572 if (!success) {
1573 this.errorCount.incrementAndGet();
1574 }
1575 this.counter.incrementAndGet();
1576 }
1577 }
1578
1579
1580
1581
1582
1583 static class ExistsUnassignedAsyncCallback implements AsyncCallback.StatCallback {
1584 private final Log LOG = LogFactory.getLog(ExistsUnassignedAsyncCallback.class);
1585 private ServerName destination;
1586 private CreateUnassignedAsyncCallback parent;
1587
1588 ExistsUnassignedAsyncCallback(
1589 CreateUnassignedAsyncCallback parent, ServerName destination) {
1590 this.parent = parent;
1591 this.destination = destination;
1592 }
1593
1594 @Override
1595 public void processResult(int rc, String path, Object ctx, Stat stat) {
1596 if (rc != 0) {
1597
1598 LOG.warn("rc != 0 for " + path + " -- some error, may be connection loss -- " +
1599 "FIX see http://wiki.apache.org/hadoop/ZooKeeper/FAQ#A2");
1600 parent.reportCompletion(false);
1601 return;
1602 }
1603 RegionState state = (RegionState)ctx;
1604 LOG.debug("rs=" + state);
1605
1606
1607
1608
1609
1610 state.update(RegionState.State.PENDING_OPEN, System.currentTimeMillis(), destination);
1611 parent.reportCompletion(true);
1612 }
1613 }
1614
1615
1616
1617
1618
1619 private RegionState addToRegionsInTransition(final HRegionInfo region) {
1620 return addToRegionsInTransition(region, false);
1621 }
1622
1623
1624
1625
1626
1627 private RegionState addToRegionsInTransition(final HRegionInfo region,
1628 boolean hijack) {
1629 synchronized (regionsInTransition) {
1630 return forceRegionStateToOffline(region, hijack);
1631 }
1632 }
1633
1634
1635
1636
1637
1638
1639 private RegionState forceRegionStateToOffline(final HRegionInfo region) {
1640 return forceRegionStateToOffline(region, false);
1641 }
1642
1643
1644
1645
1646
1647
1648
1649
1650 private RegionState forceRegionStateToOffline(final HRegionInfo region,
1651 boolean hijack) {
1652 String encodedName = region.getEncodedName();
1653 RegionState state = this.regionsInTransition.get(encodedName);
1654 if (state == null) {
1655 state = new RegionState(region, RegionState.State.OFFLINE);
1656 this.regionsInTransition.put(encodedName, state);
1657 } else {
1658
1659
1660
1661
1662
1663
1664 if (!hijack) {
1665 LOG.debug("Forcing OFFLINE; was=" + state);
1666 state.update(RegionState.State.OFFLINE);
1667 }
1668 }
1669 return state;
1670 }
1671
1672
1673
1674
1675
1676
1677
1678
1679 private void assign(final HRegionInfo region, final RegionState state,
1680 final boolean setOfflineInZK, final boolean forceNewPlan,
1681 boolean hijack) {
1682 boolean regionAlreadyInTransitionException = false;
1683 boolean serverNotRunningYet = false;
1684 long maxRegionServerStartupWaitTime = -1;
1685 for (int i = 0; i < this.maximumAssignmentAttempts; i++) {
1686 int versionOfOfflineNode = -1;
1687 if (setOfflineInZK) {
1688
1689
1690 versionOfOfflineNode = setOfflineInZooKeeper(state, hijack,
1691 regionAlreadyInTransitionException);
1692 if(versionOfOfflineNode != -1){
1693 if (isDisabledorDisablingRegionInRIT(region)) {
1694 return;
1695 }
1696
1697
1698
1699
1700
1701
1702 String tableName = region.getTableNameAsString();
1703 if (!zkTable.isEnablingTable(tableName) && !zkTable.isEnabledTable(tableName)) {
1704 LOG.debug("Setting table " + tableName + " to ENABLED state.");
1705 setEnabledTable(region);
1706 }
1707 }
1708 }
1709
1710 if (setOfflineInZK && versionOfOfflineNode == -1) {
1711 return;
1712 }
1713
1714 if (this.master.isStopped()) {
1715 LOG.debug("Server stopped; skipping assign of " + state);
1716 return;
1717 }
1718 RegionPlan plan = getRegionPlan(state, !regionAlreadyInTransitionException
1719 && !serverNotRunningYet && forceNewPlan);
1720 if (plan == null) {
1721 LOG.debug("Unable to determine a plan to assign " + state);
1722 this.timeoutMonitor.setAllRegionServersOffline(true);
1723 return;
1724 }
1725 try {
1726 LOG.debug("Assigning region " + state.getRegion().getRegionNameAsString() +
1727 " to " + plan.getDestination().toString());
1728 long currentOfflineTimeStamp = state.getStamp();
1729 RegionOpeningState regionOpenState = serverManager.sendRegionOpen(plan.getDestination(),
1730 state.getRegion(), versionOfOfflineNode);
1731 if (regionOpenState == RegionOpeningState.OPENED) {
1732
1733
1734
1735 if (state.isOffline() && currentOfflineTimeStamp != state.getStamp()) {
1736 return;
1737 }
1738 if (state.isOffline() && !state.isOpening()) {
1739 state.update(RegionState.State.PENDING_OPEN,
1740 System.currentTimeMillis(), plan.getDestination());
1741 }
1742 if (state.isOpening()) return;
1743 if (state.isOpened()) return;
1744 } else if (regionOpenState == RegionOpeningState.ALREADY_OPENED) {
1745
1746
1747
1748 LOG.debug("ALREADY_OPENED region " + state.getRegion().getRegionNameAsString() +
1749 " to " + plan.getDestination().toString());
1750 String encodedRegionName = state.getRegion()
1751 .getEncodedName();
1752 try {
1753 ZKAssign.deleteOfflineNode(master.getZooKeeper(), encodedRegionName);
1754 } catch (KeeperException.NoNodeException e) {
1755 if(LOG.isDebugEnabled()){
1756 LOG.debug("The unassigned node "+encodedRegionName+" doesnot exist.");
1757 }
1758 } catch (KeeperException e) {
1759 master.abort(
1760 "Error deleting OFFLINED node in ZK for transition ZK node ("
1761 + encodedRegionName + ")", e);
1762 }
1763 synchronized (this.regionsInTransition) {
1764 this.regionsInTransition.remove(plan.getRegionInfo()
1765 .getEncodedName());
1766 }
1767 synchronized (this.regions) {
1768 this.regions.put(plan.getRegionInfo(), plan.getDestination());
1769 addToServers(plan.getDestination(), plan.getRegionInfo());
1770 }
1771 }
1772 break;
1773 } catch (Throwable t) {
1774 if (t instanceof RemoteException) {
1775 t = ((RemoteException) t).unwrapRemoteException();
1776 }
1777 regionAlreadyInTransitionException = false;
1778 serverNotRunningYet = false;
1779 if (t instanceof RegionAlreadyInTransitionException) {
1780 regionAlreadyInTransitionException = true;
1781 if (LOG.isDebugEnabled()) {
1782 LOG.debug("Failed assignment in: " + plan.getDestination() + " due to "
1783 + t.getMessage());
1784 }
1785 } else if (t instanceof ServerNotRunningYetException) {
1786 if (maxRegionServerStartupWaitTime < 0) {
1787 maxRegionServerStartupWaitTime = System.currentTimeMillis()
1788 + this.master.getConfiguration().getLong("hbase.regionserver.rpc.startup.waittime",
1789 60000);
1790 }
1791 try {
1792 long now = System.currentTimeMillis();
1793 if (now < maxRegionServerStartupWaitTime) {
1794 LOG.debug("Server is not yet up; waiting up to "
1795 + (maxRegionServerStartupWaitTime - now) + "ms", t);
1796 serverNotRunningYet = true;
1797 Thread.sleep(100);
1798 i--;
1799 } else {
1800 LOG.debug("Server is not up for a while; try a new one", t);
1801 }
1802 } catch (InterruptedException ie) {
1803 LOG.warn("Failed to assign " + state.getRegion().getRegionNameAsString()
1804 + " since interrupted", ie);
1805 Thread.currentThread().interrupt();
1806 return;
1807 }
1808 } else if (t instanceof java.net.SocketTimeoutException
1809 && this.serverManager.isServerOnline(plan.getDestination())) {
1810 LOG.warn("Call openRegion() to " + plan.getDestination()
1811 + " has timed out when trying to assign "
1812 + region.getRegionNameAsString()
1813 + ", but the region might already be opened on "
1814 + plan.getDestination() + ".", t);
1815 return;
1816 }
1817 LOG.warn("Failed assignment of "
1818 + state.getRegion().getRegionNameAsString()
1819 + " to "
1820 + plan.getDestination()
1821 + ", trying to assign "
1822 + (regionAlreadyInTransitionException || serverNotRunningYet
1823 ? "to the same region server because of "
1824 + "RegionAlreadyInTransitionException/ServerNotRunningYetException;"
1825 : "elsewhere instead; ")
1826 + "retry=" + i, t);
1827
1828
1829
1830 state.update(RegionState.State.OFFLINE);
1831
1832
1833
1834 RegionPlan newPlan = plan;
1835 if (!regionAlreadyInTransitionException && !serverNotRunningYet) {
1836
1837
1838
1839
1840 newPlan = getRegionPlan(state, true);
1841 }
1842 if (newPlan == null) {
1843 this.timeoutMonitor.setAllRegionServersOffline(true);
1844 LOG.warn("Unable to find a viable location to assign region " +
1845 state.getRegion().getRegionNameAsString());
1846 return;
1847 }
1848 }
1849 }
1850 }
1851
1852 private static boolean isAssigningSplitParentRegion(final HRegionInfo region) {
1853 if (region.isSplitParent()) {
1854 LOG.info("Skipping assign of " + region.getRegionNameAsString()
1855 + ", already split, or still splitting");
1856 return true;
1857 }
1858 return false;
1859 }
1860
1861 private boolean isDisabledorDisablingRegionInRIT(final HRegionInfo region) {
1862 String tableName = region.getTableNameAsString();
1863 boolean disabled = this.zkTable.isDisabledTable(tableName);
1864 if (disabled || this.zkTable.isDisablingTable(tableName)) {
1865 LOG.info("Table " + tableName + (disabled ? " disabled;" : " disabling;") +
1866 " skipping assign of " + region.getRegionNameAsString());
1867 offlineDisabledRegion(region);
1868 return true;
1869 }
1870 return false;
1871 }
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884 int setOfflineInZooKeeper(final RegionState state, boolean hijack,
1885 boolean regionAlreadyInTransitionException) {
1886
1887
1888 if (!hijack && !state.isClosed() && !state.isOffline()) {
1889 if (!regionAlreadyInTransitionException ) {
1890 LOG.warn("Unexpected state : " + state + " .. Cannot transit it to OFFLINE.");
1891 return -1;
1892 }
1893 LOG.debug("Unexpected state : " + state
1894 + " but retrying to assign because RegionAlreadyInTransitionException.");
1895 }
1896 boolean allowZNodeCreation = false;
1897
1898
1899
1900
1901
1902
1903
1904 if (hijack &&
1905 (state.getState().equals(RegionState.State.PENDING_OPEN) ||
1906 state.getState().equals(RegionState.State.OPENING))) {
1907 state.update(RegionState.State.PENDING_OPEN);
1908 allowZNodeCreation = false;
1909 } else {
1910 state.update(RegionState.State.OFFLINE);
1911 allowZNodeCreation = true;
1912 }
1913 int versionOfOfflineNode = -1;
1914 try {
1915
1916 versionOfOfflineNode = ZKAssign.createOrForceNodeOffline(master.getZooKeeper(),
1917 state.getRegion(), this.master.getServerName(),
1918 hijack, allowZNodeCreation);
1919 if (versionOfOfflineNode == -1) {
1920 LOG.warn("Attempted to create/force node into OFFLINE state before "
1921 + "completing assignment but failed to do so for " + state);
1922 return -1;
1923 }
1924 } catch (KeeperException e) {
1925 master.abort("Unexpected ZK exception creating/setting node OFFLINE", e);
1926 return -1;
1927 }
1928 return versionOfOfflineNode;
1929 }
1930
1931
1932
1933
1934
1935
1936
1937 boolean asyncSetOfflineInZooKeeper(final RegionState state,
1938 final AsyncCallback.StringCallback cb, final Object ctx) {
1939 if (!state.isClosed() && !state.isOffline()) {
1940 new RuntimeException("Unexpected state trying to OFFLINE; " + state);
1941 this.master.abort("Unexpected state trying to OFFLINE; " + state,
1942 new IllegalStateException());
1943 return false;
1944 }
1945 state.update(RegionState.State.OFFLINE);
1946 try {
1947 ZKAssign.asyncCreateNodeOffline(master.getZooKeeper(), state.getRegion(),
1948 this.master.getServerName(), cb, ctx);
1949 } catch (KeeperException e) {
1950
1951 if (e instanceof NodeExistsException) {
1952 LOG.warn("Node for " + state.getRegion() + " already exists");
1953 } else {
1954 master.abort("Unexpected ZK exception creating/setting node OFFLINE", e);
1955 }
1956 return false;
1957 }
1958 return true;
1959 }
1960
1961
1962
1963
1964
1965
1966 RegionPlan getRegionPlan(final RegionState state,
1967 final boolean forceNewPlan) {
1968 return getRegionPlan(state, null, forceNewPlan);
1969 }
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980 RegionPlan getRegionPlan(final RegionState state,
1981 final ServerName serverToExclude, final boolean forceNewPlan) {
1982
1983 final String encodedName = state.getRegion().getEncodedName();
1984 final List<ServerName> servers = this.serverManager.getOnlineServersList();
1985 final List<ServerName> drainingServers = this.serverManager.getDrainingServersList();
1986
1987
1988 if (serverToExclude != null) servers.remove(serverToExclude);
1989
1990
1991
1992 if (!drainingServers.isEmpty()) {
1993 for (final ServerName server: drainingServers) {
1994 LOG.debug("Removing draining server: " + server +
1995 " from eligible server pool.");
1996 servers.remove(server);
1997 }
1998 }
1999
2000
2001 removeDeadNotExpiredServers(servers);
2002
2003
2004
2005 if (servers.isEmpty()) return null;
2006
2007 RegionPlan randomPlan = null;
2008 boolean newPlan = false;
2009 RegionPlan existingPlan = null;
2010
2011 synchronized (this.regionPlans) {
2012 existingPlan = this.regionPlans.get(encodedName);
2013
2014 if (existingPlan != null && existingPlan.getDestination() != null) {
2015 LOG.debug("Found an existing plan for " +
2016 state.getRegion().getRegionNameAsString() +
2017 " destination server is " + existingPlan.getDestination().toString());
2018 }
2019
2020 if (forceNewPlan
2021 || existingPlan == null
2022 || existingPlan.getDestination() == null
2023 || drainingServers.contains(existingPlan.getDestination())) {
2024 newPlan = true;
2025 randomPlan = new RegionPlan(state.getRegion(), null, balancer
2026 .randomAssignment(servers));
2027 this.regionPlans.put(encodedName, randomPlan);
2028 }
2029 }
2030
2031 if (newPlan) {
2032 LOG.debug("No previous transition plan was found (or we are ignoring " +
2033 "an existing plan) for " + state.getRegion().getRegionNameAsString() +
2034 " so generated a random one; " + randomPlan + "; " +
2035 serverManager.countOfRegionServers() +
2036 " (online=" + serverManager.getOnlineServers().size() +
2037 ", available=" + servers.size() + ") available servers");
2038 return randomPlan;
2039 }
2040 LOG.debug("Using pre-existing plan for region " +
2041 state.getRegion().getRegionNameAsString() + "; plan=" + existingPlan);
2042 return existingPlan;
2043 }
2044
2045
2046
2047
2048
2049
2050 public void removeDeadNotExpiredServers(List<ServerName> servers) {
2051 Set<ServerName> deadNotExpiredServers = this.serverManager
2052 .getDeadNotExpiredServers();
2053 if (!deadNotExpiredServers.isEmpty()) {
2054 for (ServerName server : deadNotExpiredServers) {
2055 LOG.debug("Removing dead but not expired server: " + server
2056 + " from eligible server pool.");
2057 servers.remove(server);
2058 }
2059 }
2060 }
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070 public void unassign(List<HRegionInfo> regions) {
2071 int waitTime = this.master.getConfiguration().getInt(
2072 "hbase.bulk.waitbetween.reopen", 0);
2073 for (HRegionInfo region : regions) {
2074 if (isRegionInTransition(region) != null)
2075 continue;
2076 unassign(region, false);
2077 while (isRegionInTransition(region) != null) {
2078 try {
2079 Thread.sleep(10);
2080 } catch (InterruptedException e) {
2081
2082 }
2083 }
2084 if (waitTime > 0)
2085 try {
2086 Thread.sleep(waitTime);
2087 } catch (InterruptedException e) {
2088
2089 }
2090 }
2091 }
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106 public void unassign(HRegionInfo region) {
2107 unassign(region, false);
2108 }
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124 public void unassign(HRegionInfo region, boolean force) {
2125
2126 LOG.debug("Starting unassignment of region " +
2127 region.getRegionNameAsString() + " (offlining)");
2128
2129 synchronized (this.regions) {
2130
2131 if (!regions.containsKey(region)) {
2132 LOG.debug("Attempted to unassign region " +
2133 region.getRegionNameAsString() + " but it is not " +
2134 "currently assigned anywhere");
2135 return;
2136 }
2137 }
2138 String encodedName = region.getEncodedName();
2139
2140 RegionState state;
2141 int versionOfClosingNode = -1;
2142 synchronized (regionsInTransition) {
2143 state = regionsInTransition.get(encodedName);
2144 if (state == null) {
2145
2146 try {
2147 versionOfClosingNode = ZKAssign.createNodeClosing(
2148 master.getZooKeeper(), region, master.getServerName());
2149 if (versionOfClosingNode == -1) {
2150 LOG.debug("Attempting to unassign region " +
2151 region.getRegionNameAsString() + " but ZK closing node "
2152 + "can't be created.");
2153 return;
2154 }
2155 } catch (KeeperException e) {
2156 if (e instanceof NodeExistsException) {
2157
2158
2159
2160
2161 NodeExistsException nee = (NodeExistsException)e;
2162 String path = nee.getPath();
2163 try {
2164 if (isSplitOrSplitting(path)) {
2165 LOG.debug(path + " is SPLIT or SPLITTING; " +
2166 "skipping unassign because region no longer exists -- its split");
2167 return;
2168 }
2169 } catch (KeeperException.NoNodeException ke) {
2170 LOG.warn("Failed getData on SPLITTING/SPLIT at " + path +
2171 "; presuming split and that the region to unassign, " +
2172 encodedName + ", no longer exists -- confirm", ke);
2173 return;
2174 } catch (KeeperException ke) {
2175 LOG.error("Unexpected zk state", ke);
2176 ke = e;
2177 }
2178 }
2179
2180 master.abort("Unexpected ZK exception creating node CLOSING", e);
2181 return;
2182 }
2183 state = new RegionState(region, RegionState.State.PENDING_CLOSE);
2184 regionsInTransition.put(encodedName, state);
2185 } else if (force && (state.isPendingClose() || state.isClosing())) {
2186 LOG.debug("Attempting to unassign region " + region.getRegionNameAsString() +
2187 " which is already " + state.getState() +
2188 " but forcing to send a CLOSE RPC again ");
2189 state.update(state.getState());
2190 } else {
2191 LOG.debug("Attempting to unassign region " +
2192 region.getRegionNameAsString() + " but it is " +
2193 "already in transition (" + state.getState() + ", force=" + force + ")");
2194 return;
2195 }
2196 }
2197
2198 ServerName server = null;
2199 synchronized (this.regions) {
2200 server = regions.get(region);
2201 }
2202
2203 if (server == null) {
2204
2205 synchronized (regionsInTransition) {
2206 state = regionsInTransition.get(encodedName);
2207 if (state != null) {
2208
2209 State presentState = state.getState();
2210 if (presentState == State.PENDING_CLOSE
2211 || presentState == State.CLOSING) {
2212 this.regionsInTransition.remove(encodedName);
2213 }
2214 }
2215 }
2216
2217 deleteClosingOrClosedNode(region);
2218 return;
2219 }
2220 try {
2221
2222
2223 if (serverManager.sendRegionClose(server, state.getRegion(),
2224 versionOfClosingNode)) {
2225 LOG.debug("Sent CLOSE to " + server + " for region " +
2226 region.getRegionNameAsString());
2227 return;
2228 }
2229
2230 LOG.warn("Server " + server + " region CLOSE RPC returned false for " +
2231 region.getRegionNameAsString());
2232 } catch (NotServingRegionException nsre) {
2233 LOG.info("Server " + server + " returned " + nsre + " for " +
2234 region.getRegionNameAsString());
2235
2236
2237
2238 } catch (Throwable t) {
2239 if (t instanceof RemoteException) {
2240 t = ((RemoteException)t).unwrapRemoteException();
2241 if (t instanceof NotServingRegionException) {
2242 if (checkIfRegionBelongsToDisabling(region)
2243 || checkIfRegionBelongsToDisabled(region)) {
2244
2245 LOG.info("While trying to recover the table "
2246 + region.getTableNameAsString()
2247 + " to DISABLED state the region " + region
2248 + " was offlined but the table was in DISABLING state");
2249 synchronized (this.regionsInTransition) {
2250 this.regionsInTransition.remove(region.getEncodedName());
2251 }
2252
2253 synchronized (this.regions) {
2254 ServerName sn = this.regions.remove(region);
2255 if (sn != null) {
2256 Set<HRegionInfo> serverRegions = this.servers.get(sn);
2257 if (serverRegions == null || !serverRegions.remove(region)) {
2258 LOG.warn("No " + region + " on " + sn);
2259 }
2260 }
2261 }
2262 deleteClosingOrClosedNode(region);
2263 }
2264 }
2265
2266 if (t instanceof RegionAlreadyInTransitionException) {
2267 LOG.debug("update " + state + " the timestamp.");
2268 state.update(state.getState());
2269 }
2270 }
2271 LOG.info("Server " + server + " returned " + t + " for " +
2272 region.getEncodedName());
2273
2274 }
2275 }
2276
2277
2278
2279
2280
2281 public void deleteClosingOrClosedNode(HRegionInfo region) {
2282 try {
2283 if (!ZKAssign.deleteNode(master.getZooKeeper(), region.getEncodedName(),
2284 EventHandler.EventType.M_ZK_REGION_CLOSING)) {
2285 boolean deleteNode = ZKAssign.deleteNode(master.getZooKeeper(), region
2286 .getEncodedName(), EventHandler.EventType.RS_ZK_REGION_CLOSED);
2287
2288
2289 if (!deleteNode) {
2290 LOG.error("The deletion of the CLOSED node for the region "
2291 + region.getEncodedName() + " returned " + deleteNode);
2292 }
2293 }
2294 } catch (NoNodeException e) {
2295 LOG.debug("CLOSING/CLOSED node for the region " + region.getEncodedName()
2296 + " already deleted");
2297 } catch (KeeperException ke) {
2298 master.abort(
2299 "Unexpected ZK exception deleting node CLOSING/CLOSED for the region "
2300 + region.getEncodedName(), ke);
2301 return;
2302 }
2303 }
2304
2305
2306
2307
2308
2309
2310 private boolean isSplitOrSplitting(final String path) throws KeeperException {
2311 boolean result = false;
2312
2313
2314 RegionTransitionData data = ZKAssign.getData(master.getZooKeeper(), path);
2315 EventType evt = data.getEventType();
2316 switch (evt) {
2317 case RS_ZK_REGION_SPLIT:
2318 case RS_ZK_REGION_SPLITTING:
2319 result = true;
2320 break;
2321 default:
2322 break;
2323 }
2324 return result;
2325 }
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335 public void waitForAssignment(HRegionInfo regionInfo)
2336 throws InterruptedException {
2337 synchronized(regions) {
2338 while (!this.master.isStopped() && !regions.containsKey(regionInfo)) {
2339
2340
2341
2342 regions.wait(100);
2343 }
2344 }
2345 }
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357 public void assignRoot() throws KeeperException {
2358 RootLocationEditor.deleteRootLocation(this.master.getZooKeeper());
2359 assign(HRegionInfo.ROOT_REGIONINFO, true);
2360 }
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370 public void assignMeta() {
2371
2372 assign(HRegionInfo.FIRST_META_REGIONINFO, true);
2373 }
2374
2375
2376
2377
2378
2379
2380
2381
2382 public void assignUserRegionsToOnlineServers(List<HRegionInfo> regions)
2383 throws IOException,
2384 InterruptedException {
2385 List<ServerName> servers = this.serverManager.getOnlineServersList();
2386 removeDeadNotExpiredServers(servers);
2387 assignUserRegions(regions, servers);
2388 }
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398 public void assignUserRegions(List<HRegionInfo> regions, List<ServerName> servers)
2399 throws IOException, InterruptedException {
2400 if (regions == null)
2401 return;
2402 Map<ServerName, List<HRegionInfo>> bulkPlan = null;
2403
2404 bulkPlan = balancer.roundRobinAssignment(regions, servers);
2405 LOG.info("Bulk assigning " + regions.size() + " region(s) round-robin across " +
2406 servers.size() + " server(s)");
2407
2408 BulkAssigner ba = new StartupBulkAssigner(this.master, bulkPlan, this);
2409 ba.bulkAssign();
2410 LOG.info("Bulk assigning done");
2411 }
2412
2413 private void setEnabledTable(HRegionInfo hri) {
2414 String tableName = hri.getTableNameAsString();
2415 boolean isTableEnabled = this.zkTable.isEnabledTable(tableName);
2416 if (!isTableEnabled) {
2417 setEnabledTable(tableName);
2418 }
2419 }
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430 public void assignAllUserRegions() throws IOException, InterruptedException {
2431
2432
2433
2434 Set<String> disablingDisabledAndEnablingTables = new HashSet<String>(this.disablingTables);
2435 disablingDisabledAndEnablingTables.addAll(this.zkTable.getDisabledTables());
2436 disablingDisabledAndEnablingTables.addAll(this.enablingTables.keySet());
2437
2438 Map<HRegionInfo, ServerName> allRegions = MetaReader.fullScan(catalogTracker,
2439 disablingDisabledAndEnablingTables, true);
2440 if (allRegions == null || allRegions.isEmpty()) return;
2441
2442
2443 List<ServerName> servers = serverManager.getOnlineServersList();
2444
2445
2446 removeDeadNotExpiredServers(servers);
2447
2448
2449 if(servers.isEmpty()) return;
2450
2451
2452 boolean retainAssignment = master.getConfiguration().
2453 getBoolean("hbase.master.startup.retainassign", true);
2454
2455 Map<ServerName, List<HRegionInfo>> bulkPlan = null;
2456 if (retainAssignment) {
2457
2458 bulkPlan = balancer.retainAssignment(allRegions, servers);
2459 } else {
2460
2461 assignUserRegions(new ArrayList<HRegionInfo>(allRegions.keySet()), servers);
2462 for (HRegionInfo hri : allRegions.keySet()) {
2463 setEnabledTable(hri);
2464 }
2465 return;
2466 }
2467 LOG.info("Bulk assigning " + allRegions.size() + " region(s) across " +
2468 servers.size() + " server(s), retainAssignment=" + retainAssignment);
2469
2470
2471 BulkAssigner ba = new StartupBulkAssigner(this.master, bulkPlan, this);
2472 ba.bulkAssign();
2473 for (HRegionInfo hri : allRegions.keySet()) {
2474 setEnabledTable(hri);
2475 }
2476 LOG.info("Bulk assigning done");
2477 }
2478
2479
2480
2481
2482
2483
2484
2485 static class StartupBulkAssigner extends BulkAssigner {
2486 final Map<ServerName, List<HRegionInfo>> bulkPlan;
2487 final AssignmentManager assignmentManager;
2488
2489 StartupBulkAssigner(final Server server,
2490 final Map<ServerName, List<HRegionInfo>> bulkPlan,
2491 final AssignmentManager am) {
2492 super(server);
2493 this.bulkPlan = bulkPlan;
2494 this.assignmentManager = am;
2495 }
2496
2497 @Override
2498 public boolean bulkAssign(boolean sync) throws InterruptedException,
2499 IOException {
2500
2501 this.assignmentManager.timeoutMonitor.bulkAssign(true);
2502 try {
2503 return super.bulkAssign(sync);
2504 } finally {
2505
2506 this.assignmentManager.timeoutMonitor.bulkAssign(false);
2507 }
2508 }
2509
2510 @Override
2511 protected String getThreadNamePrefix() {
2512 return this.server.getServerName() + "-StartupBulkAssigner";
2513 }
2514
2515 @Override
2516 protected void populatePool(java.util.concurrent.ExecutorService pool) {
2517 for (Map.Entry<ServerName, List<HRegionInfo>> e: this.bulkPlan.entrySet()) {
2518 pool.execute(new SingleServerBulkAssigner(e.getKey(), e.getValue(),
2519 this.assignmentManager));
2520 }
2521 }
2522
2523 protected boolean waitUntilDone(final long timeout)
2524 throws InterruptedException {
2525 Set<HRegionInfo> regionSet = new HashSet<HRegionInfo>();
2526 for (List<HRegionInfo> regionList : bulkPlan.values()) {
2527 regionSet.addAll(regionList);
2528 }
2529 return this.assignmentManager.waitUntilNoRegionsInTransition(timeout, regionSet);
2530 }
2531
2532 @Override
2533 protected long getTimeoutOnRIT() {
2534
2535
2536 long perRegionOpenTimeGuesstimate =
2537 this.server.getConfiguration().getLong("hbase.bulk.assignment.perregion.open.time", 1000);
2538 int regionsPerServer =
2539 this.bulkPlan.entrySet().iterator().next().getValue().size();
2540 long timeout = perRegionOpenTimeGuesstimate * regionsPerServer;
2541 LOG.debug("Timeout-on-RIT=" + timeout);
2542 return timeout;
2543 }
2544 }
2545
2546
2547
2548
2549 static class SingleServerBulkAssigner implements Runnable {
2550 private final ServerName regionserver;
2551 private final List<HRegionInfo> regions;
2552 private final AssignmentManager assignmentManager;
2553
2554 SingleServerBulkAssigner(final ServerName regionserver,
2555 final List<HRegionInfo> regions, final AssignmentManager am) {
2556 for (Iterator<HRegionInfo> it = regions.iterator(); it.hasNext(); ) {
2557 if (isAssigningSplitParentRegion(it.next())) {
2558 it.remove();
2559 }
2560 }
2561 this.regionserver = regionserver;
2562 this.regions = regions;
2563 this.assignmentManager = am;
2564 }
2565 @Override
2566 public void run() {
2567 this.assignmentManager.assign(this.regionserver, this.regions);
2568 }
2569 }
2570
2571
2572
2573
2574
2575
2576
2577 boolean waitUntilNoRegionsInTransition(final long timeout)
2578 throws InterruptedException {
2579
2580
2581
2582
2583
2584
2585 long startTime = System.currentTimeMillis();
2586 long remaining = timeout;
2587 synchronized (regionsInTransition) {
2588 while (regionsInTransition.size() > 0 && !this.master.isStopped()
2589 && remaining > 0) {
2590 regionsInTransition.wait(remaining);
2591 remaining = timeout - (System.currentTimeMillis() - startTime);
2592 }
2593 }
2594 return regionsInTransition.isEmpty();
2595 }
2596
2597
2598
2599
2600
2601
2602
2603
2604 boolean waitUntilNoRegionsInTransition(final long timeout, Set<HRegionInfo> regions)
2605 throws InterruptedException {
2606
2607 long startTime = System.currentTimeMillis();
2608 long remaining = timeout;
2609 boolean stillInTransition = true;
2610 synchronized (regionsInTransition) {
2611 while (regionsInTransition.size() > 0 && !this.master.isStopped() &&
2612 remaining > 0 && stillInTransition) {
2613 int count = 0;
2614 for (RegionState rs : regionsInTransition.values()) {
2615 if (regions.contains(rs.getRegion())) {
2616 count++;
2617 break;
2618 }
2619 }
2620 if (count == 0) {
2621 stillInTransition = false;
2622 break;
2623 }
2624 regionsInTransition.wait(remaining);
2625 remaining = timeout - (System.currentTimeMillis() - startTime);
2626 }
2627 }
2628 return stillInTransition;
2629 }
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640 Map<ServerName, List<Pair<HRegionInfo, Result>>> rebuildUserRegions() throws IOException,
2641 KeeperException {
2642
2643 List<Result> results = MetaReader.fullScan(this.catalogTracker);
2644
2645 Set<ServerName> onlineServers = serverManager.getOnlineServers().keySet();
2646
2647 Map<ServerName, List<Pair<HRegionInfo,Result>>> offlineServers =
2648 new TreeMap<ServerName, List<Pair<HRegionInfo, Result>>>();
2649
2650 for (Result result : results) {
2651 boolean disabled = false;
2652 boolean disablingOrEnabling = false;
2653 Pair<HRegionInfo, ServerName> region = MetaReader.parseCatalogResult(result);
2654 if (region == null) continue;
2655 HRegionInfo regionInfo = region.getFirst();
2656 ServerName regionLocation = region.getSecond();
2657 if (regionInfo == null) continue;
2658 String tableName = regionInfo.getTableNameAsString();
2659 if (regionLocation == null) {
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670 boolean enabling = checkIfRegionsBelongsToEnabling(regionInfo);
2671 addTheTablesInPartialState(regionInfo);
2672 if (enabling) {
2673 addToEnablingTableRegions(regionInfo);
2674 } else {
2675 LOG.warn("Region " + regionInfo.getEncodedName() + " has null regionLocation."
2676 + " But its table " + tableName + " isn't in ENABLING state.");
2677 }
2678 } else if (!onlineServers.contains(regionLocation)) {
2679
2680 List<Pair<HRegionInfo, Result>> offlineRegions =
2681 offlineServers.get(regionLocation);
2682 if (offlineRegions == null) {
2683 offlineRegions = new ArrayList<Pair<HRegionInfo,Result>>(1);
2684 offlineServers.put(regionLocation, offlineRegions);
2685 }
2686 offlineRegions.add(new Pair<HRegionInfo,Result>(regionInfo, result));
2687 disabled = checkIfRegionBelongsToDisabled(regionInfo);
2688 disablingOrEnabling = addTheTablesInPartialState(regionInfo);
2689
2690
2691 enableTableIfNotDisabledOrDisablingOrEnabling(disabled,
2692 disablingOrEnabling, tableName);
2693 } else {
2694
2695 if (regionInfo.isOffline() && regionInfo.isSplit()) {
2696 String node = ZKAssign.getNodeName(this.watcher, regionInfo
2697 .getEncodedName());
2698 Stat stat = new Stat();
2699 byte[] data = ZKUtil.getDataNoWatch(this.watcher, node, stat);
2700
2701 if (data == null) {
2702 LOG.debug("Region "+ regionInfo.getRegionNameAsString() + " split is completed. "
2703 + "Hence need not add to regions list");
2704 continue;
2705 }
2706 }
2707
2708
2709 boolean enabling = checkIfRegionsBelongsToEnabling(regionInfo);
2710 disabled = checkIfRegionBelongsToDisabled(regionInfo);
2711 if (!enabling && !disabled) {
2712 synchronized (this.regions) {
2713 regions.put(regionInfo, regionLocation);
2714 addToServers(regionLocation, regionInfo);
2715 }
2716 }
2717 disablingOrEnabling = addTheTablesInPartialState(regionInfo);
2718 if (enabling) {
2719 addToEnablingTableRegions(regionInfo);
2720 }
2721
2722
2723 enableTableIfNotDisabledOrDisablingOrEnabling(disabled,
2724 disablingOrEnabling, tableName);
2725 }
2726 }
2727 return offlineServers;
2728 }
2729
2730 private void addToEnablingTableRegions(HRegionInfo regionInfo) {
2731 String tableName = regionInfo.getTableNameAsString();
2732 List<HRegionInfo> hris = this.enablingTables.get(tableName);
2733 if (!hris.contains(regionInfo)) {
2734 if (LOG.isDebugEnabled()) {
2735 LOG.debug("Adding region" + regionInfo.getRegionNameAsString()
2736 + " to enabling table " + tableName + ".");
2737 }
2738 hris.add(regionInfo);
2739 }
2740 }
2741
2742 private void enableTableIfNotDisabledOrDisablingOrEnabling(boolean disabled,
2743 boolean disablingOrEnabling, String tableName) {
2744 if (!disabled && !disablingOrEnabling
2745 && !getZKTable().isEnabledTable(tableName)) {
2746 setEnabledTable(tableName);
2747 }
2748 }
2749
2750 private Boolean addTheTablesInPartialState(HRegionInfo regionInfo) {
2751 String tableName = regionInfo.getTableNameAsString();
2752 if (checkIfRegionBelongsToDisabling(regionInfo)) {
2753 this.disablingTables.add(tableName);
2754 return true;
2755 } else if (checkIfRegionsBelongsToEnabling(regionInfo)) {
2756 if (!this.enablingTables.containsKey(tableName)) {
2757 this.enablingTables.put(tableName, new ArrayList<HRegionInfo>());
2758 }
2759 return true;
2760 }
2761 return false;
2762 }
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774 private boolean recoverTableInDisablingState(Set<String> disablingTables)
2775 throws KeeperException, TableNotFoundException, IOException {
2776 boolean isWatcherCreated = false;
2777 if (disablingTables.size() != 0) {
2778
2779 ZKUtil.listChildrenAndWatchForNewChildren(watcher,
2780 watcher.assignmentZNode);
2781 isWatcherCreated = true;
2782 for (String tableName : disablingTables) {
2783
2784 LOG.info("The table " + tableName
2785 + " is in DISABLING state. Hence recovering by moving the table"
2786 + " to DISABLED state.");
2787 new DisableTableHandler(this.master, tableName.getBytes(),
2788 catalogTracker, this, true).process();
2789 }
2790 }
2791 return isWatcherCreated;
2792 }
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804 private void recoverTableInEnablingState(Set<String> enablingTables,
2805 boolean isWatcherCreated) throws KeeperException, TableNotFoundException,
2806 IOException {
2807 if (enablingTables.size() != 0) {
2808 if (false == isWatcherCreated) {
2809 ZKUtil.listChildrenAndWatchForNewChildren(watcher,
2810 watcher.assignmentZNode);
2811 }
2812 for (String tableName : enablingTables) {
2813
2814 LOG.info("The table " + tableName
2815 + " is in ENABLING state. Hence recovering by moving the table"
2816 + " to ENABLED state.");
2817
2818
2819 EnableTableHandler eth = null;
2820 try {
2821 eth =
2822 new EnableTableHandler(this.master, tableName.getBytes(), catalogTracker, this, true);
2823 } catch (TableNotFoundException e) {
2824 LOG.warn("Table " + tableName + " not found in .META. to recover.");
2825 continue;
2826 }
2827 if (eth != null) eth.process();
2828 }
2829 }
2830 }
2831
2832 private boolean checkIfRegionsBelongsToEnabling(HRegionInfo regionInfo) {
2833 String tableName = regionInfo.getTableNameAsString();
2834 return getZKTable().isEnablingTable(tableName);
2835 }
2836
2837 private boolean checkIfRegionBelongsToDisabled(HRegionInfo regionInfo) {
2838 String tableName = regionInfo.getTableNameAsString();
2839 return getZKTable().isDisabledTable(tableName);
2840 }
2841
2842 private boolean checkIfRegionBelongsToDisabling(HRegionInfo regionInfo) {
2843 String tableName = regionInfo.getTableNameAsString();
2844 return getZKTable().isDisablingTable(tableName);
2845 }
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863 private void processDeadServersAndRecoverLostRegions(
2864 Map<ServerName, List<Pair<HRegionInfo, Result>>> deadServers,
2865 List<String> nodes) throws IOException, KeeperException {
2866 if (null != deadServers) {
2867 Set<ServerName> actualDeadServers = this.serverManager.getDeadServers();
2868 for (Map.Entry<ServerName, List<Pair<HRegionInfo, Result>>> deadServer :
2869 deadServers.entrySet()) {
2870
2871
2872 if (actualDeadServers.contains(deadServer.getKey())) {
2873 for (Pair<HRegionInfo, Result> deadRegion : deadServer.getValue()) {
2874 HRegionInfo hri = deadRegion.getFirst();
2875
2876
2877
2878
2879 deleteNodeAndOfflineRegion(hri);
2880 nodes.remove(deadRegion.getFirst().getEncodedName());
2881 }
2882 continue;
2883 }
2884 List<Pair<HRegionInfo, Result>> regions = deadServer.getValue();
2885 for (Pair<HRegionInfo, Result> region : regions) {
2886 HRegionInfo regionInfo = region.getFirst();
2887 Result result = region.getSecond();
2888
2889
2890 try {
2891 RegionTransitionData data = ZKAssign.getData(watcher,
2892 regionInfo.getEncodedName());
2893
2894
2895
2896
2897
2898 if (data != null && data.getOrigin() != null &&
2899 serverManager.isServerOnline(data.getOrigin())) {
2900 LOG.info("The region " + regionInfo.getEncodedName()
2901 + "is being handled on " + data.getOrigin());
2902 continue;
2903 }
2904
2905 boolean assign = ServerShutdownHandler.processDeadRegion(
2906 regionInfo, result, this, this.catalogTracker);
2907 if (assign) {
2908 ZKAssign.createOrForceNodeOffline(watcher, regionInfo,
2909 master.getServerName());
2910 if (!nodes.contains(regionInfo.getEncodedName())) {
2911 nodes.add(regionInfo.getEncodedName());
2912 }
2913 }
2914 } catch (KeeperException.NoNodeException nne) {
2915
2916 }
2917 }
2918 }
2919 }
2920
2921 if (!nodes.isEmpty()) {
2922 for (String encodedRegionName : nodes) {
2923 processRegionInTransition(encodedRegionName, null, deadServers);
2924 }
2925 }
2926 }
2927
2928
2929
2930
2931
2932 public void deleteNodeAndOfflineRegion(HRegionInfo hri) {
2933 if (zkTable.isDisablingOrDisabledTable(hri.getTableNameAsString())) {
2934 try {
2935
2936 ZKAssign.deleteNodeFailSilent(this.master.getZooKeeper(), hri);
2937 } catch (KeeperException ke) {
2938 this.master.abort("Unexpected ZK exception deleting unassigned node " + hri, ke);
2939 }
2940 regionOffline(hri);
2941 }
2942 }
2943
2944
2945
2946
2947
2948
2949 private void addToServers(final ServerName sn, final HRegionInfo hri) {
2950 Set<HRegionInfo> hris = servers.get(sn);
2951 if (hris == null) {
2952 hris = new ConcurrentSkipListSet<HRegionInfo>();
2953 servers.put(sn, hris);
2954 }
2955 if (!hris.contains(hri)) hris.add(hri);
2956 }
2957
2958
2959
2960
2961 public NavigableMap<String, RegionState> getRegionsInTransition() {
2962 synchronized (this.regionsInTransition) {
2963 return new TreeMap<String, RegionState>(this.regionsInTransition);
2964 }
2965 }
2966
2967
2968
2969
2970 public boolean isRegionsInTransition() {
2971 synchronized (this.regionsInTransition) {
2972 return !this.regionsInTransition.isEmpty();
2973 }
2974 }
2975
2976
2977
2978
2979
2980
2981 public RegionState isRegionInTransition(final HRegionInfo hri) {
2982 synchronized (this.regionsInTransition) {
2983 return this.regionsInTransition.get(hri.getEncodedName());
2984 }
2985 }
2986
2987
2988
2989
2990
2991
2992
2993
2994 public void clearRegionFromTransition(HRegionInfo hri) {
2995 synchronized (this.regionsInTransition) {
2996 this.regionsInTransition.remove(hri.getEncodedName());
2997 }
2998 synchronized (this.regions) {
2999 this.regions.remove(hri);
3000 for (Set<HRegionInfo> regions : this.servers.values()) {
3001 regions.remove(hri);
3002 }
3003 }
3004 clearRegionPlan(hri);
3005 }
3006
3007
3008
3009
3010 void clearRegionPlan(final HRegionInfo region) {
3011 synchronized (this.regionPlans) {
3012 this.regionPlans.remove(region.getEncodedName());
3013 }
3014 }
3015
3016
3017
3018
3019
3020
3021 public void waitOnRegionToClearRegionsInTransition(final HRegionInfo hri)
3022 throws IOException {
3023 if (isRegionInTransition(hri) == null) return;
3024 RegionState rs = null;
3025
3026
3027 while(!this.master.isStopped() && (rs = isRegionInTransition(hri)) != null) {
3028 Threads.sleep(1000);
3029 LOG.info("Waiting on " + rs + " to clear regions-in-transition");
3030 }
3031 if (this.master.isStopped()) {
3032 LOG.info("Giving up wait on regions in " +
3033 "transition because stoppable.isStopped is set");
3034 }
3035 }
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048 public List<HRegionInfo> getRegionsOfTable(byte[] tableName) {
3049 List<HRegionInfo> tableRegions = new ArrayList<HRegionInfo>();
3050
3051
3052 HRegionInfo boundary =
3053 new HRegionInfo(tableName, null, null, false, 0L);
3054 synchronized (this.regions) {
3055 for (HRegionInfo regionInfo: this.regions.tailMap(boundary).keySet()) {
3056 if(Bytes.equals(regionInfo.getTableName(), tableName)) {
3057 tableRegions.add(regionInfo);
3058 } else {
3059 break;
3060 }
3061 }
3062 }
3063 return tableRegions;
3064 }
3065
3066
3067
3068
3069
3070 public class TimerUpdater extends Chore {
3071
3072 public TimerUpdater(final int period, final Stoppable stopper) {
3073 super("AssignmentTimerUpdater", period, stopper);
3074 }
3075
3076 @Override
3077 protected void chore() {
3078 ServerName serverToUpdateTimer = null;
3079 while (!serversInUpdatingTimer.isEmpty() && !stopper.isStopped()) {
3080 if (serverToUpdateTimer == null) {
3081 serverToUpdateTimer = serversInUpdatingTimer.first();
3082 } else {
3083 serverToUpdateTimer = serversInUpdatingTimer
3084 .higher(serverToUpdateTimer);
3085 }
3086 if (serverToUpdateTimer == null) {
3087 break;
3088 }
3089 updateTimers(serverToUpdateTimer);
3090 serversInUpdatingTimer.remove(serverToUpdateTimer);
3091 }
3092 }
3093 }
3094
3095
3096
3097
3098 public class TimeoutMonitor extends Chore {
3099 private final int timeout;
3100 private boolean bulkAssign = false;
3101 private boolean allRegionServersOffline = false;
3102 private ServerManager serverManager;
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113 public TimeoutMonitor(final int period, final Stoppable stopper,
3114 ServerManager serverManager,
3115 final int timeout) {
3116 super("AssignmentTimeoutMonitor", period, stopper);
3117 this.timeout = timeout;
3118 this.serverManager = serverManager;
3119 }
3120
3121
3122
3123
3124
3125
3126 public boolean bulkAssign(final boolean bulkAssign) {
3127 boolean result = this.bulkAssign;
3128 this.bulkAssign = bulkAssign;
3129 return result;
3130 }
3131
3132 private synchronized void setAllRegionServersOffline(
3133 boolean allRegionServersOffline) {
3134 this.allRegionServersOffline = allRegionServersOffline;
3135 }
3136
3137 @Override
3138 protected void chore() {
3139
3140 if (this.bulkAssign) return;
3141 boolean allRSsOffline = this.serverManager.getOnlineServersList().
3142 isEmpty();
3143
3144 synchronized (regionsInTransition) {
3145
3146 long now = System.currentTimeMillis();
3147 for (RegionState regionState : regionsInTransition.values()) {
3148 if (regionState.getStamp() + timeout <= now) {
3149
3150 actOnTimeOut(regionState);
3151 } else if (this.allRegionServersOffline && !allRSsOffline) {
3152 RegionPlan existingPlan = regionPlans.get(regionState.getRegion().getEncodedName());
3153 if (existingPlan == null
3154 || !this.serverManager.isServerOnline(existingPlan.getDestination())) {
3155
3156
3157 actOnTimeOut(regionState);
3158 }
3159 }
3160 }
3161 }
3162 setAllRegionServersOffline(allRSsOffline);
3163 }
3164
3165 private void actOnTimeOut(RegionState regionState) {
3166 HRegionInfo regionInfo = regionState.getRegion();
3167 LOG.info("Regions in transition timed out: " + regionState);
3168
3169 switch (regionState.getState()) {
3170 case CLOSED:
3171 LOG.info("Region " + regionInfo.getEncodedName()
3172 + " has been CLOSED for too long, waiting on queued "
3173 + "ClosedRegionHandler to run or server shutdown");
3174
3175 regionState.updateTimestampToNow();
3176 break;
3177 case OFFLINE:
3178 LOG.info("Region has been OFFLINE for too long, " + "reassigning "
3179 + regionInfo.getRegionNameAsString() + " to a random server");
3180 invokeAssign(regionInfo);
3181 break;
3182 case PENDING_OPEN:
3183 LOG.info("Region has been PENDING_OPEN for too "
3184 + "long, reassigning region=" + regionInfo.getRegionNameAsString());
3185 invokeAssign(regionInfo);
3186 break;
3187 case OPENING:
3188 processOpeningState(regionInfo);
3189 break;
3190 case OPEN:
3191 LOG.error("Region has been OPEN for too long, " +
3192 "we don't know where region was opened so can't do anything");
3193 synchronized (regionState) {
3194 regionState.updateTimestampToNow();
3195 }
3196 break;
3197
3198 case PENDING_CLOSE:
3199 LOG.info("Region has been PENDING_CLOSE for too "
3200 + "long, running forced unassign again on region="
3201 + regionInfo.getRegionNameAsString());
3202 invokeUnassign(regionInfo);
3203 break;
3204 case CLOSING:
3205 LOG.info("Region has been CLOSING for too " +
3206 "long, this should eventually complete or the server will " +
3207 "expire, send RPC again");
3208 invokeUnassign(regionInfo);
3209 break;
3210 }
3211 }
3212 }
3213
3214 private void processOpeningState(HRegionInfo regionInfo) {
3215 LOG.info("Region has been OPENING for too " + "long, reassigning region="
3216 + regionInfo.getRegionNameAsString());
3217
3218 try {
3219 String node = ZKAssign.getNodeName(watcher, regionInfo.getEncodedName());
3220 Stat stat = new Stat();
3221 RegionTransitionData dataInZNode = ZKAssign.getDataNoWatch(watcher, node,
3222 stat);
3223 if (dataInZNode == null) {
3224 LOG.warn("Data is null, node " + node + " no longer exists");
3225 return;
3226 }
3227 if (dataInZNode.getEventType() == EventType.RS_ZK_REGION_OPENED) {
3228 LOG.debug("Region has transitioned to OPENED, allowing "
3229 + "watched event handlers to process");
3230 return;
3231 } else if (dataInZNode.getEventType() != EventType.RS_ZK_REGION_OPENING &&
3232 dataInZNode.getEventType() != EventType.RS_ZK_REGION_FAILED_OPEN ) {
3233 LOG.warn("While timing out a region in state OPENING, "
3234 + "found ZK node in unexpected state: "
3235 + dataInZNode.getEventType());
3236 return;
3237 }
3238 invokeAssign(regionInfo);
3239 } catch (KeeperException ke) {
3240 LOG.error("Unexpected ZK exception timing out CLOSING region", ke);
3241 return;
3242 }
3243 return;
3244 }
3245
3246 private void invokeAssign(HRegionInfo regionInfo) {
3247 threadPoolExecutorService.submit(new AssignCallable(this, regionInfo));
3248 }
3249
3250 private void invokeUnassign(HRegionInfo regionInfo) {
3251 threadPoolExecutorService.submit(new UnAssignCallable(this, regionInfo));
3252 }
3253
3254 public boolean isCarryingRoot(ServerName serverName) {
3255 return isCarryingRegion(serverName, HRegionInfo.ROOT_REGIONINFO);
3256 }
3257
3258 public boolean isCarryingMeta(ServerName serverName) {
3259 return isCarryingRegion(serverName, HRegionInfo.FIRST_META_REGIONINFO);
3260 }
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271 public boolean isCarryingRegion(ServerName serverName, HRegionInfo hri) {
3272 RegionTransitionData data = null;
3273 try {
3274 data = ZKAssign.getData(master.getZooKeeper(), hri.getEncodedName());
3275 } catch (KeeperException e) {
3276 master.abort("Unexpected ZK exception reading unassigned node for region="
3277 + hri.getEncodedName(), e);
3278 }
3279
3280 ServerName addressFromZK = (data != null && data.getOrigin() != null) ?
3281 data.getOrigin() : null;
3282 if (addressFromZK != null) {
3283
3284 boolean matchZK = (addressFromZK != null &&
3285 addressFromZK.equals(serverName));
3286 LOG.debug("based on ZK, current region=" + hri.getRegionNameAsString() +
3287 " is on server=" + addressFromZK +
3288 " server being checked=: " + serverName);
3289 return matchZK;
3290 }
3291
3292 ServerName addressFromAM = getRegionServerOfRegion(hri);
3293 boolean matchAM = (addressFromAM != null &&
3294 addressFromAM.equals(serverName));
3295 LOG.debug("based on AM, current region=" + hri.getRegionNameAsString() +
3296 " is on server=" + (addressFromAM != null ? addressFromAM : "null") +
3297 " server being checked: " + serverName);
3298
3299 return matchAM;
3300 }
3301
3302
3303
3304
3305
3306
3307
3308 public Pair<Set<HRegionInfo>, List<RegionState>> processServerShutdown(final ServerName sn) {
3309
3310 synchronized (this.regionPlans) {
3311 for (Iterator <Map.Entry<String, RegionPlan>> i =
3312 this.regionPlans.entrySet().iterator(); i.hasNext();) {
3313 Map.Entry<String, RegionPlan> e = i.next();
3314 ServerName otherSn = e.getValue().getDestination();
3315
3316 if (otherSn != null && otherSn.equals(sn)) {
3317
3318 i.remove();
3319 }
3320 }
3321 }
3322
3323
3324
3325 Set<HRegionInfo> deadRegions = new TreeSet<HRegionInfo>();
3326 synchronized (this.regions) {
3327 Set<HRegionInfo> assignedRegions = this.servers.remove(sn);
3328 if (assignedRegions != null && !assignedRegions.isEmpty()) {
3329 deadRegions.addAll(assignedRegions);
3330 for (HRegionInfo region : deadRegions) {
3331 this.regions.remove(region);
3332 }
3333 }
3334 }
3335
3336
3337
3338 Set<HRegionInfo> ritsGoingToServer = new ConcurrentSkipListSet<HRegionInfo>();
3339 List<RegionState> ritsOnServer = new ArrayList<RegionState>();
3340 synchronized (regionsInTransition) {
3341 for (RegionState state : this.regionsInTransition.values()) {
3342
3343
3344
3345
3346 if ((state.getServerName() != null) && state.getServerName().equals(sn)) {
3347 ritsGoingToServer.add(state.getRegion());
3348 }
3349 if (deadRegions.contains(state.getRegion())) {
3350 ritsOnServer.add(state);
3351 }
3352 }
3353 }
3354 return new Pair<Set<HRegionInfo>, List<RegionState>>(ritsGoingToServer, ritsOnServer);
3355 }
3356
3357
3358
3359
3360
3361
3362
3363
3364 public void handleSplitReport(final ServerName sn, final HRegionInfo parent,
3365 final HRegionInfo a, final HRegionInfo b) {
3366 regionOffline(parent);
3367 regionOnline(a, sn);
3368 regionOnline(b, sn);
3369
3370
3371
3372
3373
3374 if (this.zkTable.isDisablingOrDisabledTable(
3375 parent.getTableNameAsString())) {
3376 unassign(a);
3377 unassign(b);
3378 }
3379 }
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389 Map<String, Map<ServerName, List<HRegionInfo>>> getAssignmentsByTable() {
3390 Map<String, Map<ServerName, List<HRegionInfo>>> result = null;
3391 synchronized (this.regions) {
3392 result = new HashMap<String, Map<ServerName,List<HRegionInfo>>>();
3393 if (!this.master.getConfiguration().
3394 getBoolean("hbase.master.loadbalance.bytable", true)) {
3395 result.put("ensemble", getAssignments());
3396 } else {
3397 for (Map.Entry<ServerName, Set<HRegionInfo>> e: this.servers.entrySet()) {
3398 for (HRegionInfo hri : e.getValue()) {
3399 if (hri.isMetaRegion() || hri.isRootRegion()) continue;
3400 String tablename = hri.getTableNameAsString();
3401 Map<ServerName, List<HRegionInfo>> svrToRegions = result.get(tablename);
3402 if (svrToRegions == null) {
3403 svrToRegions = new HashMap<ServerName, List<HRegionInfo>>(this.servers.size());
3404 result.put(tablename, svrToRegions);
3405 }
3406 List<HRegionInfo> regions = null;
3407 if (!svrToRegions.containsKey(e.getKey())) {
3408 regions = new ArrayList<HRegionInfo>();
3409 svrToRegions.put(e.getKey(), regions);
3410 } else {
3411 regions = svrToRegions.get(e.getKey());
3412 }
3413 regions.add(hri);
3414 }
3415 }
3416 }
3417 }
3418 Map<ServerName, HServerLoad> onlineSvrs = this.serverManager.getOnlineServers();
3419
3420 for (Map<ServerName,List<HRegionInfo>> map : result.values()) {
3421 for (Map.Entry<ServerName, HServerLoad> svrEntry: onlineSvrs.entrySet()) {
3422 if (!map.containsKey(svrEntry.getKey())) {
3423 map.put(svrEntry.getKey(), new ArrayList<HRegionInfo>());
3424 }
3425 }
3426 }
3427 return result;
3428 }
3429
3430
3431
3432
3433
3434
3435 Map<ServerName, List<HRegionInfo>> getAssignments() {
3436
3437
3438
3439
3440 Map<ServerName, List<HRegionInfo>> result = null;
3441 synchronized (this.regions) {
3442 result = new HashMap<ServerName, List<HRegionInfo>>(this.servers.size());
3443 for (Map.Entry<ServerName, Set<HRegionInfo>> e: this.servers.entrySet()) {
3444 result.put(e.getKey(), new ArrayList<HRegionInfo>(e.getValue()));
3445 }
3446 }
3447 return result;
3448 }
3449
3450
3451
3452
3453
3454
3455 Pair<HRegionInfo, ServerName> getAssignment(final byte [] encodedRegionName) {
3456 String name = Bytes.toString(encodedRegionName);
3457 synchronized(this.regions) {
3458 for (Map.Entry<HRegionInfo, ServerName> e: this.regions.entrySet()) {
3459 if (e.getKey().getEncodedName().equals(name)) {
3460 return new Pair<HRegionInfo, ServerName>(e.getKey(), e.getValue());
3461 }
3462 }
3463 }
3464 return null;
3465 }
3466
3467
3468
3469
3470 void balance(final RegionPlan plan) {
3471 synchronized (this.regionPlans) {
3472 this.regionPlans.put(plan.getRegionName(), plan);
3473 }
3474 unassign(plan.getRegionInfo());
3475 }
3476
3477
3478
3479
3480 void unassignCatalogRegions() {
3481 synchronized (this.regions) {
3482 for (Map.Entry<ServerName, Set<HRegionInfo>> e: this.servers.entrySet()) {
3483 Set<HRegionInfo> regions = e.getValue();
3484 if (regions == null || regions.isEmpty()) continue;
3485 for (HRegionInfo hri: regions) {
3486 if (hri.isMetaRegion()) {
3487 unassign(hri);
3488 }
3489 }
3490 }
3491 }
3492 }
3493
3494
3495
3496
3497 public static class RegionState implements org.apache.hadoop.io.Writable {
3498 private HRegionInfo region;
3499
3500 public enum State {
3501 OFFLINE,
3502 PENDING_OPEN,
3503 OPENING,
3504 OPEN,
3505 PENDING_CLOSE,
3506 CLOSING,
3507 CLOSED,
3508 SPLITTING,
3509 SPLIT
3510 }
3511
3512 private State state;
3513
3514 private final AtomicLong stamp;
3515 private ServerName serverName;
3516
3517 public RegionState() {
3518 this.stamp = new AtomicLong(System.currentTimeMillis());
3519 }
3520
3521 RegionState(HRegionInfo region, State state) {
3522 this(region, state, System.currentTimeMillis(), null);
3523 }
3524
3525 RegionState(HRegionInfo region, State state, long stamp, ServerName serverName) {
3526 this.region = region;
3527 this.state = state;
3528 this.stamp = new AtomicLong(stamp);
3529 this.serverName = serverName;
3530 }
3531
3532 public void update(State state, long stamp, ServerName serverName) {
3533 this.state = state;
3534 updateTimestamp(stamp);
3535 this.serverName = serverName;
3536 }
3537
3538 public void update(State state) {
3539 this.state = state;
3540 updateTimestampToNow();
3541 this.serverName = null;
3542 }
3543
3544 public void updateTimestamp(long stamp) {
3545 this.stamp.set(stamp);
3546 }
3547
3548 public void updateTimestampToNow() {
3549 this.stamp.set(System.currentTimeMillis());
3550 }
3551
3552 public State getState() {
3553 return state;
3554 }
3555
3556 public long getStamp() {
3557 return stamp.get();
3558 }
3559
3560 public HRegionInfo getRegion() {
3561 return region;
3562 }
3563
3564 public ServerName getServerName() {
3565 return serverName;
3566 }
3567
3568 public boolean isClosing() {
3569 return state == State.CLOSING;
3570 }
3571
3572 public boolean isClosed() {
3573 return state == State.CLOSED;
3574 }
3575
3576 public boolean isPendingClose() {
3577 return state == State.PENDING_CLOSE;
3578 }
3579
3580 public boolean isOpening() {
3581 return state == State.OPENING;
3582 }
3583
3584 public boolean isOpened() {
3585 return state == State.OPEN;
3586 }
3587
3588 public boolean isPendingOpen() {
3589 return state == State.PENDING_OPEN;
3590 }
3591
3592 public boolean isOffline() {
3593 return state == State.OFFLINE;
3594 }
3595
3596 public boolean isSplitting() {
3597 return state == State.SPLITTING;
3598 }
3599
3600 public boolean isSplit() {
3601 return state == State.SPLIT;
3602 }
3603
3604 @Override
3605 public String toString() {
3606 return region.getRegionNameAsString()
3607 + " state=" + state
3608 + ", ts=" + stamp
3609 + ", server=" + serverName;
3610 }
3611
3612
3613
3614
3615 public String toDescriptiveString() {
3616 long lstamp = stamp.get();
3617 long relTime = System.currentTimeMillis() - lstamp;
3618
3619 return region.getRegionNameAsString()
3620 + " state=" + state
3621 + ", ts=" + new Date(lstamp) + " (" + (relTime/1000) + "s ago)"
3622 + ", server=" + serverName;
3623 }
3624
3625 @Override
3626 public void readFields(DataInput in) throws IOException {
3627 region = new HRegionInfo();
3628 region.readFields(in);
3629 state = State.valueOf(in.readUTF());
3630 stamp.set(in.readLong());
3631 }
3632
3633 @Override
3634 public void write(DataOutput out) throws IOException {
3635 region.write(out);
3636 out.writeUTF(state.name());
3637 out.writeLong(stamp.get());
3638 }
3639 }
3640
3641 public void stop() {
3642 this.timeoutMonitor.interrupt();
3643 this.timerUpdater.interrupt();
3644 }
3645
3646
3647
3648
3649
3650
3651 public boolean isServerOnline(ServerName serverName) {
3652 return this.serverManager.isServerOnline(serverName);
3653 }
3654
3655
3656
3657 public void shutdown() {
3658 if (null != threadPoolExecutorService) {
3659 this.threadPoolExecutorService.shutdown();
3660 }
3661 }
3662
3663 protected void setEnabledTable(String tableName) {
3664 try {
3665 this.zkTable.setEnabledTable(tableName);
3666 } catch (KeeperException e) {
3667
3668 String errorMsg = "Unable to ensure that the table " + tableName
3669 + " will be" + " enabled because of a ZooKeeper issue";
3670 LOG.error(errorMsg);
3671 this.master.abort(errorMsg, e);
3672 }
3673 }
3674
3675 }