1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.master;
19
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.Collections;
23 import java.util.HashMap;
24 import java.util.HashSet;
25 import java.util.Iterator;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Set;
29 import java.util.TreeMap;
30
31 import org.apache.commons.logging.Log;
32 import org.apache.commons.logging.LogFactory;
33 import org.apache.hadoop.classification.InterfaceAudience;
34 import org.apache.hadoop.conf.Configuration;
35 import org.apache.hadoop.hbase.HConstants;
36 import org.apache.hadoop.hbase.HRegionInfo;
37 import org.apache.hadoop.hbase.RegionTransition;
38 import org.apache.hadoop.hbase.Server;
39 import org.apache.hadoop.hbase.ServerLoad;
40 import org.apache.hadoop.hbase.ServerName;
41 import org.apache.hadoop.hbase.TableName;
42 import org.apache.hadoop.hbase.catalog.MetaReader;
43 import org.apache.hadoop.hbase.master.RegionState.State;
44 import org.apache.hadoop.hbase.util.Bytes;
45 import org.apache.hadoop.hbase.util.Pair;
46 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
47 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
48 import org.apache.zookeeper.KeeperException;
49
50 import com.google.common.base.Preconditions;
51
52
53
54
55
56
57
58 @InterfaceAudience.Private
59 public class RegionStates {
60 private static final Log LOG = LogFactory.getLog(RegionStates.class);
61
62
63
64
65 final HashMap<String, RegionState> regionsInTransition;
66
67
68
69
70
71 private final HashMap<String, RegionState> regionStates;
72
73
74
75
76
77 private final Map<ServerName, Set<HRegionInfo>> serverHoldings;
78
79
80
81
82
83 private final TreeMap<HRegionInfo, ServerName> regionAssignments;
84
85
86
87
88
89
90
91
92
93
94
95 private final HashMap<String, ServerName> lastAssignments;
96
97
98
99
100
101
102 private final HashMap<String, Long> deadServers;
103
104
105
106
107
108
109
110
111 private final HashMap<ServerName, Long> processedServers;
112 private long lastProcessedServerCleanTime;
113
114 private final RegionStateStore regionStateStore;
115 private final ServerManager serverManager;
116 private final Server server;
117
118
119 static final String LOG_SPLIT_TIME = "hbase.master.maximum.logsplit.keeptime";
120 static final long DEFAULT_LOG_SPLIT_TIME = 7200000L;
121
122 RegionStates(final Server master,
123 final ServerManager serverManager, final RegionStateStore regionStateStore) {
124 regionStates = new HashMap<String, RegionState>();
125 regionsInTransition = new HashMap<String, RegionState>();
126 serverHoldings = new HashMap<ServerName, Set<HRegionInfo>>();
127 regionAssignments = new TreeMap<HRegionInfo, ServerName>();
128 lastAssignments = new HashMap<String, ServerName>();
129 processedServers = new HashMap<ServerName, Long>();
130 deadServers = new HashMap<String, Long>();
131 this.regionStateStore = regionStateStore;
132 this.serverManager = serverManager;
133 this.server = master;
134 }
135
136
137
138
139 public synchronized Map<HRegionInfo, ServerName> getRegionAssignments() {
140 return Collections.unmodifiableMap(regionAssignments);
141 }
142
143 public synchronized ServerName getRegionServerOfRegion(HRegionInfo hri) {
144 return regionAssignments.get(hri);
145 }
146
147
148
149
150 @SuppressWarnings("unchecked")
151 public synchronized Map<String, RegionState> getRegionsInTransition() {
152 return (Map<String, RegionState>)regionsInTransition.clone();
153 }
154
155
156
157
158 public synchronized boolean isRegionInTransition(final HRegionInfo hri) {
159 return regionsInTransition.containsKey(hri.getEncodedName());
160 }
161
162
163
164
165 public synchronized boolean isRegionInTransition(final String encodedName) {
166 return regionsInTransition.containsKey(encodedName);
167 }
168
169
170
171
172 public synchronized boolean isRegionsInTransition() {
173 return !regionsInTransition.isEmpty();
174 }
175
176
177
178
179 public synchronized boolean isRegionOnline(final HRegionInfo hri) {
180 return !isRegionInTransition(hri) && regionAssignments.containsKey(hri);
181 }
182
183
184
185
186
187 public synchronized boolean isRegionOffline(final HRegionInfo hri) {
188 return getRegionState(hri) == null || (!isRegionInTransition(hri)
189 && isRegionInState(hri, State.OFFLINE, State.CLOSED));
190 }
191
192
193
194
195 public boolean isRegionInState(
196 final HRegionInfo hri, final State... states) {
197 return isRegionInState(hri.getEncodedName(), states);
198 }
199
200
201
202
203 public boolean isRegionInState(
204 final String encodedName, final State... states) {
205 RegionState regionState = getRegionState(encodedName);
206 return isOneOfStates(regionState, states);
207 }
208
209
210
211
212 public synchronized void waitForUpdate(
213 final long timeout) throws InterruptedException {
214 this.wait(timeout);
215 }
216
217
218
219
220 public RegionState getRegionTransitionState(final HRegionInfo hri) {
221 return getRegionTransitionState(hri.getEncodedName());
222 }
223
224
225
226
227 public synchronized RegionState
228 getRegionTransitionState(final String encodedName) {
229 return regionsInTransition.get(encodedName);
230 }
231
232
233
234
235
236
237 public void createRegionStates(
238 final List<HRegionInfo> hris) {
239 for (HRegionInfo hri: hris) {
240 createRegionState(hri);
241 }
242 }
243
244
245
246
247
248
249
250 public RegionState createRegionState(final HRegionInfo hri) {
251 return createRegionState(hri, null, null);
252 }
253
254
255
256
257
258
259 public synchronized RegionState createRegionState(
260 final HRegionInfo hri, State newState, ServerName serverName) {
261 if (newState == null || (newState == State.OPEN && serverName == null)) {
262 newState = State.OFFLINE;
263 }
264 if (hri.isOffline() && hri.isSplit()) {
265 newState = State.SPLIT;
266 serverName = null;
267 }
268 String encodedName = hri.getEncodedName();
269 RegionState regionState = regionStates.get(encodedName);
270 if (regionState != null) {
271 LOG.warn("Tried to create a state for a region already in RegionStates, "
272 + "used existing: " + regionState + ", ignored new: " + newState);
273 } else {
274 regionState = new RegionState(hri, newState, serverName);
275 regionStates.put(encodedName, regionState);
276 if (newState == State.OPEN) {
277 regionAssignments.put(hri, serverName);
278 lastAssignments.put(encodedName, serverName);
279 Set<HRegionInfo> regions = serverHoldings.get(serverName);
280 if (regions == null) {
281 regions = new HashSet<HRegionInfo>();
282 serverHoldings.put(serverName, regions);
283 }
284 regions.add(hri);
285 } else if (!regionState.isUnassignable()) {
286 regionsInTransition.put(encodedName, regionState);
287 }
288 }
289 return regionState;
290 }
291
292
293
294
295 public RegionState updateRegionState(
296 final HRegionInfo hri, final State state) {
297 RegionState regionState = getRegionState(hri.getEncodedName());
298 return updateRegionState(hri, state,
299 regionState == null ? null : regionState.getServerName());
300 }
301
302
303
304
305
306
307
308 public RegionState updateRegionState(
309 final RegionTransition transition, final State state) {
310 byte [] regionName = transition.getRegionName();
311 HRegionInfo regionInfo = getRegionInfo(regionName);
312 if (regionInfo == null) {
313 String prettyRegionName = HRegionInfo.prettyPrint(
314 HRegionInfo.encodeRegionName(regionName));
315 LOG.warn("Failed to find region " + prettyRegionName
316 + " in updating its state to " + state
317 + " based on region transition " + transition);
318 return null;
319 }
320 return updateRegionState(regionInfo, state,
321 transition.getServerName());
322 }
323
324
325
326
327 public RegionState updateRegionState(
328 final HRegionInfo hri, final State state, final ServerName serverName) {
329 return updateRegionState(hri, state, serverName, HConstants.NO_SEQNUM);
330 }
331
332 public void regionOnline(
333 final HRegionInfo hri, final ServerName serverName) {
334 regionOnline(hri, serverName, HConstants.NO_SEQNUM);
335 }
336
337
338
339
340
341
342 public void regionOnline(final HRegionInfo hri,
343 final ServerName serverName, long openSeqNum) {
344 if (!serverManager.isServerOnline(serverName)) {
345
346
347
348
349 LOG.warn("Ignored, " + hri.getEncodedName()
350 + " was opened on a dead server: " + serverName);
351 return;
352 }
353 updateRegionState(hri, State.OPEN, serverName, openSeqNum);
354
355 synchronized (this) {
356 regionsInTransition.remove(hri.getEncodedName());
357 ServerName oldServerName = regionAssignments.put(hri, serverName);
358 if (!serverName.equals(oldServerName)) {
359 LOG.info("Onlined " + hri.getShortNameToLog() + " on " + serverName);
360 Set<HRegionInfo> regions = serverHoldings.get(serverName);
361 if (regions == null) {
362 regions = new HashSet<HRegionInfo>();
363 serverHoldings.put(serverName, regions);
364 }
365 regions.add(hri);
366 if (oldServerName != null) {
367 LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
368 Set<HRegionInfo> oldRegions = serverHoldings.get(oldServerName);
369 oldRegions.remove(hri);
370 if (oldRegions.isEmpty()) {
371 serverHoldings.remove(oldServerName);
372 }
373 }
374 }
375 }
376 }
377
378
379
380
381
382 public synchronized void logSplit(final ServerName serverName) {
383 for (Iterator<Map.Entry<String, ServerName>> it
384 = lastAssignments.entrySet().iterator(); it.hasNext();) {
385 Map.Entry<String, ServerName> e = it.next();
386 if (e.getValue().equals(serverName)) {
387 it.remove();
388 }
389 }
390 long now = System.currentTimeMillis();
391 if (LOG.isDebugEnabled()) {
392 LOG.debug("Adding to processed servers " + serverName);
393 }
394 processedServers.put(serverName, Long.valueOf(now));
395 Configuration conf = server.getConfiguration();
396 long obsoleteTime = conf.getLong(LOG_SPLIT_TIME, DEFAULT_LOG_SPLIT_TIME);
397
398 if (now > lastProcessedServerCleanTime + obsoleteTime) {
399 lastProcessedServerCleanTime = now;
400 long cutoff = now - obsoleteTime;
401 for (Iterator<Map.Entry<ServerName, Long>> it
402 = processedServers.entrySet().iterator(); it.hasNext();) {
403 Map.Entry<ServerName, Long> e = it.next();
404 if (e.getValue().longValue() < cutoff) {
405 if (LOG.isDebugEnabled()) {
406 LOG.debug("Removed from processed servers " + e.getKey());
407 }
408 it.remove();
409 }
410 }
411 }
412 }
413
414
415
416
417 public void logSplit(final HRegionInfo region) {
418 clearLastAssignment(region);
419 }
420
421 public synchronized void clearLastAssignment(final HRegionInfo region) {
422 lastAssignments.remove(region.getEncodedName());
423 }
424
425
426
427
428 public void regionOffline(final HRegionInfo hri) {
429 regionOffline(hri, null);
430 }
431
432
433
434
435
436
437 public void regionOffline(
438 final HRegionInfo hri, final State expectedState) {
439 Preconditions.checkArgument(expectedState == null
440 || RegionState.isUnassignable(expectedState),
441 "Offlined region should not be " + expectedState);
442 if (isRegionInState(hri, State.SPLITTING_NEW, State.MERGING_NEW)) {
443
444 deleteRegion(hri);
445 return;
446 }
447 State newState =
448 expectedState == null ? State.OFFLINE : expectedState;
449 updateRegionState(hri, newState);
450
451 synchronized (this) {
452 regionsInTransition.remove(hri.getEncodedName());
453 ServerName oldServerName = regionAssignments.remove(hri);
454 if (oldServerName != null && serverHoldings.containsKey(oldServerName)) {
455 LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
456 Set<HRegionInfo> oldRegions = serverHoldings.get(oldServerName);
457 oldRegions.remove(hri);
458 if (oldRegions.isEmpty()) {
459 serverHoldings.remove(oldServerName);
460 }
461 }
462 }
463 }
464
465
466
467
468 public synchronized List<HRegionInfo> serverOffline(
469 final ZooKeeperWatcher watcher, final ServerName sn) {
470
471 List<HRegionInfo> rits = new ArrayList<HRegionInfo>();
472 Set<HRegionInfo> assignedRegions = serverHoldings.get(sn);
473 if (assignedRegions == null) {
474 assignedRegions = new HashSet<HRegionInfo>();
475 }
476
477
478 Set<HRegionInfo> regionsToOffline = new HashSet<HRegionInfo>();
479 for (HRegionInfo region : assignedRegions) {
480
481 if (isRegionOnline(region)) {
482 regionsToOffline.add(region);
483 } else {
484 if (isRegionInState(region, State.SPLITTING, State.MERGING)) {
485 LOG.debug("Offline splitting/merging region " + getRegionState(region));
486 try {
487
488 ZKAssign.deleteNodeFailSilent(watcher, region);
489 regionsToOffline.add(region);
490 } catch (KeeperException ke) {
491 server.abort("Unexpected ZK exception deleting node " + region, ke);
492 }
493 }
494 }
495 }
496
497 for (HRegionInfo hri : regionsToOffline) {
498 regionOffline(hri);
499 }
500
501 for (RegionState state : regionsInTransition.values()) {
502 HRegionInfo hri = state.getRegion();
503 if (assignedRegions.contains(hri)) {
504
505
506
507 LOG.info("Transitioning " + state + " will be handled by SSH for " + sn);
508 } else if (sn.equals(state.getServerName())) {
509
510
511
512
513
514
515
516 if (state.isPendingOpenOrOpening() || state.isFailedClose() || state.isOffline()) {
517 LOG.info("Found region in " + state + " to be reassigned by SSH for " + sn);
518 rits.add(hri);
519 } else {
520 LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
521 }
522 }
523 }
524
525 this.notifyAll();
526 return rits;
527 }
528
529
530
531
532
533
534
535
536
537
538
539 public synchronized List<HRegionInfo> getRegionsOfTable(TableName tableName) {
540 List<HRegionInfo> tableRegions = new ArrayList<HRegionInfo>();
541
542
543 HRegionInfo boundary = new HRegionInfo(tableName, null, null, false, 0L);
544 for (HRegionInfo hri: regionAssignments.tailMap(boundary).keySet()) {
545 if(!hri.getTable().equals(tableName)) break;
546 tableRegions.add(hri);
547 }
548 return tableRegions;
549 }
550
551
552
553
554
555
556
557
558 public synchronized void waitOnRegionToClearRegionsInTransition(
559 final HRegionInfo hri) throws InterruptedException {
560 if (!isRegionInTransition(hri)) return;
561
562 while(!server.isStopped() && isRegionInTransition(hri)) {
563 RegionState rs = getRegionState(hri);
564 LOG.info("Waiting on " + rs + " to clear regions-in-transition");
565 waitForUpdate(100);
566 }
567
568 if (server.isStopped()) {
569 LOG.info("Giving up wait on region in " +
570 "transition because stoppable.isStopped is set");
571 }
572 }
573
574
575
576
577
578 public void tableDeleted(final TableName tableName) {
579 Set<HRegionInfo> regionsToDelete = new HashSet<HRegionInfo>();
580 synchronized (this) {
581 for (RegionState state: regionStates.values()) {
582 HRegionInfo region = state.getRegion();
583 if (region.getTable().equals(tableName)) {
584 regionsToDelete.add(region);
585 }
586 }
587 }
588 for (HRegionInfo region: regionsToDelete) {
589 deleteRegion(region);
590 }
591 }
592
593
594
595
596
597
598
599
600
601
602
603 synchronized boolean wasRegionOnDeadServer(final String encodedName) {
604 ServerName server = lastAssignments.get(encodedName);
605 return isServerDeadAndNotProcessed(server);
606 }
607
608 synchronized boolean isServerDeadAndNotProcessed(ServerName server) {
609 if (server == null) return false;
610 if (serverManager.isServerOnline(server)) {
611 String hostAndPort = server.getHostAndPort();
612 long startCode = server.getStartcode();
613 Long deadCode = deadServers.get(hostAndPort);
614 if (deadCode == null || startCode > deadCode.longValue()) {
615 if (serverManager.isServerReachable(server)) {
616 return false;
617 }
618
619 deadServers.put(hostAndPort, Long.valueOf(startCode));
620 }
621
622
623
624
625
626
627
628 LOG.warn("Couldn't reach online server " + server);
629 }
630
631 return !processedServers.containsKey(server);
632 }
633
634
635
636
637
638 synchronized ServerName getLastRegionServerOfRegion(final String encodedName) {
639 return lastAssignments.get(encodedName);
640 }
641
642 synchronized void setLastRegionServerOfRegions(
643 final ServerName serverName, final List<HRegionInfo> regionInfos) {
644 for (HRegionInfo hri: regionInfos) {
645 setLastRegionServerOfRegion(serverName, hri.getEncodedName());
646 }
647 }
648
649 synchronized void setLastRegionServerOfRegion(
650 final ServerName serverName, final String encodedName) {
651 lastAssignments.put(encodedName, serverName);
652 }
653
654 synchronized void closeAllUserRegions(Set<TableName> excludedTables) {
655 Set<HRegionInfo> toBeClosed = new HashSet<HRegionInfo>(regionStates.size());
656 for(RegionState state: regionStates.values()) {
657 HRegionInfo hri = state.getRegion();
658 TableName tableName = hri.getTable();
659 if (!hri.isSplit() && !hri.isMetaRegion()
660 && !excludedTables.contains(tableName)) {
661 toBeClosed.add(hri);
662 }
663 }
664 for (HRegionInfo hri: toBeClosed) {
665 updateRegionState(hri, State.CLOSED);
666 }
667 }
668
669
670
671
672
673
674
675 protected synchronized double getAverageLoad() {
676 int numServers = 0, totalLoad = 0;
677 for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
678 Set<HRegionInfo> regions = e.getValue();
679 ServerName serverName = e.getKey();
680 int regionCount = regions.size();
681 if (regionCount > 0 || serverManager.isServerOnline(serverName)) {
682 totalLoad += regionCount;
683 numServers++;
684 }
685 }
686 return numServers == 0 ? 0.0 :
687 (double)totalLoad / (double)numServers;
688 }
689
690
691
692
693
694
695
696
697
698 protected Map<TableName, Map<ServerName, List<HRegionInfo>>>
699 getAssignmentsByTable() {
700 Map<TableName, Map<ServerName, List<HRegionInfo>>> result =
701 new HashMap<TableName, Map<ServerName,List<HRegionInfo>>>();
702 synchronized (this) {
703 if (!server.getConfiguration().getBoolean("hbase.master.loadbalance.bytable", false)) {
704 Map<ServerName, List<HRegionInfo>> svrToRegions =
705 new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
706 for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
707 svrToRegions.put(e.getKey(), new ArrayList<HRegionInfo>(e.getValue()));
708 }
709 result.put(TableName.valueOf("ensemble"), svrToRegions);
710 } else {
711 for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
712 for (HRegionInfo hri: e.getValue()) {
713 if (hri.isMetaRegion()) continue;
714 TableName tablename = hri.getTable();
715 Map<ServerName, List<HRegionInfo>> svrToRegions = result.get(tablename);
716 if (svrToRegions == null) {
717 svrToRegions = new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
718 result.put(tablename, svrToRegions);
719 }
720 List<HRegionInfo> regions = svrToRegions.get(e.getKey());
721 if (regions == null) {
722 regions = new ArrayList<HRegionInfo>();
723 svrToRegions.put(e.getKey(), regions);
724 }
725 regions.add(hri);
726 }
727 }
728 }
729 }
730
731 Map<ServerName, ServerLoad>
732 onlineSvrs = serverManager.getOnlineServers();
733
734 for (Map<ServerName, List<HRegionInfo>> map: result.values()) {
735 for (ServerName svr: onlineSvrs.keySet()) {
736 if (!map.containsKey(svr)) {
737 map.put(svr, new ArrayList<HRegionInfo>());
738 }
739 }
740 }
741 return result;
742 }
743
744 protected RegionState getRegionState(final HRegionInfo hri) {
745 return getRegionState(hri.getEncodedName());
746 }
747
748 protected synchronized RegionState getRegionState(final String encodedName) {
749 return regionStates.get(encodedName);
750 }
751
752
753
754
755
756
757 protected HRegionInfo getRegionInfo(final byte [] regionName) {
758 String encodedName = HRegionInfo.encodeRegionName(regionName);
759 RegionState regionState = getRegionState(encodedName);
760 if (regionState != null) {
761 return regionState.getRegion();
762 }
763
764 try {
765 Pair<HRegionInfo, ServerName> p =
766 MetaReader.getRegion(server.getCatalogTracker(), regionName);
767 HRegionInfo hri = p == null ? null : p.getFirst();
768 if (hri != null) {
769 createRegionState(hri);
770 }
771 return hri;
772 } catch (IOException e) {
773 server.abort("Aborting because error occoured while reading "
774 + Bytes.toStringBinary(regionName) + " from hbase:meta", e);
775 return null;
776 }
777 }
778
779 static boolean isOneOfStates(RegionState regionState, State... states) {
780 State s = regionState != null ? regionState.getState() : null;
781 for (State state: states) {
782 if (s == state) return true;
783 }
784 return false;
785 }
786
787
788
789
790 private RegionState updateRegionState(final HRegionInfo hri,
791 final State state, final ServerName serverName, long openSeqNum) {
792 if (state == State.FAILED_CLOSE || state == State.FAILED_OPEN) {
793 LOG.warn("Failed to open/close " + hri.getShortNameToLog()
794 + " on " + serverName + ", set to " + state);
795 }
796
797 String encodedName = hri.getEncodedName();
798 RegionState regionState = new RegionState(
799 hri, state, System.currentTimeMillis(), serverName);
800 RegionState oldState = getRegionState(encodedName);
801 if (!regionState.equals(oldState)) {
802 LOG.info("Transition " + oldState + " to " + regionState);
803
804 regionStateStore.updateRegionState(openSeqNum, regionState, oldState);
805 }
806
807 synchronized (this) {
808 regionsInTransition.put(encodedName, regionState);
809 regionStates.put(encodedName, regionState);
810
811
812
813 if ((state == State.CLOSED || state == State.MERGED
814 || state == State.SPLIT) && lastAssignments.containsKey(encodedName)) {
815 ServerName last = lastAssignments.get(encodedName);
816 if (last.equals(serverName)) {
817 lastAssignments.remove(encodedName);
818 } else {
819 LOG.warn(encodedName + " moved to " + state + " on "
820 + serverName + ", expected " + last);
821 }
822 }
823
824
825 if (serverName != null && state == State.OPEN) {
826 ServerName last = lastAssignments.get(encodedName);
827 if (!serverName.equals(last)) {
828 lastAssignments.put(encodedName, serverName);
829 if (last != null && isServerDeadAndNotProcessed(last)) {
830 LOG.warn(encodedName + " moved to " + serverName
831 + ", while it's previous host " + last
832 + " is dead but not processed yet");
833 }
834 }
835 }
836
837
838 this.notifyAll();
839 }
840 return regionState;
841 }
842
843
844
845
846 private synchronized void deleteRegion(final HRegionInfo hri) {
847 String encodedName = hri.getEncodedName();
848 regionsInTransition.remove(encodedName);
849 regionStates.remove(encodedName);
850 lastAssignments.remove(encodedName);
851 ServerName sn = regionAssignments.remove(hri);
852 if (sn != null) {
853 Set<HRegionInfo> regions = serverHoldings.get(sn);
854 regions.remove(hri);
855 }
856 }
857 }