View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.Collections;
23  import java.util.HashMap;
24  import java.util.HashSet;
25  import java.util.Iterator;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.Set;
29  import java.util.TreeMap;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.classification.InterfaceAudience;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.hbase.HConstants;
36  import org.apache.hadoop.hbase.HRegionInfo;
37  import org.apache.hadoop.hbase.RegionTransition;
38  import org.apache.hadoop.hbase.Server;
39  import org.apache.hadoop.hbase.ServerLoad;
40  import org.apache.hadoop.hbase.ServerName;
41  import org.apache.hadoop.hbase.TableName;
42  import org.apache.hadoop.hbase.catalog.MetaReader;
43  import org.apache.hadoop.hbase.master.RegionState.State;
44  import org.apache.hadoop.hbase.util.Bytes;
45  import org.apache.hadoop.hbase.util.Pair;
46  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
47  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
48  import org.apache.zookeeper.KeeperException;
49  
50  import com.google.common.base.Preconditions;
51  
52  /**
53   * Region state accountant. It holds the states of all regions in the memory.
54   * In normal scenario, it should match the meta table and the true region states.
55   *
56   * This map is used by AssignmentManager to track region states.
57   */
58  @InterfaceAudience.Private
59  public class RegionStates {
60    private static final Log LOG = LogFactory.getLog(RegionStates.class);
61  
62    /**
63     * Regions currently in transition.
64     */
65    final HashMap<String, RegionState> regionsInTransition;
66  
67    /**
68     * Region encoded name to state map.
69     * All the regions should be in this map.
70     */
71    private final HashMap<String, RegionState> regionStates;
72  
73    /**
74     * Server to regions assignment map.
75     * Contains the set of regions currently assigned to a given server.
76     */
77    private final Map<ServerName, Set<HRegionInfo>> serverHoldings;
78  
79    /**
80     * Region to server assignment map.
81     * Contains the server a given region is currently assigned to.
82     */
83    private final TreeMap<HRegionInfo, ServerName> regionAssignments;
84  
85    /**
86     * Encoded region name to server assignment map for re-assignment
87     * purpose. Contains the server a given region is last known assigned
88     * to, which has not completed log splitting, so not assignable.
89     * If a region is currently assigned, this server info in this
90     * map should be the same as that in regionAssignments.
91     * However the info in regionAssignments is cleared when the region
92     * is offline while the info in lastAssignments is cleared when
93     * the region is closed or the server is dead and processed.
94     */
95    private final HashMap<String, ServerName> lastAssignments;
96  
97    /**
98     * Map a host port pair string to the latest start code
99     * of a region server which is known to be dead. It is dead
100    * to us, but server manager may not know it yet.
101    */
102   private final HashMap<String, Long> deadServers;
103 
104   /**
105    * Map a dead servers to the time when log split is done.
106    * Since log splitting is not ordered, we have to remember
107    * all processed instances. The map is cleaned up based
108    * on a configured time. By default, we assume a dead
109    * server should be done with log splitting in two hours.
110    */
111   private final HashMap<ServerName, Long> processedServers;
112   private long lastProcessedServerCleanTime;
113 
114   private final RegionStateStore regionStateStore;
115   private final ServerManager serverManager;
116   private final Server server;
117 
118   // The maximum time to keep a log split info in region states map
119   static final String LOG_SPLIT_TIME = "hbase.master.maximum.logsplit.keeptime";
120   static final long DEFAULT_LOG_SPLIT_TIME = 7200000L; // 2 hours
121 
122   RegionStates(final Server master,
123       final ServerManager serverManager, final RegionStateStore regionStateStore) {
124     regionStates = new HashMap<String, RegionState>();
125     regionsInTransition = new HashMap<String, RegionState>();
126     serverHoldings = new HashMap<ServerName, Set<HRegionInfo>>();
127     regionAssignments = new TreeMap<HRegionInfo, ServerName>();
128     lastAssignments = new HashMap<String, ServerName>();
129     processedServers = new HashMap<ServerName, Long>();
130     deadServers = new HashMap<String, Long>();
131     this.regionStateStore = regionStateStore;
132     this.serverManager = serverManager;
133     this.server = master;
134   }
135 
136   /**
137    * @return an unmodifiable the region assignment map
138    */
139   public synchronized Map<HRegionInfo, ServerName> getRegionAssignments() {
140     return Collections.unmodifiableMap(regionAssignments);
141   }
142 
143   public synchronized ServerName getRegionServerOfRegion(HRegionInfo hri) {
144     return regionAssignments.get(hri);
145   }
146 
147   /**
148    * Get regions in transition and their states
149    */
150   @SuppressWarnings("unchecked")
151   public synchronized Map<String, RegionState> getRegionsInTransition() {
152     return (Map<String, RegionState>)regionsInTransition.clone();
153   }
154 
155   /**
156    * @return True if specified region in transition.
157    */
158   public synchronized boolean isRegionInTransition(final HRegionInfo hri) {
159     return regionsInTransition.containsKey(hri.getEncodedName());
160   }
161 
162   /**
163    * @return True if specified region in transition.
164    */
165   public synchronized boolean isRegionInTransition(final String encodedName) {
166     return regionsInTransition.containsKey(encodedName);
167   }
168 
169   /**
170    * @return True if any region in transition.
171    */
172   public synchronized boolean isRegionsInTransition() {
173     return !regionsInTransition.isEmpty();
174   }
175 
176   /**
177    * @return True if specified region assigned, and not in transition.
178    */
179   public synchronized boolean isRegionOnline(final HRegionInfo hri) {
180     return !isRegionInTransition(hri) && regionAssignments.containsKey(hri);
181   }
182 
183   /**
184    * @return True if specified region offline/closed, but not in transition.
185    * If the region is not in the map, it is offline to us too.
186    */
187   public synchronized boolean isRegionOffline(final HRegionInfo hri) {
188     return getRegionState(hri) == null || (!isRegionInTransition(hri)
189       && isRegionInState(hri, State.OFFLINE, State.CLOSED));
190   }
191 
192   /**
193    * @return True if specified region is in one of the specified states.
194    */
195   public boolean isRegionInState(
196       final HRegionInfo hri, final State... states) {
197     return isRegionInState(hri.getEncodedName(), states);
198   }
199 
200   /**
201    * @return True if specified region is in one of the specified states.
202    */
203   public boolean isRegionInState(
204       final String encodedName, final State... states) {
205     RegionState regionState = getRegionState(encodedName);
206     return isOneOfStates(regionState, states);
207   }
208 
209   /**
210    * Wait for the state map to be updated by assignment manager.
211    */
212   public synchronized void waitForUpdate(
213       final long timeout) throws InterruptedException {
214     this.wait(timeout);
215   }
216 
217   /**
218    * Get region transition state
219    */
220   public RegionState getRegionTransitionState(final HRegionInfo hri) {
221     return getRegionTransitionState(hri.getEncodedName());
222   }
223 
224   /**
225    * Get region transition state
226    */
227   public synchronized RegionState
228       getRegionTransitionState(final String encodedName) {
229     return regionsInTransition.get(encodedName);
230   }
231 
232   /**
233    * Add a list of regions to RegionStates. If a region is split
234    * and offline, its state will be SPLIT. Otherwise, its state will
235    * be OFFLINE. Region already in RegionStates will be skipped.
236    */
237   public void createRegionStates(
238       final List<HRegionInfo> hris) {
239     for (HRegionInfo hri: hris) {
240       createRegionState(hri);
241     }
242   }
243 
244   /**
245    * Add a region to RegionStates. If the region is split
246    * and offline, its state will be SPLIT. Otherwise, its state will
247    * be OFFLINE. If it is already in RegionStates, this call has
248    * no effect, and the original state is returned.
249    */
250   public RegionState createRegionState(final HRegionInfo hri) {
251     return createRegionState(hri, null, null);
252   }
253 
254   /**
255    * Add a region to RegionStates with the specified state.
256    * If the region is already in RegionStates, this call has
257    * no effect, and the original state is returned.
258    */
259   public synchronized RegionState createRegionState(
260       final HRegionInfo hri, State newState, ServerName serverName) {
261     if (newState == null || (newState == State.OPEN && serverName == null)) {
262       newState =  State.OFFLINE;
263     }
264     if (hri.isOffline() && hri.isSplit()) {
265       newState = State.SPLIT;
266       serverName = null;
267     }
268     String encodedName = hri.getEncodedName();
269     RegionState regionState = regionStates.get(encodedName);
270     if (regionState != null) {
271       LOG.warn("Tried to create a state for a region already in RegionStates, "
272         + "used existing: " + regionState + ", ignored new: " + newState);
273     } else {
274       regionState = new RegionState(hri, newState, serverName);
275       regionStates.put(encodedName, regionState);
276       if (newState == State.OPEN) {
277         regionAssignments.put(hri, serverName);
278         lastAssignments.put(encodedName, serverName);
279         Set<HRegionInfo> regions = serverHoldings.get(serverName);
280         if (regions == null) {
281           regions = new HashSet<HRegionInfo>();
282           serverHoldings.put(serverName, regions);
283         }
284         regions.add(hri);
285       } else if (!regionState.isUnassignable()) {
286         regionsInTransition.put(encodedName, regionState);
287       }
288     }
289     return regionState;
290   }
291 
292   /**
293    * Update a region state. It will be put in transition if not already there.
294    */
295   public RegionState updateRegionState(
296       final HRegionInfo hri, final State state) {
297     RegionState regionState = getRegionState(hri.getEncodedName());
298     return updateRegionState(hri, state,
299       regionState == null ? null : regionState.getServerName());
300   }
301 
302   /**
303    * Update a region state. It will be put in transition if not already there.
304    *
305    * If we can't find the region info based on the region name in
306    * the transition, log a warning and return null.
307    */
308   public RegionState updateRegionState(
309       final RegionTransition transition, final State state) {
310     byte [] regionName = transition.getRegionName();
311     HRegionInfo regionInfo = getRegionInfo(regionName);
312     if (regionInfo == null) {
313       String prettyRegionName = HRegionInfo.prettyPrint(
314         HRegionInfo.encodeRegionName(regionName));
315       LOG.warn("Failed to find region " + prettyRegionName
316         + " in updating its state to " + state
317         + " based on region transition " + transition);
318       return null;
319     }
320     return updateRegionState(regionInfo, state,
321       transition.getServerName());
322   }
323   
324   /**
325    * Transition a region state to OPEN from OPENING/PENDING_OPEN
326    */
327   public synchronized RegionState transitionOpenFromPendingOpenOrOpeningOnServer(
328       final RegionTransition transition, final RegionState fromState, final ServerName sn) {
329     if(fromState.isPendingOpenOrOpeningOnServer(sn)){
330       return updateRegionState(transition, State.OPEN);
331     }
332     return null;
333   }
334 
335   /**
336    * Update a region state. It will be put in transition if not already there.
337    */
338   public RegionState updateRegionState(
339       final HRegionInfo hri, final State state, final ServerName serverName) {
340     return updateRegionState(hri, state, serverName, HConstants.NO_SEQNUM);
341   }
342 
343   public void regionOnline(
344       final HRegionInfo hri, final ServerName serverName) {
345     regionOnline(hri, serverName, HConstants.NO_SEQNUM);
346   }
347 
348   /**
349    * A region is online, won't be in transition any more.
350    * We can't confirm it is really online on specified region server
351    * because it hasn't been put in region server's online region list yet.
352    */
353   public void regionOnline(final HRegionInfo hri,
354       final ServerName serverName, long openSeqNum) {
355     if (!serverManager.isServerOnline(serverName)) {
356       // This is possible if the region server dies before master gets a
357       // chance to handle ZK event in time. At this time, if the dead server
358       // is already processed by SSH, we should ignore this event.
359       // If not processed yet, ignore and let SSH deal with it.
360       LOG.warn("Ignored, " + hri.getEncodedName()
361         + " was opened on a dead server: " + serverName);
362       return;
363     }
364     updateRegionState(hri, State.OPEN, serverName, openSeqNum);
365 
366     synchronized (this) {
367       regionsInTransition.remove(hri.getEncodedName());
368       ServerName oldServerName = regionAssignments.put(hri, serverName);
369       if (!serverName.equals(oldServerName)) {
370         LOG.info("Onlined " + hri.getShortNameToLog() + " on " + serverName);
371         Set<HRegionInfo> regions = serverHoldings.get(serverName);
372         if (regions == null) {
373           regions = new HashSet<HRegionInfo>();
374           serverHoldings.put(serverName, regions);
375         }
376         regions.add(hri);
377         if (oldServerName != null) {
378           LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
379           Set<HRegionInfo> oldRegions = serverHoldings.get(oldServerName);
380           oldRegions.remove(hri);
381           if (oldRegions.isEmpty()) {
382             serverHoldings.remove(oldServerName);
383           }
384         }
385       }
386     }
387   }
388 
389   /**
390    * A dead server's hlogs have been split so that all the regions
391    * used to be open on it can be safely assigned now. Mark them assignable.
392    */
393   public synchronized void logSplit(final ServerName serverName) {
394     for (Iterator<Map.Entry<String, ServerName>> it
395         = lastAssignments.entrySet().iterator(); it.hasNext();) {
396       Map.Entry<String, ServerName> e = it.next();
397       if (e.getValue().equals(serverName)) {
398         it.remove();
399       }
400     }
401     long now = System.currentTimeMillis();
402     if (LOG.isDebugEnabled()) {
403       LOG.debug("Adding to processed servers " + serverName);
404     }
405     processedServers.put(serverName, Long.valueOf(now));
406     Configuration conf = server.getConfiguration();
407     long obsoleteTime = conf.getLong(LOG_SPLIT_TIME, DEFAULT_LOG_SPLIT_TIME);
408     // Doesn't have to be very accurate about the clean up time
409     if (now > lastProcessedServerCleanTime + obsoleteTime) {
410       lastProcessedServerCleanTime = now;
411       long cutoff = now - obsoleteTime;
412       for (Iterator<Map.Entry<ServerName, Long>> it
413           = processedServers.entrySet().iterator(); it.hasNext();) {
414         Map.Entry<ServerName, Long> e = it.next();
415         if (e.getValue().longValue() < cutoff) {
416           if (LOG.isDebugEnabled()) {
417             LOG.debug("Removed from processed servers " + e.getKey());
418           }
419           it.remove();
420         }
421       }
422     }
423   }
424 
425   /**
426    * Log split is done for a given region, so it is assignable now.
427    */
428   public void logSplit(final HRegionInfo region) {
429     clearLastAssignment(region);
430   }
431 
432   public synchronized void clearLastAssignment(final HRegionInfo region) {
433     lastAssignments.remove(region.getEncodedName());
434   }
435 
436   /**
437    * A region is offline, won't be in transition any more.
438    */
439   public void regionOffline(final HRegionInfo hri) {
440     regionOffline(hri, null);
441   }
442 
443   /**
444    * A region is offline, won't be in transition any more. Its state
445    * should be the specified expected state, which can only be
446    * Split/Merged/Offline/null(=Offline)/SplittingNew/MergingNew.
447    */
448   public void regionOffline(
449       final HRegionInfo hri, final State expectedState) {
450     Preconditions.checkArgument(expectedState == null
451       || RegionState.isUnassignable(expectedState),
452         "Offlined region should not be " + expectedState);
453     if (isRegionInState(hri, State.SPLITTING_NEW, State.MERGING_NEW)) {
454       // Remove it from all region maps
455       deleteRegion(hri);
456       return;
457     }
458     State newState =
459       expectedState == null ? State.OFFLINE : expectedState;
460     updateRegionState(hri, newState);
461 
462     synchronized (this) {
463       regionsInTransition.remove(hri.getEncodedName());
464       ServerName oldServerName = regionAssignments.remove(hri);
465       if (oldServerName != null && serverHoldings.containsKey(oldServerName)) {
466         LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
467         Set<HRegionInfo> oldRegions = serverHoldings.get(oldServerName);
468         oldRegions.remove(hri);
469         if (oldRegions.isEmpty()) {
470           serverHoldings.remove(oldServerName);
471         }
472       }
473     }
474   }
475 
476   /**
477    * A server is offline, all regions on it are dead.
478    */
479   public synchronized List<HRegionInfo> serverOffline(
480       final ZooKeeperWatcher watcher, final ServerName sn) {
481     // Offline all regions on this server not already in transition.
482     List<HRegionInfo> rits = new ArrayList<HRegionInfo>();
483     Set<HRegionInfo> assignedRegions = serverHoldings.get(sn);
484     if (assignedRegions == null) {
485       assignedRegions = new HashSet<HRegionInfo>();
486     }
487 
488     // Offline regions outside the loop to avoid ConcurrentModificationException
489     Set<HRegionInfo> regionsToOffline = new HashSet<HRegionInfo>();
490     for (HRegionInfo region : assignedRegions) {
491       // Offline open regions, no need to offline if SPLIT/MERGED/OFFLINE
492       if (isRegionOnline(region)) {
493         regionsToOffline.add(region);
494       } else {
495         if (isRegionInState(region, State.SPLITTING, State.MERGING)) {
496           LOG.debug("Offline splitting/merging region " + getRegionState(region));
497           try {
498             // Delete the ZNode if exists
499             ZKAssign.deleteNodeFailSilent(watcher, region);
500             regionsToOffline.add(region);
501           } catch (KeeperException ke) {
502             server.abort("Unexpected ZK exception deleting node " + region, ke);
503           }
504         }
505       }
506     }
507 
508     for (HRegionInfo hri : regionsToOffline) {
509       regionOffline(hri);
510     }
511 
512     for (RegionState state : regionsInTransition.values()) {
513       HRegionInfo hri = state.getRegion();
514       if (assignedRegions.contains(hri)) {
515         // Region is open on this region server, but in transition.
516         // This region must be moving away from this server, or splitting/merging.
517         // SSH will handle it, either skip assigning, or re-assign.
518         LOG.info("Transitioning " + state + " will be handled by SSH for " + sn);
519       } else if (sn.equals(state.getServerName())) {
520         // Region is in transition on this region server, and this
521         // region is not open on this server. So the region must be
522         // moving to this server from another one (i.e. opening or
523         // pending open on this server, was open on another one.
524         // Offline state is also kind of pending open if the region is in
525         // transition. The region could be in failed_close state too if we have
526         // tried several times to open it while this region server is not reachable)
527         if (state.isPendingOpenOrOpening() || state.isFailedClose() || state.isOffline()) {
528           LOG.info("Found region in " + state + " to be reassigned by SSH for " + sn);
529           rits.add(hri);
530         } else {
531           LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
532         }
533       }
534     }
535 
536     this.notifyAll();
537     return rits;
538   }
539 
540   /**
541    * Gets the online regions of the specified table.
542    * This method looks at the in-memory state.  It does not go to <code>hbase:meta</code>.
543    * Only returns <em>online</em> regions.  If a region on this table has been
544    * closed during a disable, etc., it will be included in the returned list.
545    * So, the returned list may not necessarily be ALL regions in this table, its
546    * all the ONLINE regions in the table.
547    * @param tableName
548    * @return Online regions from <code>tableName</code>
549    */
550   public synchronized List<HRegionInfo> getRegionsOfTable(TableName tableName) {
551     List<HRegionInfo> tableRegions = new ArrayList<HRegionInfo>();
552     // boundary needs to have table's name but regionID 0 so that it is sorted
553     // before all table's regions.
554     HRegionInfo boundary = new HRegionInfo(tableName, null, null, false, 0L);
555     for (HRegionInfo hri: regionAssignments.tailMap(boundary).keySet()) {
556       if(!hri.getTable().equals(tableName)) break;
557       tableRegions.add(hri);
558     }
559     return tableRegions;
560   }
561 
562 
563   /**
564    * Wait on region to clear regions-in-transition.
565    * <p>
566    * If the region isn't in transition, returns immediately.  Otherwise, method
567    * blocks until the region is out of transition.
568    */
569   public synchronized void waitOnRegionToClearRegionsInTransition(
570       final HRegionInfo hri) throws InterruptedException {
571     if (!isRegionInTransition(hri)) return;
572 
573     while(!server.isStopped() && isRegionInTransition(hri)) {
574       RegionState rs = getRegionState(hri);
575       LOG.info("Waiting on " + rs + " to clear regions-in-transition");
576       waitForUpdate(100);
577     }
578 
579     if (server.isStopped()) {
580       LOG.info("Giving up wait on region in " +
581         "transition because stoppable.isStopped is set");
582     }
583   }
584 
585   /**
586    * A table is deleted. Remove its regions from all internal maps.
587    * We loop through all regions assuming we don't delete tables too much.
588    */
589   public void tableDeleted(final TableName tableName) {
590     Set<HRegionInfo> regionsToDelete = new HashSet<HRegionInfo>();
591     synchronized (this) {
592       for (RegionState state: regionStates.values()) {
593         HRegionInfo region = state.getRegion();
594         if (region.getTable().equals(tableName)) {
595           regionsToDelete.add(region);
596         }
597       }
598     }
599     for (HRegionInfo region: regionsToDelete) {
600       deleteRegion(region);
601     }
602   }
603 
604   /**
605    * Checking if a region was assigned to a server which is not online now.
606    * If so, we should hold re-assign this region till SSH has split its hlogs.
607    * Once logs are split, the last assignment of this region will be reset,
608    * which means a null last assignment server is ok for re-assigning.
609    *
610    * A region server could be dead but we don't know it yet. We may
611    * think it's online falsely. Therefore if a server is online, we still
612    * need to confirm it reachable and having the expected start code.
613    */
614   synchronized boolean wasRegionOnDeadServer(final String encodedName) {
615     ServerName server = lastAssignments.get(encodedName);
616     return isServerDeadAndNotProcessed(server);
617   }
618 
619   synchronized boolean isServerDeadAndNotProcessed(ServerName server) {
620     if (server == null) return false;
621     if (serverManager.isServerOnline(server)) {
622       String hostAndPort = server.getHostAndPort();
623       long startCode = server.getStartcode();
624       Long deadCode = deadServers.get(hostAndPort);
625       if (deadCode == null || startCode > deadCode.longValue()) {
626         if (serverManager.isServerReachable(server)) {
627           return false;
628         }
629         // The size of deadServers won't grow unbounded.
630         deadServers.put(hostAndPort, Long.valueOf(startCode));
631       }
632       // Watch out! If the server is not dead, the region could
633       // remain unassigned. That's why ServerManager#isServerReachable
634       // should use some retry.
635       //
636       // We cache this info since it is very unlikely for that
637       // instance to come back up later on. We don't want to expire
638       // the server since we prefer to let it die naturally.
639       LOG.warn("Couldn't reach online server " + server);
640     }
641     // Now, we know it's dead. Check if it's processed
642     return !processedServers.containsKey(server);
643   }
644 
645  /**
646    * Get the last region server a region was on for purpose of re-assignment,
647    * i.e. should the re-assignment be held back till log split is done?
648    */
649   synchronized ServerName getLastRegionServerOfRegion(final String encodedName) {
650     return lastAssignments.get(encodedName);
651   }
652 
653   synchronized void setLastRegionServerOfRegions(
654       final ServerName serverName, final List<HRegionInfo> regionInfos) {
655     for (HRegionInfo hri: regionInfos) {
656       setLastRegionServerOfRegion(serverName, hri.getEncodedName());
657     }
658   }
659 
660   synchronized void setLastRegionServerOfRegion(
661       final ServerName serverName, final String encodedName) {
662     lastAssignments.put(encodedName, serverName);
663   }
664 
665   synchronized void closeAllUserRegions(Set<TableName> excludedTables) {
666     Set<HRegionInfo> toBeClosed = new HashSet<HRegionInfo>(regionStates.size());
667     for(RegionState state: regionStates.values()) {
668       HRegionInfo hri = state.getRegion();
669       TableName tableName = hri.getTable();
670       if (!hri.isSplit() && !hri.isMetaRegion()
671           && !excludedTables.contains(tableName)) {
672         toBeClosed.add(hri);
673       }
674     }
675     for (HRegionInfo hri: toBeClosed) {
676       updateRegionState(hri, State.CLOSED);
677     }
678   }
679 
680   /**
681    * Compute the average load across all region servers.
682    * Currently, this uses a very naive computation - just uses the number of
683    * regions being served, ignoring stats about number of requests.
684    * @return the average load
685    */
686   protected synchronized double getAverageLoad() {
687     int numServers = 0, totalLoad = 0;
688     for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
689       Set<HRegionInfo> regions = e.getValue();
690       ServerName serverName = e.getKey();
691       int regionCount = regions.size();
692       if (serverManager.isServerOnline(serverName)) {
693         totalLoad += regionCount;
694         numServers++;
695       }
696     }
697     return numServers == 0 ? 0.0 :
698       (double)totalLoad / (double)numServers;
699   }
700 
701   /**
702    * This is an EXPENSIVE clone.  Cloning though is the safest thing to do.
703    * Can't let out original since it can change and at least the load balancer
704    * wants to iterate this exported list.  We need to synchronize on regions
705    * since all access to this.servers is under a lock on this.regions.
706    *
707    * @return A clone of current assignments by table.
708    */
709   protected Map<TableName, Map<ServerName, List<HRegionInfo>>>
710       getAssignmentsByTable() {
711     Map<TableName, Map<ServerName, List<HRegionInfo>>> result =
712       new HashMap<TableName, Map<ServerName,List<HRegionInfo>>>();
713     synchronized (this) {
714       if (!server.getConfiguration().getBoolean("hbase.master.loadbalance.bytable", false)) {
715         Map<ServerName, List<HRegionInfo>> svrToRegions =
716           new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
717         for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
718           svrToRegions.put(e.getKey(), new ArrayList<HRegionInfo>(e.getValue()));
719         }
720         result.put(TableName.valueOf("ensemble"), svrToRegions);
721       } else {
722         for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
723           for (HRegionInfo hri: e.getValue()) {
724             if (hri.isMetaRegion()) continue;
725             TableName tablename = hri.getTable();
726             Map<ServerName, List<HRegionInfo>> svrToRegions = result.get(tablename);
727             if (svrToRegions == null) {
728               svrToRegions = new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
729               result.put(tablename, svrToRegions);
730             }
731             List<HRegionInfo> regions = svrToRegions.get(e.getKey());
732             if (regions == null) {
733               regions = new ArrayList<HRegionInfo>();
734               svrToRegions.put(e.getKey(), regions);
735             }
736             regions.add(hri);
737           }
738         }
739       }
740     }
741 
742     Map<ServerName, ServerLoad>
743       onlineSvrs = serverManager.getOnlineServers();
744     // Take care of servers w/o assignments.
745     for (Map<ServerName, List<HRegionInfo>> map: result.values()) {
746       for (ServerName svr: onlineSvrs.keySet()) {
747         if (!map.containsKey(svr)) {
748           map.put(svr, new ArrayList<HRegionInfo>());
749         }
750       }
751     }
752     return result;
753   }
754 
755   protected RegionState getRegionState(final HRegionInfo hri) {
756     return getRegionState(hri.getEncodedName());
757   }
758 
759   protected synchronized RegionState getRegionState(final String encodedName) {
760     return regionStates.get(encodedName);
761   }
762 
763   /**
764    * Get the HRegionInfo from cache, if not there, from the hbase:meta table
765    * @param  regionName
766    * @return HRegionInfo for the region
767    */
768   protected HRegionInfo getRegionInfo(final byte [] regionName) {
769     String encodedName = HRegionInfo.encodeRegionName(regionName);
770     RegionState regionState = getRegionState(encodedName);
771     if (regionState != null) {
772       return regionState.getRegion();
773     }
774 
775     try {
776       Pair<HRegionInfo, ServerName> p =
777         MetaReader.getRegion(server.getCatalogTracker(), regionName);
778       HRegionInfo hri = p == null ? null : p.getFirst();
779       if (hri != null) {
780         createRegionState(hri);
781       }
782       return hri;
783     } catch (IOException e) {
784       server.abort("Aborting because error occoured while reading "
785         + Bytes.toStringBinary(regionName) + " from hbase:meta", e);
786       return null;
787     }
788   }
789 
790   static boolean isOneOfStates(RegionState regionState, State... states) {
791     State s = regionState != null ? regionState.getState() : null;
792     for (State state: states) {
793       if (s == state) return true;
794     }
795     return false;
796   }
797 
798   /**
799    * Update a region state. It will be put in transition if not already there.
800    */
801   private RegionState updateRegionState(final HRegionInfo hri,
802       final State state, final ServerName serverName, long openSeqNum) {
803     if (state == State.FAILED_CLOSE || state == State.FAILED_OPEN) {
804       LOG.warn("Failed to open/close " + hri.getShortNameToLog()
805         + " on " + serverName + ", set to " + state);
806     }
807 
808     String encodedName = hri.getEncodedName();
809     RegionState regionState = new RegionState(
810       hri, state, System.currentTimeMillis(), serverName);
811     RegionState oldState = getRegionState(encodedName);
812     if (!regionState.equals(oldState)) {
813       LOG.info("Transition " + oldState + " to " + regionState);
814       // Persist region state before updating in-memory info, if needed
815       regionStateStore.updateRegionState(openSeqNum, regionState, oldState);
816     }
817 
818     synchronized (this) {
819       regionsInTransition.put(encodedName, regionState);
820       regionStates.put(encodedName, regionState);
821 
822       // For these states, region should be properly closed.
823       // There should be no log splitting issue.
824       if ((state == State.CLOSED || state == State.MERGED
825           || state == State.SPLIT) && lastAssignments.containsKey(encodedName)) {
826         ServerName last = lastAssignments.get(encodedName);
827         if (last.equals(serverName)) {
828           lastAssignments.remove(encodedName);
829         } else {
830           LOG.warn(encodedName + " moved to " + state + " on "
831             + serverName + ", expected " + last);
832         }
833       }
834 
835       // Once a region is opened, record its last assignment right away.
836       if (serverName != null && state == State.OPEN) {
837         ServerName last = lastAssignments.get(encodedName);
838         if (!serverName.equals(last)) {
839           lastAssignments.put(encodedName, serverName);
840           if (last != null && isServerDeadAndNotProcessed(last)) {
841             LOG.warn(encodedName + " moved to " + serverName
842               + ", while it's previous host " + last
843               + " is dead but not processed yet");
844           }
845         }
846       }
847 
848       // notify the change
849       this.notifyAll();
850     }
851     return regionState;
852   }
853 
854   /**
855    * Remove a region from all state maps.
856    */
857   private synchronized void deleteRegion(final HRegionInfo hri) {
858     String encodedName = hri.getEncodedName();
859     regionsInTransition.remove(encodedName);
860     regionStates.remove(encodedName);
861     lastAssignments.remove(encodedName);
862     ServerName sn = regionAssignments.remove(hri);
863     if (sn != null) {
864       Set<HRegionInfo> regions = serverHoldings.get(sn);
865       regions.remove(hri);
866     }
867   }
868 }