View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.Collections;
23  import java.util.HashMap;
24  import java.util.HashSet;
25  import java.util.Iterator;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.Set;
29  import java.util.TreeMap;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.classification.InterfaceAudience;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.hbase.HConstants;
36  import org.apache.hadoop.hbase.HRegionInfo;
37  import org.apache.hadoop.hbase.RegionTransition;
38  import org.apache.hadoop.hbase.Server;
39  import org.apache.hadoop.hbase.ServerLoad;
40  import org.apache.hadoop.hbase.ServerName;
41  import org.apache.hadoop.hbase.TableName;
42  import org.apache.hadoop.hbase.catalog.MetaReader;
43  import org.apache.hadoop.hbase.master.RegionState.State;
44  import org.apache.hadoop.hbase.util.Bytes;
45  import org.apache.hadoop.hbase.util.FSUtils;
46  import org.apache.hadoop.hbase.util.Pair;
47  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
48  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
49  import org.apache.zookeeper.KeeperException;
50  
51  import com.google.common.base.Preconditions;
52  
53  /**
54   * Region state accountant. It holds the states of all regions in the memory.
55   * In normal scenario, it should match the meta table and the true region states.
56   *
57   * This map is used by AssignmentManager to track region states.
58   */
59  @InterfaceAudience.Private
60  public class RegionStates {
61    private static final Log LOG = LogFactory.getLog(RegionStates.class);
62  
63    /**
64     * Regions currently in transition.
65     */
66    final HashMap<String, RegionState> regionsInTransition;
67  
68    /**
69     * Region encoded name to state map.
70     * All the regions should be in this map.
71     */
72    private final HashMap<String, RegionState> regionStates;
73  
74    /**
75     * Server to regions assignment map.
76     * Contains the set of regions currently assigned to a given server.
77     */
78    private final Map<ServerName, Set<HRegionInfo>> serverHoldings;
79  
80    /**
81     * Region to server assignment map.
82     * Contains the server a given region is currently assigned to.
83     */
84    private final TreeMap<HRegionInfo, ServerName> regionAssignments;
85  
86    /**
87     * Encoded region name to server assignment map for re-assignment
88     * purpose. Contains the server a given region is last known assigned
89     * to, which has not completed log splitting, so not assignable.
90     * If a region is currently assigned, this server info in this
91     * map should be the same as that in regionAssignments.
92     * However the info in regionAssignments is cleared when the region
93     * is offline while the info in lastAssignments is cleared when
94     * the region is closed or the server is dead and processed.
95     */
96    private final HashMap<String, ServerName> lastAssignments;
97  
98    /**
99     * Map a host port pair string to the latest start code
100    * of a region server which is known to be dead. It is dead
101    * to us, but server manager may not know it yet.
102    */
103   private final HashMap<String, Long> deadServers;
104 
105   /**
106    * Map a dead servers to the time when log split is done.
107    * Since log splitting is not ordered, we have to remember
108    * all processed instances. The map is cleaned up based
109    * on a configured time. By default, we assume a dead
110    * server should be done with log splitting in two hours.
111    */
112   private final HashMap<ServerName, Long> processedServers;
113   private long lastProcessedServerCleanTime;
114 
115   private final RegionStateStore regionStateStore;
116   private final ServerManager serverManager;
117   private final Server server;
118 
119   // The maximum time to keep a log split info in region states map
120   static final String LOG_SPLIT_TIME = "hbase.master.maximum.logsplit.keeptime";
121   static final long DEFAULT_LOG_SPLIT_TIME = 7200000L; // 2 hours
122 
123   RegionStates(final Server master,
124       final ServerManager serverManager, final RegionStateStore regionStateStore) {
125     regionStates = new HashMap<String, RegionState>();
126     regionsInTransition = new HashMap<String, RegionState>();
127     serverHoldings = new HashMap<ServerName, Set<HRegionInfo>>();
128     regionAssignments = new TreeMap<HRegionInfo, ServerName>();
129     lastAssignments = new HashMap<String, ServerName>();
130     processedServers = new HashMap<ServerName, Long>();
131     deadServers = new HashMap<String, Long>();
132     this.regionStateStore = regionStateStore;
133     this.serverManager = serverManager;
134     this.server = master;
135   }
136 
137   /**
138    * @return an unmodifiable the region assignment map
139    */
140   public synchronized Map<HRegionInfo, ServerName> getRegionAssignments() {
141     return Collections.unmodifiableMap(regionAssignments);
142   }
143 
144   public synchronized ServerName getRegionServerOfRegion(HRegionInfo hri) {
145     return regionAssignments.get(hri);
146   }
147 
148   /**
149    * Get regions in transition and their states
150    */
151   @SuppressWarnings("unchecked")
152   public synchronized Map<String, RegionState> getRegionsInTransition() {
153     return (Map<String, RegionState>)regionsInTransition.clone();
154   }
155 
156   /**
157    * @return True if specified region in transition.
158    */
159   public synchronized boolean isRegionInTransition(final HRegionInfo hri) {
160     return regionsInTransition.containsKey(hri.getEncodedName());
161   }
162 
163   /**
164    * @return True if specified region in transition.
165    */
166   public synchronized boolean isRegionInTransition(final String encodedName) {
167     return regionsInTransition.containsKey(encodedName);
168   }
169 
170   /**
171    * @return True if any region in transition.
172    */
173   public synchronized boolean isRegionsInTransition() {
174     return !regionsInTransition.isEmpty();
175   }
176 
177   /**
178    * @return True if specified region assigned, and not in transition.
179    */
180   public synchronized boolean isRegionOnline(final HRegionInfo hri) {
181     return !isRegionInTransition(hri) && regionAssignments.containsKey(hri);
182   }
183 
184   /**
185    * @return True if specified region offline/closed, but not in transition.
186    * If the region is not in the map, it is offline to us too.
187    */
188   public synchronized boolean isRegionOffline(final HRegionInfo hri) {
189     return getRegionState(hri) == null || (!isRegionInTransition(hri)
190       && isRegionInState(hri, State.OFFLINE, State.CLOSED));
191   }
192 
193   /**
194    * @return True if specified region is in one of the specified states.
195    */
196   public boolean isRegionInState(
197       final HRegionInfo hri, final State... states) {
198     return isRegionInState(hri.getEncodedName(), states);
199   }
200 
201   /**
202    * @return True if specified region is in one of the specified states.
203    */
204   public boolean isRegionInState(
205       final String encodedName, final State... states) {
206     RegionState regionState = getRegionState(encodedName);
207     return isOneOfStates(regionState, states);
208   }
209 
210   /**
211    * Wait for the state map to be updated by assignment manager.
212    */
213   public synchronized void waitForUpdate(
214       final long timeout) throws InterruptedException {
215     this.wait(timeout);
216   }
217 
218   /**
219    * Get region transition state
220    */
221   public RegionState getRegionTransitionState(final HRegionInfo hri) {
222     return getRegionTransitionState(hri.getEncodedName());
223   }
224 
225   /**
226    * Get region transition state
227    */
228   public synchronized RegionState
229       getRegionTransitionState(final String encodedName) {
230     return regionsInTransition.get(encodedName);
231   }
232 
233   /**
234    * Add a list of regions to RegionStates. If a region is split
235    * and offline, its state will be SPLIT. Otherwise, its state will
236    * be OFFLINE. Region already in RegionStates will be skipped.
237    */
238   public void createRegionStates(
239       final List<HRegionInfo> hris) {
240     for (HRegionInfo hri: hris) {
241       createRegionState(hri);
242     }
243   }
244 
245   /**
246    * Add a region to RegionStates. If the region is split
247    * and offline, its state will be SPLIT. Otherwise, its state will
248    * be OFFLINE. If it is already in RegionStates, this call has
249    * no effect, and the original state is returned.
250    */
251   public RegionState createRegionState(final HRegionInfo hri) {
252     return createRegionState(hri, null, null);
253   }
254 
255   /**
256    * Add a region to RegionStates with the specified state.
257    * If the region is already in RegionStates, this call has
258    * no effect, and the original state is returned.
259    */
260   public synchronized RegionState createRegionState(
261       final HRegionInfo hri, State newState, ServerName serverName) {
262     if (newState == null || (newState == State.OPEN && serverName == null)) {
263       newState =  State.OFFLINE;
264     }
265     if (hri.isOffline() && hri.isSplit()) {
266       newState = State.SPLIT;
267       serverName = null;
268     }
269     String encodedName = hri.getEncodedName();
270     RegionState regionState = regionStates.get(encodedName);
271     if (regionState != null) {
272       LOG.warn("Tried to create a state for a region already in RegionStates, "
273         + "used existing: " + regionState + ", ignored new: " + newState);
274     } else {
275       regionState = new RegionState(hri, newState, serverName);
276       regionStates.put(encodedName, regionState);
277       if (newState == State.OPEN) {
278         regionAssignments.put(hri, serverName);
279         lastAssignments.put(encodedName, serverName);
280         Set<HRegionInfo> regions = serverHoldings.get(serverName);
281         if (regions == null) {
282           regions = new HashSet<HRegionInfo>();
283           serverHoldings.put(serverName, regions);
284         }
285         regions.add(hri);
286       } else if (!regionState.isUnassignable()) {
287         regionsInTransition.put(encodedName, regionState);
288       }
289     }
290     return regionState;
291   }
292 
293   /**
294    * Update a region state. It will be put in transition if not already there.
295    */
296   public RegionState updateRegionState(
297       final HRegionInfo hri, final State state) {
298     RegionState regionState = getRegionState(hri.getEncodedName());
299     return updateRegionState(hri, state,
300       regionState == null ? null : regionState.getServerName());
301   }
302 
303   /**
304    * Update a region state. It will be put in transition if not already there.
305    *
306    * If we can't find the region info based on the region name in
307    * the transition, log a warning and return null.
308    */
309   public RegionState updateRegionState(
310       final RegionTransition transition, final State state) {
311     byte [] regionName = transition.getRegionName();
312     HRegionInfo regionInfo = getRegionInfo(regionName);
313     if (regionInfo == null) {
314       String prettyRegionName = HRegionInfo.prettyPrint(
315         HRegionInfo.encodeRegionName(regionName));
316       LOG.warn("Failed to find region " + prettyRegionName
317         + " in updating its state to " + state
318         + " based on region transition " + transition);
319       return null;
320     }
321     return updateRegionState(regionInfo, state,
322       transition.getServerName());
323   }
324   
325   /**
326    * Transition a region state to OPEN from OPENING/PENDING_OPEN
327    */
328   public synchronized RegionState transitionOpenFromPendingOpenOrOpeningOnServer(
329       final RegionTransition transition, final RegionState fromState, final ServerName sn) {
330     if(fromState.isPendingOpenOrOpeningOnServer(sn)){
331       return updateRegionState(transition, State.OPEN);
332     }
333     return null;
334   }
335 
336   /**
337    * Update a region state. It will be put in transition if not already there.
338    */
339   public RegionState updateRegionState(
340       final HRegionInfo hri, final State state, final ServerName serverName) {
341     return updateRegionState(hri, state, serverName, HConstants.NO_SEQNUM);
342   }
343 
344   public void regionOnline(
345       final HRegionInfo hri, final ServerName serverName) {
346     regionOnline(hri, serverName, HConstants.NO_SEQNUM);
347   }
348 
349   /**
350    * A region is online, won't be in transition any more.
351    * We can't confirm it is really online on specified region server
352    * because it hasn't been put in region server's online region list yet.
353    */
354   public void regionOnline(final HRegionInfo hri,
355       final ServerName serverName, long openSeqNum) {
356     if (!serverManager.isServerOnline(serverName)) {
357       // This is possible if the region server dies before master gets a
358       // chance to handle ZK event in time. At this time, if the dead server
359       // is already processed by SSH, we should ignore this event.
360       // If not processed yet, ignore and let SSH deal with it.
361       LOG.warn("Ignored, " + hri.getEncodedName()
362         + " was opened on a dead server: " + serverName);
363       return;
364     }
365     updateRegionState(hri, State.OPEN, serverName, openSeqNum);
366 
367     synchronized (this) {
368       regionsInTransition.remove(hri.getEncodedName());
369       ServerName oldServerName = regionAssignments.put(hri, serverName);
370       if (!serverName.equals(oldServerName)) {
371         LOG.info("Onlined " + hri.getShortNameToLog() + " on " + serverName);
372         Set<HRegionInfo> regions = serverHoldings.get(serverName);
373         if (regions == null) {
374           regions = new HashSet<HRegionInfo>();
375           serverHoldings.put(serverName, regions);
376         }
377         regions.add(hri);
378         if (oldServerName != null) {
379           LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
380           Set<HRegionInfo> oldRegions = serverHoldings.get(oldServerName);
381           oldRegions.remove(hri);
382           if (oldRegions.isEmpty()) {
383             serverHoldings.remove(oldServerName);
384           }
385         }
386       }
387     }
388   }
389 
390   /**
391    * A dead server's hlogs have been split so that all the regions
392    * used to be open on it can be safely assigned now. Mark them assignable.
393    */
394   public synchronized void logSplit(final ServerName serverName) {
395     for (Iterator<Map.Entry<String, ServerName>> it
396         = lastAssignments.entrySet().iterator(); it.hasNext();) {
397       Map.Entry<String, ServerName> e = it.next();
398       if (e.getValue().equals(serverName)) {
399         it.remove();
400       }
401     }
402     long now = System.currentTimeMillis();
403     if (LOG.isDebugEnabled()) {
404       LOG.debug("Adding to processed servers " + serverName);
405     }
406     processedServers.put(serverName, Long.valueOf(now));
407     Configuration conf = server.getConfiguration();
408     long obsoleteTime = conf.getLong(LOG_SPLIT_TIME, DEFAULT_LOG_SPLIT_TIME);
409     // Doesn't have to be very accurate about the clean up time
410     if (now > lastProcessedServerCleanTime + obsoleteTime) {
411       lastProcessedServerCleanTime = now;
412       long cutoff = now - obsoleteTime;
413       for (Iterator<Map.Entry<ServerName, Long>> it
414           = processedServers.entrySet().iterator(); it.hasNext();) {
415         Map.Entry<ServerName, Long> e = it.next();
416         if (e.getValue().longValue() < cutoff) {
417           if (LOG.isDebugEnabled()) {
418             LOG.debug("Removed from processed servers " + e.getKey());
419           }
420           it.remove();
421         }
422       }
423     }
424   }
425 
426   /**
427    * Log split is done for a given region, so it is assignable now.
428    */
429   public void logSplit(final HRegionInfo region) {
430     clearLastAssignment(region);
431   }
432 
433   public synchronized void clearLastAssignment(final HRegionInfo region) {
434     lastAssignments.remove(region.getEncodedName());
435   }
436 
437   /**
438    * A region is offline, won't be in transition any more.
439    */
440   public void regionOffline(final HRegionInfo hri) {
441     regionOffline(hri, null);
442   }
443 
444   /**
445    * A region is offline, won't be in transition any more. Its state
446    * should be the specified expected state, which can only be
447    * Split/Merged/Offline/null(=Offline)/SplittingNew/MergingNew.
448    */
449   public void regionOffline(
450       final HRegionInfo hri, final State expectedState) {
451     Preconditions.checkArgument(expectedState == null
452       || RegionState.isUnassignable(expectedState),
453         "Offlined region should not be " + expectedState);
454     if (isRegionInState(hri, State.SPLITTING_NEW, State.MERGING_NEW)) {
455       // Remove it from all region maps
456       deleteRegion(hri);
457       return;
458     }
459     State newState =
460       expectedState == null ? State.OFFLINE : expectedState;
461     updateRegionState(hri, newState);
462 
463     synchronized (this) {
464       regionsInTransition.remove(hri.getEncodedName());
465       ServerName oldServerName = regionAssignments.remove(hri);
466       if (oldServerName != null && serverHoldings.containsKey(oldServerName)) {
467         LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
468         Set<HRegionInfo> oldRegions = serverHoldings.get(oldServerName);
469         oldRegions.remove(hri);
470         if (oldRegions.isEmpty()) {
471           serverHoldings.remove(oldServerName);
472         }
473       }
474     }
475   }
476 
477   /**
478    * A server is offline, all regions on it are dead.
479    */
480   public synchronized List<HRegionInfo> serverOffline(
481       final ZooKeeperWatcher watcher, final ServerName sn) {
482     // Offline all regions on this server not already in transition.
483     List<HRegionInfo> rits = new ArrayList<HRegionInfo>();
484     Set<HRegionInfo> assignedRegions = serverHoldings.get(sn);
485     if (assignedRegions == null) {
486       assignedRegions = new HashSet<HRegionInfo>();
487     }
488 
489     // Offline regions outside the loop to avoid ConcurrentModificationException
490     Set<HRegionInfo> regionsToOffline = new HashSet<HRegionInfo>();
491     for (HRegionInfo region : assignedRegions) {
492       // Offline open regions, no need to offline if SPLIT/MERGED/OFFLINE
493       if (isRegionOnline(region)) {
494         regionsToOffline.add(region);
495       } else {
496         if (isRegionInState(region, State.SPLITTING, State.MERGING)) {
497           LOG.debug("Offline splitting/merging region " + getRegionState(region));
498           try {
499             // Delete the ZNode if exists
500             ZKAssign.deleteNodeFailSilent(watcher, region);
501             regionsToOffline.add(region);
502           } catch (KeeperException ke) {
503             server.abort("Unexpected ZK exception deleting node " + region, ke);
504           }
505         }
506       }
507     }
508 
509     for (RegionState state : regionsInTransition.values()) {
510       HRegionInfo hri = state.getRegion();
511       if (assignedRegions.contains(hri)) {
512         // Region is open on this region server, but in transition.
513         // This region must be moving away from this server, or splitting/merging.
514         // SSH will handle it, either skip assigning, or re-assign.
515         LOG.info("Transitioning " + state + " will be handled by SSH for " + sn);
516       } else if (sn.equals(state.getServerName())) {
517         // Region is in transition on this region server, and this
518         // region is not open on this server. So the region must be
519         // moving to this server from another one (i.e. opening or
520         // pending open on this server, was open on another one.
521         // Offline state is also kind of pending open if the region is in
522         // transition. The region could be in failed_close state too if we have
523         // tried several times to open it while this region server is not reachable)
524         if (state.isPendingOpenOrOpening() || state.isFailedClose() || state.isOffline()) {
525           LOG.info("Found region in " + state + " to be reassigned by SSH for " + sn);
526           rits.add(hri);
527         } else if(state.isSplittingNew()) {
528           try {
529             if (MetaReader.getRegion(server.getCatalogTracker(), state.getRegion().getRegionName()) == null) {
530               regionsToOffline.add(state.getRegion());
531               FSUtils.deleteRegionDir(server.getConfiguration(), state.getRegion());
532             }
533           } catch (IOException e) {
534             LOG.warn("Got exception while deleting " + state.getRegion()
535                 + " directories from file system.", e);
536           }
537         } else {
538           LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
539         }
540       }
541     }
542 
543     for (HRegionInfo hri : regionsToOffline) {
544       regionOffline(hri);
545     }
546 
547     this.notifyAll();
548     return rits;
549   }
550 
551   /**
552    * Gets the online regions of the specified table.
553    * This method looks at the in-memory state.  It does not go to <code>hbase:meta</code>.
554    * Only returns <em>online</em> regions.  If a region on this table has been
555    * closed during a disable, etc., it will be included in the returned list.
556    * So, the returned list may not necessarily be ALL regions in this table, its
557    * all the ONLINE regions in the table.
558    * @param tableName
559    * @return Online regions from <code>tableName</code>
560    */
561   public synchronized List<HRegionInfo> getRegionsOfTable(TableName tableName) {
562     List<HRegionInfo> tableRegions = new ArrayList<HRegionInfo>();
563     // boundary needs to have table's name but regionID 0 so that it is sorted
564     // before all table's regions.
565     HRegionInfo boundary = new HRegionInfo(tableName, null, null, false, 0L);
566     for (HRegionInfo hri: regionAssignments.tailMap(boundary).keySet()) {
567       if(!hri.getTable().equals(tableName)) break;
568       tableRegions.add(hri);
569     }
570     return tableRegions;
571   }
572 
573 
574   /**
575    * Wait on region to clear regions-in-transition.
576    * <p>
577    * If the region isn't in transition, returns immediately.  Otherwise, method
578    * blocks until the region is out of transition.
579    */
580   public synchronized void waitOnRegionToClearRegionsInTransition(
581       final HRegionInfo hri) throws InterruptedException {
582     if (!isRegionInTransition(hri)) return;
583 
584     while(!server.isStopped() && isRegionInTransition(hri)) {
585       RegionState rs = getRegionState(hri);
586       LOG.info("Waiting on " + rs + " to clear regions-in-transition");
587       waitForUpdate(100);
588     }
589 
590     if (server.isStopped()) {
591       LOG.info("Giving up wait on region in " +
592         "transition because stoppable.isStopped is set");
593     }
594   }
595 
596   /**
597    * A table is deleted. Remove its regions from all internal maps.
598    * We loop through all regions assuming we don't delete tables too much.
599    */
600   public void tableDeleted(final TableName tableName) {
601     Set<HRegionInfo> regionsToDelete = new HashSet<HRegionInfo>();
602     synchronized (this) {
603       for (RegionState state: regionStates.values()) {
604         HRegionInfo region = state.getRegion();
605         if (region.getTable().equals(tableName)) {
606           regionsToDelete.add(region);
607         }
608       }
609     }
610     for (HRegionInfo region: regionsToDelete) {
611       deleteRegion(region);
612     }
613   }
614 
615   /**
616    * Checking if a region was assigned to a server which is not online now.
617    * If so, we should hold re-assign this region till SSH has split its hlogs.
618    * Once logs are split, the last assignment of this region will be reset,
619    * which means a null last assignment server is ok for re-assigning.
620    *
621    * A region server could be dead but we don't know it yet. We may
622    * think it's online falsely. Therefore if a server is online, we still
623    * need to confirm it reachable and having the expected start code.
624    */
625   synchronized boolean wasRegionOnDeadServer(final String encodedName) {
626     ServerName server = lastAssignments.get(encodedName);
627     return isServerDeadAndNotProcessed(server);
628   }
629 
630   synchronized boolean isServerDeadAndNotProcessed(ServerName server) {
631     if (server == null) return false;
632     if (serverManager.isServerOnline(server)) {
633       String hostAndPort = server.getHostAndPort();
634       long startCode = server.getStartcode();
635       Long deadCode = deadServers.get(hostAndPort);
636       if (deadCode == null || startCode > deadCode.longValue()) {
637         if (serverManager.isServerReachable(server)) {
638           return false;
639         }
640         // The size of deadServers won't grow unbounded.
641         deadServers.put(hostAndPort, Long.valueOf(startCode));
642       }
643       // Watch out! If the server is not dead, the region could
644       // remain unassigned. That's why ServerManager#isServerReachable
645       // should use some retry.
646       //
647       // We cache this info since it is very unlikely for that
648       // instance to come back up later on. We don't want to expire
649       // the server since we prefer to let it die naturally.
650       LOG.warn("Couldn't reach online server " + server);
651     }
652     // Now, we know it's dead. Check if it's processed
653     return !processedServers.containsKey(server);
654   }
655 
656  /**
657    * Get the last region server a region was on for purpose of re-assignment,
658    * i.e. should the re-assignment be held back till log split is done?
659    */
660   synchronized ServerName getLastRegionServerOfRegion(final String encodedName) {
661     return lastAssignments.get(encodedName);
662   }
663 
664   synchronized void setLastRegionServerOfRegions(
665       final ServerName serverName, final List<HRegionInfo> regionInfos) {
666     for (HRegionInfo hri: regionInfos) {
667       setLastRegionServerOfRegion(serverName, hri.getEncodedName());
668     }
669   }
670 
671   synchronized void setLastRegionServerOfRegion(
672       final ServerName serverName, final String encodedName) {
673     lastAssignments.put(encodedName, serverName);
674   }
675 
676   synchronized void closeAllUserRegions(Set<TableName> excludedTables) {
677     Set<HRegionInfo> toBeClosed = new HashSet<HRegionInfo>(regionStates.size());
678     for(RegionState state: regionStates.values()) {
679       HRegionInfo hri = state.getRegion();
680       TableName tableName = hri.getTable();
681       if (!hri.isSplit() && !hri.isMetaRegion()
682           && !excludedTables.contains(tableName)) {
683         toBeClosed.add(hri);
684       }
685     }
686     for (HRegionInfo hri: toBeClosed) {
687       updateRegionState(hri, State.CLOSED);
688     }
689   }
690 
691   /**
692    * Compute the average load across all region servers.
693    * Currently, this uses a very naive computation - just uses the number of
694    * regions being served, ignoring stats about number of requests.
695    * @return the average load
696    */
697   protected synchronized double getAverageLoad() {
698     int numServers = 0, totalLoad = 0;
699     for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
700       Set<HRegionInfo> regions = e.getValue();
701       ServerName serverName = e.getKey();
702       int regionCount = regions.size();
703       if (serverManager.isServerOnline(serverName)) {
704         totalLoad += regionCount;
705         numServers++;
706       }
707     }
708     return numServers == 0 ? 0.0 :
709       (double)totalLoad / (double)numServers;
710   }
711 
712   /**
713    * This is an EXPENSIVE clone.  Cloning though is the safest thing to do.
714    * Can't let out original since it can change and at least the load balancer
715    * wants to iterate this exported list.  We need to synchronize on regions
716    * since all access to this.servers is under a lock on this.regions.
717    *
718    * @return A clone of current assignments by table.
719    */
720   protected Map<TableName, Map<ServerName, List<HRegionInfo>>>
721       getAssignmentsByTable() {
722     Map<TableName, Map<ServerName, List<HRegionInfo>>> result =
723       new HashMap<TableName, Map<ServerName,List<HRegionInfo>>>();
724     synchronized (this) {
725       if (!server.getConfiguration().getBoolean("hbase.master.loadbalance.bytable", false)) {
726         Map<ServerName, List<HRegionInfo>> svrToRegions =
727           new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
728         for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
729           svrToRegions.put(e.getKey(), new ArrayList<HRegionInfo>(e.getValue()));
730         }
731         result.put(TableName.valueOf("ensemble"), svrToRegions);
732       } else {
733         for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
734           for (HRegionInfo hri: e.getValue()) {
735             if (hri.isMetaRegion()) continue;
736             TableName tablename = hri.getTable();
737             Map<ServerName, List<HRegionInfo>> svrToRegions = result.get(tablename);
738             if (svrToRegions == null) {
739               svrToRegions = new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
740               result.put(tablename, svrToRegions);
741             }
742             List<HRegionInfo> regions = svrToRegions.get(e.getKey());
743             if (regions == null) {
744               regions = new ArrayList<HRegionInfo>();
745               svrToRegions.put(e.getKey(), regions);
746             }
747             regions.add(hri);
748           }
749         }
750       }
751     }
752 
753     Map<ServerName, ServerLoad>
754       onlineSvrs = serverManager.getOnlineServers();
755     // Take care of servers w/o assignments, and remove servers in draining mode
756     List<ServerName> drainingServers = this.serverManager.getDrainingServersList();
757     for (Map<ServerName, List<HRegionInfo>> map: result.values()) {
758       for (ServerName svr: onlineSvrs.keySet()) {
759         if (!map.containsKey(svr)) {
760           map.put(svr, new ArrayList<HRegionInfo>());
761         }
762       }
763       map.keySet().removeAll(drainingServers);
764     }
765     return result;
766   }
767 
768   protected RegionState getRegionState(final HRegionInfo hri) {
769     return getRegionState(hri.getEncodedName());
770   }
771 
772   protected synchronized RegionState getRegionState(final String encodedName) {
773     return regionStates.get(encodedName);
774   }
775 
776   /**
777    * Get the HRegionInfo from cache, if not there, from the hbase:meta table
778    * @param  regionName
779    * @return HRegionInfo for the region
780    */
781   protected HRegionInfo getRegionInfo(final byte [] regionName) {
782     String encodedName = HRegionInfo.encodeRegionName(regionName);
783     RegionState regionState = getRegionState(encodedName);
784     if (regionState != null) {
785       return regionState.getRegion();
786     }
787 
788     try {
789       Pair<HRegionInfo, ServerName> p =
790         MetaReader.getRegion(server.getCatalogTracker(), regionName);
791       HRegionInfo hri = p == null ? null : p.getFirst();
792       if (hri != null) {
793         createRegionState(hri);
794       }
795       return hri;
796     } catch (IOException e) {
797       server.abort("Aborting because error occoured while reading "
798         + Bytes.toStringBinary(regionName) + " from hbase:meta", e);
799       return null;
800     }
801   }
802 
803   static boolean isOneOfStates(RegionState regionState, State... states) {
804     State s = regionState != null ? regionState.getState() : null;
805     for (State state: states) {
806       if (s == state) return true;
807     }
808     return false;
809   }
810 
811   /**
812    * Update a region state. It will be put in transition if not already there.
813    */
814   private RegionState updateRegionState(final HRegionInfo hri,
815       final State state, final ServerName serverName, long openSeqNum) {
816     if (state == State.FAILED_CLOSE || state == State.FAILED_OPEN) {
817       LOG.warn("Failed to open/close " + hri.getShortNameToLog()
818         + " on " + serverName + ", set to " + state);
819     }
820 
821     String encodedName = hri.getEncodedName();
822     RegionState regionState = new RegionState(
823       hri, state, System.currentTimeMillis(), serverName);
824     RegionState oldState = getRegionState(encodedName);
825     if (!regionState.equals(oldState)) {
826       LOG.info("Transition " + oldState + " to " + regionState);
827       // Persist region state before updating in-memory info, if needed
828       regionStateStore.updateRegionState(openSeqNum, regionState, oldState);
829     }
830 
831     synchronized (this) {
832       regionsInTransition.put(encodedName, regionState);
833       regionStates.put(encodedName, regionState);
834 
835       // For these states, region should be properly closed.
836       // There should be no log splitting issue.
837       if ((state == State.CLOSED || state == State.MERGED
838           || state == State.SPLIT) && lastAssignments.containsKey(encodedName)) {
839         ServerName last = lastAssignments.get(encodedName);
840         if (last.equals(serverName)) {
841           lastAssignments.remove(encodedName);
842         } else {
843           LOG.warn(encodedName + " moved to " + state + " on "
844             + serverName + ", expected " + last);
845         }
846       }
847 
848       // Once a region is opened, record its last assignment right away.
849       if (serverName != null && state == State.OPEN) {
850         ServerName last = lastAssignments.get(encodedName);
851         if (!serverName.equals(last)) {
852           lastAssignments.put(encodedName, serverName);
853           if (last != null && isServerDeadAndNotProcessed(last)) {
854             LOG.warn(encodedName + " moved to " + serverName
855               + ", while it's previous host " + last
856               + " is dead but not processed yet");
857           }
858         }
859       }
860 
861       // notify the change
862       this.notifyAll();
863     }
864     return regionState;
865   }
866 
867   /**
868    * Remove a region from all state maps.
869    */
870   private synchronized void deleteRegion(final HRegionInfo hri) {
871     String encodedName = hri.getEncodedName();
872     regionsInTransition.remove(encodedName);
873     regionStates.remove(encodedName);
874     lastAssignments.remove(encodedName);
875     ServerName sn = regionAssignments.remove(hri);
876     if (sn != null) {
877       Set<HRegionInfo> regions = serverHoldings.get(sn);
878       regions.remove(hri);
879     }
880   }
881 }