View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.Arrays;
24  import java.util.Collections;
25  import java.util.HashMap;
26  import java.util.HashSet;
27  import java.util.Iterator;
28  import java.util.List;
29  import java.util.Map;
30  import java.util.Map.Entry;
31  import java.util.NavigableMap;
32  import java.util.Set;
33  import java.util.TreeMap;
34  import java.util.concurrent.ConcurrentHashMap;
35  import java.util.concurrent.ConcurrentSkipListSet;
36  import java.util.concurrent.CopyOnWriteArrayList;
37  import java.util.concurrent.ThreadFactory;
38  import java.util.concurrent.TimeUnit;
39  import java.util.concurrent.atomic.AtomicBoolean;
40  import java.util.concurrent.atomic.AtomicInteger;
41  import java.util.concurrent.locks.Lock;
42  import java.util.concurrent.locks.ReentrantLock;
43  
44  import org.apache.commons.logging.Log;
45  import org.apache.commons.logging.LogFactory;
46  import org.apache.hadoop.hbase.classification.InterfaceAudience;
47  import org.apache.hadoop.conf.Configuration;
48  import org.apache.hadoop.fs.FileSystem;
49  import org.apache.hadoop.fs.Path;
50  import org.apache.hadoop.hbase.Chore;
51  import org.apache.hadoop.hbase.HBaseIOException;
52  import org.apache.hadoop.hbase.HConstants;
53  import org.apache.hadoop.hbase.HRegionInfo;
54  import org.apache.hadoop.hbase.NotServingRegionException;
55  import org.apache.hadoop.hbase.RegionTransition;
56  import org.apache.hadoop.hbase.Server;
57  import org.apache.hadoop.hbase.ServerName;
58  import org.apache.hadoop.hbase.Stoppable;
59  import org.apache.hadoop.hbase.TableName;
60  import org.apache.hadoop.hbase.TableNotFoundException;
61  import org.apache.hadoop.hbase.catalog.CatalogTracker;
62  import org.apache.hadoop.hbase.catalog.MetaReader;
63  import org.apache.hadoop.hbase.client.Result;
64  import org.apache.hadoop.hbase.exceptions.DeserializationException;
65  import org.apache.hadoop.hbase.executor.EventHandler;
66  import org.apache.hadoop.hbase.executor.EventType;
67  import org.apache.hadoop.hbase.executor.ExecutorService;
68  import org.apache.hadoop.hbase.ipc.RpcClient;
69  import org.apache.hadoop.hbase.ipc.RpcClient.FailedServerException;
70  import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
71  import org.apache.hadoop.hbase.master.RegionState.State;
72  import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper;
73  import org.apache.hadoop.hbase.master.balancer.FavoredNodeLoadBalancer;
74  import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler;
75  import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
76  import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
77  import org.apache.hadoop.hbase.master.handler.OpenedRegionHandler;
78  import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition;
79  import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
80  import org.apache.hadoop.hbase.regionserver.RegionAlreadyInTransitionException;
81  import org.apache.hadoop.hbase.regionserver.RegionMergeTransaction;
82  import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
83  import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
84  import org.apache.hadoop.hbase.regionserver.SplitTransaction;
85  import org.apache.hadoop.hbase.regionserver.wal.HLog;
86  import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
87  import org.apache.hadoop.hbase.util.ConfigUtil;
88  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
89  import org.apache.hadoop.hbase.util.FSUtils;
90  import org.apache.hadoop.hbase.util.KeyLocker;
91  import org.apache.hadoop.hbase.util.Pair;
92  import org.apache.hadoop.hbase.util.PairOfSameType;
93  import org.apache.hadoop.hbase.util.Threads;
94  import org.apache.hadoop.hbase.util.Triple;
95  import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
96  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
97  import org.apache.hadoop.hbase.zookeeper.ZKTable;
98  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
99  import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
100 import org.apache.hadoop.ipc.RemoteException;
101 import org.apache.zookeeper.AsyncCallback;
102 import org.apache.zookeeper.KeeperException;
103 import org.apache.zookeeper.KeeperException.NoNodeException;
104 import org.apache.zookeeper.KeeperException.NodeExistsException;
105 import org.apache.zookeeper.data.Stat;
106 
107 import com.google.common.annotations.VisibleForTesting;
108 import com.google.common.base.Preconditions;
109 import com.google.common.collect.LinkedHashMultimap;
110 
111 /**
112  * Manages and performs region assignment.
113  * <p>
114  * Monitors ZooKeeper for events related to regions in transition.
115  * <p>
116  * Handles existing regions in transition during master failover.
117  */
118 @InterfaceAudience.Private
119 public class AssignmentManager extends ZooKeeperListener {
120   private static final Log LOG = LogFactory.getLog(AssignmentManager.class);
121 
122   public static final ServerName HBCK_CODE_SERVERNAME = ServerName.valueOf(HConstants.HBCK_CODE_NAME,
123       -1, -1L);
124 
125   public static final String ASSIGNMENT_TIMEOUT = "hbase.master.assignment.timeoutmonitor.timeout";
126   public static final int DEFAULT_ASSIGNMENT_TIMEOUT_DEFAULT = 600000;
127   public static final String ASSIGNMENT_TIMEOUT_MANAGEMENT = "hbase.assignment.timeout.management";
128   public static final boolean DEFAULT_ASSIGNMENT_TIMEOUT_MANAGEMENT = false;
129 
130   public static final String ALREADY_IN_TRANSITION_WAITTIME
131     = "hbase.assignment.already.intransition.waittime";
132   public static final int DEFAULT_ALREADY_IN_TRANSITION_WAITTIME = 60000; // 1 minute
133 
134   protected final Server server;
135 
136   private ServerManager serverManager;
137 
138   private boolean shouldAssignRegionsWithFavoredNodes;
139 
140   private CatalogTracker catalogTracker;
141 
142   protected final TimeoutMonitor timeoutMonitor;
143 
144   private final TimerUpdater timerUpdater;
145 
146   private LoadBalancer balancer;
147 
148   private final MetricsAssignmentManager metricsAssignmentManager;
149 
150   private final TableLockManager tableLockManager;
151 
152   private AtomicInteger numRegionsOpened = new AtomicInteger(0);
153 
154   final private KeyLocker<String> locker = new KeyLocker<String>();
155 
156   /**
157    * Map of regions to reopen after the schema of a table is changed. Key -
158    * encoded region name, value - HRegionInfo
159    */
160   private final Map <String, HRegionInfo> regionsToReopen;
161 
162   /*
163    * Maximum times we recurse an assignment/unassignment.
164    * See below in {@link #assign()} and {@link #unassign()}.
165    */
166   private final int maximumAttempts;
167 
168   /**
169    * Map of two merging regions from the region to be created.
170    */
171   private final Map<String, PairOfSameType<HRegionInfo>> mergingRegions
172     = new HashMap<String, PairOfSameType<HRegionInfo>>();
173 
174   private final Map<HRegionInfo, PairOfSameType<HRegionInfo>> splitRegions
175   = new HashMap<HRegionInfo, PairOfSameType<HRegionInfo>>();
176 
177   /**
178    * The sleep time for which the assignment will wait before retrying in case of hbase:meta assignment
179    * failure due to lack of availability of region plan or bad region plan
180    */
181   private final long sleepTimeBeforeRetryingMetaAssignment;
182 
183   /** Plans for region movement. Key is the encoded version of a region name*/
184   // TODO: When do plans get cleaned out?  Ever? In server open and in server
185   // shutdown processing -- St.Ack
186   // All access to this Map must be synchronized.
187   final NavigableMap<String, RegionPlan> regionPlans =
188     new TreeMap<String, RegionPlan>();
189 
190   private final ZKTable zkTable;
191 
192   /**
193    * Contains the server which need to update timer, these servers will be
194    * handled by {@link TimerUpdater}
195    */
196   private final ConcurrentSkipListSet<ServerName> serversInUpdatingTimer;
197 
198   private final ExecutorService executorService;
199 
200   // For unit tests, keep track of calls to ClosedRegionHandler
201   private Map<HRegionInfo, AtomicBoolean> closedRegionHandlerCalled = null;
202 
203   // For unit tests, keep track of calls to OpenedRegionHandler
204   private Map<HRegionInfo, AtomicBoolean> openedRegionHandlerCalled = null;
205 
206   //Thread pool executor service for timeout monitor
207   private java.util.concurrent.ExecutorService threadPoolExecutorService;
208 
209   // A bunch of ZK events workers. Each is a single thread executor service
210   private final java.util.concurrent.ExecutorService zkEventWorkers;
211 
212   private List<EventType> ignoreStatesRSOffline = Arrays.asList(
213       EventType.RS_ZK_REGION_FAILED_OPEN, EventType.RS_ZK_REGION_CLOSED);
214 
215   private final RegionStates regionStates;
216 
217   // The threshold to use bulk assigning. Using bulk assignment
218   // only if assigning at least this many regions to at least this
219   // many servers. If assigning fewer regions to fewer servers,
220   // bulk assigning may be not as efficient.
221   private final int bulkAssignThresholdRegions;
222   private final int bulkAssignThresholdServers;
223 
224   // Should bulk assignment wait till all regions are assigned,
225   // or it is timed out?  This is useful to measure bulk assignment
226   // performance, but not needed in most use cases.
227   private final boolean bulkAssignWaitTillAllAssigned;
228 
229   /**
230    * Indicator that AssignmentManager has recovered the region states so
231    * that ServerShutdownHandler can be fully enabled and re-assign regions
232    * of dead servers. So that when re-assignment happens, AssignmentManager
233    * has proper region states.
234    *
235    * Protected to ease testing.
236    */
237   protected final AtomicBoolean failoverCleanupDone = new AtomicBoolean(false);
238 
239   /** Is the TimeOutManagement activated **/
240   private final boolean tomActivated;
241 
242   /**
243    * A map to track the count a region fails to open in a row.
244    * So that we don't try to open a region forever if the failure is
245    * unrecoverable.  We don't put this information in region states
246    * because we don't expect this to happen frequently; we don't
247    * want to copy this information over during each state transition either.
248    */
249   private final ConcurrentHashMap<String, AtomicInteger>
250     failedOpenTracker = new ConcurrentHashMap<String, AtomicInteger>();
251 
252   // A flag to indicate if we are using ZK for region assignment
253   private final boolean useZKForAssignment;
254 
255   // In case not using ZK for region assignment, region states
256   // are persisted in meta with a state store
257   private final RegionStateStore regionStateStore;
258 
259   /**
260    * For testing only!  Set to true to skip handling of split.
261    */
262   @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="MS_SHOULD_BE_FINAL")
263   public static boolean TEST_SKIP_SPLIT_HANDLING = false;
264 
265   /** Listeners that are called on assignment events. */
266   private List<AssignmentListener> listeners = new CopyOnWriteArrayList<AssignmentListener>();
267 
268   /**
269    * Constructs a new assignment manager.
270    *
271    * @param server
272    * @param serverManager
273    * @param catalogTracker
274    * @param service
275    * @throws KeeperException
276    * @throws IOException
277    */
278   public AssignmentManager(Server server, ServerManager serverManager,
279       CatalogTracker catalogTracker, final LoadBalancer balancer,
280       final ExecutorService service, MetricsMaster metricsMaster,
281       final TableLockManager tableLockManager) throws KeeperException, IOException {
282     super(server.getZooKeeper());
283     this.server = server;
284     this.serverManager = serverManager;
285     this.catalogTracker = catalogTracker;
286     this.executorService = service;
287     this.regionStateStore = new RegionStateStore(server);
288     this.regionsToReopen = Collections.synchronizedMap
289                            (new HashMap<String, HRegionInfo> ());
290     Configuration conf = server.getConfiguration();
291     // Only read favored nodes if using the favored nodes load balancer.
292     this.shouldAssignRegionsWithFavoredNodes = conf.getClass(
293            HConstants.HBASE_MASTER_LOADBALANCER_CLASS, Object.class).equals(
294            FavoredNodeLoadBalancer.class);
295     this.tomActivated = conf.getBoolean(
296       ASSIGNMENT_TIMEOUT_MANAGEMENT, DEFAULT_ASSIGNMENT_TIMEOUT_MANAGEMENT);
297     if (tomActivated){
298       this.serversInUpdatingTimer =  new ConcurrentSkipListSet<ServerName>();
299       this.timeoutMonitor = new TimeoutMonitor(
300         conf.getInt("hbase.master.assignment.timeoutmonitor.period", 30000),
301         server, serverManager,
302         conf.getInt(ASSIGNMENT_TIMEOUT, DEFAULT_ASSIGNMENT_TIMEOUT_DEFAULT));
303       this.timerUpdater = new TimerUpdater(conf.getInt(
304         "hbase.master.assignment.timerupdater.period", 10000), server);
305       Threads.setDaemonThreadRunning(timerUpdater.getThread(),
306         server.getServerName() + ".timerUpdater");
307     } else {
308       this.serversInUpdatingTimer =  null;
309       this.timeoutMonitor = null;
310       this.timerUpdater = null;
311     }
312     this.zkTable = new ZKTable(this.watcher);
313     // This is the max attempts, not retries, so it should be at least 1.
314     this.maximumAttempts = Math.max(1,
315       this.server.getConfiguration().getInt("hbase.assignment.maximum.attempts", 10));
316     this.sleepTimeBeforeRetryingMetaAssignment = this.server.getConfiguration().getLong(
317         "hbase.meta.assignment.retry.sleeptime", 1000l);
318     this.balancer = balancer;
319     int maxThreads = conf.getInt("hbase.assignment.threads.max", 30);
320     this.threadPoolExecutorService = Threads.getBoundedCachedThreadPool(
321       maxThreads, 60L, TimeUnit.SECONDS, Threads.newDaemonThreadFactory("AM."));
322     this.regionStates = new RegionStates(server, serverManager, regionStateStore);
323 
324     this.bulkAssignWaitTillAllAssigned =
325       conf.getBoolean("hbase.bulk.assignment.waittillallassigned", false);
326     this.bulkAssignThresholdRegions = conf.getInt("hbase.bulk.assignment.threshold.regions", 7);
327     this.bulkAssignThresholdServers = conf.getInt("hbase.bulk.assignment.threshold.servers", 3);
328 
329     int workers = conf.getInt("hbase.assignment.zkevent.workers", 20);
330     ThreadFactory threadFactory = Threads.newDaemonThreadFactory("AM.ZK.Worker");
331     zkEventWorkers = Threads.getBoundedCachedThreadPool(workers, 60L,
332             TimeUnit.SECONDS, threadFactory);
333     this.tableLockManager = tableLockManager;
334 
335     this.metricsAssignmentManager = new MetricsAssignmentManager();
336     useZKForAssignment = ConfigUtil.useZKForAssignment(conf);
337   }
338 
339   void startTimeOutMonitor() {
340     if (tomActivated) {
341       Threads.setDaemonThreadRunning(timeoutMonitor.getThread(), server.getServerName()
342           + ".timeoutMonitor");
343     }
344   }
345 
346   /**
347    * Add the listener to the notification list.
348    * @param listener The AssignmentListener to register
349    */
350   public void registerListener(final AssignmentListener listener) {
351     this.listeners.add(listener);
352   }
353 
354   /**
355    * Remove the listener from the notification list.
356    * @param listener The AssignmentListener to unregister
357    */
358   public boolean unregisterListener(final AssignmentListener listener) {
359     return this.listeners.remove(listener);
360   }
361 
362   /**
363    * @return Instance of ZKTable.
364    */
365   public ZKTable getZKTable() {
366     // These are 'expensive' to make involving trip to zk ensemble so allow
367     // sharing.
368     return this.zkTable;
369   }
370 
371   /**
372    * This SHOULD not be public. It is public now
373    * because of some unit tests.
374    *
375    * TODO: make it package private and keep RegionStates in the master package
376    */
377   public RegionStates getRegionStates() {
378     return regionStates;
379   }
380 
381   /**
382    * Used in some tests to mock up region state in meta
383    */
384   @VisibleForTesting
385   RegionStateStore getRegionStateStore() {
386     return regionStateStore;
387   }
388 
389   public RegionPlan getRegionReopenPlan(HRegionInfo hri) {
390     return new RegionPlan(hri, null, regionStates.getRegionServerOfRegion(hri));
391   }
392 
393   /**
394    * Add a regionPlan for the specified region.
395    * @param encodedName
396    * @param plan
397    */
398   public void addPlan(String encodedName, RegionPlan plan) {
399     synchronized (regionPlans) {
400       regionPlans.put(encodedName, plan);
401     }
402   }
403 
404   /**
405    * Add a map of region plans.
406    */
407   public void addPlans(Map<String, RegionPlan> plans) {
408     synchronized (regionPlans) {
409       regionPlans.putAll(plans);
410     }
411   }
412 
413   /**
414    * Set the list of regions that will be reopened
415    * because of an update in table schema
416    *
417    * @param regions
418    *          list of regions that should be tracked for reopen
419    */
420   public void setRegionsToReopen(List <HRegionInfo> regions) {
421     for(HRegionInfo hri : regions) {
422       regionsToReopen.put(hri.getEncodedName(), hri);
423     }
424   }
425 
426   /**
427    * Used by the client to identify if all regions have the schema updates
428    *
429    * @param tableName
430    * @return Pair indicating the status of the alter command
431    * @throws IOException
432    */
433   public Pair<Integer, Integer> getReopenStatus(TableName tableName)
434       throws IOException {
435     List <HRegionInfo> hris =
436       MetaReader.getTableRegions(this.server.getCatalogTracker(), tableName, true);
437     Integer pending = 0;
438     for (HRegionInfo hri : hris) {
439       String name = hri.getEncodedName();
440       // no lock concurrent access ok: sequential consistency respected.
441       if (regionsToReopen.containsKey(name)
442           || regionStates.isRegionInTransition(name)) {
443         pending++;
444       }
445     }
446     return new Pair<Integer, Integer>(pending, hris.size());
447   }
448 
449   /**
450    * Used by ServerShutdownHandler to make sure AssignmentManager has completed
451    * the failover cleanup before re-assigning regions of dead servers. So that
452    * when re-assignment happens, AssignmentManager has proper region states.
453    */
454   public boolean isFailoverCleanupDone() {
455     return failoverCleanupDone.get();
456   }
457 
458   /**
459    * To avoid racing with AM, external entities may need to lock a region,
460    * for example, when SSH checks what regions to skip re-assigning.
461    */
462   public Lock acquireRegionLock(final String encodedName) {
463     return locker.acquireLock(encodedName);
464   }
465 
466   /**
467    * Now, failover cleanup is completed. Notify server manager to
468    * process queued up dead servers processing, if any.
469    */
470   void failoverCleanupDone() {
471     failoverCleanupDone.set(true);
472     serverManager.processQueuedDeadServers();
473   }
474 
475   /**
476    * Called on startup.
477    * Figures whether a fresh cluster start of we are joining extant running cluster.
478    * @throws IOException
479    * @throws KeeperException
480    * @throws InterruptedException
481    */
482   void joinCluster() throws IOException,
483       KeeperException, InterruptedException {
484     long startTime = System.currentTimeMillis();
485     // Concurrency note: In the below the accesses on regionsInTransition are
486     // outside of a synchronization block where usually all accesses to RIT are
487     // synchronized.  The presumption is that in this case it is safe since this
488     // method is being played by a single thread on startup.
489 
490     // TODO: Regions that have a null location and are not in regionsInTransitions
491     // need to be handled.
492 
493     // Scan hbase:meta to build list of existing regions, servers, and assignment
494     // Returns servers who have not checked in (assumed dead) and their regions
495     Map<ServerName, List<HRegionInfo>> deadServers = rebuildUserRegions();
496 
497     // This method will assign all user regions if a clean server startup or
498     // it will reconstruct master state and cleanup any leftovers from
499     // previous master process.
500     boolean failover = processDeadServersAndRegionsInTransition(deadServers);
501 
502     if (!useZKForAssignment) {
503       // Not use ZK for assignment any more, remove the ZNode
504       ZKUtil.deleteNodeRecursively(watcher, watcher.assignmentZNode);
505     }
506     recoverTableInDisablingState();
507     recoverTableInEnablingState();
508     LOG.info("Joined the cluster in " + (System.currentTimeMillis()
509       - startTime) + "ms, failover=" + failover);
510   }
511 
512   /**
513    * Process all regions that are in transition in zookeeper and also
514    * processes the list of dead servers by scanning the META.
515    * Used by master joining an cluster.  If we figure this is a clean cluster
516    * startup, will assign all user regions.
517    * @param deadServers
518    *          Map of dead servers and their regions. Can be null.
519    * @throws KeeperException
520    * @throws IOException
521    * @throws InterruptedException
522    */
523   boolean processDeadServersAndRegionsInTransition(
524       final Map<ServerName, List<HRegionInfo>> deadServers)
525           throws KeeperException, IOException, InterruptedException {
526     List<String> nodes = ZKUtil.listChildrenNoWatch(watcher,
527       watcher.assignmentZNode);
528 
529     if (nodes == null && useZKForAssignment) {
530       String errorMessage = "Failed to get the children from ZK";
531       server.abort(errorMessage, new IOException(errorMessage));
532       return true; // Doesn't matter in this case
533     }
534 
535     boolean failover = !serverManager.getDeadServers().isEmpty();
536     if (failover) {
537       // This may not be a failover actually, especially if meta is on this master.
538       if (LOG.isDebugEnabled()) {
539         LOG.debug("Found dead servers out on cluster " + serverManager.getDeadServers());
540       }
541     } else {
542       // If any one region except meta is assigned, it's a failover.
543       Set<ServerName> onlineServers = serverManager.getOnlineServers().keySet();
544       for (Map.Entry<HRegionInfo, ServerName> en : regionStates.getRegionAssignments().entrySet()) {
545         HRegionInfo hri = en.getKey();
546         if (!hri.isMetaTable() && onlineServers.contains(en.getValue())) {
547           LOG.debug("Found " + hri + " out on cluster");
548           failover = true;
549           break;
550         }
551       }
552     }
553 
554     if (!failover && nodes != null) {
555       // If any one region except meta is in transition, it's a failover.
556       for (String encodedName : nodes) {
557         RegionState regionState = regionStates.getRegionState(encodedName);
558         if (regionState != null && !regionState.getRegion().isMetaRegion()) {
559           LOG.debug("Found " + regionState + " in RITs");
560           failover = true;
561           break;
562         }
563       }
564     }
565 
566     if (!failover && !useZKForAssignment) {
567       // If any region except meta is in transition on a live server, it's a failover.
568       Map<String, RegionState> regionsInTransition = regionStates.getRegionsInTransition();
569       if (!regionsInTransition.isEmpty()) {
570         Set<ServerName> onlineServers = serverManager.getOnlineServers().keySet();
571         for (RegionState regionState : regionsInTransition.values()) {
572           if (!regionState.getRegion().isMetaRegion()
573               && onlineServers.contains(regionState.getServerName())) {
574             LOG.debug("Found " + regionState + " in RITs");
575             failover = true;
576             break;
577           }
578         }
579       }
580     }
581 
582     if (!failover) {
583       // If we get here, we have a full cluster restart. It is a failover only
584       // if there are some HLogs are not split yet. For meta HLogs, they should have
585       // been split already, if any. We can walk through those queued dead servers,
586       // if they don't have any HLogs, this restart should be considered as a clean one
587       Set<ServerName> queuedDeadServers = serverManager.getRequeuedDeadServers().keySet();
588       if (!queuedDeadServers.isEmpty()) {
589         Configuration conf = server.getConfiguration();
590         Path rootdir = FSUtils.getRootDir(conf);
591         FileSystem fs = rootdir.getFileSystem(conf);
592         for (ServerName serverName : queuedDeadServers) {
593           Path logDir = new Path(rootdir, HLogUtil.getHLogDirectoryName(serverName.toString()));
594           Path splitDir = logDir.suffix(HLog.SPLITTING_EXT);
595           if (fs.exists(logDir) || fs.exists(splitDir)) {
596             LOG.debug("Found queued dead server " + serverName);
597             failover = true;
598             break;
599           }
600         }
601         if (!failover) {
602           // We figured that it's not a failover, so no need to
603           // work on these re-queued dead servers any more.
604           LOG.info("AM figured that it's not a failover and cleaned up " + queuedDeadServers.size()
605               + " queued dead servers");
606           serverManager.removeRequeuedDeadServers();
607         }
608       }
609     }
610 
611     Set<TableName> disabledOrDisablingOrEnabling = null;
612     if (!failover) {
613       disabledOrDisablingOrEnabling = ZKTable.getDisabledOrDisablingTables(watcher);
614       disabledOrDisablingOrEnabling.addAll(ZKTable.getEnablingTables(watcher));
615       // Clean re/start, mark all user regions closed before reassignment
616       // TODO -Hbase-11319
617       regionStates.closeAllUserRegions(disabledOrDisablingOrEnabling);
618     }
619 
620     // Now region states are restored
621     regionStateStore.start();
622 
623     // If we found user regions out on cluster, its a failover.
624     if (failover) {
625       LOG.info("Found regions out on cluster or in RIT; presuming failover");
626       // Process list of dead servers and regions in RIT.
627       // See HBASE-4580 for more information.
628       processDeadServersAndRecoverLostRegions(deadServers);
629     } 
630     if (!failover && useZKForAssignment) {
631       // Cleanup any existing ZK nodes and start watching
632       ZKAssign.deleteAllNodes(watcher);
633       ZKUtil.listChildrenAndWatchForNewChildren(this.watcher, this.watcher.assignmentZNode);
634     }
635     // Now we can safely claim failover cleanup completed and enable
636     // ServerShutdownHandler for further processing. The nodes (below)
637     // in transition, if any, are for regions not related to those
638     // dead servers at all, and can be done in parallel to SSH.
639     failoverCleanupDone();
640     if (!failover) {
641       // Fresh cluster startup.
642       LOG.info("Clean cluster startup. Assigning user regions");
643       assignAllUserRegions(disabledOrDisablingOrEnabling);
644     }
645     return failover;
646   }
647 
648   /**
649    * If region is up in zk in transition, then do fixup and block and wait until
650    * the region is assigned and out of transition.  Used on startup for
651    * catalog regions.
652    * @param hri Region to look for.
653    * @return True if we processed a region in transition else false if region
654    * was not up in zk in transition.
655    * @throws InterruptedException
656    * @throws KeeperException
657    * @throws IOException
658    */
659   boolean processRegionInTransitionAndBlockUntilAssigned(final HRegionInfo hri)
660       throws InterruptedException, KeeperException, IOException {
661     String encodedRegionName = hri.getEncodedName();
662     if (!processRegionInTransition(encodedRegionName, hri)) {
663       return false; // The region is not in transition
664     }
665     LOG.debug("Waiting on " + HRegionInfo.prettyPrint(encodedRegionName));
666     while (!this.server.isStopped() &&
667         this.regionStates.isRegionInTransition(encodedRegionName)) {
668       RegionState state = this.regionStates.getRegionTransitionState(encodedRegionName);
669       if (state == null || !serverManager.isServerOnline(state.getServerName())) {
670         // The region is not in transition, or not in transition on an online
671         // server. Doesn't help to block here any more. Caller need to
672         // verify the region is actually assigned.
673         break;
674       }
675       this.regionStates.waitForUpdate(100);
676     }
677     return true;
678   }
679 
680   /**
681    * Process failover of new master for region <code>encodedRegionName</code>
682    * up in zookeeper.
683    * @param encodedRegionName Region to process failover for.
684    * @param regionInfo If null we'll go get it from meta table.
685    * @return True if we processed <code>regionInfo</code> as a RIT.
686    * @throws KeeperException
687    * @throws IOException
688    */
689   boolean processRegionInTransition(final String encodedRegionName,
690       final HRegionInfo regionInfo) throws KeeperException, IOException {
691     // We need a lock here to ensure that we will not put the same region twice
692     // It has no reason to be a lock shared with the other operations.
693     // We can do the lock on the region only, instead of a global lock: what we want to ensure
694     // is that we don't have two threads working on the same region.
695     Lock lock = locker.acquireLock(encodedRegionName);
696     try {
697       Stat stat = new Stat();
698       byte [] data = ZKAssign.getDataAndWatch(watcher, encodedRegionName, stat);
699       if (data == null) return false;
700       RegionTransition rt;
701       try {
702         rt = RegionTransition.parseFrom(data);
703       } catch (DeserializationException e) {
704         LOG.warn("Failed parse znode data", e);
705         return false;
706       }
707       HRegionInfo hri = regionInfo;
708       if (hri == null) {
709         // The region info is not passed in. We will try to find the region
710         // from region states map/meta based on the encoded region name. But we
711         // may not be able to find it. This is valid for online merge that
712         // the region may have not been created if the merge is not completed.
713         // Therefore, it is not in meta at master recovery time.
714         hri = regionStates.getRegionInfo(rt.getRegionName());
715         EventType et = rt.getEventType();
716         if (hri == null && et != EventType.RS_ZK_REGION_MERGING
717             && et != EventType.RS_ZK_REQUEST_REGION_MERGE) {
718           LOG.warn("Couldn't find the region in recovering " + rt);
719           return false;
720         }
721       }
722       return processRegionsInTransition(
723         rt, hri, stat.getVersion());
724     } finally {
725       lock.unlock();
726     }
727   }
728 
729   /**
730    * This call is invoked only (1) master assign meta;
731    * (2) during failover mode startup, zk assignment node processing.
732    * The locker is set in the caller. It returns true if the region
733    * is in transition for sure, false otherwise.
734    *
735    * It should be private but it is used by some test too.
736    */
737   boolean processRegionsInTransition(
738       final RegionTransition rt, final HRegionInfo regionInfo,
739       final int expectedVersion) throws KeeperException {
740     EventType et = rt.getEventType();
741     // Get ServerName.  Could not be null.
742     final ServerName sn = rt.getServerName();
743     final byte[] regionName = rt.getRegionName();
744     final String encodedName = HRegionInfo.encodeRegionName(regionName);
745     final String prettyPrintedRegionName = HRegionInfo.prettyPrint(encodedName);
746     LOG.info("Processing " + prettyPrintedRegionName + " in state: " + et);
747 
748     if (regionStates.isRegionInTransition(encodedName)
749         && (regionInfo.isMetaRegion() || !useZKForAssignment)) {
750       LOG.info("Processed region " + prettyPrintedRegionName + " in state: "
751         + et + ", does nothing since the region is already in transition "
752         + regionStates.getRegionTransitionState(encodedName));
753       // Just return
754       return true;
755     }
756     if (!serverManager.isServerOnline(sn)) {
757       // It was transitioning on a dead server, so it's closed now.
758       // Force to OFFLINE and put it in transition, but not assign it
759       // since log splitting for the dead server is not done yet.
760       LOG.debug("RIT " + encodedName + " in state=" + rt.getEventType() +
761         " was on deadserver; forcing offline");
762       if (regionStates.isRegionOnline(regionInfo)) {
763         // Meta could still show the region is assigned to the previous
764         // server. If that server is online, when we reload the meta, the
765         // region is put back to online, we need to offline it.
766         regionStates.regionOffline(regionInfo);
767         sendRegionClosedNotification(regionInfo);
768       }
769       // Put it back in transition so that SSH can re-assign it
770       regionStates.updateRegionState(regionInfo, State.OFFLINE, sn);
771 
772       if (regionInfo.isMetaRegion()) {
773         // If it's meta region, reset the meta location.
774         // So that master knows the right meta region server.
775         MetaRegionTracker.setMetaLocation(watcher, sn, State.OPEN);
776       } else {
777         // No matter the previous server is online or offline,
778         // we need to reset the last region server of the region.
779         regionStates.setLastRegionServerOfRegion(sn, encodedName);
780         // Make sure we know the server is dead.
781         if (!serverManager.isServerDead(sn)) {
782           serverManager.expireServer(sn);
783         }
784       }
785       return false;
786     }
787     switch (et) {
788       case M_ZK_REGION_CLOSING:
789         // Insert into RIT & resend the query to the region server: may be the previous master
790         // died before sending the query the first time.
791         final RegionState rsClosing = regionStates.updateRegionState(rt, State.CLOSING);
792         this.executorService.submit(
793           new EventHandler(server, EventType.M_MASTER_RECOVERY) {
794             @Override
795             public void process() throws IOException {
796               ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName());
797               try {
798                 unassign(regionInfo, rsClosing, expectedVersion, null, useZKForAssignment, null);
799                 if (regionStates.isRegionOffline(regionInfo)) {
800                   assign(regionInfo, true);
801                 }
802               } finally {
803                 lock.unlock();
804               }
805             }
806           });
807         break;
808 
809       case RS_ZK_REGION_CLOSED:
810       case RS_ZK_REGION_FAILED_OPEN:
811         // Region is closed, insert into RIT and handle it
812         regionStates.updateRegionState(regionInfo, State.CLOSED, sn);
813         invokeAssign(regionInfo);
814         break;
815 
816       case M_ZK_REGION_OFFLINE:
817         // Insert in RIT and resend to the regionserver
818         regionStates.updateRegionState(rt, State.PENDING_OPEN);
819         final RegionState rsOffline = regionStates.getRegionState(regionInfo);
820         this.executorService.submit(
821           new EventHandler(server, EventType.M_MASTER_RECOVERY) {
822             @Override
823             public void process() throws IOException {
824               ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName());
825               try {
826                 RegionPlan plan = new RegionPlan(regionInfo, null, sn);
827                 addPlan(encodedName, plan);
828                 assign(rsOffline, false, false);
829               } finally {
830                 lock.unlock();
831               }
832             }
833           });
834         break;
835 
836       case RS_ZK_REGION_OPENING:
837         regionStates.updateRegionState(rt, State.OPENING);
838         break;
839 
840       case RS_ZK_REGION_OPENED:
841         // Region is opened, insert into RIT and handle it
842         // This could be done asynchronously, we would need then to acquire the lock in the
843         //  handler.
844         regionStates.updateRegionState(rt, State.OPEN);
845         new OpenedRegionHandler(server, this, regionInfo, sn, expectedVersion).process();
846         break;
847       case RS_ZK_REQUEST_REGION_SPLIT:
848       case RS_ZK_REGION_SPLITTING:
849       case RS_ZK_REGION_SPLIT:
850         // Splitting region should be online. We could have skipped it during
851         // user region rebuilding since we may consider the split is completed.
852         // Put it in SPLITTING state to avoid complications.
853         regionStates.regionOnline(regionInfo, sn);
854         regionStates.updateRegionState(rt, State.SPLITTING);
855         if (!handleRegionSplitting(
856             rt, encodedName, prettyPrintedRegionName, sn)) {
857           deleteSplittingNode(encodedName, sn);
858         }
859         break;
860       case RS_ZK_REQUEST_REGION_MERGE:
861       case RS_ZK_REGION_MERGING:
862       case RS_ZK_REGION_MERGED:
863         if (!handleRegionMerging(
864             rt, encodedName, prettyPrintedRegionName, sn)) {
865           deleteMergingNode(encodedName, sn);
866         }
867         break;
868       default:
869         throw new IllegalStateException("Received region in state:" + et + " is not valid.");
870     }
871     LOG.info("Processed region " + prettyPrintedRegionName + " in state "
872       + et + ", on " + (serverManager.isServerOnline(sn) ? "" : "dead ")
873       + "server: " + sn);
874     return true;
875   }
876 
877   /**
878    * When a region is closed, it should be removed from the regionsToReopen
879    * @param hri HRegionInfo of the region which was closed
880    */
881   public void removeClosedRegion(HRegionInfo hri) {
882     if (regionsToReopen.remove(hri.getEncodedName()) != null) {
883       LOG.debug("Removed region from reopening regions because it was closed");
884     }
885   }
886 
887   /**
888    * Handles various states an unassigned node can be in.
889    * <p>
890    * Method is called when a state change is suspected for an unassigned node.
891    * <p>
892    * This deals with skipped transitions (we got a CLOSED but didn't see CLOSING
893    * yet).
894    * @param rt
895    * @param expectedVersion
896    */
897   void handleRegion(final RegionTransition rt, int expectedVersion) {
898     if (rt == null) {
899       LOG.warn("Unexpected NULL input for RegionTransition rt");
900       return;
901     }
902     final ServerName sn = rt.getServerName();
903     // Check if this is a special HBCK transition
904     if (sn.equals(HBCK_CODE_SERVERNAME)) {
905       handleHBCK(rt);
906       return;
907     }
908     final long createTime = rt.getCreateTime();
909     final byte[] regionName = rt.getRegionName();
910     String encodedName = HRegionInfo.encodeRegionName(regionName);
911     String prettyPrintedRegionName = HRegionInfo.prettyPrint(encodedName);
912     // Verify this is a known server
913     if (!serverManager.isServerOnline(sn)
914       && !ignoreStatesRSOffline.contains(rt.getEventType())) {
915       LOG.warn("Attempted to handle region transition for server but " +
916         "it is not online: " + prettyPrintedRegionName + ", " + rt);
917       return;
918     }
919 
920     RegionState regionState =
921       regionStates.getRegionState(encodedName);
922     long startTime = System.currentTimeMillis();
923     if (LOG.isDebugEnabled()) {
924       boolean lateEvent = createTime < (startTime - 15000);
925       LOG.debug("Handling " + rt.getEventType() +
926         ", server=" + sn + ", region=" +
927         (prettyPrintedRegionName == null ? "null" : prettyPrintedRegionName) +
928         (lateEvent ? ", which is more than 15 seconds late" : "") +
929         ", current_state=" + regionState);
930     }
931     // We don't do anything for this event,
932     // so separate it out, no need to lock/unlock anything
933     if (rt.getEventType() == EventType.M_ZK_REGION_OFFLINE) {
934       return;
935     }
936 
937     // We need a lock on the region as we could update it
938     Lock lock = locker.acquireLock(encodedName);
939     try {
940       RegionState latestState =
941         regionStates.getRegionState(encodedName);
942       if ((regionState == null && latestState != null)
943           || (regionState != null && latestState == null)
944           || (regionState != null && latestState != null
945             && latestState.getState() != regionState.getState())) {
946         LOG.warn("Region state changed from " + regionState + " to "
947           + latestState + ", while acquiring lock");
948       }
949       long waitedTime = System.currentTimeMillis() - startTime;
950       if (waitedTime > 5000) {
951         LOG.warn("Took " + waitedTime + "ms to acquire the lock");
952       }
953       regionState = latestState;
954       switch (rt.getEventType()) {
955       case RS_ZK_REQUEST_REGION_SPLIT:
956       case RS_ZK_REGION_SPLITTING:
957       case RS_ZK_REGION_SPLIT:
958         if (!handleRegionSplitting(
959             rt, encodedName, prettyPrintedRegionName, sn)) {
960           deleteSplittingNode(encodedName, sn);
961         }
962         break;
963 
964       case RS_ZK_REQUEST_REGION_MERGE:
965       case RS_ZK_REGION_MERGING:
966       case RS_ZK_REGION_MERGED:
967         // Merged region is a new region, we can't find it in the region states now.
968         // However, the two merging regions are not new. They should be in state for merging.
969         if (!handleRegionMerging(
970             rt, encodedName, prettyPrintedRegionName, sn)) {
971           deleteMergingNode(encodedName, sn);
972         }
973         break;
974 
975       case M_ZK_REGION_CLOSING:
976         // Should see CLOSING after we have asked it to CLOSE or additional
977         // times after already being in state of CLOSING
978         if (regionState == null
979             || !regionState.isPendingCloseOrClosingOnServer(sn)) {
980           LOG.warn("Received CLOSING for " + prettyPrintedRegionName
981             + " from " + sn + " but the region isn't PENDING_CLOSE/CLOSING here: "
982             + regionStates.getRegionState(encodedName));
983           return;
984         }
985         // Transition to CLOSING (or update stamp if already CLOSING)
986         regionStates.updateRegionState(rt, State.CLOSING);
987         break;
988 
989       case RS_ZK_REGION_CLOSED:
990         // Should see CLOSED after CLOSING but possible after PENDING_CLOSE
991         if (regionState == null
992             || !regionState.isPendingCloseOrClosingOnServer(sn)) {
993           LOG.warn("Received CLOSED for " + prettyPrintedRegionName
994             + " from " + sn + " but the region isn't PENDING_CLOSE/CLOSING here: "
995             + regionStates.getRegionState(encodedName));
996           return;
997         }
998         // Handle CLOSED by assigning elsewhere or stopping if a disable
999         // If we got here all is good.  Need to update RegionState -- else
1000         // what follows will fail because not in expected state.
1001         new ClosedRegionHandler(server, this, regionState.getRegion()).process();
1002         updateClosedRegionHandlerTracker(regionState.getRegion());
1003         break;
1004 
1005         case RS_ZK_REGION_FAILED_OPEN:
1006           if (regionState == null
1007               || !regionState.isPendingOpenOrOpeningOnServer(sn)) {
1008             LOG.warn("Received FAILED_OPEN for " + prettyPrintedRegionName
1009               + " from " + sn + " but the region isn't PENDING_OPEN/OPENING here: "
1010               + regionStates.getRegionState(encodedName));
1011             return;
1012           }
1013           AtomicInteger failedOpenCount = failedOpenTracker.get(encodedName);
1014           if (failedOpenCount == null) {
1015             failedOpenCount = new AtomicInteger();
1016             // No need to use putIfAbsent, or extra synchronization since
1017             // this whole handleRegion block is locked on the encoded region
1018             // name, and failedOpenTracker is updated only in this block
1019             failedOpenTracker.put(encodedName, failedOpenCount);
1020           }
1021           if (failedOpenCount.incrementAndGet() >= maximumAttempts) {
1022             regionStates.updateRegionState(rt, State.FAILED_OPEN);
1023             // remove the tracking info to save memory, also reset
1024             // the count for next open initiative
1025             failedOpenTracker.remove(encodedName);
1026           } else {
1027             // Handle this the same as if it were opened and then closed.
1028             regionState = regionStates.updateRegionState(rt, State.CLOSED);
1029             if (regionState != null) {
1030               // When there are more than one region server a new RS is selected as the
1031               // destination and the same is updated in the regionplan. (HBASE-5546)
1032               try {
1033                 getRegionPlan(regionState.getRegion(), sn, true);
1034                 new ClosedRegionHandler(server, this, regionState.getRegion()).process();
1035               } catch (HBaseIOException e) {
1036                 LOG.warn("Failed to get region plan", e);
1037               }
1038             }
1039           }
1040           break;
1041 
1042         case RS_ZK_REGION_OPENING:
1043           // Should see OPENING after we have asked it to OPEN or additional
1044           // times after already being in state of OPENING
1045           if (regionState == null
1046               || !regionState.isPendingOpenOrOpeningOnServer(sn)) {
1047             LOG.warn("Received OPENING for " + prettyPrintedRegionName
1048               + " from " + sn + " but the region isn't PENDING_OPEN/OPENING here: "
1049               + regionStates.getRegionState(encodedName));
1050             return;
1051           }
1052           // Transition to OPENING (or update stamp if already OPENING)
1053           regionStates.updateRegionState(rt, State.OPENING);
1054           break;
1055 
1056         case RS_ZK_REGION_OPENED:
1057           // Should see OPENED after OPENING but possible after PENDING_OPEN.
1058           if (regionState == null
1059               || !regionState.isPendingOpenOrOpeningOnServer(sn)) {
1060             LOG.warn("Received OPENED for " + prettyPrintedRegionName
1061               + " from " + sn + " but the region isn't PENDING_OPEN/OPENING here: "
1062               + regionStates.getRegionState(encodedName));
1063 
1064             if (regionState != null) {
1065               // Close it without updating the internal region states,
1066               // so as not to create double assignments in unlucky scenarios
1067               // mentioned in OpenRegionHandler#process
1068               unassign(regionState.getRegion(), null, -1, null, false, sn);
1069             }
1070             return;
1071           }
1072           // Handle OPENED by removing from transition and deleted zk node
1073           regionState = 
1074               regionStates.transitionOpenFromPendingOpenOrOpeningOnServer(rt,regionState, sn);
1075           if (regionState != null) {
1076             failedOpenTracker.remove(encodedName); // reset the count, if any
1077             new OpenedRegionHandler(
1078               server, this, regionState.getRegion(), sn, expectedVersion).process();
1079             updateOpenedRegionHandlerTracker(regionState.getRegion());
1080           }
1081           break;
1082 
1083         default:
1084           throw new IllegalStateException("Received event is not valid.");
1085       }
1086     } finally {
1087       lock.unlock();
1088     }
1089   }
1090 
1091   //For unit tests only
1092   boolean wasClosedHandlerCalled(HRegionInfo hri) {
1093     AtomicBoolean b = closedRegionHandlerCalled.get(hri);
1094     //compareAndSet to be sure that unit tests don't see stale values. Means,
1095     //we will return true exactly once unless the handler code resets to true
1096     //this value.
1097     return b == null ? false : b.compareAndSet(true, false);
1098   }
1099 
1100   //For unit tests only
1101   boolean wasOpenedHandlerCalled(HRegionInfo hri) {
1102     AtomicBoolean b = openedRegionHandlerCalled.get(hri);
1103     //compareAndSet to be sure that unit tests don't see stale values. Means,
1104     //we will return true exactly once unless the handler code resets to true
1105     //this value.
1106     return b == null ? false : b.compareAndSet(true, false);
1107   }
1108 
1109   //For unit tests only
1110   void initializeHandlerTrackers() {
1111     closedRegionHandlerCalled = new HashMap<HRegionInfo, AtomicBoolean>();
1112     openedRegionHandlerCalled = new HashMap<HRegionInfo, AtomicBoolean>();
1113   }
1114 
1115   void updateClosedRegionHandlerTracker(HRegionInfo hri) {
1116     if (closedRegionHandlerCalled != null) { //only for unit tests this is true
1117       closedRegionHandlerCalled.put(hri, new AtomicBoolean(true));
1118     }
1119   }
1120 
1121   void updateOpenedRegionHandlerTracker(HRegionInfo hri) {
1122     if (openedRegionHandlerCalled != null) { //only for unit tests this is true
1123       openedRegionHandlerCalled.put(hri, new AtomicBoolean(true));
1124     }
1125   }
1126 
1127   // TODO: processFavoredNodes might throw an exception, for e.g., if the
1128   // meta could not be contacted/updated. We need to see how seriously to treat
1129   // this problem as. Should we fail the current assignment. We should be able
1130   // to recover from this problem eventually (if the meta couldn't be updated
1131   // things should work normally and eventually get fixed up).
1132   void processFavoredNodes(List<HRegionInfo> regions) throws IOException {
1133     if (!shouldAssignRegionsWithFavoredNodes) return;
1134     // The AM gets the favored nodes info for each region and updates the meta
1135     // table with that info
1136     Map<HRegionInfo, List<ServerName>> regionToFavoredNodes =
1137         new HashMap<HRegionInfo, List<ServerName>>();
1138     for (HRegionInfo region : regions) {
1139       regionToFavoredNodes.put(region,
1140           ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region));
1141     }
1142     FavoredNodeAssignmentHelper.updateMetaWithFavoredNodesInfo(regionToFavoredNodes, catalogTracker);
1143   }
1144 
1145   /**
1146    * Handle a ZK unassigned node transition triggered by HBCK repair tool.
1147    * <p>
1148    * This is handled in a separate code path because it breaks the normal rules.
1149    * @param rt
1150    */
1151   private void handleHBCK(RegionTransition rt) {
1152     String encodedName = HRegionInfo.encodeRegionName(rt.getRegionName());
1153     LOG.info("Handling HBCK triggered transition=" + rt.getEventType() +
1154       ", server=" + rt.getServerName() + ", region=" +
1155       HRegionInfo.prettyPrint(encodedName));
1156     RegionState regionState = regionStates.getRegionTransitionState(encodedName);
1157     switch (rt.getEventType()) {
1158       case M_ZK_REGION_OFFLINE:
1159         HRegionInfo regionInfo;
1160         if (regionState != null) {
1161           regionInfo = regionState.getRegion();
1162         } else {
1163           try {
1164             byte [] name = rt.getRegionName();
1165             Pair<HRegionInfo, ServerName> p = MetaReader.getRegion(catalogTracker, name);
1166             regionInfo = p.getFirst();
1167           } catch (IOException e) {
1168             LOG.info("Exception reading hbase:meta doing HBCK repair operation", e);
1169             return;
1170           }
1171         }
1172         LOG.info("HBCK repair is triggering assignment of region=" +
1173             regionInfo.getRegionNameAsString());
1174         // trigger assign, node is already in OFFLINE so don't need to update ZK
1175         assign(regionInfo, false);
1176         break;
1177 
1178       default:
1179         LOG.warn("Received unexpected region state from HBCK: " + rt.toString());
1180         break;
1181     }
1182 
1183   }
1184 
1185   // ZooKeeper events
1186 
1187   /**
1188    * New unassigned node has been created.
1189    *
1190    * <p>This happens when an RS begins the OPENING or CLOSING of a region by
1191    * creating an unassigned node.
1192    *
1193    * <p>When this happens we must:
1194    * <ol>
1195    *   <li>Watch the node for further events</li>
1196    *   <li>Read and handle the state in the node</li>
1197    * </ol>
1198    */
1199   @Override
1200   public void nodeCreated(String path) {
1201     handleAssignmentEvent(path);
1202   }
1203 
1204   /**
1205    * Existing unassigned node has had data changed.
1206    *
1207    * <p>This happens when an RS transitions from OFFLINE to OPENING, or between
1208    * OPENING/OPENED and CLOSING/CLOSED.
1209    *
1210    * <p>When this happens we must:
1211    * <ol>
1212    *   <li>Watch the node for further events</li>
1213    *   <li>Read and handle the state in the node</li>
1214    * </ol>
1215    */
1216   @Override
1217   public void nodeDataChanged(String path) {
1218     handleAssignmentEvent(path);
1219   }
1220 
1221 
1222   // We  don't want to have two events on the same region managed simultaneously.
1223   // For this reason, we need to wait if an event on the same region is currently in progress.
1224   // So we track the region names of the events in progress, and we keep a waiting list.
1225   private final Set<String> regionsInProgress = new HashSet<String>();
1226   // In a LinkedHashMultimap, the put order is kept when we retrieve the collection back. We need
1227   //  this as we want the events to be managed in the same order as we received them.
1228   private final LinkedHashMultimap <String, RegionRunnable>
1229       zkEventWorkerWaitingList = LinkedHashMultimap.create();
1230 
1231   /**
1232    * A specific runnable that works only on a region.
1233    */
1234   private interface RegionRunnable extends Runnable{
1235     /**
1236      * @return - the name of the region it works on.
1237      */
1238     String getRegionName();
1239   }
1240 
1241   /**
1242    * Submit a task, ensuring that there is only one task at a time that working on a given region.
1243    * Order is respected.
1244    */
1245   protected void zkEventWorkersSubmit(final RegionRunnable regRunnable) {
1246 
1247     synchronized (regionsInProgress) {
1248       // If we're there is already a task with this region, we add it to the
1249       //  waiting list and return.
1250       if (regionsInProgress.contains(regRunnable.getRegionName())) {
1251         synchronized (zkEventWorkerWaitingList){
1252           zkEventWorkerWaitingList.put(regRunnable.getRegionName(), regRunnable);
1253         }
1254         return;
1255       }
1256 
1257       // No event in progress on this region => we can submit a new task immediately.
1258       regionsInProgress.add(regRunnable.getRegionName());
1259       zkEventWorkers.submit(new Runnable() {
1260         @Override
1261         public void run() {
1262           try {
1263             regRunnable.run();
1264           } finally {
1265             // now that we have finished, let's see if there is an event for the same region in the
1266             //  waiting list. If it's the case, we can now submit it to the pool.
1267             synchronized (regionsInProgress) {
1268               regionsInProgress.remove(regRunnable.getRegionName());
1269               synchronized (zkEventWorkerWaitingList) {
1270                 java.util.Set<RegionRunnable> waiting = zkEventWorkerWaitingList.get(
1271                     regRunnable.getRegionName());
1272                 if (!waiting.isEmpty()) {
1273                   // We want the first object only. The only way to get it is through an iterator.
1274                   RegionRunnable toSubmit = waiting.iterator().next();
1275                   zkEventWorkerWaitingList.remove(toSubmit.getRegionName(), toSubmit);
1276                   zkEventWorkersSubmit(toSubmit);
1277                 }
1278               }
1279             }
1280           }
1281         }
1282       });
1283     }
1284   }
1285 
1286   @Override
1287   public void nodeDeleted(final String path) {
1288     if (path.startsWith(watcher.assignmentZNode)) {
1289       final String regionName = ZKAssign.getRegionName(watcher, path);
1290       zkEventWorkersSubmit(new RegionRunnable() {
1291         @Override
1292         public String getRegionName() {
1293           return regionName;
1294         }
1295 
1296         @Override
1297         public void run() {
1298           Lock lock = locker.acquireLock(regionName);
1299           try {
1300             RegionState rs = regionStates.getRegionTransitionState(regionName);
1301             if (rs == null) {
1302               rs = regionStates.getRegionState(regionName);
1303               if (rs == null || !rs.isMergingNew()) {
1304                 // MergingNew is an offline state
1305                 return;
1306               }
1307             }
1308 
1309             HRegionInfo regionInfo = rs.getRegion();
1310             String regionNameStr = regionInfo.getRegionNameAsString();
1311             LOG.debug("Znode " + regionNameStr + " deleted, state: " + rs);
1312             boolean disabled = getZKTable().isDisablingOrDisabledTable(regionInfo.getTable());
1313             ServerName serverName = rs.getServerName();
1314             if (serverManager.isServerOnline(serverName)) {
1315               if (rs.isOnServer(serverName) && (rs.isOpened() || rs.isSplitting())) {
1316                 synchronized (regionStates) {
1317                   regionOnline(regionInfo, serverName);
1318                   if (rs.isSplitting() && splitRegions.containsKey(regionInfo)) {
1319                     // Check if the daugter regions are still there, if they are present, offline
1320                     // as its the case of a rollback.
1321                     HRegionInfo hri_a = splitRegions.get(regionInfo).getFirst();
1322                     HRegionInfo hri_b = splitRegions.get(regionInfo).getSecond();
1323                     if (!regionStates.isRegionInTransition(hri_a.getEncodedName())) {
1324                       LOG.warn("Split daughter region not in transition " + hri_a);
1325                     }
1326                     if (!regionStates.isRegionInTransition(hri_b.getEncodedName())) {
1327                       LOG.warn("Split daughter region not in transition" + hri_b);
1328                     }
1329                     regionOffline(hri_a);
1330                     regionOffline(hri_b);
1331                     splitRegions.remove(regionInfo);
1332                   }
1333                   if (disabled) {
1334                     // if server is offline, no hurt to unassign again
1335                     LOG.info("Opened " + regionNameStr
1336                         + "but this table is disabled, triggering close of region");
1337                     unassign(regionInfo);
1338                   }
1339                 }
1340               } else if (rs.isMergingNew()) {
1341                 synchronized (regionStates) {
1342                   String p = regionInfo.getEncodedName();
1343                   PairOfSameType<HRegionInfo> regions = mergingRegions.get(p);
1344                   if (regions != null) {
1345                     onlineMergingRegion(disabled, regions.getFirst(), serverName);
1346                     onlineMergingRegion(disabled, regions.getSecond(), serverName);
1347                   }
1348                 }
1349               }
1350             }
1351           } finally {
1352             lock.unlock();
1353           }
1354         }
1355 
1356         private void onlineMergingRegion(boolean disabled,
1357             final HRegionInfo hri, final ServerName serverName) {
1358           RegionState regionState = regionStates.getRegionState(hri);
1359           if (regionState != null && regionState.isMerging()
1360               && regionState.isOnServer(serverName)) {
1361             regionOnline(regionState.getRegion(), serverName);
1362             if (disabled) {
1363               unassign(hri);
1364             }
1365           }
1366         }
1367       });
1368     }
1369   }
1370 
1371   /**
1372    * New unassigned node has been created.
1373    *
1374    * <p>This happens when an RS begins the OPENING, SPLITTING or CLOSING of a
1375    * region by creating a znode.
1376    *
1377    * <p>When this happens we must:
1378    * <ol>
1379    *   <li>Watch the node for further children changed events</li>
1380    *   <li>Watch all new children for changed events</li>
1381    * </ol>
1382    */
1383   @Override
1384   public void nodeChildrenChanged(String path) {
1385     if (path.equals(watcher.assignmentZNode)) {
1386       zkEventWorkers.submit(new Runnable() {
1387         @Override
1388         public void run() {
1389           try {
1390             // Just make sure we see the changes for the new znodes
1391             List<String> children =
1392               ZKUtil.listChildrenAndWatchForNewChildren(
1393                 watcher, watcher.assignmentZNode);
1394             if (children != null) {
1395               Stat stat = new Stat();
1396               for (String child : children) {
1397                 // if region is in transition, we already have a watch
1398                 // on it, so no need to watch it again. So, as I know for now,
1399                 // this is needed to watch splitting nodes only.
1400                 if (!regionStates.isRegionInTransition(child)) {
1401                   ZKAssign.getDataAndWatch(watcher, child, stat);
1402                 }
1403               }
1404             }
1405           } catch (KeeperException e) {
1406             server.abort("Unexpected ZK exception reading unassigned children", e);
1407           }
1408         }
1409       });
1410     }
1411   }
1412 
1413   
1414   /**
1415    * Marks the region as online.  Removes it from regions in transition and
1416    * updates the in-memory assignment information.
1417    * <p>
1418    * Used when a region has been successfully opened on a region server.
1419    * @param regionInfo
1420    * @param sn
1421    */
1422   void regionOnline(HRegionInfo regionInfo, ServerName sn) {
1423     regionOnline(regionInfo, sn, HConstants.NO_SEQNUM);
1424   }
1425 
1426   void regionOnline(HRegionInfo regionInfo, ServerName sn, long openSeqNum) {
1427     numRegionsOpened.incrementAndGet();
1428     regionStates.regionOnline(regionInfo, sn, openSeqNum);
1429 
1430     // Remove plan if one.
1431     clearRegionPlan(regionInfo);
1432     // Add the server to serversInUpdatingTimer
1433     addToServersInUpdatingTimer(sn);
1434     balancer.regionOnline(regionInfo, sn);
1435 
1436     // Tell our listeners that a region was opened
1437     sendRegionOpenedNotification(regionInfo, sn);
1438   }
1439 
1440   /**
1441    * Pass the assignment event to a worker for processing.
1442    * Each worker is a single thread executor service.  The reason
1443    * for just one thread is to make sure all events for a given
1444    * region are processed in order.
1445    *
1446    * @param path
1447    */
1448   private void handleAssignmentEvent(final String path) {
1449     if (path.startsWith(watcher.assignmentZNode)) {
1450       final String regionName = ZKAssign.getRegionName(watcher, path);
1451 
1452       zkEventWorkersSubmit(new RegionRunnable() {
1453         @Override
1454         public String getRegionName() {
1455           return regionName;
1456         }
1457 
1458         @Override
1459         public void run() {
1460           try {
1461             Stat stat = new Stat();
1462             byte [] data = ZKAssign.getDataAndWatch(watcher, path, stat);
1463             if (data == null) return;
1464 
1465             RegionTransition rt = RegionTransition.parseFrom(data);
1466             handleRegion(rt, stat.getVersion());
1467           } catch (KeeperException e) {
1468             server.abort("Unexpected ZK exception reading unassigned node data", e);
1469           } catch (DeserializationException e) {
1470             server.abort("Unexpected exception deserializing node data", e);
1471           }
1472         }
1473       });
1474     }
1475   }
1476 
1477   /**
1478    * Add the server to the set serversInUpdatingTimer, then {@link TimerUpdater}
1479    * will update timers for this server in background
1480    * @param sn
1481    */
1482   private void addToServersInUpdatingTimer(final ServerName sn) {
1483     if (tomActivated){
1484       this.serversInUpdatingTimer.add(sn);
1485     }
1486   }
1487 
1488   /**
1489    * Touch timers for all regions in transition that have the passed
1490    * <code>sn</code> in common.
1491    * Call this method whenever a server checks in.  Doing so helps the case where
1492    * a new regionserver has joined the cluster and its been given 1k regions to
1493    * open.  If this method is tickled every time the region reports in a
1494    * successful open then the 1k-th region won't be timed out just because its
1495    * sitting behind the open of 999 other regions.  This method is NOT used
1496    * as part of bulk assign -- there we have a different mechanism for extending
1497    * the regions in transition timer (we turn it off temporarily -- because
1498    * there is no regionplan involved when bulk assigning.
1499    * @param sn
1500    */
1501   private void updateTimers(final ServerName sn) {
1502     Preconditions.checkState(tomActivated);
1503     if (sn == null) return;
1504 
1505     // This loop could be expensive.
1506     // First make a copy of current regionPlan rather than hold sync while
1507     // looping because holding sync can cause deadlock.  Its ok in this loop
1508     // if the Map we're going against is a little stale
1509     List<Map.Entry<String, RegionPlan>> rps;
1510     synchronized(this.regionPlans) {
1511       rps = new ArrayList<Map.Entry<String, RegionPlan>>(regionPlans.entrySet());
1512     }
1513 
1514     for (Map.Entry<String, RegionPlan> e : rps) {
1515       if (e.getValue() != null && e.getKey() != null && sn.equals(e.getValue().getDestination())) {
1516         RegionState regionState = regionStates.getRegionTransitionState(e.getKey());
1517         if (regionState != null) {
1518           regionState.updateTimestampToNow();
1519         }
1520       }
1521     }
1522   }
1523 
1524   /**
1525    * Marks the region as offline.  Removes it from regions in transition and
1526    * removes in-memory assignment information.
1527    * <p>
1528    * Used when a region has been closed and should remain closed.
1529    * @param regionInfo
1530    */
1531   public void regionOffline(final HRegionInfo regionInfo) {
1532     regionOffline(regionInfo, null);
1533   }
1534 
1535   public void offlineDisabledRegion(HRegionInfo regionInfo) {
1536     if (useZKForAssignment) {
1537       // Disabling so should not be reassigned, just delete the CLOSED node
1538       LOG.debug("Table being disabled so deleting ZK node and removing from " +
1539         "regions in transition, skipping assignment of region " +
1540           regionInfo.getRegionNameAsString());
1541       String encodedName = regionInfo.getEncodedName();
1542       deleteNodeInStates(encodedName, "closed", null,
1543         EventType.RS_ZK_REGION_CLOSED, EventType.M_ZK_REGION_OFFLINE);
1544     }
1545     regionOffline(regionInfo);
1546   }
1547 
1548   // Assignment methods
1549 
1550   /**
1551    * Assigns the specified region.
1552    * <p>
1553    * If a RegionPlan is available with a valid destination then it will be used
1554    * to determine what server region is assigned to.  If no RegionPlan is
1555    * available, region will be assigned to a random available server.
1556    * <p>
1557    * Updates the RegionState and sends the OPEN RPC.
1558    * <p>
1559    * This will only succeed if the region is in transition and in a CLOSED or
1560    * OFFLINE state or not in transition (in-memory not zk), and of course, the
1561    * chosen server is up and running (It may have just crashed!).  If the
1562    * in-memory checks pass, the zk node is forced to OFFLINE before assigning.
1563    *
1564    * @param region server to be assigned
1565    * @param setOfflineInZK whether ZK node should be created/transitioned to an
1566    *                       OFFLINE state before assigning the region
1567    */
1568   public void assign(HRegionInfo region, boolean setOfflineInZK) {
1569     assign(region, setOfflineInZK, false);
1570   }
1571 
1572   /**
1573    * Use care with forceNewPlan. It could cause double assignment.
1574    */
1575   public void assign(HRegionInfo region,
1576       boolean setOfflineInZK, boolean forceNewPlan) {
1577     if (isDisabledorDisablingRegionInRIT(region)) {
1578       return;
1579     }
1580     if (this.serverManager.isClusterShutdown()) {
1581       LOG.info("Cluster shutdown is set; skipping assign of " +
1582         region.getRegionNameAsString());
1583       return;
1584     }
1585     String encodedName = region.getEncodedName();
1586     Lock lock = locker.acquireLock(encodedName);
1587     try {
1588       RegionState state = forceRegionStateToOffline(region, forceNewPlan);
1589       if (state != null) {
1590         if (regionStates.wasRegionOnDeadServer(encodedName)) {
1591           LOG.info("Skip assigning " + region.getRegionNameAsString()
1592             + ", it's host " + regionStates.getLastRegionServerOfRegion(encodedName)
1593             + " is dead but not processed yet");
1594           return;
1595         }
1596         assign(state, setOfflineInZK && useZKForAssignment, forceNewPlan);
1597       }
1598     } finally {
1599       lock.unlock();
1600     }
1601   }
1602 
1603   /**
1604    * Bulk assign regions to <code>destination</code>.
1605    * @param destination
1606    * @param regions Regions to assign.
1607    * @return true if successful
1608    */
1609   boolean assign(final ServerName destination, final List<HRegionInfo> regions) {
1610     long startTime = EnvironmentEdgeManager.currentTimeMillis();
1611     try {
1612       int regionCount = regions.size();
1613       if (regionCount == 0) {
1614         return true;
1615       }
1616       LOG.debug("Assigning " + regionCount + " region(s) to " + destination.toString());
1617       Set<String> encodedNames = new HashSet<String>(regionCount);
1618       for (HRegionInfo region : regions) {
1619         encodedNames.add(region.getEncodedName());
1620       }
1621 
1622       List<HRegionInfo> failedToOpenRegions = new ArrayList<HRegionInfo>();
1623       Map<String, Lock> locks = locker.acquireLocks(encodedNames);
1624       try {
1625         AtomicInteger counter = new AtomicInteger(0);
1626         Map<String, Integer> offlineNodesVersions = new ConcurrentHashMap<String, Integer>();
1627         OfflineCallback cb = new OfflineCallback(
1628           watcher, destination, counter, offlineNodesVersions);
1629         Map<String, RegionPlan> plans = new HashMap<String, RegionPlan>(regions.size());
1630         List<RegionState> states = new ArrayList<RegionState>(regions.size());
1631         for (HRegionInfo region : regions) {
1632           String encodedName = region.getEncodedName();
1633           if (!isDisabledorDisablingRegionInRIT(region)) {
1634             RegionState state = forceRegionStateToOffline(region, false);
1635             boolean onDeadServer = false;
1636             if (state != null) {
1637               if (regionStates.wasRegionOnDeadServer(encodedName)) {
1638                 LOG.info("Skip assigning " + region.getRegionNameAsString()
1639                   + ", it's host " + regionStates.getLastRegionServerOfRegion(encodedName)
1640                   + " is dead but not processed yet");
1641                 onDeadServer = true;
1642               } else if (!useZKForAssignment
1643                   || asyncSetOfflineInZooKeeper(state, cb, destination)) {
1644                 RegionPlan plan = new RegionPlan(region, state.getServerName(), destination);
1645                 plans.put(encodedName, plan);
1646                 states.add(state);
1647                 continue;
1648               }
1649             }
1650             // Reassign if the region wasn't on a dead server
1651             if (!onDeadServer) {
1652               LOG.info("failed to force region state to offline or "
1653                 + "failed to set it offline in ZK, will reassign later: " + region);
1654               failedToOpenRegions.add(region); // assign individually later
1655             }
1656           }
1657           // Release the lock, this region is excluded from bulk assign because
1658           // we can't update its state, or set its znode to offline.
1659           Lock lock = locks.remove(encodedName);
1660           lock.unlock();
1661         }
1662         if (useZKForAssignment) {
1663           // Wait until all unassigned nodes have been put up and watchers set.
1664           int total = states.size();
1665           for (int oldCounter = 0; !server.isStopped();) {
1666             int count = counter.get();
1667             if (oldCounter != count) {
1668               LOG.info(destination.toString() + " unassigned znodes=" + count + " of total="
1669                   + total);
1670               oldCounter = count;
1671             }
1672             if (count >= total) break;
1673             Threads.sleep(5);
1674           }
1675         }
1676 
1677         if (server.isStopped()) {
1678           return false;
1679         }
1680 
1681         // Add region plans, so we can updateTimers when one region is opened so
1682         // that unnecessary timeout on RIT is reduced.
1683         this.addPlans(plans);
1684 
1685         List<Triple<HRegionInfo, Integer, List<ServerName>>> regionOpenInfos =
1686           new ArrayList<Triple<HRegionInfo, Integer, List<ServerName>>>(states.size());
1687         for (RegionState state: states) {
1688           HRegionInfo region = state.getRegion();
1689           String encodedRegionName = region.getEncodedName();
1690           Integer nodeVersion = offlineNodesVersions.get(encodedRegionName);
1691           if (useZKForAssignment && (nodeVersion == null || nodeVersion == -1)) {
1692             LOG.warn("failed to offline in zookeeper: " + region);
1693             failedToOpenRegions.add(region); // assign individually later
1694             Lock lock = locks.remove(encodedRegionName);
1695             lock.unlock();
1696           } else {
1697             regionStates.updateRegionState(
1698               region, State.PENDING_OPEN, destination);
1699             List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
1700             if (this.shouldAssignRegionsWithFavoredNodes) {
1701               favoredNodes = ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region);
1702             }
1703             regionOpenInfos.add(new Triple<HRegionInfo, Integer,  List<ServerName>>(
1704               region, nodeVersion, favoredNodes));
1705           }
1706         }
1707 
1708         // Move on to open regions.
1709         try {
1710           // Send OPEN RPC. If it fails on a IOE or RemoteException,
1711           // regions will be assigned individually.
1712           long maxWaitTime = System.currentTimeMillis() +
1713             this.server.getConfiguration().
1714               getLong("hbase.regionserver.rpc.startup.waittime", 60000);
1715           for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) {
1716             try {
1717               // regionOpenInfos is empty if all regions are in failedToOpenRegions list
1718               if (regionOpenInfos.isEmpty()) {
1719                 break;
1720               }
1721               List<RegionOpeningState> regionOpeningStateList = serverManager
1722                 .sendRegionOpen(destination, regionOpenInfos);
1723               if (regionOpeningStateList == null) {
1724                 // Failed getting RPC connection to this server
1725                 return false;
1726               }
1727               for (int k = 0, n = regionOpeningStateList.size(); k < n; k++) {
1728                 RegionOpeningState openingState = regionOpeningStateList.get(k);
1729                 if (openingState != RegionOpeningState.OPENED) {
1730                   HRegionInfo region = regionOpenInfos.get(k).getFirst();
1731                   if (openingState == RegionOpeningState.ALREADY_OPENED) {
1732                     processAlreadyOpenedRegion(region, destination);
1733                   } else if (openingState == RegionOpeningState.FAILED_OPENING) {
1734                     // Failed opening this region, reassign it later
1735                     failedToOpenRegions.add(region);
1736                   } else {
1737                     LOG.warn("THIS SHOULD NOT HAPPEN: unknown opening state "
1738                       + openingState + " in assigning region " + region);
1739                   }
1740                 }
1741               }
1742               break;
1743             } catch (IOException e) {
1744               if (e instanceof RemoteException) {
1745                 e = ((RemoteException)e).unwrapRemoteException();
1746               }
1747               if (e instanceof RegionServerStoppedException) {
1748                 LOG.warn("The region server was shut down, ", e);
1749                 // No need to retry, the region server is a goner.
1750                 return false;
1751               } else if (e instanceof ServerNotRunningYetException) {
1752                 long now = System.currentTimeMillis();
1753                 if (now < maxWaitTime) {
1754                   LOG.debug("Server is not yet up; waiting up to " +
1755                     (maxWaitTime - now) + "ms", e);
1756                   Thread.sleep(100);
1757                   i--; // reset the try count
1758                   continue;
1759                 }
1760               } else if (e instanceof java.net.SocketTimeoutException
1761                   && this.serverManager.isServerOnline(destination)) {
1762                 // In case socket is timed out and the region server is still online,
1763                 // the openRegion RPC could have been accepted by the server and
1764                 // just the response didn't go through.  So we will retry to
1765                 // open the region on the same server.
1766                 if (LOG.isDebugEnabled()) {
1767                   LOG.debug("Bulk assigner openRegion() to " + destination
1768                     + " has timed out, but the regions might"
1769                     + " already be opened on it.", e);
1770                 }
1771                 // wait and reset the re-try count, server might be just busy.
1772                 Thread.sleep(100);
1773                 i--;
1774                 continue;
1775               }
1776               throw e;
1777             }
1778           }
1779         } catch (IOException e) {
1780           // Can be a socket timeout, EOF, NoRouteToHost, etc
1781           LOG.info("Unable to communicate with " + destination
1782             + " in order to assign regions, ", e);
1783           return false;
1784         } catch (InterruptedException e) {
1785           throw new RuntimeException(e);
1786         }
1787       } finally {
1788         for (Lock lock : locks.values()) {
1789           lock.unlock();
1790         }
1791       }
1792 
1793       if (!failedToOpenRegions.isEmpty()) {
1794         for (HRegionInfo region : failedToOpenRegions) {
1795           if (!regionStates.isRegionOnline(region)) {
1796             invokeAssign(region);
1797           }
1798         }
1799       }
1800       LOG.debug("Bulk assigning done for " + destination);
1801       return true;
1802     } finally {
1803       metricsAssignmentManager.updateBulkAssignTime(EnvironmentEdgeManager.currentTimeMillis() - startTime);
1804     }
1805   }
1806 
1807   /**
1808    * Send CLOSE RPC if the server is online, otherwise, offline the region.
1809    *
1810    * The RPC will be sent only to the region sever found in the region state
1811    * if it is passed in, otherwise, to the src server specified. If region
1812    * state is not specified, we don't update region state at all, instead
1813    * we just send the RPC call. This is useful for some cleanup without
1814    * messing around the region states (see handleRegion, on region opened
1815    * on an unexpected server scenario, for an example)
1816    */
1817   private void unassign(final HRegionInfo region,
1818       final RegionState state, final int versionOfClosingNode,
1819       final ServerName dest, final boolean transitionInZK,
1820       final ServerName src) {
1821     ServerName server = src;
1822     if (state != null) {
1823       server = state.getServerName();
1824     }
1825     long maxWaitTime = -1;
1826     for (int i = 1; i <= this.maximumAttempts; i++) {
1827       if (this.server.isStopped() || this.server.isAborted()) {
1828         LOG.debug("Server stopped/aborted; skipping unassign of " + region);
1829         return;
1830       }
1831       // ClosedRegionhandler can remove the server from this.regions
1832       if (!serverManager.isServerOnline(server)) {
1833         LOG.debug("Offline " + region.getRegionNameAsString()
1834           + ", no need to unassign since it's on a dead server: " + server);
1835         if (transitionInZK) {
1836           // delete the node. if no node exists need not bother.
1837           deleteClosingOrClosedNode(region, server);
1838         }
1839         if (state != null) {
1840           regionOffline(region);
1841         }
1842         return;
1843       }
1844       try {
1845         // Send CLOSE RPC
1846         if (serverManager.sendRegionClose(server, region,
1847           versionOfClosingNode, dest, transitionInZK)) {
1848           LOG.debug("Sent CLOSE to " + server + " for region " +
1849             region.getRegionNameAsString());
1850           if (useZKForAssignment && !transitionInZK && state != null) {
1851             // Retry to make sure the region is
1852             // closed so as to avoid double assignment.
1853             unassign(region, state, versionOfClosingNode,
1854               dest, transitionInZK, src);
1855           }
1856           return;
1857         }
1858         // This never happens. Currently regionserver close always return true.
1859         // Todo; this can now happen (0.96) if there is an exception in a coprocessor
1860         LOG.warn("Server " + server + " region CLOSE RPC returned false for " +
1861           region.getRegionNameAsString());
1862       } catch (Throwable t) {
1863         if (t instanceof RemoteException) {
1864           t = ((RemoteException)t).unwrapRemoteException();
1865         }
1866         boolean logRetries = true;
1867         if (t instanceof NotServingRegionException
1868             || t instanceof RegionServerStoppedException
1869             || t instanceof ServerNotRunningYetException) {
1870           LOG.debug("Offline " + region.getRegionNameAsString()
1871             + ", it's not any more on " + server, t);
1872           if (transitionInZK) {
1873             deleteClosingOrClosedNode(region, server);
1874           }
1875           if (state != null) {
1876             regionOffline(region);
1877           }
1878           return;
1879         } else if ((t instanceof FailedServerException) || (state != null &&
1880             t instanceof RegionAlreadyInTransitionException)) {
1881           long sleepTime = 0;
1882           Configuration conf = this.server.getConfiguration();
1883           if(t instanceof FailedServerException) {
1884             sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY,
1885                   RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
1886           } else {
1887             // RS is already processing this region, only need to update the timestamp
1888             LOG.debug("update " + state + " the timestamp.");
1889             state.updateTimestampToNow();
1890             if (maxWaitTime < 0) {
1891               maxWaitTime =
1892                   EnvironmentEdgeManager.currentTimeMillis()
1893                       + conf.getLong(ALREADY_IN_TRANSITION_WAITTIME,
1894                         DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
1895             }
1896             long now = EnvironmentEdgeManager.currentTimeMillis();
1897             if (now < maxWaitTime) {
1898               LOG.debug("Region is already in transition; "
1899                 + "waiting up to " + (maxWaitTime - now) + "ms", t);
1900               sleepTime = 100;
1901               i--; // reset the try count
1902               logRetries = false;
1903             }
1904           }
1905           try {
1906             if (sleepTime > 0) {
1907               Thread.sleep(sleepTime);
1908             }
1909           } catch (InterruptedException ie) {
1910             LOG.warn("Failed to unassign "
1911               + region.getRegionNameAsString() + " since interrupted", ie);
1912             Thread.currentThread().interrupt();
1913             if (!tomActivated && state != null) {
1914               regionStates.updateRegionState(region, State.FAILED_CLOSE);
1915             }
1916             return;
1917           }
1918         }
1919 
1920         if (logRetries) {
1921           LOG.info("Server " + server + " returned " + t + " for "
1922             + region.getRegionNameAsString() + ", try=" + i
1923             + " of " + this.maximumAttempts, t);
1924           // Presume retry or server will expire.
1925         }
1926       }
1927     }
1928     // Run out of attempts
1929     if (!tomActivated && state != null) {
1930       regionStates.updateRegionState(region, State.FAILED_CLOSE);
1931     }
1932   }
1933 
1934   /**
1935    * Set region to OFFLINE unless it is opening and forceNewPlan is false.
1936    */
1937   private RegionState forceRegionStateToOffline(
1938       final HRegionInfo region, final boolean forceNewPlan) {
1939     RegionState state = regionStates.getRegionState(region);
1940     if (state == null) {
1941       LOG.warn("Assigning a region not in region states: " + region);
1942       state = regionStates.createRegionState(region);
1943     }
1944 
1945     ServerName sn = state.getServerName();
1946     if (forceNewPlan && LOG.isDebugEnabled()) {
1947       LOG.debug("Force region state offline " + state);
1948     }
1949 
1950     switch (state.getState()) {
1951     case OPEN:
1952     case OPENING:
1953     case PENDING_OPEN:
1954     case CLOSING:
1955     case PENDING_CLOSE:
1956       if (!forceNewPlan) {
1957         LOG.debug("Skip assigning " +
1958           region + ", it is already " + state);
1959         return null;
1960       }
1961     case FAILED_CLOSE:
1962     case FAILED_OPEN:
1963       unassign(region, state, -1, null, false, null);
1964       state = regionStates.getRegionState(region);
1965       if (state.isFailedClose()) {
1966         // If we can't close the region, we can't re-assign
1967         // it so as to avoid possible double assignment/data loss.
1968         LOG.info("Skip assigning " +
1969           region + ", we couldn't close it: " + state);
1970         return null;
1971       }
1972     case OFFLINE:
1973       // This region could have been open on this server
1974       // for a while. If the server is dead and not processed
1975       // yet, we can move on only if the meta shows the
1976       // region is not on this server actually, or on a server
1977       // not dead, or dead and processed already.
1978       // In case not using ZK, we don't need this check because
1979       // we have the latest info in memory, and the caller
1980       // will do another round checking any way.
1981       if (useZKForAssignment
1982           && regionStates.isServerDeadAndNotProcessed(sn)
1983           && wasRegionOnDeadServerByMeta(region, sn)) {
1984         if (!regionStates.isRegionInTransition(region)) {
1985           LOG.info("Updating the state to " + State.OFFLINE + " to allow to be reassigned by SSH");
1986           regionStates.updateRegionState(region, State.OFFLINE);
1987         }
1988         LOG.info("Skip assigning " + region.getRegionNameAsString()
1989             + ", it is on a dead but not processed yet server: " + sn);
1990         return null;
1991       }
1992     case CLOSED:
1993       break;
1994     default:
1995       LOG.error("Trying to assign region " + region
1996         + ", which is " + state);
1997       return null;
1998     }
1999     return state;
2000   }
2001 
2002   private boolean wasRegionOnDeadServerByMeta(
2003       final HRegionInfo region, final ServerName sn) {
2004     try {
2005       if (region.isMetaRegion()) {
2006         ServerName server = catalogTracker.getMetaLocation();
2007         return regionStates.isServerDeadAndNotProcessed(server);
2008       }
2009       while (!server.isStopped()) {
2010         try {
2011           catalogTracker.waitForMeta();
2012           Result r = MetaReader.getRegionResult(catalogTracker, region.getRegionName());
2013           if (r == null || r.isEmpty()) return false;
2014           ServerName server = HRegionInfo.getServerName(r);
2015           return regionStates.isServerDeadAndNotProcessed(server);
2016         } catch (IOException ioe) {
2017           LOG.info("Received exception accessing hbase:meta during force assign "
2018             + region.getRegionNameAsString() + ", retrying", ioe);
2019         }
2020       }
2021     } catch (InterruptedException e) {
2022       Thread.currentThread().interrupt();
2023       LOG.info("Interrupted accessing hbase:meta", e);
2024     }
2025     // Call is interrupted or server is stopped.
2026     return regionStates.isServerDeadAndNotProcessed(sn);
2027   }
2028 
2029   /**
2030    * Caller must hold lock on the passed <code>state</code> object.
2031    * @param state
2032    * @param setOfflineInZK
2033    * @param forceNewPlan
2034    */
2035   private void assign(RegionState state,
2036       final boolean setOfflineInZK, final boolean forceNewPlan) {
2037     long startTime = EnvironmentEdgeManager.currentTimeMillis();
2038     try {
2039       Configuration conf = server.getConfiguration();
2040       RegionState currentState = state;
2041       int versionOfOfflineNode = -1;
2042       RegionPlan plan = null;
2043       long maxWaitTime = -1;
2044       HRegionInfo region = state.getRegion();
2045       RegionOpeningState regionOpenState;
2046       Throwable previousException = null;
2047       for (int i = 1; i <= maximumAttempts; i++) {
2048         if (server.isStopped() || server.isAborted()) {
2049           LOG.info("Skip assigning " + region.getRegionNameAsString()
2050             + ", the server is stopped/aborted");
2051           return;
2052         }
2053 
2054         if (plan == null) { // Get a server for the region at first
2055           try {
2056             plan = getRegionPlan(region, forceNewPlan);
2057           } catch (HBaseIOException e) {
2058             LOG.warn("Failed to get region plan", e);
2059           }
2060         }
2061 
2062         if (plan == null) {
2063           LOG.warn("Unable to determine a plan to assign " + region);
2064           if (tomActivated){
2065             this.timeoutMonitor.setAllRegionServersOffline(true);
2066           } else {
2067             if (region.isMetaRegion()) {
2068                 if (i == maximumAttempts) {
2069                   i = 0; // re-set attempt count to 0 for at least 1 retry
2070 
2071                   LOG.warn("Unable to determine a plan to assign a hbase:meta region " + region +
2072                     " after maximumAttempts (" + this.maximumAttempts +
2073                     "). Reset attempts count and continue retrying.");
2074                 }
2075                 waitForRetryingMetaAssignment();
2076                 continue;
2077             }
2078 
2079             regionStates.updateRegionState(region, State.FAILED_OPEN);
2080           }
2081           return;
2082         }
2083         if (setOfflineInZK && versionOfOfflineNode == -1) {
2084           // get the version of the znode after setting it to OFFLINE.
2085           // versionOfOfflineNode will be -1 if the znode was not set to OFFLINE
2086           versionOfOfflineNode = setOfflineInZooKeeper(currentState, plan.getDestination());
2087           if (versionOfOfflineNode != -1) {
2088             if (isDisabledorDisablingRegionInRIT(region)) {
2089               return;
2090             }
2091             // In case of assignment from EnableTableHandler table state is ENABLING. Any how
2092             // EnableTableHandler will set ENABLED after assigning all the table regions. If we
2093             // try to set to ENABLED directly then client API may think table is enabled.
2094             // When we have a case such as all the regions are added directly into hbase:meta and we call
2095             // assignRegion then we need to make the table ENABLED. Hence in such case the table
2096             // will not be in ENABLING or ENABLED state.
2097             TableName tableName = region.getTable();
2098             if (!zkTable.isEnablingTable(tableName) && !zkTable.isEnabledTable(tableName)) {
2099               LOG.debug("Setting table " + tableName + " to ENABLED state.");
2100               setEnabledTable(tableName);
2101             }
2102           }
2103         }
2104         if (setOfflineInZK && versionOfOfflineNode == -1) {
2105           LOG.info("Unable to set offline in ZooKeeper to assign " + region);
2106           // Setting offline in ZK must have been failed due to ZK racing or some
2107           // exception which may make the server to abort. If it is ZK racing,
2108           // we should retry since we already reset the region state,
2109           // existing (re)assignment will fail anyway.
2110           if (!server.isAborted()) {
2111             continue;
2112           }
2113         }
2114         LOG.info("Assigning " + region.getRegionNameAsString() +
2115             " to " + plan.getDestination().toString());
2116         // Transition RegionState to PENDING_OPEN
2117         currentState = regionStates.updateRegionState(region,
2118           State.PENDING_OPEN, plan.getDestination());
2119 
2120         boolean needNewPlan;
2121         final String assignMsg = "Failed assignment of " + region.getRegionNameAsString() +
2122             " to " + plan.getDestination();
2123         try {
2124           List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
2125           if (this.shouldAssignRegionsWithFavoredNodes) {
2126             favoredNodes = ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region);
2127           }
2128           regionOpenState = serverManager.sendRegionOpen(
2129               plan.getDestination(), region, versionOfOfflineNode, favoredNodes);
2130 
2131           if (regionOpenState == RegionOpeningState.FAILED_OPENING) {
2132             // Failed opening this region, looping again on a new server.
2133             needNewPlan = true;
2134             LOG.warn(assignMsg + ", regionserver says 'FAILED_OPENING', " +
2135                 " trying to assign elsewhere instead; " +
2136                 "try=" + i + " of " + this.maximumAttempts);
2137           } else {
2138             // we're done
2139             if (regionOpenState == RegionOpeningState.ALREADY_OPENED) {
2140               processAlreadyOpenedRegion(region, plan.getDestination());
2141             }
2142             return;
2143           }
2144 
2145         } catch (Throwable t) {
2146           if (t instanceof RemoteException) {
2147             t = ((RemoteException) t).unwrapRemoteException();
2148           }
2149           previousException = t;
2150 
2151           // Should we wait a little before retrying? If the server is starting it's yes.
2152           // If the region is already in transition, it's yes as well: we want to be sure that
2153           //  the region will get opened but we don't want a double assignment.
2154           boolean hold = (t instanceof RegionAlreadyInTransitionException ||
2155               t instanceof ServerNotRunningYetException);
2156 
2157           // In case socket is timed out and the region server is still online,
2158           // the openRegion RPC could have been accepted by the server and
2159           // just the response didn't go through.  So we will retry to
2160           // open the region on the same server to avoid possible
2161           // double assignment.
2162           boolean retry = !hold && (t instanceof java.net.SocketTimeoutException
2163               && this.serverManager.isServerOnline(plan.getDestination()));
2164 
2165           if (hold) {
2166             LOG.warn(assignMsg + ", waiting a little before trying on the same region server " +
2167               "try=" + i + " of " + this.maximumAttempts, t);
2168 
2169             if (maxWaitTime < 0) {
2170               if (t instanceof RegionAlreadyInTransitionException) {
2171                 maxWaitTime = EnvironmentEdgeManager.currentTimeMillis()
2172                   + this.server.getConfiguration().getLong(ALREADY_IN_TRANSITION_WAITTIME,
2173                     DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
2174               } else {
2175                 maxWaitTime = EnvironmentEdgeManager.currentTimeMillis()
2176                   + this.server.getConfiguration().getLong(
2177                     "hbase.regionserver.rpc.startup.waittime", 60000);
2178               }
2179             }
2180             try {
2181               needNewPlan = false;
2182               long now = EnvironmentEdgeManager.currentTimeMillis();
2183               if (now < maxWaitTime) {
2184                 LOG.debug("Server is not yet up or region is already in transition; "
2185                   + "waiting up to " + (maxWaitTime - now) + "ms", t);
2186                 Thread.sleep(100);
2187                 i--; // reset the try count
2188               } else if (!(t instanceof RegionAlreadyInTransitionException)) {
2189                 LOG.debug("Server is not up for a while; try a new one", t);
2190                 needNewPlan = true;
2191               }
2192             } catch (InterruptedException ie) {
2193               LOG.warn("Failed to assign "
2194                   + region.getRegionNameAsString() + " since interrupted", ie);
2195               Thread.currentThread().interrupt();
2196               if (!tomActivated) {
2197                 regionStates.updateRegionState(region, State.FAILED_OPEN);
2198               }
2199               return;
2200             }
2201           } else if (retry) {
2202             needNewPlan = false;
2203             i--; // we want to retry as many times as needed as long as the RS is not dead.
2204             LOG.warn(assignMsg + ", trying to assign to the same region server due ", t);
2205           } else {
2206             needNewPlan = true;
2207             LOG.warn(assignMsg + ", trying to assign elsewhere instead;" +
2208                 " try=" + i + " of " + this.maximumAttempts, t);
2209           }
2210         }
2211 
2212         if (i == this.maximumAttempts) {
2213           // For meta region, we have to keep retrying until succeeding
2214           if (region.isMetaRegion()) {
2215             i = 0; // re-set attempt count to 0 for at least 1 retry
2216             LOG.warn(assignMsg +
2217                 ", trying to assign a hbase:meta region reached to maximumAttempts (" +
2218                 this.maximumAttempts + ").  Reset attempt counts and continue retrying.");
2219             waitForRetryingMetaAssignment();
2220           }
2221           else {
2222             // Don't reset the region state or get a new plan any more.
2223             // This is the last try.
2224             continue;
2225           }
2226         }
2227 
2228         // If region opened on destination of present plan, reassigning to new
2229         // RS may cause double assignments. In case of RegionAlreadyInTransitionException
2230         // reassigning to same RS.
2231         if (needNewPlan) {
2232           // Force a new plan and reassign. Will return null if no servers.
2233           // The new plan could be the same as the existing plan since we don't
2234           // exclude the server of the original plan, which should not be
2235           // excluded since it could be the only server up now.
2236           RegionPlan newPlan = null;
2237           try {
2238             newPlan = getRegionPlan(region, true);
2239           } catch (HBaseIOException e) {
2240             LOG.warn("Failed to get region plan", e);
2241           }
2242           if (newPlan == null) {
2243             if (tomActivated) {
2244               this.timeoutMonitor.setAllRegionServersOffline(true);
2245             } else {
2246               regionStates.updateRegionState(region, State.FAILED_OPEN);
2247             }
2248             LOG.warn("Unable to find a viable location to assign region " +
2249                 region.getRegionNameAsString());
2250             return;
2251           }
2252 
2253           if (plan != newPlan && !plan.getDestination().equals(newPlan.getDestination())) {
2254             // Clean out plan we failed execute and one that doesn't look like it'll
2255             // succeed anyways; we need a new plan!
2256             // Transition back to OFFLINE
2257             currentState = regionStates.updateRegionState(region, State.OFFLINE);
2258             versionOfOfflineNode = -1;
2259             plan = newPlan;
2260           } else if(plan.getDestination().equals(newPlan.getDestination()) &&
2261               previousException instanceof FailedServerException) {
2262             try {
2263               LOG.info("Trying to re-assign " + region.getRegionNameAsString() +
2264                 " to the same failed server.");
2265               Thread.sleep(1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY,
2266                 RpcClient.FAILED_SERVER_EXPIRY_DEFAULT));
2267             } catch (InterruptedException ie) {
2268               LOG.warn("Failed to assign "
2269                   + region.getRegionNameAsString() + " since interrupted", ie);
2270               Thread.currentThread().interrupt();
2271               if (!tomActivated) {
2272                 regionStates.updateRegionState(region, State.FAILED_OPEN);
2273               }
2274               return;
2275             }
2276           }
2277         }
2278       }
2279       // Run out of attempts
2280       if (!tomActivated) {
2281         regionStates.updateRegionState(region, State.FAILED_OPEN);
2282       }
2283     } finally {
2284       metricsAssignmentManager.updateAssignmentTime(EnvironmentEdgeManager.currentTimeMillis() - startTime);
2285     }
2286   }
2287 
2288   private void processAlreadyOpenedRegion(HRegionInfo region, ServerName sn) {
2289     // Remove region from in-memory transition and unassigned node from ZK
2290     // While trying to enable the table the regions of the table were
2291     // already enabled.
2292     LOG.debug("ALREADY_OPENED " + region.getRegionNameAsString()
2293       + " to " + sn);
2294     String encodedName = region.getEncodedName();
2295     deleteNodeInStates(encodedName, "offline", sn, EventType.M_ZK_REGION_OFFLINE);
2296     regionStates.regionOnline(region, sn);
2297   }
2298 
2299   private boolean isDisabledorDisablingRegionInRIT(final HRegionInfo region) {
2300     TableName tableName = region.getTable();
2301     boolean disabled = this.zkTable.isDisabledTable(tableName);
2302     if (disabled || this.zkTable.isDisablingTable(tableName)) {
2303       LOG.info("Table " + tableName + (disabled ? " disabled;" : " disabling;") +
2304         " skipping assign of " + region.getRegionNameAsString());
2305       offlineDisabledRegion(region);
2306       return true;
2307     }
2308     return false;
2309   }
2310 
2311   /**
2312    * Wait for some time before retrying meta table region assignment
2313    */
2314   private void waitForRetryingMetaAssignment() {
2315     try {
2316       Thread.sleep(this.sleepTimeBeforeRetryingMetaAssignment);
2317     } catch (InterruptedException e) {
2318       LOG.error("Got exception while waiting for hbase:meta assignment");
2319       Thread.currentThread().interrupt();
2320     }
2321   }
2322 
2323   /**
2324    * Set region as OFFLINED up in zookeeper
2325    *
2326    * @param state
2327    * @return the version of the offline node if setting of the OFFLINE node was
2328    *         successful, -1 otherwise.
2329    */
2330   private int setOfflineInZooKeeper(final RegionState state, final ServerName destination) {
2331     if (!state.isClosed() && !state.isOffline()) {
2332       String msg = "Unexpected state : " + state + " .. Cannot transit it to OFFLINE.";
2333       this.server.abort(msg, new IllegalStateException(msg));
2334       return -1;
2335     }
2336     regionStates.updateRegionState(state.getRegion(), State.OFFLINE);
2337     int versionOfOfflineNode;
2338     try {
2339       // get the version after setting the znode to OFFLINE
2340       versionOfOfflineNode = ZKAssign.createOrForceNodeOffline(watcher,
2341         state.getRegion(), destination);
2342       if (versionOfOfflineNode == -1) {
2343         LOG.warn("Attempted to create/force node into OFFLINE state before "
2344             + "completing assignment but failed to do so for " + state);
2345         return -1;
2346       }
2347     } catch (KeeperException e) {
2348       server.abort("Unexpected ZK exception creating/setting node OFFLINE", e);
2349       return -1;
2350     }
2351     return versionOfOfflineNode;
2352   }
2353 
2354   /**
2355    * @param region the region to assign
2356    * @return Plan for passed <code>region</code> (If none currently, it creates one or
2357    * if no servers to assign, it returns null).
2358    */
2359   private RegionPlan getRegionPlan(final HRegionInfo region,
2360       final boolean forceNewPlan)  throws HBaseIOException {
2361     return getRegionPlan(region, null, forceNewPlan);
2362   }
2363 
2364   /**
2365    * @param region the region to assign
2366    * @param serverToExclude Server to exclude (we know its bad). Pass null if
2367    * all servers are thought to be assignable.
2368    * @param forceNewPlan If true, then if an existing plan exists, a new plan
2369    * will be generated.
2370    * @return Plan for passed <code>region</code> (If none currently, it creates one or
2371    * if no servers to assign, it returns null).
2372    */
2373   private RegionPlan getRegionPlan(final HRegionInfo region,
2374       final ServerName serverToExclude, final boolean forceNewPlan) throws HBaseIOException {
2375     // Pickup existing plan or make a new one
2376     final String encodedName = region.getEncodedName();
2377     final List<ServerName> destServers =
2378       serverManager.createDestinationServersList(serverToExclude);
2379 
2380     if (destServers.isEmpty()){
2381       LOG.warn("Can't move " + encodedName +
2382         ", there is no destination server available.");
2383       return null;
2384     }
2385 
2386     RegionPlan randomPlan = null;
2387     boolean newPlan = false;
2388     RegionPlan existingPlan;
2389 
2390     synchronized (this.regionPlans) {
2391       existingPlan = this.regionPlans.get(encodedName);
2392 
2393       if (existingPlan != null && existingPlan.getDestination() != null) {
2394         LOG.debug("Found an existing plan for " + region.getRegionNameAsString()
2395           + " destination server is " + existingPlan.getDestination() +
2396             " accepted as a dest server = " + destServers.contains(existingPlan.getDestination()));
2397       }
2398 
2399       if (forceNewPlan
2400           || existingPlan == null
2401           || existingPlan.getDestination() == null
2402           || !destServers.contains(existingPlan.getDestination())) {
2403         newPlan = true;
2404         randomPlan = new RegionPlan(region, null,
2405             balancer.randomAssignment(region, destServers));
2406         if (!region.isMetaTable() && shouldAssignRegionsWithFavoredNodes) {
2407           List<HRegionInfo> regions = new ArrayList<HRegionInfo>(1);
2408           regions.add(region);
2409           try {
2410             processFavoredNodes(regions);
2411           } catch (IOException ie) {
2412             LOG.warn("Ignoring exception in processFavoredNodes " + ie);
2413           }
2414         }
2415         this.regionPlans.put(encodedName, randomPlan);
2416       }
2417     }
2418 
2419     if (newPlan) {
2420       if (randomPlan.getDestination() == null) {
2421         LOG.warn("Can't find a destination for " + encodedName);
2422         return null;
2423       }
2424       LOG.debug("No previous transition plan found (or ignoring " +
2425         "an existing plan) for " + region.getRegionNameAsString() +
2426         "; generated random plan=" + randomPlan + "; " +
2427         serverManager.countOfRegionServers() +
2428                " (online=" + serverManager.getOnlineServers().size() +
2429                ", available=" + destServers.size() + ") available servers" +
2430                ", forceNewPlan=" + forceNewPlan);
2431         return randomPlan;
2432       }
2433     LOG.debug("Using pre-existing plan for " +
2434       region.getRegionNameAsString() + "; plan=" + existingPlan);
2435     return existingPlan;
2436   }
2437 
2438   /**
2439    * Unassigns the specified region.
2440    * <p>
2441    * Updates the RegionState and sends the CLOSE RPC unless region is being
2442    * split by regionserver; then the unassign fails (silently) because we
2443    * presume the region being unassigned no longer exists (its been split out
2444    * of existence). TODO: What to do if split fails and is rolled back and
2445    * parent is revivified?
2446    * <p>
2447    * If a RegionPlan is already set, it will remain.
2448    *
2449    * @param region server to be unassigned
2450    */
2451   public void unassign(HRegionInfo region) {
2452     unassign(region, false);
2453   }
2454 
2455 
2456   /**
2457    * Unassigns the specified region.
2458    * <p>
2459    * Updates the RegionState and sends the CLOSE RPC unless region is being
2460    * split by regionserver; then the unassign fails (silently) because we
2461    * presume the region being unassigned no longer exists (its been split out
2462    * of existence). TODO: What to do if split fails and is rolled back and
2463    * parent is revivified?
2464    * <p>
2465    * If a RegionPlan is already set, it will remain.
2466    *
2467    * @param region server to be unassigned
2468    * @param force if region should be closed even if already closing
2469    */
2470   public void unassign(HRegionInfo region, boolean force, ServerName dest) {
2471     // TODO: Method needs refactoring.  Ugly buried returns throughout.  Beware!
2472     LOG.debug("Starting unassign of " + region.getRegionNameAsString()
2473       + " (offlining), current state: " + regionStates.getRegionState(region));
2474 
2475     String encodedName = region.getEncodedName();
2476     // Grab the state of this region and synchronize on it
2477     int versionOfClosingNode = -1;
2478     // We need a lock here as we're going to do a put later and we don't want multiple states
2479     //  creation
2480     ReentrantLock lock = locker.acquireLock(encodedName);
2481     RegionState state = regionStates.getRegionTransitionState(encodedName);
2482     boolean reassign = true;
2483     try {
2484       if (state == null) {
2485         // Region is not in transition.
2486         // We can unassign it only if it's not SPLIT/MERGED.
2487         state = regionStates.getRegionState(encodedName);
2488         if (state != null && state.isUnassignable()) {
2489           LOG.info("Attempting to unassign " + state + ", ignored");
2490           // Offline region will be reassigned below
2491           return;
2492         }
2493         // Create the znode in CLOSING state
2494         try {
2495           if (state == null || state.getServerName() == null) {
2496             // We don't know where the region is, offline it.
2497             // No need to send CLOSE RPC
2498             LOG.warn("Attempting to unassign a region not in RegionStates"
2499               + region.getRegionNameAsString() + ", offlined");
2500             regionOffline(region);
2501             return;
2502           }
2503           if (useZKForAssignment) {
2504             versionOfClosingNode = ZKAssign.createNodeClosing(
2505               watcher, region, state.getServerName());
2506             if (versionOfClosingNode == -1) {
2507               LOG.info("Attempting to unassign " +
2508                 region.getRegionNameAsString() + " but ZK closing node "
2509                 + "can't be created.");
2510               reassign = false; // not unassigned at all
2511               return;
2512             }
2513           }
2514         } catch (KeeperException e) {
2515           if (e instanceof NodeExistsException) {
2516             // Handle race between master initiated close and regionserver
2517             // orchestrated splitting. See if existing node is in a
2518             // SPLITTING or SPLIT state.  If so, the regionserver started
2519             // an op on node before we could get our CLOSING in.  Deal.
2520             NodeExistsException nee = (NodeExistsException)e;
2521             String path = nee.getPath();
2522             try {
2523               if (isSplitOrSplittingOrMergedOrMerging(path)) {
2524                 LOG.debug(path + " is SPLIT or SPLITTING or MERGED or MERGING; " +
2525                   "skipping unassign because region no longer exists -- its split or merge");
2526                 reassign = false; // no need to reassign for split/merged region
2527                 return;
2528               }
2529             } catch (KeeperException.NoNodeException ke) {
2530               LOG.warn("Failed getData on SPLITTING/SPLIT at " + path +
2531                 "; presuming split and that the region to unassign, " +
2532                 encodedName + ", no longer exists -- confirm", ke);
2533               return;
2534             } catch (KeeperException ke) {
2535               LOG.error("Unexpected zk state", ke);
2536             } catch (DeserializationException de) {
2537               LOG.error("Failed parse", de);
2538             }
2539           }
2540           // If we get here, don't understand whats going on -- abort.
2541           server.abort("Unexpected ZK exception creating node CLOSING", e);
2542           reassign = false; // heading out already
2543           return;
2544         }
2545         state = regionStates.updateRegionState(region, State.PENDING_CLOSE);
2546       } else if (state.isFailedOpen()) {
2547         // The region is not open yet
2548         regionOffline(region);
2549         return;
2550       } else if (force && state.isPendingCloseOrClosing()) {
2551         LOG.debug("Attempting to unassign " + region.getRegionNameAsString() +
2552           " which is already " + state.getState()  +
2553           " but forcing to send a CLOSE RPC again ");
2554         if (state.isFailedClose()) {
2555           state = regionStates.updateRegionState(region, State.PENDING_CLOSE);
2556         }
2557         state.updateTimestampToNow();
2558       } else {
2559         LOG.debug("Attempting to unassign " +
2560           region.getRegionNameAsString() + " but it is " +
2561           "already in transition (" + state.getState() + ", force=" + force + ")");
2562         return;
2563       }
2564 
2565       unassign(region, state, versionOfClosingNode, dest, useZKForAssignment, null);
2566     } finally {
2567       lock.unlock();
2568 
2569       // Region is expected to be reassigned afterwards
2570       if (reassign && regionStates.isRegionOffline(region)) {
2571         assign(region, true);
2572       }
2573     }
2574   }
2575 
2576   public void unassign(HRegionInfo region, boolean force){
2577      unassign(region, force, null);
2578   }
2579 
2580   /**
2581    * @param region regioninfo of znode to be deleted.
2582    */
2583   public void deleteClosingOrClosedNode(HRegionInfo region, ServerName sn) {
2584     String encodedName = region.getEncodedName();
2585     deleteNodeInStates(encodedName, "closing", sn, EventType.M_ZK_REGION_CLOSING,
2586       EventType.RS_ZK_REGION_CLOSED);
2587   }
2588 
2589   /**
2590    * @param path
2591    * @return True if znode is in SPLIT or SPLITTING or MERGED or MERGING state.
2592    * @throws KeeperException Can happen if the znode went away in meantime.
2593    * @throws DeserializationException
2594    */
2595   private boolean isSplitOrSplittingOrMergedOrMerging(final String path)
2596       throws KeeperException, DeserializationException {
2597     boolean result = false;
2598     // This may fail if the SPLIT or SPLITTING or MERGED or MERGING znode gets
2599     // cleaned up before we can get data from it.
2600     byte [] data = ZKAssign.getData(watcher, path);
2601     if (data == null) {
2602       LOG.info("Node " + path + " is gone");
2603       return false;
2604     }
2605     RegionTransition rt = RegionTransition.parseFrom(data);
2606     switch (rt.getEventType()) {
2607     case RS_ZK_REQUEST_REGION_SPLIT:
2608     case RS_ZK_REGION_SPLIT:
2609     case RS_ZK_REGION_SPLITTING:
2610     case RS_ZK_REQUEST_REGION_MERGE:
2611     case RS_ZK_REGION_MERGED:
2612     case RS_ZK_REGION_MERGING:
2613       result = true;
2614       break;
2615     default:
2616       LOG.info("Node " + path + " is in " + rt.getEventType());
2617       break;
2618     }
2619     return result;
2620   }
2621 
2622   /**
2623    * Used by unit tests. Return the number of regions opened so far in the life
2624    * of the master. Increases by one every time the master opens a region
2625    * @return the counter value of the number of regions opened so far
2626    */
2627   public int getNumRegionsOpened() {
2628     return numRegionsOpened.get();
2629   }
2630 
2631   /**
2632    * Waits until the specified region has completed assignment.
2633    * <p>
2634    * If the region is already assigned, returns immediately.  Otherwise, method
2635    * blocks until the region is assigned.
2636    * @param regionInfo region to wait on assignment for
2637    * @throws InterruptedException
2638    */
2639   public boolean waitForAssignment(HRegionInfo regionInfo)
2640       throws InterruptedException {
2641     while (!regionStates.isRegionOnline(regionInfo)) {
2642       if (regionStates.isRegionInState(regionInfo, State.FAILED_OPEN)
2643           || this.server.isStopped()) {
2644         return false;
2645       }
2646 
2647       // We should receive a notification, but it's
2648       //  better to have a timeout to recheck the condition here:
2649       //  it lowers the impact of a race condition if any
2650       regionStates.waitForUpdate(100);
2651     }
2652     return true;
2653   }
2654 
2655   /**
2656    * Assigns the hbase:meta region.
2657    * <p>
2658    * Assumes that hbase:meta is currently closed and is not being actively served by
2659    * any RegionServer.
2660    * <p>
2661    * Forcibly unsets the current meta region location in ZooKeeper and assigns
2662    * hbase:meta to a random RegionServer.
2663    * @throws KeeperException
2664    */
2665   public void assignMeta() throws KeeperException {
2666     MetaRegionTracker.deleteMetaLocation(this.watcher);
2667     assign(HRegionInfo.FIRST_META_REGIONINFO, true);
2668   }
2669 
2670   /**
2671    * Assigns specified regions retaining assignments, if any.
2672    * <p>
2673    * This is a synchronous call and will return once every region has been
2674    * assigned.  If anything fails, an exception is thrown
2675    * @throws InterruptedException
2676    * @throws IOException
2677    */
2678   public void assign(Map<HRegionInfo, ServerName> regions)
2679         throws IOException, InterruptedException {
2680     if (regions == null || regions.isEmpty()) {
2681       return;
2682     }
2683     List<ServerName> servers = serverManager.createDestinationServersList();
2684     if (servers == null || servers.isEmpty()) {
2685       throw new IOException("Found no destination server to assign region(s)");
2686     }
2687 
2688     // Reuse existing assignment info
2689     Map<ServerName, List<HRegionInfo>> bulkPlan =
2690       balancer.retainAssignment(regions, servers);
2691 
2692     assign(regions.size(), servers.size(),
2693       "retainAssignment=true", bulkPlan);
2694   }
2695 
2696   /**
2697    * Assigns specified regions round robin, if any.
2698    * <p>
2699    * This is a synchronous call and will return once every region has been
2700    * assigned.  If anything fails, an exception is thrown
2701    * @throws InterruptedException
2702    * @throws IOException
2703    */
2704   public void assign(List<HRegionInfo> regions)
2705         throws IOException, InterruptedException {
2706     if (regions == null || regions.isEmpty()) {
2707       return;
2708     }
2709 
2710     List<ServerName> servers = serverManager.createDestinationServersList();
2711     if (servers == null || servers.isEmpty()) {
2712       throw new IOException("Found no destination server to assign region(s)");
2713     }
2714 
2715     // Generate a round-robin bulk assignment plan
2716     Map<ServerName, List<HRegionInfo>> bulkPlan
2717       = balancer.roundRobinAssignment(regions, servers);
2718     processFavoredNodes(regions);
2719 
2720     assign(regions.size(), servers.size(),
2721       "round-robin=true", bulkPlan);
2722   }
2723 
2724   private void assign(int regions, int totalServers,
2725       String message, Map<ServerName, List<HRegionInfo>> bulkPlan)
2726           throws InterruptedException, IOException {
2727 
2728     int servers = bulkPlan.size();
2729     if (servers == 1 || (regions < bulkAssignThresholdRegions
2730         && servers < bulkAssignThresholdServers)) {
2731 
2732       // Not use bulk assignment.  This could be more efficient in small
2733       // cluster, especially mini cluster for testing, so that tests won't time out
2734       if (LOG.isTraceEnabled()) {
2735         LOG.trace("Not using bulk assignment since we are assigning only " + regions +
2736           " region(s) to " + servers + " server(s)");
2737       }
2738       for (Map.Entry<ServerName, List<HRegionInfo>> plan: bulkPlan.entrySet()) {
2739         if (!assign(plan.getKey(), plan.getValue())) {
2740           for (HRegionInfo region: plan.getValue()) {
2741             if (!regionStates.isRegionOnline(region)) {
2742               invokeAssign(region);
2743             }
2744           }
2745         }
2746       }
2747     } else {
2748       LOG.info("Bulk assigning " + regions + " region(s) across "
2749         + totalServers + " server(s), " + message);
2750 
2751       // Use fixed count thread pool assigning.
2752       BulkAssigner ba = new GeneralBulkAssigner(
2753         this.server, bulkPlan, this, bulkAssignWaitTillAllAssigned);
2754       ba.bulkAssign();
2755       LOG.info("Bulk assigning done");
2756     }
2757   }
2758 
2759   /**
2760    * Assigns all user regions, if any exist.  Used during cluster startup.
2761    * <p>
2762    * This is a synchronous call and will return once every region has been
2763    * assigned.  If anything fails, an exception is thrown and the cluster
2764    * should be shutdown.
2765    * @throws InterruptedException
2766    * @throws IOException
2767    * @throws KeeperException
2768    */
2769   private void assignAllUserRegions(Set<TableName> disabledOrDisablingOrEnabling)
2770       throws IOException, InterruptedException, KeeperException {
2771     // Skip assignment for regions of tables in DISABLING state because during clean cluster startup
2772     // no RS is alive and regions map also doesn't have any information about the regions.
2773     // See HBASE-6281.
2774     // Scan hbase:meta for all user regions, skipping any disabled tables
2775     Map<HRegionInfo, ServerName> allRegions;
2776     SnapshotOfRegionAssignmentFromMeta snapshotOfRegionAssignment =
2777        new SnapshotOfRegionAssignmentFromMeta(catalogTracker, disabledOrDisablingOrEnabling, true);
2778     snapshotOfRegionAssignment.initialize();
2779     allRegions = snapshotOfRegionAssignment.getRegionToRegionServerMap();
2780     if (allRegions == null || allRegions.isEmpty()) {
2781       return;
2782     }
2783 
2784     // Determine what type of assignment to do on startup
2785     boolean retainAssignment = server.getConfiguration().
2786       getBoolean("hbase.master.startup.retainassign", true);
2787 
2788     if (retainAssignment) {
2789       assign(allRegions);
2790     } else {
2791       List<HRegionInfo> regions = new ArrayList<HRegionInfo>(allRegions.keySet());
2792       assign(regions);
2793     }
2794 
2795     for (HRegionInfo hri : allRegions.keySet()) {
2796       TableName tableName = hri.getTable();
2797       if (!zkTable.isEnabledTable(tableName)) {
2798         setEnabledTable(tableName);
2799       }
2800     }
2801   }
2802 
2803   /**
2804    * Wait until no regions in transition.
2805    * @param timeout How long to wait.
2806    * @return True if nothing in regions in transition.
2807    * @throws InterruptedException
2808    */
2809   boolean waitUntilNoRegionsInTransition(final long timeout)
2810       throws InterruptedException {
2811     // Blocks until there are no regions in transition. It is possible that
2812     // there
2813     // are regions in transition immediately after this returns but guarantees
2814     // that if it returns without an exception that there was a period of time
2815     // with no regions in transition from the point-of-view of the in-memory
2816     // state of the Master.
2817     final long endTime = System.currentTimeMillis() + timeout;
2818 
2819     while (!this.server.isStopped() && regionStates.isRegionsInTransition()
2820         && endTime > System.currentTimeMillis()) {
2821       regionStates.waitForUpdate(100);
2822     }
2823 
2824     return !regionStates.isRegionsInTransition();
2825   }
2826 
2827   /**
2828    * Rebuild the list of user regions and assignment information.
2829    * <p>
2830    * Returns a map of servers that are not found to be online and the regions
2831    * they were hosting.
2832    * @return map of servers not online to their assigned regions, as stored
2833    *         in META
2834    * @throws IOException
2835    */
2836   Map<ServerName, List<HRegionInfo>> rebuildUserRegions() throws IOException, KeeperException {
2837     Set<TableName> enablingTables = ZKTable.getEnablingTables(watcher);
2838     Set<TableName> disabledOrEnablingTables = ZKTable.getDisabledTables(watcher);
2839     disabledOrEnablingTables.addAll(enablingTables);
2840     Set<TableName> disabledOrDisablingOrEnabling = ZKTable.getDisablingTables(watcher);
2841     disabledOrDisablingOrEnabling.addAll(disabledOrEnablingTables);
2842 
2843     // Region assignment from META
2844     List<Result> results = MetaReader.fullScan(this.catalogTracker);
2845     // Get any new but slow to checkin region server that joined the cluster
2846     Set<ServerName> onlineServers = serverManager.getOnlineServers().keySet();
2847     // Map of offline servers and their regions to be returned
2848     Map<ServerName, List<HRegionInfo>> offlineServers =
2849       new TreeMap<ServerName, List<HRegionInfo>>();
2850     // Iterate regions in META
2851     for (Result result : results) {
2852       HRegionInfo regionInfo = HRegionInfo.getHRegionInfo(result);
2853       if (regionInfo == null) continue;
2854       State state = RegionStateStore.getRegionState(result);
2855       ServerName regionLocation = RegionStateStore.getRegionServer(result);
2856       regionStates.createRegionState(regionInfo, state, regionLocation);
2857       if (!regionStates.isRegionInState(regionInfo, State.OPEN)) {
2858         // Region is not open (either offline or in transition), skip
2859         continue;
2860       }
2861       TableName tableName = regionInfo.getTable();
2862       if (!onlineServers.contains(regionLocation)) {
2863         // Region is located on a server that isn't online
2864         List<HRegionInfo> offlineRegions = offlineServers.get(regionLocation);
2865         if (offlineRegions == null) {
2866           offlineRegions = new ArrayList<HRegionInfo>(1);
2867           offlineServers.put(regionLocation, offlineRegions);
2868         }
2869         if (useZKForAssignment) {
2870           regionStates.regionOffline(regionInfo);
2871         }
2872         offlineRegions.add(regionInfo);
2873       } else if (!disabledOrEnablingTables.contains(tableName)) {
2874         // Region is being served and on an active server
2875         // add only if region not in disabled or enabling table
2876 
2877         regionStates.updateRegionState(regionInfo, State.OPEN, regionLocation);
2878         regionStates.regionOnline(regionInfo, regionLocation);
2879         balancer.regionOnline(regionInfo, regionLocation);
2880       } else if (useZKForAssignment) {
2881         regionStates.regionOffline(regionInfo);
2882       }
2883       // need to enable the table if not disabled or disabling or enabling
2884       // this will be used in rolling restarts
2885       if (!disabledOrDisablingOrEnabling.contains(tableName)
2886           && !getZKTable().isEnabledTable(tableName)) {
2887         setEnabledTable(tableName);
2888       }
2889 
2890     }
2891     return offlineServers;
2892   }
2893 
2894   /**
2895    * Recover the tables that were not fully moved to DISABLED state. These
2896    * tables are in DISABLING state when the master restarted/switched.
2897    *
2898    * @throws KeeperException
2899    * @throws TableNotFoundException
2900    * @throws IOException
2901    */
2902   private void recoverTableInDisablingState()
2903       throws KeeperException, TableNotFoundException, IOException {
2904     Set<TableName> disablingTables = ZKTable.getDisablingTables(watcher);
2905     if (disablingTables.size() != 0) {
2906       for (TableName tableName : disablingTables) {
2907         // Recover by calling DisableTableHandler
2908         LOG.info("The table " + tableName
2909             + " is in DISABLING state.  Hence recovering by moving the table"
2910             + " to DISABLED state.");
2911         new DisableTableHandler(this.server, tableName, catalogTracker,
2912             this, tableLockManager, true).prepare().process();
2913       }
2914     }
2915   }
2916 
2917   /**
2918    * Recover the tables that are not fully moved to ENABLED state. These tables
2919    * are in ENABLING state when the master restarted/switched
2920    *
2921    * @throws KeeperException
2922    * @throws org.apache.hadoop.hbase.TableNotFoundException
2923    * @throws IOException
2924    */
2925   private void recoverTableInEnablingState()
2926       throws KeeperException, TableNotFoundException, IOException {
2927     Set<TableName> enablingTables = ZKTable.getEnablingTables(watcher);
2928     if (enablingTables.size() != 0) {
2929       for (TableName tableName : enablingTables) {
2930         // Recover by calling EnableTableHandler
2931         LOG.info("The table " + tableName
2932             + " is in ENABLING state.  Hence recovering by moving the table"
2933             + " to ENABLED state.");
2934         // enableTable in sync way during master startup,
2935         // no need to invoke coprocessor
2936         EnableTableHandler eth = new EnableTableHandler(this.server, tableName,
2937           catalogTracker, this, tableLockManager, true);
2938         try {
2939           eth.prepare();
2940         } catch (TableNotFoundException e) {
2941           LOG.warn("Table " + tableName + " not found in hbase:meta to recover.");
2942           continue;
2943         }
2944         eth.process();
2945       }
2946     }
2947   }
2948 
2949   /**
2950    * Processes list of dead servers from result of hbase:meta scan and regions in RIT
2951    * <p>
2952    * This is used for failover to recover the lost regions that belonged to
2953    * RegionServers which failed while there was no active master or regions
2954    * that were in RIT.
2955    * <p>
2956    *
2957    *
2958    * @param deadServers
2959    *          The list of dead servers which failed while there was no active
2960    *          master. Can be null.
2961    * @throws IOException
2962    * @throws KeeperException
2963    */
2964   private void processDeadServersAndRecoverLostRegions(
2965       Map<ServerName, List<HRegionInfo>> deadServers)
2966           throws IOException, KeeperException {
2967     if (deadServers != null) {
2968       for (Map.Entry<ServerName, List<HRegionInfo>> server: deadServers.entrySet()) {
2969         ServerName serverName = server.getKey();
2970         // We need to keep such info even if the server is known dead
2971         regionStates.setLastRegionServerOfRegions(serverName, server.getValue());
2972         if (!serverManager.isServerDead(serverName)) {
2973           serverManager.expireServer(serverName); // Let SSH do region re-assign
2974         }
2975       }
2976     }
2977 
2978     List<String> nodes = useZKForAssignment ?
2979       ZKUtil.listChildrenAndWatchForNewChildren(watcher, watcher.assignmentZNode)
2980       : ZKUtil.listChildrenNoWatch(watcher, watcher.assignmentZNode);
2981     if (nodes != null && !nodes.isEmpty()) {
2982       for (String encodedRegionName : nodes) {
2983         processRegionInTransition(encodedRegionName, null);
2984       }
2985     } else if (!useZKForAssignment) {
2986        processRegionInTransitionZkLess();
2987     }
2988   }
2989   
2990   void processRegionInTransitionZkLess() {
2991     // We need to send RPC call again for PENDING_OPEN/PENDING_CLOSE regions
2992     // in case the RPC call is not sent out yet before the master was shut down
2993     // since we update the state before we send the RPC call. We can't update
2994     // the state after the RPC call. Otherwise, we don't know what's happened
2995     // to the region if the master dies right after the RPC call is out.
2996     Map<String, RegionState> rits = regionStates.getRegionsInTransition();
2997     for (RegionState regionState : rits.values()) {
2998       if (!serverManager.isServerOnline(regionState.getServerName())) {
2999         continue; // SSH will handle it
3000       }
3001       State state = regionState.getState();
3002       LOG.info("Processing " + regionState);
3003       switch (state) {
3004       case CLOSED:
3005         invokeAssign(regionState.getRegion());
3006         break;
3007       case PENDING_OPEN:
3008         retrySendRegionOpen(regionState);
3009         break;
3010       case PENDING_CLOSE:
3011         retrySendRegionClose(regionState);
3012         break;
3013       default:
3014         // No process for other states
3015       }
3016     }
3017   }
3018 
3019   /**
3020    * At master failover, for pending_open region, make sure
3021    * sendRegionOpen RPC call is sent to the target regionserver
3022    */
3023   private void retrySendRegionOpen(final RegionState regionState) {
3024     this.executorService.submit(
3025       new EventHandler(server, EventType.M_MASTER_RECOVERY) {
3026         @Override
3027         public void process() throws IOException {
3028           HRegionInfo hri = regionState.getRegion();
3029           ServerName serverName = regionState.getServerName();
3030           ReentrantLock lock = locker.acquireLock(hri.getEncodedName());
3031           try {
3032             while (serverManager.isServerOnline(serverName)
3033                 && !server.isStopped() && !server.isAborted()) {
3034               try {
3035                 List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
3036                 if (shouldAssignRegionsWithFavoredNodes) {
3037                   favoredNodes = ((FavoredNodeLoadBalancer)balancer).getFavoredNodes(hri);
3038                 }
3039                 RegionOpeningState regionOpenState = serverManager.sendRegionOpen(
3040                   serverName, hri, -1, favoredNodes);
3041 
3042                 if (regionOpenState == RegionOpeningState.FAILED_OPENING) {
3043                   // Failed opening this region, this means the target server didn't get
3044                   // the original region open RPC, so re-assign it with a new plan
3045                   LOG.debug("Got failed_opening in retry sendRegionOpen for "
3046                     + regionState + ", re-assign it");
3047                   invokeAssign(hri, true);
3048                 }
3049                 return; // Done.
3050               } catch (Throwable t) {
3051                 if (t instanceof RemoteException) {
3052                   t = ((RemoteException) t).unwrapRemoteException();
3053                 }
3054                 // In case SocketTimeoutException/FailedServerException, we will retry
3055                 if (t instanceof java.net.SocketTimeoutException
3056                     || t instanceof FailedServerException) {
3057                   Threads.sleep(100);
3058                   continue;
3059                 }
3060                 // For other exceptions, re-assign it
3061                 LOG.debug("Got exception in retry sendRegionOpen for "
3062                   + regionState + ", re-assign it", t);
3063                 invokeAssign(hri);
3064                 return; // Done.
3065               }
3066             }
3067           } finally {
3068             lock.unlock();
3069           }
3070         }
3071       });
3072   }
3073 
3074   /**
3075    * At master failover, for pending_close region, make sure
3076    * sendRegionClose RPC call is sent to the target regionserver
3077    */
3078   private void retrySendRegionClose(final RegionState regionState) {
3079     this.executorService.submit(
3080       new EventHandler(server, EventType.M_MASTER_RECOVERY) {
3081         @Override
3082         public void process() throws IOException {
3083           HRegionInfo hri = regionState.getRegion();
3084           ServerName serverName = regionState.getServerName();
3085           ReentrantLock lock = locker.acquireLock(hri.getEncodedName());
3086           try {
3087             while (serverManager.isServerOnline(serverName)
3088                 && !server.isStopped() && !server.isAborted()) {
3089               try {
3090                 if (!serverManager.sendRegionClose(serverName, hri, -1, null, false)) {
3091                   // This means the region is still on the target server
3092                   LOG.debug("Got false in retry sendRegionClose for "
3093                     + regionState + ", re-close it");
3094                   invokeUnAssign(hri);
3095                 }
3096                 return; // Done.
3097               } catch (Throwable t) {
3098                 if (t instanceof RemoteException) {
3099                   t = ((RemoteException) t).unwrapRemoteException();
3100                 }
3101                 // In case SocketTimeoutException/FailedServerException, we will retry
3102                 if (t instanceof java.net.SocketTimeoutException
3103                     || t instanceof FailedServerException) {
3104                   Threads.sleep(100);
3105                   continue;
3106                 }
3107                 if (!(t instanceof NotServingRegionException
3108                     || t instanceof RegionAlreadyInTransitionException)) {
3109                   // NotServingRegionException/RegionAlreadyInTransitionException
3110                   // means the target server got the original region close request.
3111                   // For other exceptions, re-close it
3112                   LOG.debug("Got exception in retry sendRegionClose for "
3113                     + regionState + ", re-close it", t);
3114                   invokeUnAssign(hri);
3115                 }
3116                 return; // Done.
3117               }
3118             }
3119           } finally {
3120             lock.unlock();
3121           }
3122         }
3123       });
3124   }
3125 
3126   /**
3127    * Set Regions in transitions metrics.
3128    * This takes an iterator on the RegionInTransition map (CLSM), and is not synchronized.
3129    * This iterator is not fail fast, which may lead to stale read; but that's better than
3130    * creating a copy of the map for metrics computation, as this method will be invoked
3131    * on a frequent interval.
3132    */
3133   public void updateRegionsInTransitionMetrics() {
3134     long currentTime = System.currentTimeMillis();
3135     int totalRITs = 0;
3136     int totalRITsOverThreshold = 0;
3137     long oldestRITTime = 0;
3138     int ritThreshold = this.server.getConfiguration().
3139       getInt(HConstants.METRICS_RIT_STUCK_WARNING_THRESHOLD, 60000);
3140     for (RegionState state: regionStates.getRegionsInTransition().values()) {
3141       totalRITs++;
3142       long ritTime = currentTime - state.getStamp();
3143       if (ritTime > ritThreshold) { // more than the threshold
3144         totalRITsOverThreshold++;
3145       }
3146       if (oldestRITTime < ritTime) {
3147         oldestRITTime = ritTime;
3148       }
3149     }
3150     if (this.metricsAssignmentManager != null) {
3151       this.metricsAssignmentManager.updateRITOldestAge(oldestRITTime);
3152       this.metricsAssignmentManager.updateRITCount(totalRITs);
3153       this.metricsAssignmentManager.updateRITCountOverThreshold(totalRITsOverThreshold);
3154     }
3155   }
3156 
3157   /**
3158    * @param region Region whose plan we are to clear.
3159    */
3160   void clearRegionPlan(final HRegionInfo region) {
3161     synchronized (this.regionPlans) {
3162       this.regionPlans.remove(region.getEncodedName());
3163     }
3164   }
3165 
3166   /**
3167    * Wait on region to clear regions-in-transition.
3168    * @param hri Region to wait on.
3169    * @throws IOException
3170    */
3171   public void waitOnRegionToClearRegionsInTransition(final HRegionInfo hri)
3172       throws IOException, InterruptedException {
3173     waitOnRegionToClearRegionsInTransition(hri, -1L);
3174   }
3175 
3176   /**
3177    * Wait on region to clear regions-in-transition or time out
3178    * @param hri
3179    * @param timeOut Milliseconds to wait for current region to be out of transition state.
3180    * @return True when a region clears regions-in-transition before timeout otherwise false
3181    * @throws InterruptedException
3182    */
3183   public boolean waitOnRegionToClearRegionsInTransition(final HRegionInfo hri, long timeOut)
3184       throws InterruptedException {
3185     if (!regionStates.isRegionInTransition(hri)) return true;
3186     long end = (timeOut <= 0) ? Long.MAX_VALUE : EnvironmentEdgeManager.currentTimeMillis()
3187         + timeOut;
3188     // There is already a timeout monitor on regions in transition so I
3189     // should not have to have one here too?
3190     LOG.info("Waiting for " + hri.getEncodedName() +
3191         " to leave regions-in-transition, timeOut=" + timeOut + " ms.");
3192     while (!this.server.isStopped() && regionStates.isRegionInTransition(hri)) {
3193       regionStates.waitForUpdate(100);
3194       if (EnvironmentEdgeManager.currentTimeMillis() > end) {
3195         LOG.info("Timed out on waiting for " + hri.getEncodedName() + " to be assigned.");
3196         return false;
3197       }
3198     }
3199     if (this.server.isStopped()) {
3200       LOG.info("Giving up wait on regions in transition because stoppable.isStopped is set");
3201       return false;
3202     }
3203     return true;
3204   }
3205 
3206   /**
3207    * Update timers for all regions in transition going against the server in the
3208    * serversInUpdatingTimer.
3209    */
3210   public class TimerUpdater extends Chore {
3211 
3212     public TimerUpdater(final int period, final Stoppable stopper) {
3213       super("AssignmentTimerUpdater", period, stopper);
3214     }
3215 
3216     @Override
3217     protected void chore() {
3218       Preconditions.checkState(tomActivated);
3219       ServerName serverToUpdateTimer = null;
3220       while (!serversInUpdatingTimer.isEmpty() && !stopper.isStopped()) {
3221         if (serverToUpdateTimer == null) {
3222           serverToUpdateTimer = serversInUpdatingTimer.first();
3223         } else {
3224           serverToUpdateTimer = serversInUpdatingTimer
3225               .higher(serverToUpdateTimer);
3226         }
3227         if (serverToUpdateTimer == null) {
3228           break;
3229         }
3230         updateTimers(serverToUpdateTimer);
3231         serversInUpdatingTimer.remove(serverToUpdateTimer);
3232       }
3233     }
3234   }
3235 
3236   /**
3237    * Monitor to check for time outs on region transition operations
3238    */
3239   public class TimeoutMonitor extends Chore {
3240     private boolean allRegionServersOffline = false;
3241     private ServerManager serverManager;
3242     private final int timeout;
3243 
3244     /**
3245      * Creates a periodic monitor to check for time outs on region transition
3246      * operations.  This will deal with retries if for some reason something
3247      * doesn't happen within the specified timeout.
3248      * @param period
3249    * @param stopper When {@link Stoppable#isStopped()} is true, this thread will
3250    * cleanup and exit cleanly.
3251      * @param timeout
3252      */
3253     public TimeoutMonitor(final int period, final Stoppable stopper,
3254         ServerManager serverManager,
3255         final int timeout) {
3256       super("AssignmentTimeoutMonitor", period, stopper);
3257       this.timeout = timeout;
3258       this.serverManager = serverManager;
3259     }
3260 
3261     private synchronized void setAllRegionServersOffline(
3262       boolean allRegionServersOffline) {
3263       this.allRegionServersOffline = allRegionServersOffline;
3264     }
3265 
3266     @Override
3267     protected void chore() {
3268       Preconditions.checkState(tomActivated);
3269       boolean noRSAvailable = this.serverManager.createDestinationServersList().isEmpty();
3270 
3271       // Iterate all regions in transition checking for time outs
3272       long now = System.currentTimeMillis();
3273       // no lock concurrent access ok: we will be working on a copy, and it's java-valid to do
3274       //  a copy while another thread is adding/removing items
3275       for (String regionName : regionStates.getRegionsInTransition().keySet()) {
3276         RegionState regionState = regionStates.getRegionTransitionState(regionName);
3277         if (regionState == null) continue;
3278 
3279         if (regionState.getStamp() + timeout <= now) {
3280           // decide on action upon timeout
3281           actOnTimeOut(regionState);
3282         } else if (this.allRegionServersOffline && !noRSAvailable) {
3283           RegionPlan existingPlan = regionPlans.get(regionName);
3284           if (existingPlan == null
3285               || !this.serverManager.isServerOnline(existingPlan
3286                   .getDestination())) {
3287             // if some RSs just came back online, we can start the assignment
3288             // right away
3289             actOnTimeOut(regionState);
3290           }
3291         }
3292       }
3293       setAllRegionServersOffline(noRSAvailable);
3294     }
3295 
3296     private void actOnTimeOut(RegionState regionState) {
3297       HRegionInfo regionInfo = regionState.getRegion();
3298       LOG.info("Regions in transition timed out:  " + regionState);
3299       // Expired! Do a retry.
3300       switch (regionState.getState()) {
3301       case CLOSED:
3302         LOG.info("Region " + regionInfo.getEncodedName()
3303             + " has been CLOSED for too long, waiting on queued "
3304             + "ClosedRegionHandler to run or server shutdown");
3305         // Update our timestamp.
3306         regionState.updateTimestampToNow();
3307         break;
3308       case OFFLINE:
3309         LOG.info("Region has been OFFLINE for too long, " + "reassigning "
3310             + regionInfo.getRegionNameAsString() + " to a random server");
3311         invokeAssign(regionInfo);
3312         break;
3313       case PENDING_OPEN:
3314         LOG.info("Region has been PENDING_OPEN for too "
3315             + "long, reassigning region=" + regionInfo.getRegionNameAsString());
3316         invokeAssign(regionInfo);
3317         break;
3318       case OPENING:
3319         processOpeningState(regionInfo);
3320         break;
3321       case OPEN:
3322         LOG.error("Region has been OPEN for too long, " +
3323             "we don't know where region was opened so can't do anything");
3324         regionState.updateTimestampToNow();
3325         break;
3326 
3327       case PENDING_CLOSE:
3328         LOG.info("Region has been PENDING_CLOSE for too "
3329             + "long, running forced unassign again on region="
3330             + regionInfo.getRegionNameAsString());
3331         invokeUnassign(regionInfo);
3332         break;
3333       case CLOSING:
3334         LOG.info("Region has been CLOSING for too " +
3335           "long, this should eventually complete or the server will " +
3336           "expire, send RPC again");
3337         invokeUnassign(regionInfo);
3338         break;
3339 
3340       case SPLIT:
3341       case SPLITTING:
3342       case FAILED_OPEN:
3343       case FAILED_CLOSE:
3344       case MERGING:
3345         break;
3346 
3347       default:
3348         throw new IllegalStateException("Received event is not valid.");
3349       }
3350     }
3351   }
3352 
3353   private void processOpeningState(HRegionInfo regionInfo) {
3354     LOG.info("Region has been OPENING for too long, reassigning region="
3355         + regionInfo.getRegionNameAsString());
3356     // Should have a ZK node in OPENING state
3357     try {
3358       String node = ZKAssign.getNodeName(watcher, regionInfo.getEncodedName());
3359       Stat stat = new Stat();
3360       byte [] data = ZKAssign.getDataNoWatch(watcher, node, stat);
3361       if (data == null) {
3362         LOG.warn("Data is null, node " + node + " no longer exists");
3363         return;
3364       }
3365       RegionTransition rt = RegionTransition.parseFrom(data);
3366       EventType et = rt.getEventType();
3367       if (et == EventType.RS_ZK_REGION_OPENED) {
3368         LOG.debug("Region has transitioned to OPENED, allowing "
3369             + "watched event handlers to process");
3370         return;
3371       } else if (et != EventType.RS_ZK_REGION_OPENING && et != EventType.RS_ZK_REGION_FAILED_OPEN ) {
3372         LOG.warn("While timing out a region, found ZK node in unexpected state: " + et);
3373         return;
3374       }
3375       invokeAssign(regionInfo);
3376     } catch (KeeperException ke) {
3377       LOG.error("Unexpected ZK exception timing out CLOSING region", ke);
3378     } catch (DeserializationException e) {
3379       LOG.error("Unexpected exception parsing CLOSING region", e);
3380     }
3381   }
3382 
3383   void invokeAssign(HRegionInfo regionInfo) {
3384     invokeAssign(regionInfo, true);
3385   }
3386 
3387   void invokeAssign(HRegionInfo regionInfo, boolean newPlan) {
3388     threadPoolExecutorService.submit(new AssignCallable(this, regionInfo, newPlan));
3389   }
3390 
3391   void invokeUnAssign(HRegionInfo regionInfo) {
3392     threadPoolExecutorService.submit(new UnAssignCallable(this, regionInfo));
3393   }
3394 
3395   private void invokeUnassign(HRegionInfo regionInfo) {
3396     threadPoolExecutorService.submit(new UnAssignCallable(this, regionInfo));
3397   }
3398 
3399   public boolean isCarryingMeta(ServerName serverName) {
3400     return isCarryingRegion(serverName, HRegionInfo.FIRST_META_REGIONINFO);
3401   }
3402 
3403   /**
3404    * Check if the shutdown server carries the specific region.
3405    * We have a bunch of places that store region location
3406    * Those values aren't consistent. There is a delay of notification.
3407    * The location from zookeeper unassigned node has the most recent data;
3408    * but the node could be deleted after the region is opened by AM.
3409    * The AM's info could be old when OpenedRegionHandler
3410    * processing hasn't finished yet when server shutdown occurs.
3411    * @return whether the serverName currently hosts the region
3412    */
3413   private boolean isCarryingRegion(ServerName serverName, HRegionInfo hri) {
3414     RegionTransition rt = null;
3415     try {
3416       byte [] data = ZKAssign.getData(watcher, hri.getEncodedName());
3417       // This call can legitimately come by null
3418       rt = data == null? null: RegionTransition.parseFrom(data);
3419     } catch (KeeperException e) {
3420       server.abort("Exception reading unassigned node for region=" + hri.getEncodedName(), e);
3421     } catch (DeserializationException e) {
3422       server.abort("Exception parsing unassigned node for region=" + hri.getEncodedName(), e);
3423     }
3424 
3425     ServerName addressFromZK = rt != null? rt.getServerName():  null;
3426     if (addressFromZK != null) {
3427       // if we get something from ZK, we will use the data
3428       boolean matchZK = addressFromZK.equals(serverName);
3429       LOG.debug("Checking region=" + hri.getRegionNameAsString() + ", zk server=" + addressFromZK +
3430         " current=" + serverName + ", matches=" + matchZK);
3431       return matchZK;
3432     }
3433 
3434     ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
3435     boolean matchAM = (addressFromAM != null &&
3436       addressFromAM.equals(serverName));
3437     LOG.debug("based on AM, current region=" + hri.getRegionNameAsString() +
3438       " is on server=" + (addressFromAM != null ? addressFromAM : "null") +
3439       " server being checked: " + serverName);
3440 
3441     return matchAM;
3442   }
3443 
3444   /**
3445    * Process shutdown server removing any assignments.
3446    * @param sn Server that went down.
3447    * @return list of regions in transition on this server
3448    */
3449   public List<HRegionInfo> processServerShutdown(final ServerName sn) {
3450     // Clean out any existing assignment plans for this server
3451     synchronized (this.regionPlans) {
3452       for (Iterator <Map.Entry<String, RegionPlan>> i =
3453           this.regionPlans.entrySet().iterator(); i.hasNext();) {
3454         Map.Entry<String, RegionPlan> e = i.next();
3455         ServerName otherSn = e.getValue().getDestination();
3456         // The name will be null if the region is planned for a random assign.
3457         if (otherSn != null && otherSn.equals(sn)) {
3458           // Use iterator's remove else we'll get CME
3459           i.remove();
3460         }
3461       }
3462     }
3463     List<HRegionInfo> regions = regionStates.serverOffline(watcher, sn);
3464     for (Iterator<HRegionInfo> it = regions.iterator(); it.hasNext(); ) {
3465       HRegionInfo hri = it.next();
3466       String encodedName = hri.getEncodedName();
3467 
3468       // We need a lock on the region as we could update it
3469       Lock lock = locker.acquireLock(encodedName);
3470       try {
3471         RegionState regionState =
3472           regionStates.getRegionTransitionState(encodedName);
3473         if (regionState == null
3474             || (regionState.getServerName() != null && !regionState.isOnServer(sn))
3475             || !(regionState.isFailedClose() || regionState.isOffline()
3476               || regionState.isPendingOpenOrOpening())) {
3477           LOG.info("Skip " + regionState + " since it is not opening/failed_close"
3478             + " on the dead server any more: " + sn);
3479           it.remove();
3480         } else {
3481           try {
3482             // Delete the ZNode if exists
3483             ZKAssign.deleteNodeFailSilent(watcher, hri);
3484           } catch (KeeperException ke) {
3485             server.abort("Unexpected ZK exception deleting node " + hri, ke);
3486           }
3487           if (zkTable.isDisablingOrDisabledTable(hri.getTable())) {
3488             regionStates.regionOffline(hri);
3489             it.remove();
3490             continue;
3491           }
3492           // Mark the region offline and assign it again by SSH
3493           regionStates.updateRegionState(hri, State.OFFLINE);
3494         }
3495       } finally {
3496         lock.unlock();
3497       }
3498     }
3499     return regions;
3500   }
3501 
3502   /**
3503    * @param plan Plan to execute.
3504    */
3505   public void balance(final RegionPlan plan) {
3506     HRegionInfo hri = plan.getRegionInfo();
3507     TableName tableName = hri.getTable();
3508     if (zkTable.isDisablingOrDisabledTable(tableName)) {
3509       LOG.info("Ignored moving region of disabling/disabled table "
3510         + tableName);
3511       return;
3512     }
3513 
3514     // Move the region only if it's assigned
3515     String encodedName = hri.getEncodedName();
3516     ReentrantLock lock = locker.acquireLock(encodedName);
3517     try {
3518       if (!regionStates.isRegionOnline(hri)) {
3519         RegionState state = regionStates.getRegionState(encodedName);
3520         LOG.info("Ignored moving region not assigned: " + hri + ", "
3521           + (state == null ? "not in region states" : state));
3522         return;
3523       }
3524       synchronized (this.regionPlans) {
3525         this.regionPlans.put(plan.getRegionName(), plan);
3526       }
3527       unassign(hri, false, plan.getDestination());
3528     } finally {
3529       lock.unlock();
3530     }
3531   }
3532 
3533   public void stop() {
3534     shutdown(); // Stop executor service, etc
3535     if (tomActivated){
3536       this.timeoutMonitor.interrupt();
3537       this.timerUpdater.interrupt();
3538     }
3539   }
3540 
3541   /**
3542    * Shutdown the threadpool executor service
3543    */
3544   public void shutdown() {
3545     // It's an immediate shutdown, so we're clearing the remaining tasks.
3546     synchronized (zkEventWorkerWaitingList){
3547       zkEventWorkerWaitingList.clear();
3548     }
3549     threadPoolExecutorService.shutdownNow();
3550     zkEventWorkers.shutdownNow();
3551     regionStateStore.stop();
3552   }
3553 
3554   protected void setEnabledTable(TableName tableName) {
3555     try {
3556       this.zkTable.setEnabledTable(tableName);
3557     } catch (KeeperException e) {
3558       // here we can abort as it is the start up flow
3559       String errorMsg = "Unable to ensure that the table " + tableName
3560           + " will be" + " enabled because of a ZooKeeper issue";
3561       LOG.error(errorMsg);
3562       this.server.abort(errorMsg, e);
3563     }
3564   }
3565 
3566   /**
3567    * Set region as OFFLINED up in zookeeper asynchronously.
3568    * @param state
3569    * @return True if we succeeded, false otherwise (State was incorrect or failed
3570    * updating zk).
3571    */
3572   private boolean asyncSetOfflineInZooKeeper(final RegionState state,
3573       final AsyncCallback.StringCallback cb, final ServerName destination) {
3574     if (!state.isClosed() && !state.isOffline()) {
3575       this.server.abort("Unexpected state trying to OFFLINE; " + state,
3576         new IllegalStateException());
3577       return false;
3578     }
3579     regionStates.updateRegionState(state.getRegion(), State.OFFLINE);
3580     try {
3581       ZKAssign.asyncCreateNodeOffline(watcher, state.getRegion(),
3582         destination, cb, state);
3583     } catch (KeeperException e) {
3584       if (e instanceof NodeExistsException) {
3585         LOG.warn("Node for " + state.getRegion() + " already exists");
3586       } else {
3587         server.abort("Unexpected ZK exception creating/setting node OFFLINE", e);
3588       }
3589       return false;
3590     }
3591     return true;
3592   }
3593 
3594   private boolean deleteNodeInStates(String encodedName,
3595       String desc, ServerName sn, EventType... types) {
3596     try {
3597       for (EventType et: types) {
3598         if (ZKAssign.deleteNode(watcher, encodedName, et, sn)) {
3599           return true;
3600         }
3601       }
3602       LOG.info("Failed to delete the " + desc + " node for "
3603         + encodedName + ". The node type may not match");
3604     } catch (NoNodeException e) {
3605       if (LOG.isDebugEnabled()) {
3606         LOG.debug("The " + desc + " node for " + encodedName + " already deleted");
3607       }
3608     } catch (KeeperException ke) {
3609       server.abort("Unexpected ZK exception deleting " + desc
3610         + " node for the region " + encodedName, ke);
3611     }
3612     return false;
3613   }
3614 
3615   private void deleteMergingNode(String encodedName, ServerName sn) {
3616     deleteNodeInStates(encodedName, "merging", sn, EventType.RS_ZK_REGION_MERGING,
3617       EventType.RS_ZK_REQUEST_REGION_MERGE, EventType.RS_ZK_REGION_MERGED);
3618   }
3619 
3620   private void deleteSplittingNode(String encodedName, ServerName sn) {
3621     deleteNodeInStates(encodedName, "splitting", sn, EventType.RS_ZK_REGION_SPLITTING,
3622       EventType.RS_ZK_REQUEST_REGION_SPLIT, EventType.RS_ZK_REGION_SPLIT);
3623   }
3624 
3625   private void onRegionFailedOpen(
3626       final HRegionInfo hri, final ServerName sn) {
3627     String encodedName = hri.getEncodedName();
3628     AtomicInteger failedOpenCount = failedOpenTracker.get(encodedName);
3629     if (failedOpenCount == null) {
3630       failedOpenCount = new AtomicInteger();
3631       // No need to use putIfAbsent, or extra synchronization since
3632       // this whole handleRegion block is locked on the encoded region
3633       // name, and failedOpenTracker is updated only in this block
3634       failedOpenTracker.put(encodedName, failedOpenCount);
3635     }
3636     if (failedOpenCount.incrementAndGet() >= maximumAttempts && !hri.isMetaRegion() ) {
3637       regionStates.updateRegionState(hri, State.FAILED_OPEN);
3638       // remove the tracking info to save memory, also reset
3639       // the count for next open initiative
3640       failedOpenTracker.remove(encodedName);
3641     }
3642     else {
3643       if (hri.isMetaRegion() && failedOpenCount.get() >= maximumAttempts) {
3644         // Log a warning message if a meta region failedOpenCount exceeds maximumAttempts
3645         // so that we are aware of potential problem if it persists for a long time.
3646         LOG.warn("Failed to open the hbase:meta region " +
3647             hri.getRegionNameAsString() + " after" +
3648             failedOpenCount.get() + " retries. Continue retrying.");
3649       }
3650 
3651       // Handle this the same as if it were opened and then closed.
3652       RegionState regionState = regionStates.updateRegionState(hri, State.CLOSED);
3653       if (regionState != null) {
3654         // When there are more than one region server a new RS is selected as the
3655         // destination and the same is updated in the region plan. (HBASE-5546)
3656         Set<TableName> disablingOrDisabled = null;
3657         try {
3658           disablingOrDisabled = ZKTable.getDisablingTables(watcher);
3659           disablingOrDisabled.addAll(ZKTable.getDisabledTables(watcher));
3660         } catch (KeeperException e) {
3661           server.abort("Cannot retrieve info about disabling or disabled tables ", e);
3662         }
3663         if (disablingOrDisabled.contains(hri.getTable())) {
3664           offlineDisabledRegion(hri);
3665           return;
3666         }
3667         // ZK Node is in CLOSED state, assign it.
3668          regionStates.updateRegionState(hri, RegionState.State.CLOSED);
3669         // This below has to do w/ online enable/disable of a table
3670         removeClosedRegion(hri);
3671         try {
3672           getRegionPlan(hri, sn, true);
3673         } catch (HBaseIOException e) {
3674           LOG.warn("Failed to get region plan", e);
3675         }
3676         invokeAssign(hri, false);
3677       }
3678     }
3679   }
3680 
3681   private void onRegionOpen(
3682       final HRegionInfo hri, final ServerName sn, long openSeqNum) {
3683     regionOnline(hri, sn, openSeqNum);
3684     if (useZKForAssignment) {
3685       try {
3686         // Delete the ZNode if exists
3687         ZKAssign.deleteNodeFailSilent(watcher, hri);
3688       } catch (KeeperException ke) {
3689         server.abort("Unexpected ZK exception deleting node " + hri, ke);
3690       }
3691     }
3692 
3693     // reset the count, if any
3694     failedOpenTracker.remove(hri.getEncodedName());
3695     if (isTableDisabledOrDisabling(hri.getTable())) {
3696       invokeUnAssign(hri);
3697     }
3698   }
3699 
3700   private void onRegionClosed(final HRegionInfo hri) {
3701     if (isTableDisabledOrDisabling(hri.getTable())) {
3702       offlineDisabledRegion(hri);
3703       return;
3704     }
3705     regionStates.updateRegionState(hri, RegionState.State.CLOSED);
3706     // This below has to do w/ online enable/disable of a table
3707     removeClosedRegion(hri);
3708     invokeAssign(hri, false);
3709   }
3710 
3711   private String onRegionSplit(ServerName sn, TransitionCode code,
3712       HRegionInfo p, HRegionInfo a, HRegionInfo b) {
3713     RegionState rs_p = regionStates.getRegionState(p);
3714     RegionState rs_a = regionStates.getRegionState(a);
3715     RegionState rs_b = regionStates.getRegionState(b);
3716     if (!(rs_p.isOpenOrSplittingOnServer(sn)
3717         && (rs_a == null || rs_a.isOpenOrSplittingNewOnServer(sn))
3718         && (rs_b == null || rs_b.isOpenOrSplittingNewOnServer(sn)))) {
3719       return "Not in state good for split";
3720     }
3721 
3722     regionStates.updateRegionState(a, State.SPLITTING_NEW, sn);
3723     regionStates.updateRegionState(b, State.SPLITTING_NEW, sn);
3724     regionStates.updateRegionState(p, State.SPLITTING);
3725 
3726     if (code == TransitionCode.SPLIT) {
3727       if (TEST_SKIP_SPLIT_HANDLING) {
3728         return "Skipping split message, TEST_SKIP_SPLIT_HANDLING is set";
3729       }
3730       regionOffline(p, State.SPLIT);
3731       regionOnline(a, sn, 1);
3732       regionOnline(b, sn, 1);
3733 
3734       // User could disable the table before master knows the new region.
3735       if (isTableDisabledOrDisabling(p.getTable())) {
3736         invokeUnAssign(a);
3737         invokeUnAssign(b);
3738       }
3739     } else if (code == TransitionCode.SPLIT_PONR) {
3740       try {
3741         regionStateStore.splitRegion(p, a, b, sn);
3742       } catch (IOException ioe) {
3743         LOG.info("Failed to record split region " + p.getShortNameToLog());
3744         return "Failed to record the splitting in meta";
3745       }
3746     } else if (code == TransitionCode.SPLIT_REVERTED) {
3747       regionOnline(p, sn);
3748       regionOffline(a);
3749       regionOffline(b);
3750 
3751       if (isTableDisabledOrDisabling(p.getTable())) {
3752         invokeUnAssign(p);
3753       }
3754     }
3755     return null;
3756   }
3757 
3758   private boolean isTableDisabledOrDisabling(TableName t) {
3759     Set<TableName> disablingOrDisabled = null;
3760     try {
3761       disablingOrDisabled = ZKTable.getDisablingTables(watcher);
3762       disablingOrDisabled.addAll(ZKTable.getDisabledTables(watcher));
3763     } catch (KeeperException e) {
3764       server.abort("Cannot retrieve info about disabling or disabled tables ", e);
3765     }
3766     return disablingOrDisabled.contains(t) ? true : false;
3767   }
3768 
3769   private String onRegionMerge(ServerName sn, TransitionCode code,
3770       HRegionInfo p, HRegionInfo a, HRegionInfo b) {
3771     RegionState rs_p = regionStates.getRegionState(p);
3772     RegionState rs_a = regionStates.getRegionState(a);
3773     RegionState rs_b = regionStates.getRegionState(b);
3774     if (!(rs_a.isOpenOrMergingOnServer(sn) && rs_b.isOpenOrMergingOnServer(sn)
3775         && (rs_p == null || rs_p.isOpenOrMergingNewOnServer(sn)))) {
3776       return "Not in state good for merge";
3777     }
3778 
3779     regionStates.updateRegionState(a, State.MERGING);
3780     regionStates.updateRegionState(b, State.MERGING);
3781     regionStates.updateRegionState(p, State.MERGING_NEW, sn);
3782 
3783     String encodedName = p.getEncodedName();
3784     if (code == TransitionCode.READY_TO_MERGE) {
3785       mergingRegions.put(encodedName,
3786         new PairOfSameType<HRegionInfo>(a, b));
3787     } else if (code == TransitionCode.MERGED) {
3788       mergingRegions.remove(encodedName);
3789       regionOffline(a, State.MERGED);
3790       regionOffline(b, State.MERGED);
3791       regionOnline(p, sn, 1);
3792 
3793       // User could disable the table before master knows the new region.
3794       if (isTableDisabledOrDisabling(p.getTable())) {
3795         invokeUnAssign(p);
3796       }
3797     } else if (code == TransitionCode.MERGE_PONR) {
3798       try {
3799         regionStateStore.mergeRegions(p, a, b, sn);
3800       } catch (IOException ioe) {
3801         LOG.info("Failed to record merged region " + p.getShortNameToLog());
3802         return "Failed to record the merging in meta";
3803       }
3804     } else {
3805       mergingRegions.remove(encodedName);
3806       regionOnline(a, sn);
3807       regionOnline(b, sn);
3808       regionOffline(p);
3809 
3810       if (isTableDisabledOrDisabling(p.getTable())) {
3811         invokeUnAssign(a);
3812         invokeUnAssign(b);
3813       }
3814     }
3815     return null;
3816   }
3817 
3818   /**
3819    * A helper to handle region merging transition event.
3820    * It transitions merging regions to MERGING state.
3821    */
3822   private boolean handleRegionMerging(final RegionTransition rt, final String encodedName,
3823       final String prettyPrintedRegionName, final ServerName sn) {
3824     if (!serverManager.isServerOnline(sn)) {
3825       LOG.warn("Dropped merging! ServerName=" + sn + " unknown.");
3826       return false;
3827     }
3828     byte [] payloadOfMerging = rt.getPayload();
3829     List<HRegionInfo> mergingRegions;
3830     try {
3831       mergingRegions = HRegionInfo.parseDelimitedFrom(
3832         payloadOfMerging, 0, payloadOfMerging.length);
3833     } catch (IOException e) {
3834       LOG.error("Dropped merging! Failed reading "  + rt.getEventType()
3835         + " payload for " + prettyPrintedRegionName);
3836       return false;
3837     }
3838     assert mergingRegions.size() == 3;
3839     HRegionInfo p = mergingRegions.get(0);
3840     HRegionInfo hri_a = mergingRegions.get(1);
3841     HRegionInfo hri_b = mergingRegions.get(2);
3842 
3843     RegionState rs_p = regionStates.getRegionState(p);
3844     RegionState rs_a = regionStates.getRegionState(hri_a);
3845     RegionState rs_b = regionStates.getRegionState(hri_b);
3846 
3847     if (!((rs_a == null || rs_a.isOpenOrMergingOnServer(sn))
3848         && (rs_b == null || rs_b.isOpenOrMergingOnServer(sn))
3849         && (rs_p == null || rs_p.isOpenOrMergingNewOnServer(sn)))) {
3850       LOG.warn("Dropped merging! Not in state good for MERGING; rs_p="
3851         + rs_p + ", rs_a=" + rs_a + ", rs_b=" + rs_b);
3852       return false;
3853     }
3854 
3855     EventType et = rt.getEventType();
3856     if (et == EventType.RS_ZK_REQUEST_REGION_MERGE) {
3857       try {
3858         if (RegionMergeTransaction.transitionMergingNode(watcher, p,
3859             hri_a, hri_b, sn, -1, EventType.RS_ZK_REQUEST_REGION_MERGE,
3860             EventType.RS_ZK_REGION_MERGING) == -1) {
3861           byte[] data = ZKAssign.getData(watcher, encodedName);
3862           EventType currentType = null;
3863           if (data != null) {
3864             RegionTransition newRt = RegionTransition.parseFrom(data);
3865             currentType = newRt.getEventType();
3866           }
3867           if (currentType == null || (currentType != EventType.RS_ZK_REGION_MERGED
3868               && currentType != EventType.RS_ZK_REGION_MERGING)) {
3869             LOG.warn("Failed to transition pending_merge node "
3870               + encodedName + " to merging, it's now " + currentType);
3871             return false;
3872           }
3873         }
3874       } catch (Exception e) {
3875         LOG.warn("Failed to transition pending_merge node "
3876           + encodedName + " to merging", e);
3877         return false;
3878       }
3879     }
3880 
3881     synchronized (regionStates) {
3882       regionStates.updateRegionState(hri_a, State.MERGING);
3883       regionStates.updateRegionState(hri_b, State.MERGING);
3884       regionStates.updateRegionState(p, State.MERGING_NEW, sn);
3885 
3886       if (et != EventType.RS_ZK_REGION_MERGED) {
3887         this.mergingRegions.put(encodedName,
3888           new PairOfSameType<HRegionInfo>(hri_a, hri_b));
3889       } else {
3890         this.mergingRegions.remove(encodedName);
3891         regionOffline(hri_a, State.MERGED);
3892         regionOffline(hri_b, State.MERGED);
3893         regionOnline(p, sn);
3894       }
3895     }
3896 
3897     if (et == EventType.RS_ZK_REGION_MERGED) {
3898       LOG.debug("Handling MERGED event for " + encodedName + "; deleting node");
3899       // Remove region from ZK
3900       try {
3901         boolean successful = false;
3902         while (!successful) {
3903           // It's possible that the RS tickles in between the reading of the
3904           // znode and the deleting, so it's safe to retry.
3905           successful = ZKAssign.deleteNode(watcher, encodedName,
3906             EventType.RS_ZK_REGION_MERGED, sn);
3907         }
3908       } catch (KeeperException e) {
3909         if (e instanceof NoNodeException) {
3910           String znodePath = ZKUtil.joinZNode(watcher.splitLogZNode, encodedName);
3911           LOG.debug("The znode " + znodePath + " does not exist.  May be deleted already.");
3912         } else {
3913           server.abort("Error deleting MERGED node " + encodedName, e);
3914         }
3915       }
3916       LOG.info("Handled MERGED event; merged=" + p.getRegionNameAsString()
3917         + ", region_a=" + hri_a.getRegionNameAsString() + ", region_b="
3918         + hri_b.getRegionNameAsString() + ", on " + sn);
3919 
3920       // User could disable the table before master knows the new region.
3921       if (zkTable.isDisablingOrDisabledTable(p.getTable())) {
3922         unassign(p);
3923       }
3924     }
3925     return true;
3926   }
3927 
3928   /**
3929    * A helper to handle region splitting transition event.
3930    */
3931   private boolean handleRegionSplitting(final RegionTransition rt, final String encodedName,
3932       final String prettyPrintedRegionName, final ServerName sn) {
3933     if (!serverManager.isServerOnline(sn)) {
3934       LOG.warn("Dropped splitting! ServerName=" + sn + " unknown.");
3935       return false;
3936     }
3937     byte [] payloadOfSplitting = rt.getPayload();
3938     List<HRegionInfo> splittingRegions;
3939     try {
3940       splittingRegions = HRegionInfo.parseDelimitedFrom(
3941         payloadOfSplitting, 0, payloadOfSplitting.length);
3942     } catch (IOException e) {
3943       LOG.error("Dropped splitting! Failed reading " + rt.getEventType()
3944         + " payload for " + prettyPrintedRegionName);
3945       return false;
3946     }
3947     assert splittingRegions.size() == 2;
3948     HRegionInfo hri_a = splittingRegions.get(0);
3949     HRegionInfo hri_b = splittingRegions.get(1);
3950 
3951     RegionState rs_p = regionStates.getRegionState(encodedName);
3952     RegionState rs_a = regionStates.getRegionState(hri_a);
3953     RegionState rs_b = regionStates.getRegionState(hri_b);
3954 
3955     if (!((rs_p == null || rs_p.isOpenOrSplittingOnServer(sn))
3956         && (rs_a == null || rs_a.isOpenOrSplittingNewOnServer(sn))
3957         && (rs_b == null || rs_b.isOpenOrSplittingNewOnServer(sn)))) {
3958       LOG.warn("Dropped splitting! Not in state good for SPLITTING; rs_p="
3959         + rs_p + ", rs_a=" + rs_a + ", rs_b=" + rs_b);
3960       return false;
3961     }
3962 
3963     if (rs_p == null) {
3964       // Splitting region should be online
3965       rs_p = regionStates.updateRegionState(rt, State.OPEN);
3966       if (rs_p == null) {
3967         LOG.warn("Received splitting for region " + prettyPrintedRegionName
3968           + " from server " + sn + " but it doesn't exist anymore,"
3969           + " probably already processed its split");
3970         return false;
3971       }
3972       regionStates.regionOnline(rs_p.getRegion(), sn);
3973     }
3974 
3975     HRegionInfo p = rs_p.getRegion();
3976     EventType et = rt.getEventType();
3977     if (et == EventType.RS_ZK_REQUEST_REGION_SPLIT) {
3978       try {
3979         if (SplitTransaction.transitionSplittingNode(watcher, p,
3980             hri_a, hri_b, sn, -1, EventType.RS_ZK_REQUEST_REGION_SPLIT,
3981             EventType.RS_ZK_REGION_SPLITTING) == -1) {
3982           byte[] data = ZKAssign.getData(watcher, encodedName);
3983           EventType currentType = null;
3984           if (data != null) {
3985             RegionTransition newRt = RegionTransition.parseFrom(data);
3986             currentType = newRt.getEventType();
3987           }
3988           if (currentType == null || (currentType != EventType.RS_ZK_REGION_SPLIT
3989               && currentType != EventType.RS_ZK_REGION_SPLITTING)) {
3990             LOG.warn("Failed to transition pending_split node "
3991               + encodedName + " to splitting, it's now " + currentType);
3992             return false;
3993           }
3994         }
3995       } catch (Exception e) {
3996         LOG.warn("Failed to transition pending_split node "
3997           + encodedName + " to splitting", e);
3998         return false;
3999       }
4000     }
4001 
4002     synchronized (regionStates) {
4003       splitRegions.put(p, new PairOfSameType<HRegionInfo>(hri_a, hri_b));
4004       regionStates.updateRegionState(hri_a, State.SPLITTING_NEW, sn);
4005       regionStates.updateRegionState(hri_b, State.SPLITTING_NEW, sn);
4006       regionStates.updateRegionState(rt, State.SPLITTING);
4007 
4008       // The below is for testing ONLY!  We can't do fault injection easily, so
4009       // resort to this kinda uglyness -- St.Ack 02/25/2011.
4010       if (TEST_SKIP_SPLIT_HANDLING) {
4011         LOG.warn("Skipping split message, TEST_SKIP_SPLIT_HANDLING is set");
4012         return true; // return true so that the splitting node stays
4013       }
4014 
4015       if (et == EventType.RS_ZK_REGION_SPLIT) {
4016         regionOffline(p, State.SPLIT);
4017         regionOnline(hri_a, sn);
4018         regionOnline(hri_b, sn);
4019         splitRegions.remove(p);
4020       }
4021     }
4022 
4023     if (et == EventType.RS_ZK_REGION_SPLIT) {
4024       LOG.debug("Handling SPLIT event for " + encodedName + "; deleting node");
4025       // Remove region from ZK
4026       try {
4027         boolean successful = false;
4028         while (!successful) {
4029           // It's possible that the RS tickles in between the reading of the
4030           // znode and the deleting, so it's safe to retry.
4031           successful = ZKAssign.deleteNode(watcher, encodedName,
4032             EventType.RS_ZK_REGION_SPLIT, sn);
4033         }
4034       } catch (KeeperException e) {
4035         if (e instanceof NoNodeException) {
4036           String znodePath = ZKUtil.joinZNode(watcher.splitLogZNode, encodedName);
4037           LOG.debug("The znode " + znodePath + " does not exist.  May be deleted already.");
4038         } else {
4039           server.abort("Error deleting SPLIT node " + encodedName, e);
4040         }
4041       }
4042       LOG.info("Handled SPLIT event; parent=" + p.getRegionNameAsString()
4043         + ", daughter a=" + hri_a.getRegionNameAsString() + ", daughter b="
4044         + hri_b.getRegionNameAsString() + ", on " + sn);
4045 
4046       // User could disable the table before master knows the new region.
4047       if (zkTable.isDisablingOrDisabledTable(p.getTable())) {
4048         unassign(hri_a);
4049         unassign(hri_b);
4050       }
4051     }
4052     return true;
4053   }
4054 
4055   /**
4056    * A region is offline.  The new state should be the specified one,
4057    * if not null.  If the specified state is null, the new state is Offline.
4058    * The specified state can be Split/Merged/Offline/null only.
4059    */
4060   private void regionOffline(final HRegionInfo regionInfo, final State state) {
4061     regionStates.regionOffline(regionInfo, state);
4062     removeClosedRegion(regionInfo);
4063     // remove the region plan as well just in case.
4064     clearRegionPlan(regionInfo);
4065     balancer.regionOffline(regionInfo);
4066 
4067     // Tell our listeners that a region was closed
4068     sendRegionClosedNotification(regionInfo);
4069   }
4070 
4071   private void sendRegionOpenedNotification(final HRegionInfo regionInfo,
4072       final ServerName serverName) {
4073     if (!this.listeners.isEmpty()) {
4074       for (AssignmentListener listener : this.listeners) {
4075         listener.regionOpened(regionInfo, serverName);
4076       }
4077     }
4078   }
4079 
4080   private void sendRegionClosedNotification(final HRegionInfo regionInfo) {
4081     if (!this.listeners.isEmpty()) {
4082       for (AssignmentListener listener : this.listeners) {
4083         listener.regionClosed(regionInfo);
4084       }
4085     }
4086   }
4087 
4088   /**
4089    * Try to update some region states. If the state machine prevents
4090    * such update, an error message is returned to explain the reason.
4091    *
4092    * It's expected that in each transition there should have just one
4093    * region for opening/closing, 3 regions for splitting/merging.
4094    * These regions should be on the server that requested the change.
4095    *
4096    * Region state machine. Only these transitions
4097    * are expected to be triggered by a region server.
4098    *
4099    * On the state transition:
4100    *  (1) Open/Close should be initiated by master
4101    *      (a) Master sets the region to pending_open/pending_close
4102    *        in memory and hbase:meta after sending the request
4103    *        to the region server
4104    *      (b) Region server reports back to the master
4105    *        after open/close is done (either success/failure)
4106    *      (c) If region server has problem to report the status
4107    *        to master, it must be because the master is down or some
4108    *        temporary network issue. Otherwise, the region server should
4109    *        abort since it must be a bug. If the master is not accessible,
4110    *        the region server should keep trying until the server is
4111    *        stopped or till the status is reported to the (new) master
4112    *      (d) If region server dies in the middle of opening/closing
4113    *        a region, SSH picks it up and finishes it
4114    *      (e) If master dies in the middle, the new master recovers
4115    *        the state during initialization from hbase:meta. Region server
4116    *        can report any transition that has not been reported to
4117    *        the previous active master yet
4118    *  (2) Split/merge is initiated by region servers
4119    *      (a) To split a region, a region server sends a request
4120    *        to master to try to set a region to splitting, together with
4121    *        two daughters (to be created) to splitting new. If approved
4122    *        by the master, the splitting can then move ahead
4123    *      (b) To merge two regions, a region server sends a request to
4124    *        master to try to set the new merged region (to be created) to
4125    *        merging_new, together with two regions (to be merged) to merging.
4126    *        If it is ok with the master, the merge can then move ahead
4127    *      (c) Once the splitting/merging is done, the region server
4128    *        reports the status back to the master either success/failure.
4129    *      (d) Other scenarios should be handled similarly as for
4130    *        region open/close
4131    */
4132   protected String onRegionTransition(final ServerName serverName,
4133       final RegionStateTransition transition) {
4134     TransitionCode code = transition.getTransitionCode();
4135     HRegionInfo hri = HRegionInfo.convert(transition.getRegionInfo(0));
4136     RegionState current = regionStates.getRegionState(hri);
4137     if (LOG.isDebugEnabled()) {
4138       LOG.debug("Got transition " + code + " for "
4139         + (current != null ? current.toString() : hri.getShortNameToLog())
4140         + " from " + serverName);
4141     }
4142     String errorMsg = null;
4143     switch (code) {
4144     case OPENED:
4145       if (current != null && current.isOpened() && current.isOnServer(serverName)) {
4146         LOG.info("Region " + hri.getShortNameToLog() + " is already " + current.getState() + " on "
4147             + serverName);
4148         break;
4149       }
4150     case FAILED_OPEN:
4151       if (current == null
4152           || !current.isPendingOpenOrOpeningOnServer(serverName)) {
4153         errorMsg = hri.getShortNameToLog()
4154           + " is not pending open on " + serverName;
4155       } else if (code == TransitionCode.FAILED_OPEN) {
4156         onRegionFailedOpen(hri, serverName);
4157       } else {
4158         long openSeqNum = HConstants.NO_SEQNUM;
4159         if (transition.hasOpenSeqNum()) {
4160           openSeqNum = transition.getOpenSeqNum();
4161         }
4162         if (openSeqNum < 0) {
4163           errorMsg = "Newly opened region has invalid open seq num " + openSeqNum;
4164         } else {
4165           onRegionOpen(hri, serverName, openSeqNum);
4166         }
4167       }
4168       break;
4169 
4170     case CLOSED:
4171       if (current == null
4172           || !current.isPendingCloseOrClosingOnServer(serverName)) {
4173         errorMsg = hri.getShortNameToLog()
4174           + " is not pending close on " + serverName;
4175       } else {
4176         onRegionClosed(hri);
4177       }
4178       break;
4179 
4180     case READY_TO_SPLIT:
4181     case SPLIT_PONR:
4182     case SPLIT:
4183     case SPLIT_REVERTED:
4184       errorMsg = onRegionSplit(serverName, code, hri,
4185         HRegionInfo.convert(transition.getRegionInfo(1)),
4186         HRegionInfo.convert(transition.getRegionInfo(2)));
4187       break;
4188 
4189     case READY_TO_MERGE:
4190     case MERGE_PONR:
4191     case MERGED:
4192     case MERGE_REVERTED:
4193       errorMsg = onRegionMerge(serverName, code, hri,
4194         HRegionInfo.convert(transition.getRegionInfo(1)),
4195         HRegionInfo.convert(transition.getRegionInfo(2)));
4196       break;
4197 
4198     default:
4199       errorMsg = "Unexpected transition code " + code;
4200     }
4201     if (errorMsg != null) {
4202       LOG.error("Failed to transtion region from " + current + " to "
4203         + code + " by " + serverName + ": " + errorMsg);
4204     }
4205     return errorMsg;
4206   }
4207 
4208   /**
4209    * @return Instance of load balancer
4210    */
4211   public LoadBalancer getBalancer() {
4212     return this.balancer;
4213   }
4214 }