View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.Arrays;
24  import java.util.Collections;
25  import java.util.HashMap;
26  import java.util.HashSet;
27  import java.util.Iterator;
28  import java.util.List;
29  import java.util.Map;
30  import java.util.NavigableMap;
31  import java.util.Set;
32  import java.util.TreeMap;
33  import java.util.concurrent.ConcurrentHashMap;
34  import java.util.concurrent.ConcurrentSkipListSet;
35  import java.util.concurrent.ThreadFactory;
36  import java.util.concurrent.TimeUnit;
37  import java.util.concurrent.atomic.AtomicBoolean;
38  import java.util.concurrent.atomic.AtomicInteger;
39  import java.util.concurrent.locks.Lock;
40  import java.util.concurrent.locks.ReentrantLock;
41  
42  import org.apache.commons.logging.Log;
43  import org.apache.commons.logging.LogFactory;
44  import org.apache.hadoop.classification.InterfaceAudience;
45  import org.apache.hadoop.conf.Configuration;
46  import org.apache.hadoop.hbase.Chore;
47  import org.apache.hadoop.hbase.HConstants;
48  import org.apache.hadoop.hbase.HRegionInfo;
49  import org.apache.hadoop.hbase.RegionTransition;
50  import org.apache.hadoop.hbase.Server;
51  import org.apache.hadoop.hbase.ServerName;
52  import org.apache.hadoop.hbase.Stoppable;
53  import org.apache.hadoop.hbase.catalog.CatalogTracker;
54  import org.apache.hadoop.hbase.catalog.MetaReader;
55  import org.apache.hadoop.hbase.client.Result;
56  import org.apache.hadoop.hbase.exceptions.DeserializationException;
57  import org.apache.hadoop.hbase.exceptions.NotServingRegionException;
58  import org.apache.hadoop.hbase.exceptions.RegionAlreadyInTransitionException;
59  import org.apache.hadoop.hbase.exceptions.RegionServerStoppedException;
60  import org.apache.hadoop.hbase.exceptions.ServerNotRunningYetException;
61  import org.apache.hadoop.hbase.exceptions.TableNotFoundException;
62  import org.apache.hadoop.hbase.executor.EventHandler;
63  import org.apache.hadoop.hbase.executor.EventType;
64  import org.apache.hadoop.hbase.executor.ExecutorService;
65  import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler;
66  import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
67  import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
68  import org.apache.hadoop.hbase.master.handler.MergedRegionHandler;
69  import org.apache.hadoop.hbase.master.handler.OpenedRegionHandler;
70  import org.apache.hadoop.hbase.master.handler.SplitRegionHandler;
71  import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
72  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
73  import org.apache.hadoop.hbase.util.KeyLocker;
74  import org.apache.hadoop.hbase.util.Pair;
75  import org.apache.hadoop.hbase.util.Threads;
76  import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
77  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
78  import org.apache.hadoop.hbase.zookeeper.ZKTable;
79  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
80  import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
81  import org.apache.hadoop.ipc.RemoteException;
82  import org.apache.zookeeper.AsyncCallback;
83  import org.apache.zookeeper.KeeperException;
84  import org.apache.zookeeper.KeeperException.NoNodeException;
85  import org.apache.zookeeper.KeeperException.NodeExistsException;
86  import org.apache.zookeeper.data.Stat;
87  
88  import com.google.common.base.Preconditions;
89  import com.google.common.collect.LinkedHashMultimap;
90  
91  /**
92   * Manages and performs region assignment.
93   * <p>
94   * Monitors ZooKeeper for events related to regions in transition.
95   * <p>
96   * Handles existing regions in transition during master failover.
97   */
98  @InterfaceAudience.Private
99  public class AssignmentManager extends ZooKeeperListener {
100   private static final Log LOG = LogFactory.getLog(AssignmentManager.class);
101 
102   public static final ServerName HBCK_CODE_SERVERNAME = new ServerName(HConstants.HBCK_CODE_NAME,
103       -1, -1L);
104 
105   protected final Server server;
106 
107   private ServerManager serverManager;
108 
109   private CatalogTracker catalogTracker;
110 
111   protected final TimeoutMonitor timeoutMonitor;
112 
113   private final TimerUpdater timerUpdater;
114 
115   private LoadBalancer balancer;
116 
117   private final TableLockManager tableLockManager;
118 
119   final private KeyLocker<String> locker = new KeyLocker<String>();
120 
121   /**
122    * Map of regions to reopen after the schema of a table is changed. Key -
123    * encoded region name, value - HRegionInfo
124    */
125   private final Map <String, HRegionInfo> regionsToReopen;
126 
127   /*
128    * Maximum times we recurse an assignment/unassignment.
129    * See below in {@link #assign()} and {@link #unassign()}.
130    */
131   private final int maximumAttempts;
132 
133   /** Plans for region movement. Key is the encoded version of a region name*/
134   // TODO: When do plans get cleaned out?  Ever? In server open and in server
135   // shutdown processing -- St.Ack
136   // All access to this Map must be synchronized.
137   final NavigableMap<String, RegionPlan> regionPlans =
138     new TreeMap<String, RegionPlan>();
139 
140   private final ZKTable zkTable;
141 
142   /**
143    * Contains the server which need to update timer, these servers will be
144    * handled by {@link TimerUpdater}
145    */
146   private final ConcurrentSkipListSet<ServerName> serversInUpdatingTimer;
147 
148   private final ExecutorService executorService;
149 
150   //Thread pool executor service for timeout monitor
151   private java.util.concurrent.ExecutorService threadPoolExecutorService;
152 
153   // A bunch of ZK events workers. Each is a single thread executor service
154   private final java.util.concurrent.ExecutorService zkEventWorkers;
155 
156   private List<EventType> ignoreStatesRSOffline = Arrays.asList(
157       EventType.RS_ZK_REGION_FAILED_OPEN, EventType.RS_ZK_REGION_CLOSED);
158 
159   // metrics instance to send metrics for RITs
160   MetricsMaster metricsMaster;
161 
162   private final RegionStates regionStates;
163 
164   // The threshold to use bulk assigning. Using bulk assignment
165   // only if assigning at least this many regions to at least this
166   // many servers. If assigning fewer regions to fewer servers,
167   // bulk assigning may be not as efficient.
168   private final int bulkAssignThresholdRegions;
169   private final int bulkAssignThresholdServers;
170 
171   // Should bulk assignment wait till all regions are assigned,
172   // or it is timed out?  This is useful to measure bulk assignment
173   // performance, but not needed in most use cases.
174   private final boolean bulkAssignWaitTillAllAssigned;
175 
176   /**
177    * Indicator that AssignmentManager has recovered the region states so
178    * that ServerShutdownHandler can be fully enabled and re-assign regions
179    * of dead servers. So that when re-assignment happens, AssignmentManager
180    * has proper region states.
181    *
182    * Protected to ease testing.
183    */
184   protected final AtomicBoolean failoverCleanupDone = new AtomicBoolean(false);
185 
186   /** Is the TimeOutManagement activated **/
187   private final boolean tomActivated;
188 
189   /**
190    * A map to track the count a region fails to open in a row.
191    * So that we don't try to open a region forever if the failure is
192    * unrecoverable.  We don't put this information in region states
193    * because we don't expect this to happen frequently; we don't
194    * want to copy this information over during each state transition either.
195    */
196   private final ConcurrentHashMap<String, AtomicInteger>
197     failedOpenTracker = new ConcurrentHashMap<String, AtomicInteger>();
198 
199   /**
200    * Constructs a new assignment manager.
201    *
202    * @param server
203    * @param serverManager
204    * @param catalogTracker
205    * @param service
206    * @throws KeeperException
207    * @throws IOException
208    */
209   public AssignmentManager(Server server, ServerManager serverManager,
210       CatalogTracker catalogTracker, final LoadBalancer balancer,
211       final ExecutorService service, MetricsMaster metricsMaster,
212       final TableLockManager tableLockManager) throws KeeperException, IOException {
213     super(server.getZooKeeper());
214     this.server = server;
215     this.serverManager = serverManager;
216     this.catalogTracker = catalogTracker;
217     this.executorService = service;
218     this.regionsToReopen = Collections.synchronizedMap
219                            (new HashMap<String, HRegionInfo> ());
220     Configuration conf = server.getConfiguration();
221     this.tomActivated = conf.getBoolean("hbase.assignment.timeout.management", false);
222     if (tomActivated){
223       this.serversInUpdatingTimer =  new ConcurrentSkipListSet<ServerName>();
224       this.timeoutMonitor = new TimeoutMonitor(
225         conf.getInt("hbase.master.assignment.timeoutmonitor.period", 30000),
226         server, serverManager,
227         conf.getInt("hbase.master.assignment.timeoutmonitor.timeout", 600000));
228       this.timerUpdater = new TimerUpdater(conf.getInt(
229         "hbase.master.assignment.timerupdater.period", 10000), server);
230       Threads.setDaemonThreadRunning(timerUpdater.getThread(),
231         server.getServerName() + ".timerUpdater");
232     } else {
233       this.serversInUpdatingTimer =  null;
234       this.timeoutMonitor = null;
235       this.timerUpdater = null;
236     }
237     this.zkTable = new ZKTable(this.watcher);
238     this.maximumAttempts =
239       this.server.getConfiguration().getInt("hbase.assignment.maximum.attempts", 10);
240     this.balancer = balancer;
241     int maxThreads = conf.getInt("hbase.assignment.threads.max", 30);
242     this.threadPoolExecutorService = Threads.getBoundedCachedThreadPool(
243       maxThreads, 60L, TimeUnit.SECONDS, Threads.newDaemonThreadFactory("hbase-am"));
244     this.metricsMaster = metricsMaster;// can be null only with tests.
245     this.regionStates = new RegionStates(server, serverManager);
246 
247     this.bulkAssignWaitTillAllAssigned =
248       conf.getBoolean("hbase.bulk.assignment.waittillallassigned", false);
249     this.bulkAssignThresholdRegions = conf.getInt("hbase.bulk.assignment.threshold.regions", 7);
250     this.bulkAssignThresholdServers = conf.getInt("hbase.bulk.assignment.threshold.servers", 3);
251 
252     int workers = conf.getInt("hbase.assignment.zkevent.workers", 20);
253     ThreadFactory threadFactory = Threads.newDaemonThreadFactory("hbase-am-zkevent-worker");
254     zkEventWorkers = Threads.getBoundedCachedThreadPool(workers, 60L,
255             TimeUnit.SECONDS, threadFactory);
256     this.tableLockManager = tableLockManager;
257   }
258 
259   void startTimeOutMonitor() {
260     if (tomActivated) {
261       Threads.setDaemonThreadRunning(timeoutMonitor.getThread(), server.getServerName()
262           + ".timeoutMonitor");
263     }
264   }
265 
266   /**
267    * @return Instance of ZKTable.
268    */
269   public ZKTable getZKTable() {
270     // These are 'expensive' to make involving trip to zk ensemble so allow
271     // sharing.
272     return this.zkTable;
273   }
274 
275   /**
276    * This SHOULD not be public. It is public now
277    * because of some unit tests.
278    *
279    * TODO: make it package private and keep RegionStates in the master package
280    */
281   public RegionStates getRegionStates() {
282     return regionStates;
283   }
284 
285   public RegionPlan getRegionReopenPlan(HRegionInfo hri) {
286     return new RegionPlan(hri, null, regionStates.getRegionServerOfRegion(hri));
287   }
288 
289   /**
290    * Add a regionPlan for the specified region.
291    * @param encodedName
292    * @param plan
293    */
294   public void addPlan(String encodedName, RegionPlan plan) {
295     synchronized (regionPlans) {
296       regionPlans.put(encodedName, plan);
297     }
298   }
299 
300   /**
301    * Add a map of region plans.
302    */
303   public void addPlans(Map<String, RegionPlan> plans) {
304     synchronized (regionPlans) {
305       regionPlans.putAll(plans);
306     }
307   }
308 
309   /**
310    * Set the list of regions that will be reopened
311    * because of an update in table schema
312    *
313    * @param regions
314    *          list of regions that should be tracked for reopen
315    */
316   public void setRegionsToReopen(List <HRegionInfo> regions) {
317     for(HRegionInfo hri : regions) {
318       regionsToReopen.put(hri.getEncodedName(), hri);
319     }
320   }
321 
322   /**
323    * Used by the client to identify if all regions have the schema updates
324    *
325    * @param tableName
326    * @return Pair indicating the status of the alter command
327    * @throws IOException
328    */
329   public Pair<Integer, Integer> getReopenStatus(byte[] tableName)
330       throws IOException {
331     List <HRegionInfo> hris =
332       MetaReader.getTableRegions(this.server.getCatalogTracker(), tableName, true);
333     Integer pending = 0;
334     for (HRegionInfo hri : hris) {
335       String name = hri.getEncodedName();
336       // no lock concurrent access ok: sequential consistency respected.
337       if (regionsToReopen.containsKey(name)
338           || regionStates.isRegionInTransition(name)) {
339         pending++;
340       }
341     }
342     return new Pair<Integer, Integer>(pending, hris.size());
343   }
344 
345   /**
346    * Used by ServerShutdownHandler to make sure AssignmentManager has completed
347    * the failover cleanup before re-assigning regions of dead servers. So that
348    * when re-assignment happens, AssignmentManager has proper region states.
349    */
350   public boolean isFailoverCleanupDone() {
351     return failoverCleanupDone.get();
352   }
353 
354   /**
355    * Now, failover cleanup is completed. Notify server manager to
356    * process queued up dead servers processing, if any.
357    */
358   void failoverCleanupDone() {
359     failoverCleanupDone.set(true);
360     serverManager.processQueuedDeadServers();
361   }
362 
363   /**
364    * Called on startup.
365    * Figures whether a fresh cluster start of we are joining extant running cluster.
366    * @throws IOException
367    * @throws KeeperException
368    * @throws InterruptedException
369    */
370   void joinCluster() throws IOException,
371       KeeperException, InterruptedException {
372     // Concurrency note: In the below the accesses on regionsInTransition are
373     // outside of a synchronization block where usually all accesses to RIT are
374     // synchronized.  The presumption is that in this case it is safe since this
375     // method is being played by a single thread on startup.
376 
377     // TODO: Regions that have a null location and are not in regionsInTransitions
378     // need to be handled.
379 
380     // Scan META to build list of existing regions, servers, and assignment
381     // Returns servers who have not checked in (assumed dead) and their regions
382     Map<ServerName, List<HRegionInfo>> deadServers = rebuildUserRegions();
383 
384     // This method will assign all user regions if a clean server startup or
385     // it will reconstruct master state and cleanup any leftovers from
386     // previous master process.
387     processDeadServersAndRegionsInTransition(deadServers);
388 
389     recoverTableInDisablingState();
390     recoverTableInEnablingState();
391   }
392 
393   /**
394    * Process all regions that are in transition in zookeeper and also
395    * processes the list of dead servers by scanning the META.
396    * Used by master joining an cluster.  If we figure this is a clean cluster
397    * startup, will assign all user regions.
398    * @param deadServers
399    *          Map of dead servers and their regions. Can be null.
400    * @throws KeeperException
401    * @throws IOException
402    * @throws InterruptedException
403    */
404   void processDeadServersAndRegionsInTransition(
405       final Map<ServerName, List<HRegionInfo>> deadServers)
406           throws KeeperException, IOException, InterruptedException {
407     List<String> nodes = ZKUtil.listChildrenNoWatch(watcher,
408       watcher.assignmentZNode);
409 
410     if (nodes == null) {
411       String errorMessage = "Failed to get the children from ZK";
412       server.abort(errorMessage, new IOException(errorMessage));
413       return;
414     }
415 
416     boolean failover = !serverManager.getDeadServers().isEmpty();
417 
418     if (!failover) {
419       // Run through all regions.  If they are not assigned and not in RIT, then
420       // its a clean cluster startup, else its a failover.
421       Map<HRegionInfo, ServerName> regions = regionStates.getRegionAssignments();
422       for (Map.Entry<HRegionInfo, ServerName> e: regions.entrySet()) {
423         if (!e.getKey().isMetaTable() && e.getValue() != null) {
424           LOG.debug("Found " + e + " out on cluster");
425           failover = true;
426           break;
427         }
428         if (nodes.contains(e.getKey().getEncodedName())) {
429           LOG.debug("Found " + e.getKey().getRegionNameAsString() + " in RITs");
430           // Could be a meta region.
431           failover = true;
432           break;
433         }
434       }
435     }
436 
437     // If we found user regions out on cluster, its a failover.
438     if (failover) {
439       LOG.info("Found regions out on cluster or in RIT; failover");
440       // Process list of dead servers and regions in RIT.
441       // See HBASE-4580 for more information.
442       processDeadServersAndRecoverLostRegions(deadServers);
443     } else {
444       // Fresh cluster startup.
445       LOG.info("Clean cluster startup. Assigning userregions");
446       assignAllUserRegions();
447     }
448   }
449 
450   /**
451    * If region is up in zk in transition, then do fixup and block and wait until
452    * the region is assigned and out of transition.  Used on startup for
453    * catalog regions.
454    * @param hri Region to look for.
455    * @return True if we processed a region in transition else false if region
456    * was not up in zk in transition.
457    * @throws InterruptedException
458    * @throws KeeperException
459    * @throws IOException
460    */
461   boolean processRegionInTransitionAndBlockUntilAssigned(final HRegionInfo hri)
462       throws InterruptedException, KeeperException, IOException {
463     boolean intransistion = processRegionInTransition(hri.getEncodedName(), hri);
464     if (!intransistion) return intransistion;
465     LOG.debug("Waiting on " + HRegionInfo.prettyPrint(hri.getEncodedName()));
466     while (!this.server.isStopped() &&
467       this.regionStates.isRegionInTransition(hri.getEncodedName())) {
468       // We put a timeout because we may have the region getting in just between the test
469       //  and the waitForUpdate
470       this.regionStates.waitForUpdate(100);
471     }
472     return intransistion;
473   }
474 
475   /**
476    * Process failover of new master for region <code>encodedRegionName</code>
477    * up in zookeeper.
478    * @param encodedRegionName Region to process failover for.
479    * @param regionInfo If null we'll go get it from meta table.
480    * @return True if we processed <code>regionInfo</code> as a RIT.
481    * @throws KeeperException
482    * @throws IOException
483    */
484   boolean processRegionInTransition(final String encodedRegionName,
485       final HRegionInfo regionInfo) throws KeeperException, IOException {
486     // We need a lock here to ensure that we will not put the same region twice
487     // It has no reason to be a lock shared with the other operations.
488     // We can do the lock on the region only, instead of a global lock: what we want to ensure
489     // is that we don't have two threads working on the same region.
490     Lock lock = locker.acquireLock(encodedRegionName);
491     try {
492       Stat stat = new Stat();
493       byte [] data = ZKAssign.getDataAndWatch(watcher, encodedRegionName, stat);
494       if (data == null) return false;
495       RegionTransition rt;
496       try {
497         rt = RegionTransition.parseFrom(data);
498       } catch (DeserializationException e) {
499         LOG.warn("Failed parse znode data", e);
500         return false;
501       }
502       HRegionInfo hri = regionInfo;
503       if (hri == null) {
504         hri = regionStates.getRegionInfo(rt.getRegionName());
505         if (hri == null) return false;
506       }
507       processRegionsInTransition(rt, hri, stat.getVersion());
508       return true;
509     } finally {
510       lock.unlock();
511     }
512   }
513 
514   /**
515    * This call is invoked only (1) master assign meta;
516    * (2) during failover mode startup, zk assignment node processing.
517    * The locker is set in the caller.
518    *
519    * It should be private but it is used by some test too.
520    */
521   void processRegionsInTransition(
522       final RegionTransition rt, final HRegionInfo regionInfo,
523       final int expectedVersion) throws KeeperException {
524     EventType et = rt.getEventType();
525     // Get ServerName.  Could not be null.
526     final ServerName sn = rt.getServerName();
527     String encodedRegionName = regionInfo.getEncodedName();
528     LOG.info("Processing region " + regionInfo.getRegionNameAsString() + " in state " + et);
529 
530 
531     if (regionStates.isRegionInTransition(encodedRegionName)) {
532       // Just return
533       return;
534     }
535     switch (et) {
536       case M_ZK_REGION_CLOSING:
537         // If zk node of the region was updated by a live server skip this
538         // region and just add it into RIT.
539         if (!serverManager.isServerOnline(sn)) {
540           // If was not online, its closed now. Force to OFFLINE and this
541           // will get it reassigned if appropriate
542           forceOffline(regionInfo, rt);
543         } else {
544           // Insert into RIT & resend the query to the region server: may be the previous master
545           // died before sending the query the first time.
546           regionStates.updateRegionState(rt, RegionState.State.CLOSING);
547           final RegionState rs = regionStates.getRegionState(regionInfo);
548           this.executorService.submit(
549               new EventHandler(server, EventType.M_MASTER_RECOVERY) {
550                 @Override
551                 public void process() throws IOException {
552                   ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName());
553                   try {
554                     unassign(regionInfo, rs, expectedVersion, sn, true, null);
555                   } finally {
556                     lock.unlock();
557                   }
558                 }
559               });
560         }
561         break;
562 
563       case RS_ZK_REGION_CLOSED:
564       case RS_ZK_REGION_FAILED_OPEN:
565         // Region is closed, insert into RIT and handle it
566         addToRITandCallClose(regionInfo, RegionState.State.CLOSED, rt);
567         break;
568 
569       case M_ZK_REGION_OFFLINE:
570         // If zk node of the region was updated by a live server skip this
571         // region and just add it into RIT.
572         if (!serverManager.isServerOnline(sn)) {
573           // Region is offline, insert into RIT and handle it like a closed
574           addToRITandCallClose(regionInfo, RegionState.State.OFFLINE, rt);
575         } else {
576           // Insert in RIT and resend to the regionserver
577           regionStates.updateRegionState(rt, RegionState.State.PENDING_OPEN);
578           final RegionState rs = regionStates.getRegionState(regionInfo);
579           this.executorService.submit(
580               new EventHandler(server, EventType.M_MASTER_RECOVERY) {
581                 @Override
582                 public void process() throws IOException {
583                   ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName());
584                   try {
585                     assign(rs, false, false);
586                   } finally {
587                     lock.unlock();
588                   }
589                 }
590               });
591         }
592         break;
593 
594       case RS_ZK_REGION_OPENING:
595         if (!serverManager.isServerOnline(sn)) {
596           forceOffline(regionInfo, rt);
597         } else {
598           regionStates.updateRegionState(rt, RegionState.State.OPENING);
599         }
600         break;
601 
602       case RS_ZK_REGION_OPENED:
603         if (!serverManager.isServerOnline(sn)) {
604           forceOffline(regionInfo, rt);
605         } else {
606           // Region is opened, insert into RIT and handle it
607           // This could be done asynchronously, we would need then to acquire the lock in the
608           //  handler.
609           regionStates.updateRegionState(rt, RegionState.State.OPEN);
610           new OpenedRegionHandler(server, this, regionInfo, sn, expectedVersion).process();
611         }
612         break;
613       case RS_ZK_REGION_SPLITTING:
614         if (!serverManager.isServerOnline(sn)) {
615           // The regionserver started the split, but died before updating the status.
616           // It means (hopefully) that the split was not finished
617           // TBD - to study. In the meantime, do nothing as in the past.
618           LOG.warn("Processed region " + regionInfo.getEncodedName() + " in state : " + et +
619               " on a dead regionserver: " + sn + " doing nothing");
620         } else {
621           LOG.info("Processed region " + regionInfo.getEncodedName() + " in state : " +
622               et + " nothing to do.");
623           // We don't do anything. The way the code is written in RS_ZK_REGION_SPLIT management,
624           //  it adds the RS_ZK_REGION_SPLITTING state if needed. So we don't have to do it here.
625         }
626         break;
627       case RS_ZK_REGION_SPLIT:
628         if (!serverManager.isServerOnline(sn)) {
629           forceOffline(regionInfo, rt);
630         } else {
631           LOG.info("Processed region " + regionInfo.getEncodedName() + " in state : " +
632               et + " nothing to do.");
633           // We don't do anything. The regionserver is supposed to update the znode
634           // multiple times so if it's still up we will receive an update soon.
635         }
636         break;
637       case RS_ZK_REGION_MERGING:
638         // nothing to do
639         LOG.info("Processed region " + regionInfo.getEncodedName()
640             + " in state : " + et + " nothing to do.");
641         break;
642       case RS_ZK_REGION_MERGE:
643         if (!serverManager.isServerOnline(sn)) {
644           // ServerShutdownHandler would handle this region
645           LOG.warn("Processed region " + regionInfo.getEncodedName()
646               + " in state : " + et + " on a dead regionserver: " + sn
647               + " doing nothing");
648         } else {
649           LOG.info("Processed region " + regionInfo.getEncodedName() + " in state : " +
650               et + " nothing to do.");
651           // We don't do anything. The regionserver is supposed to update the znode
652           // multiple times so if it's still up we will receive an update soon.
653         }
654         break;
655       default:
656         throw new IllegalStateException("Received region in state :" + et + " is not valid.");
657     }
658   }
659 
660   /**
661    * Put the region <code>hri</code> into an offline state up in zk.
662    *
663    * You need to have lock on the region before calling this method.
664    *
665    * @param hri
666    * @param oldRt
667    * @throws KeeperException
668    */
669   private void forceOffline(final HRegionInfo hri, final RegionTransition oldRt)
670       throws KeeperException {
671     // If was on dead server, its closed now.  Force to OFFLINE and then
672     // handle it like a close; this will get it reassigned if appropriate
673     LOG.debug("RIT " + hri.getEncodedName() + " in state=" + oldRt.getEventType() +
674       " was on deadserver; forcing offline");
675     ZKAssign.createOrForceNodeOffline(this.watcher, hri, oldRt.getServerName());
676     addToRITandCallClose(hri, RegionState.State.OFFLINE, oldRt);
677   }
678 
679   /**
680    * Add to the in-memory copy of regions in transition and then call close
681    * handler on passed region <code>hri</code>
682    * @param hri
683    * @param state
684    * @param oldData
685    */
686   private void addToRITandCallClose(final HRegionInfo hri,
687       final RegionState.State state, final RegionTransition oldData) {
688     regionStates.updateRegionState(oldData, state);
689     new ClosedRegionHandler(this.server, this, hri).process();
690   }
691 
692   /**
693    * When a region is closed, it should be removed from the regionsToReopen
694    * @param hri HRegionInfo of the region which was closed
695    */
696   public void removeClosedRegion(HRegionInfo hri) {
697     if (regionsToReopen.remove(hri.getEncodedName()) != null) {
698       LOG.debug("Removed region from reopening regions because it was closed");
699     }
700   }
701 
702   /**
703    * Handles various states an unassigned node can be in.
704    * <p>
705    * Method is called when a state change is suspected for an unassigned node.
706    * <p>
707    * This deals with skipped transitions (we got a CLOSED but didn't see CLOSING
708    * yet).
709    * @param rt
710    * @param expectedVersion
711    */
712   private void handleRegion(final RegionTransition rt, int expectedVersion) {
713     if (rt == null) {
714       LOG.warn("Unexpected NULL input for RegionTransition rt");
715       return;
716     }
717     final ServerName sn = rt.getServerName();
718     // Check if this is a special HBCK transition
719     if (sn.equals(HBCK_CODE_SERVERNAME)) {
720       handleHBCK(rt);
721       return;
722     }
723     final long createTime = rt.getCreateTime();
724     final byte[] regionName = rt.getRegionName();
725     String encodedName = HRegionInfo.encodeRegionName(regionName);
726     String prettyPrintedRegionName = HRegionInfo.prettyPrint(encodedName);
727     // Verify this is a known server
728     if (!serverManager.isServerOnline(sn)
729       && !ignoreStatesRSOffline.contains(rt.getEventType())) {
730       LOG.warn("Attempted to handle region transition for server but " +
731         "server is not online: " + prettyPrintedRegionName);
732       return;
733     }
734 
735     RegionState regionState =
736       regionStates.getRegionTransitionState(encodedName);
737     long startTime = System.currentTimeMillis();
738     if (LOG.isDebugEnabled()) {
739       boolean lateEvent = createTime < (startTime - 15000);
740       LOG.debug("Handling transition=" + rt.getEventType() +
741         ", server=" + sn + ", region=" +
742         (prettyPrintedRegionName == null ? "null" : prettyPrintedRegionName) +
743         (lateEvent ? ", which is more than 15 seconds late" : "") +
744         ", current state from region state map =" + regionState);
745     }
746     // We don't do anything for this event,
747     // so separate it out, no need to lock/unlock anything
748     if (rt.getEventType() == EventType.M_ZK_REGION_OFFLINE) {
749       return;
750     }
751 
752     // We need a lock on the region as we could update it
753     Lock lock = locker.acquireLock(encodedName);
754     try {
755       RegionState latestState =
756         regionStates.getRegionTransitionState(encodedName);
757       if ((regionState == null && latestState != null)
758           || (regionState != null && latestState == null)
759           || (regionState != null && latestState != null
760             && latestState.getState() != regionState.getState())) {
761         LOG.warn("Region state changed from " + regionState + " to "
762           + latestState + ", while acquiring lock");
763       }
764       long waitedTime = System.currentTimeMillis() - startTime;
765       if (waitedTime > 5000) {
766         LOG.warn("Took " + waitedTime + "ms to acquire the lock");
767       }
768       regionState = latestState;
769       switch (rt.getEventType()) {
770         case RS_ZK_REGION_SPLITTING:
771           if (!isInStateForSplitting(regionState)) break;
772           regionStates.updateRegionState(rt, RegionState.State.SPLITTING);
773           break;
774 
775         case RS_ZK_REGION_SPLIT:
776           // RegionState must be null, or SPLITTING or PENDING_CLOSE.
777           if (!isInStateForSplitting(regionState)) break;
778           // If null, add SPLITTING state before going to SPLIT
779           if (regionState == null) {
780             regionState = regionStates.updateRegionState(rt,
781               RegionState.State.SPLITTING);
782 
783             String message = "Received SPLIT for region " + prettyPrintedRegionName +
784               " from server " + sn;
785             // If still null, it means we cannot find it and it was already processed
786             if (regionState == null) {
787               LOG.warn(message + " but it doesn't exist anymore," +
788                   " probably already processed its split");
789               break;
790             }
791             LOG.info(message +
792                 " but region was not first in SPLITTING state; continuing");
793           }
794           // Check it has daughters.
795           byte [] payload = rt.getPayload();
796           List<HRegionInfo> daughters;
797           try {
798             daughters = HRegionInfo.parseDelimitedFrom(payload, 0, payload.length);
799           } catch (IOException e) {
800             LOG.error("Dropped split! Failed reading split payload for " +
801               prettyPrintedRegionName);
802             break;
803           }
804           assert daughters.size() == 2;
805           // Assert that we can get a serverinfo for this server.
806           if (!this.serverManager.isServerOnline(sn)) {
807             LOG.error("Dropped split! ServerName=" + sn + " unknown.");
808             break;
809           }
810           // Run handler to do the rest of the SPLIT handling.
811           this.executorService.submit(new SplitRegionHandler(server, this,
812             regionState.getRegion(), sn, daughters));
813           break;
814 
815         case RS_ZK_REGION_MERGING:
816           // Merged region is a new region, we can't find it in the region states now.
817           // Do nothing.
818           break;
819 
820         case RS_ZK_REGION_MERGE:
821           // Assert that we can get a serverinfo for this server.
822           if (!this.serverManager.isServerOnline(sn)) {
823             LOG.error("Dropped merge! ServerName=" + sn + " unknown.");
824             break;
825           }
826           // Get merged and merging regions.
827           byte[] payloadOfMerge = rt.getPayload();
828           List<HRegionInfo> mergeRegions;
829           try {
830             mergeRegions = HRegionInfo.parseDelimitedFrom(payloadOfMerge, 0,
831                 payloadOfMerge.length);
832           } catch (IOException e) {
833             LOG.error("Dropped merge! Failed reading merge payload for " +
834               prettyPrintedRegionName);
835             break;
836           }
837           assert mergeRegions.size() == 3;
838           // Run handler to do the rest of the MERGE handling.
839           this.executorService.submit(new MergedRegionHandler(server, this, sn,
840               mergeRegions));
841           break;
842 
843         case M_ZK_REGION_CLOSING:
844           // Should see CLOSING after we have asked it to CLOSE or additional
845           // times after already being in state of CLOSING
846           if (regionState != null
847               && !regionState.isPendingCloseOrClosingOnServer(sn)) {
848             LOG.warn("Received CLOSING for region " + prettyPrintedRegionName
849               + " from server " + sn + " but region was in the state " + regionState
850               + " and not in expected PENDING_CLOSE or CLOSING states,"
851               + " or not on the expected server");
852             return;
853           }
854           // Transition to CLOSING (or update stamp if already CLOSING)
855           regionStates.updateRegionState(rt, RegionState.State.CLOSING);
856           break;
857 
858         case RS_ZK_REGION_CLOSED:
859           // Should see CLOSED after CLOSING but possible after PENDING_CLOSE
860           if (regionState != null
861               && !regionState.isPendingCloseOrClosingOnServer(sn)) {
862             LOG.warn("Received CLOSED for region " + prettyPrintedRegionName
863               + " from server " + sn + " but region was in the state " + regionState
864               + " and not in expected PENDING_CLOSE or CLOSING states,"
865               + " or not on the expected server");
866             return;
867           }
868           // Handle CLOSED by assigning elsewhere or stopping if a disable
869           // If we got here all is good.  Need to update RegionState -- else
870           // what follows will fail because not in expected state.
871           regionState = regionStates.updateRegionState(rt, RegionState.State.CLOSED);
872           if (regionState != null) {
873             removeClosedRegion(regionState.getRegion());
874             this.executorService.submit(new ClosedRegionHandler(server,
875               this, regionState.getRegion()));
876           }
877           break;
878 
879         case RS_ZK_REGION_FAILED_OPEN:
880           if (regionState != null
881               && !regionState.isPendingOpenOrOpeningOnServer(sn)) {
882             LOG.warn("Received FAILED_OPEN for region " + prettyPrintedRegionName
883               + " from server " + sn + " but region was in the state " + regionState
884               + " and not in expected PENDING_OPEN or OPENING states,"
885               + " or not on the expected server");
886             return;
887           }
888           // Handle this the same as if it were opened and then closed.
889           regionState = regionStates.updateRegionState(rt, RegionState.State.CLOSED);
890           // When there are more than one region server a new RS is selected as the
891           // destination and the same is updated in the regionplan. (HBASE-5546)
892           if (regionState != null) {
893             AtomicInteger failedOpenCount = failedOpenTracker.get(encodedName);
894             if (failedOpenCount == null) {
895               failedOpenCount = new AtomicInteger();
896               // No need to use putIfAbsent, or extra synchronization since
897               // this whole handleRegion block is locked on the encoded region
898               // name, and failedOpenTracker is updated only in this block
899               failedOpenTracker.put(encodedName, failedOpenCount);
900             }
901             if (failedOpenCount.incrementAndGet() >= maximumAttempts) {
902               regionStates.updateRegionState(
903                 regionState.getRegion(), RegionState.State.FAILED_OPEN);
904               // remove the tracking info to save memory, also reset
905               // the count for next open initiative
906               failedOpenTracker.remove(encodedName);
907             } else {
908               getRegionPlan(regionState.getRegion(), sn, true);
909               this.executorService.submit(new ClosedRegionHandler(server,
910                 this, regionState.getRegion()));
911             }
912           }
913           break;
914 
915         case RS_ZK_REGION_OPENING:
916           // Should see OPENING after we have asked it to OPEN or additional
917           // times after already being in state of OPENING
918           if (regionState != null
919               && !regionState.isPendingOpenOrOpeningOnServer(sn)) {
920             LOG.warn("Received OPENING for region " + prettyPrintedRegionName
921               + " from server " + sn + " but region was in the state " + regionState
922               + " and not in expected PENDING_OPEN or OPENING states,"
923               + " or not on the expected server");
924             return;
925           }
926           // Transition to OPENING (or update stamp if already OPENING)
927           regionStates.updateRegionState(rt, RegionState.State.OPENING);
928           break;
929 
930         case RS_ZK_REGION_OPENED:
931           // Should see OPENED after OPENING but possible after PENDING_OPEN
932           if (regionState != null
933               && !regionState.isPendingOpenOrOpeningOnServer(sn)) {
934             LOG.warn("Received OPENED for region " + prettyPrintedRegionName
935               + " from server " + sn + " but region was in the state " + regionState
936               + " and not in expected PENDING_OPEN or OPENING states,"
937               + " or not on the expected server");
938             // Close it without updating the internal region states,
939             // so as not to create double assignments in unlucky scenarios
940             // mentioned in OpenRegionHandler#process
941             unassign(regionState.getRegion(), null, -1, null, false, sn);
942             return;
943           }
944           // Handle OPENED by removing from transition and deleted zk node
945           regionState = regionStates.updateRegionState(rt, RegionState.State.OPEN);
946           if (regionState != null) {
947             failedOpenTracker.remove(encodedName); // reset the count, if any
948             this.executorService.submit(new OpenedRegionHandler(
949               server, this, regionState.getRegion(), sn, expectedVersion));
950           }
951           break;
952 
953         default:
954           throw new IllegalStateException("Received event is not valid.");
955       }
956     } finally {
957       lock.unlock();
958     }
959   }
960 
961   /**
962    * @return Returns true if this RegionState is splittable; i.e. the
963    * RegionState is currently in splitting state or pending_close or
964    * null (Anything else will return false). (Anything else will return false).
965    */
966   private boolean isInStateForSplitting(final RegionState rs) {
967     if (rs == null) return true;
968     if (rs.isSplitting()) return true;
969     if (convertPendingCloseToSplitting(rs)) return true;
970     LOG.warn("Dropped region split! Not in state good for SPLITTING; rs=" + rs);
971     return false;
972   }
973 
974   /**
975    * If the passed regionState is in PENDING_CLOSE, clean up PENDING_CLOSE
976    * state and convert it to SPLITTING instead.
977    * This can happen in case where master wants to close a region at same time
978    * a regionserver starts a split.  The split won.  Clean out old PENDING_CLOSE
979    * state.
980    * @param rs
981    * @return True if we converted from PENDING_CLOSE to SPLITTING
982    */
983   private boolean convertPendingCloseToSplitting(final RegionState rs) {
984     if (!rs.isPendingClose()) return false;
985     LOG.debug("Converting PENDING_CLOSE to SPLITTING; rs=" + rs);
986     regionStates.updateRegionState(
987       rs.getRegion(), RegionState.State.SPLITTING);
988     // Clean up existing state.  Clear from region plans seems all we
989     // have to do here by way of clean up of PENDING_CLOSE.
990     clearRegionPlan(rs.getRegion());
991     return true;
992   }
993 
994   /**
995    * Handle a ZK unassigned node transition triggered by HBCK repair tool.
996    * <p>
997    * This is handled in a separate code path because it breaks the normal rules.
998    * @param rt
999    */
1000   private void handleHBCK(RegionTransition rt) {
1001     String encodedName = HRegionInfo.encodeRegionName(rt.getRegionName());
1002     LOG.info("Handling HBCK triggered transition=" + rt.getEventType() +
1003       ", server=" + rt.getServerName() + ", region=" +
1004       HRegionInfo.prettyPrint(encodedName));
1005     RegionState regionState = regionStates.getRegionTransitionState(encodedName);
1006     switch (rt.getEventType()) {
1007       case M_ZK_REGION_OFFLINE:
1008         HRegionInfo regionInfo;
1009         if (regionState != null) {
1010           regionInfo = regionState.getRegion();
1011         } else {
1012           try {
1013             byte [] name = rt.getRegionName();
1014             Pair<HRegionInfo, ServerName> p = MetaReader.getRegion(catalogTracker, name);
1015             regionInfo = p.getFirst();
1016           } catch (IOException e) {
1017             LOG.info("Exception reading META doing HBCK repair operation", e);
1018             return;
1019           }
1020         }
1021         LOG.info("HBCK repair is triggering assignment of region=" +
1022             regionInfo.getRegionNameAsString());
1023         // trigger assign, node is already in OFFLINE so don't need to update ZK
1024         assign(regionInfo, false);
1025         break;
1026 
1027       default:
1028         LOG.warn("Received unexpected region state from HBCK: " + rt.toString());
1029         break;
1030     }
1031 
1032   }
1033 
1034   // ZooKeeper events
1035 
1036   /**
1037    * New unassigned node has been created.
1038    *
1039    * <p>This happens when an RS begins the OPENING or CLOSING of a region by
1040    * creating an unassigned node.
1041    *
1042    * <p>When this happens we must:
1043    * <ol>
1044    *   <li>Watch the node for further events</li>
1045    *   <li>Read and handle the state in the node</li>
1046    * </ol>
1047    */
1048   @Override
1049   public void nodeCreated(String path) {
1050     handleAssignmentEvent(path);
1051   }
1052 
1053   /**
1054    * Existing unassigned node has had data changed.
1055    *
1056    * <p>This happens when an RS transitions from OFFLINE to OPENING, or between
1057    * OPENING/OPENED and CLOSING/CLOSED.
1058    *
1059    * <p>When this happens we must:
1060    * <ol>
1061    *   <li>Watch the node for further events</li>
1062    *   <li>Read and handle the state in the node</li>
1063    * </ol>
1064    */
1065   @Override
1066   public void nodeDataChanged(String path) {
1067     handleAssignmentEvent(path);
1068   }
1069 
1070 
1071   // We  don't want to have two events on the same region managed simultaneously.
1072   // For this reason, we need to wait if an event on the same region is currently in progress.
1073   // So we track the region names of the events in progress, and we keep a waiting list.
1074   private final Set<String> regionsInProgress = new HashSet<String>();
1075   // In a LinkedHashMultimap, the put order is kept when we retrieve the collection back. We need
1076   //  this as we want the events to be managed in the same order as we received them.
1077   private final LinkedHashMultimap <String, RegionRunnable>
1078       zkEventWorkerWaitingList = LinkedHashMultimap.create();
1079 
1080   /**
1081    * A specific runnable that works only on a region.
1082    */
1083   private static interface RegionRunnable extends Runnable{
1084     /**
1085      * @return - the name of the region it works on.
1086      */
1087     public String getRegionName();
1088   }
1089 
1090   /**
1091    * Submit a task, ensuring that there is only one task at a time that working on a given region.
1092    * Order is respected.
1093    */
1094   protected void zkEventWorkersSubmit(final RegionRunnable regRunnable) {
1095 
1096     synchronized (regionsInProgress) {
1097       // If we're there is already a task with this region, we add it to the
1098       //  waiting list and return.
1099       if (regionsInProgress.contains(regRunnable.getRegionName())) {
1100         synchronized (zkEventWorkerWaitingList){
1101           zkEventWorkerWaitingList.put(regRunnable.getRegionName(), regRunnable);
1102         }
1103         return;
1104       }
1105 
1106       // No event in progress on this region => we can submit a new task immediately.
1107       regionsInProgress.add(regRunnable.getRegionName());
1108       zkEventWorkers.submit(new Runnable() {
1109         @Override
1110         public void run() {
1111           try {
1112             regRunnable.run();
1113           } finally {
1114             // now that we have finished, let's see if there is an event for the same region in the
1115             //  waiting list. If it's the case, we can now submit it to the pool.
1116             synchronized (regionsInProgress) {
1117               regionsInProgress.remove(regRunnable.getRegionName());
1118               synchronized (zkEventWorkerWaitingList) {
1119                 java.util.Set<RegionRunnable> waiting = zkEventWorkerWaitingList.get(
1120                     regRunnable.getRegionName());
1121                 if (!waiting.isEmpty()) {
1122                   // We want the first object only. The only way to get it is through an iterator.
1123                   RegionRunnable toSubmit = waiting.iterator().next();
1124                   zkEventWorkerWaitingList.remove(toSubmit.getRegionName(), toSubmit);
1125                   zkEventWorkersSubmit(toSubmit);
1126                 }
1127               }
1128             }
1129           }
1130         }
1131       });
1132     }
1133   }
1134 
1135   @Override
1136   public void nodeDeleted(final String path) {
1137     if (path.startsWith(watcher.assignmentZNode)) {
1138       final String regionName = ZKAssign.getRegionName(watcher, path);
1139       zkEventWorkersSubmit(new RegionRunnable() {
1140         @Override
1141         public String getRegionName() {
1142           return regionName;
1143         }
1144 
1145         @Override
1146         public void run() {
1147           Lock lock = locker.acquireLock(regionName);
1148           try {
1149             RegionState rs = regionStates.getRegionTransitionState(regionName);
1150             if (rs == null) return;
1151 
1152             HRegionInfo regionInfo = rs.getRegion();
1153             if (rs.isSplit()) {
1154               LOG.debug("Ephemeral node deleted, regionserver crashed?, " +
1155                 "clearing from RIT; rs=" + rs);
1156               regionOffline(rs.getRegion());
1157             } else {
1158               String regionNameStr = regionInfo.getRegionNameAsString();
1159               LOG.debug("The znode of region " + regionNameStr
1160                 + " has been deleted.");
1161               if (rs.isOpened()) {
1162                 ServerName serverName = rs.getServerName();
1163                 regionOnline(regionInfo, serverName);
1164                 LOG.info("The master has opened the region "
1165                   + regionNameStr + " that was online on " + serverName);
1166                 boolean disabled = getZKTable().isDisablingOrDisabledTable(
1167                   regionInfo.getTableNameAsString());
1168                 if (!serverManager.isServerOnline(serverName) && !disabled) {
1169                   LOG.info("Opened region " + regionNameStr
1170                     + "but the region server is offline, reassign the region");
1171                   assign(regionInfo, true);
1172                 } else if (disabled) {
1173                   // if server is offline, no hurt to unassign again
1174                   LOG.info("Opened region " + regionNameStr
1175                     + "but this table is disabled, triggering close of region");
1176                   unassign(regionInfo);
1177                 }
1178               }
1179             }
1180           } finally {
1181             lock.unlock();
1182           }
1183         }
1184       });
1185     }
1186   }
1187 
1188   /**
1189    * New unassigned node has been created.
1190    *
1191    * <p>This happens when an RS begins the OPENING, SPLITTING or CLOSING of a
1192    * region by creating a znode.
1193    *
1194    * <p>When this happens we must:
1195    * <ol>
1196    *   <li>Watch the node for further children changed events</li>
1197    *   <li>Watch all new children for changed events</li>
1198    * </ol>
1199    */
1200   @Override
1201   public void nodeChildrenChanged(String path) {
1202     if (path.equals(watcher.assignmentZNode)) {
1203       zkEventWorkers.submit(new Runnable() {
1204         @Override
1205         public void run() {
1206           try {
1207             // Just make sure we see the changes for the new znodes
1208             List<String> children =
1209               ZKUtil.listChildrenAndWatchForNewChildren(
1210                 watcher, watcher.assignmentZNode);
1211             if (children != null) {
1212               Stat stat = new Stat();
1213               for (String child : children) {
1214                 // if region is in transition, we already have a watch
1215                 // on it, so no need to watch it again. So, as I know for now,
1216                 // this is needed to watch splitting nodes only.
1217                 if (!regionStates.isRegionInTransition(child)) {
1218                   stat.setVersion(0);
1219                   byte[] data = ZKAssign.getDataAndWatch(watcher,
1220                     ZKUtil.joinZNode(watcher.assignmentZNode, child), stat);
1221                   if (data != null && stat.getVersion() > 0) {
1222                     try {
1223                       RegionTransition rt = RegionTransition.parseFrom(data);
1224 
1225                       //See HBASE-7551, handle splitting too, in case we miss the node change event
1226                       if (rt.getEventType() == EventType.RS_ZK_REGION_SPLITTING) {
1227                         handleRegion(rt, stat.getVersion());
1228                       }
1229                     } catch (DeserializationException de) {
1230                       LOG.error("error getting data for " + child, de);
1231                     }
1232                   }
1233                 }
1234               }
1235             }
1236           } catch (KeeperException e) {
1237             server.abort("Unexpected ZK exception reading unassigned children", e);
1238           }
1239         }
1240       });
1241     }
1242   }
1243 
1244   /**
1245    * Marks the region as online.  Removes it from regions in transition and
1246    * updates the in-memory assignment information.
1247    * <p>
1248    * Used when a region has been successfully opened on a region server.
1249    * @param regionInfo
1250    * @param sn
1251    */
1252   void regionOnline(HRegionInfo regionInfo, ServerName sn) {
1253     if (!serverManager.isServerOnline(sn)) {
1254       LOG.warn("A region was opened on a dead server, ServerName=" +
1255         sn + ", region=" + regionInfo.getEncodedName());
1256     }
1257 
1258     regionStates.regionOnline(regionInfo, sn);
1259 
1260     // Remove plan if one.
1261     clearRegionPlan(regionInfo);
1262     // Add the server to serversInUpdatingTimer
1263     addToServersInUpdatingTimer(sn);
1264   }
1265 
1266   /**
1267    * Pass the assignment event to a worker for processing.
1268    * Each worker is a single thread executor service.  The reason
1269    * for just one thread is to make sure all events for a given
1270    * region are processed in order.
1271    *
1272    * @param path
1273    */
1274   private void handleAssignmentEvent(final String path) {
1275     if (path.startsWith(watcher.assignmentZNode)) {
1276       final String regionName = ZKAssign.getRegionName(watcher, path);
1277 
1278       zkEventWorkersSubmit(new RegionRunnable() {
1279         @Override
1280         public String getRegionName() {
1281           return regionName;
1282         }
1283 
1284         @Override
1285         public void run() {
1286           try {
1287             Stat stat = new Stat();
1288             byte [] data = ZKAssign.getDataAndWatch(watcher, path, stat);
1289             if (data == null) return;
1290 
1291             RegionTransition rt = RegionTransition.parseFrom(data);
1292             handleRegion(rt, stat.getVersion());
1293           } catch (KeeperException e) {
1294             server.abort("Unexpected ZK exception reading unassigned node data", e);
1295           } catch (DeserializationException e) {
1296             server.abort("Unexpected exception deserializing node data", e);
1297           }
1298         }
1299       });
1300     }
1301   }
1302 
1303   /**
1304    * Add the server to the set serversInUpdatingTimer, then {@link TimerUpdater}
1305    * will update timers for this server in background
1306    * @param sn
1307    */
1308   private void addToServersInUpdatingTimer(final ServerName sn) {
1309     if (tomActivated){
1310       this.serversInUpdatingTimer.add(sn);
1311     }
1312   }
1313 
1314   /**
1315    * Touch timers for all regions in transition that have the passed
1316    * <code>sn</code> in common.
1317    * Call this method whenever a server checks in.  Doing so helps the case where
1318    * a new regionserver has joined the cluster and its been given 1k regions to
1319    * open.  If this method is tickled every time the region reports in a
1320    * successful open then the 1k-th region won't be timed out just because its
1321    * sitting behind the open of 999 other regions.  This method is NOT used
1322    * as part of bulk assign -- there we have a different mechanism for extending
1323    * the regions in transition timer (we turn it off temporarily -- because
1324    * there is no regionplan involved when bulk assigning.
1325    * @param sn
1326    */
1327   private void updateTimers(final ServerName sn) {
1328     Preconditions.checkState(tomActivated);
1329     if (sn == null) return;
1330 
1331     // This loop could be expensive.
1332     // First make a copy of current regionPlan rather than hold sync while
1333     // looping because holding sync can cause deadlock.  Its ok in this loop
1334     // if the Map we're going against is a little stale
1335     List<Map.Entry<String, RegionPlan>> rps;
1336     synchronized(this.regionPlans) {
1337       rps = new ArrayList<Map.Entry<String, RegionPlan>>(regionPlans.entrySet());
1338     }
1339 
1340     for (Map.Entry<String, RegionPlan> e : rps) {
1341       if (e.getValue() != null && e.getKey() != null && sn.equals(e.getValue().getDestination())) {
1342         RegionState regionState = regionStates.getRegionTransitionState(e.getKey());
1343         if (regionState != null) {
1344           regionState.updateTimestampToNow();
1345         }
1346       }
1347     }
1348   }
1349 
1350   /**
1351    * Marks the region as offline.  Removes it from regions in transition and
1352    * removes in-memory assignment information.
1353    * <p>
1354    * Used when a region has been closed and should remain closed.
1355    * @param regionInfo
1356    */
1357   public void regionOffline(final HRegionInfo regionInfo) {
1358     regionStates.regionOffline(regionInfo);
1359     removeClosedRegion(regionInfo);
1360     // remove the region plan as well just in case.
1361     clearRegionPlan(regionInfo);
1362   }
1363 
1364   public void offlineDisabledRegion(HRegionInfo regionInfo) {
1365     // Disabling so should not be reassigned, just delete the CLOSED node
1366     LOG.debug("Table being disabled so deleting ZK node and removing from " +
1367         "regions in transition, skipping assignment of region " +
1368           regionInfo.getRegionNameAsString());
1369     try {
1370       if (!ZKAssign.deleteClosedNode(watcher, regionInfo.getEncodedName())) {
1371         // Could also be in OFFLINE mode
1372         ZKAssign.deleteOfflineNode(watcher, regionInfo.getEncodedName());
1373       }
1374     } catch (KeeperException.NoNodeException nne) {
1375       LOG.debug("Tried to delete closed node for " + regionInfo + " but it " +
1376           "does not exist so just offlining");
1377     } catch (KeeperException e) {
1378       this.server.abort("Error deleting CLOSED node in ZK", e);
1379     }
1380     regionOffline(regionInfo);
1381   }
1382 
1383   // Assignment methods
1384 
1385   /**
1386    * Assigns the specified region.
1387    * <p>
1388    * If a RegionPlan is available with a valid destination then it will be used
1389    * to determine what server region is assigned to.  If no RegionPlan is
1390    * available, region will be assigned to a random available server.
1391    * <p>
1392    * Updates the RegionState and sends the OPEN RPC.
1393    * <p>
1394    * This will only succeed if the region is in transition and in a CLOSED or
1395    * OFFLINE state or not in transition (in-memory not zk), and of course, the
1396    * chosen server is up and running (It may have just crashed!).  If the
1397    * in-memory checks pass, the zk node is forced to OFFLINE before assigning.
1398    *
1399    * @param region server to be assigned
1400    * @param setOfflineInZK whether ZK node should be created/transitioned to an
1401    *                       OFFLINE state before assigning the region
1402    */
1403   public void assign(HRegionInfo region, boolean setOfflineInZK) {
1404     assign(region, setOfflineInZK, false);
1405   }
1406 
1407   /**
1408    * Use care with forceNewPlan. It could cause double assignment.
1409    */
1410   public void assign(HRegionInfo region,
1411       boolean setOfflineInZK, boolean forceNewPlan) {
1412     if (!setOfflineInZK && isDisabledorDisablingRegionInRIT(region)) {
1413       return;
1414     }
1415     if (this.serverManager.isClusterShutdown()) {
1416       LOG.info("Cluster shutdown is set; skipping assign of " +
1417         region.getRegionNameAsString());
1418       return;
1419     }
1420     String encodedName = region.getEncodedName();
1421     Lock lock = locker.acquireLock(encodedName);
1422     try {
1423       RegionState state = forceRegionStateToOffline(region, forceNewPlan);
1424       if (state != null) {
1425         assign(state, setOfflineInZK, forceNewPlan);
1426       }
1427     } finally {
1428       lock.unlock();
1429     }
1430   }
1431 
1432   /**
1433    * Bulk assign regions to <code>destination</code>.
1434    * @param destination
1435    * @param regions Regions to assign.
1436    * @return true if successful
1437    */
1438   boolean assign(final ServerName destination,
1439       final List<HRegionInfo> regions) {
1440     int regionCount = regions.size();
1441     if (regionCount == 0) {
1442       return true;
1443     }
1444     LOG.debug("Bulk assigning " + regionCount + " region(s) to " +
1445       destination.toString());
1446 
1447     Set<String> encodedNames = new HashSet<String>(regionCount);
1448     for (HRegionInfo region : regions) {
1449       encodedNames.add(region.getEncodedName());
1450     }
1451 
1452     List<HRegionInfo> failedToOpenRegions = new ArrayList<HRegionInfo>();
1453     Map<String, Lock> locks = locker.acquireLocks(encodedNames);
1454     try {
1455       AtomicInteger counter = new AtomicInteger(0);
1456       Map<String, Integer> offlineNodesVersions = new ConcurrentHashMap<String, Integer>();
1457       OfflineCallback cb = new OfflineCallback(
1458         watcher, destination, counter, offlineNodesVersions);
1459       Map<String, RegionPlan> plans = new HashMap<String, RegionPlan>(regions.size());
1460       List<RegionState> states = new ArrayList<RegionState>(regions.size());
1461       for (HRegionInfo region : regions) {
1462         String encodedRegionName = region.getEncodedName();
1463         RegionState state = forceRegionStateToOffline(region, true);
1464         if (state != null && asyncSetOfflineInZooKeeper(state, cb, destination)) {
1465           RegionPlan plan = new RegionPlan(region, state.getServerName(), destination);
1466           plans.put(encodedRegionName, plan);
1467           states.add(state);
1468         } else {
1469           LOG.warn("failed to force region state to offline or "
1470             + "failed to set it offline in ZK, will reassign later: " + region);
1471           failedToOpenRegions.add(region); // assign individually later
1472           Lock lock = locks.remove(encodedRegionName);
1473           lock.unlock();
1474         }
1475       }
1476 
1477       // Wait until all unassigned nodes have been put up and watchers set.
1478       int total = states.size();
1479       for (int oldCounter = 0; !server.isStopped();) {
1480         int count = counter.get();
1481         if (oldCounter != count) {
1482           LOG.info(destination.toString() + " unassigned znodes=" + count +
1483             " of total=" + total);
1484           oldCounter = count;
1485         }
1486         if (count >= total) break;
1487         Threads.sleep(5);
1488       }
1489 
1490       if (server.isStopped()) {
1491         return false;
1492       }
1493 
1494       // Add region plans, so we can updateTimers when one region is opened so
1495       // that unnecessary timeout on RIT is reduced.
1496       this.addPlans(plans);
1497 
1498       List<Pair<HRegionInfo, Integer>> regionOpenInfos =
1499         new ArrayList<Pair<HRegionInfo, Integer>>(states.size());
1500       for (RegionState state: states) {
1501         HRegionInfo region = state.getRegion();
1502         String encodedRegionName = region.getEncodedName();
1503         Integer nodeVersion = offlineNodesVersions.get(encodedRegionName);
1504         if (nodeVersion == null || nodeVersion == -1) {
1505           LOG.warn("failed to offline in zookeeper: " + region);
1506           failedToOpenRegions.add(region); // assign individually later
1507           Lock lock = locks.remove(encodedRegionName);
1508           lock.unlock();
1509         } else {
1510           regionStates.updateRegionState(region,
1511             RegionState.State.PENDING_OPEN, destination);
1512           regionOpenInfos.add(new Pair<HRegionInfo, Integer>(
1513             region, nodeVersion));
1514         }
1515       }
1516 
1517       // Move on to open regions.
1518       try {
1519         // Send OPEN RPC. If it fails on a IOE or RemoteException, the
1520         // TimeoutMonitor will pick up the pieces.
1521         long maxWaitTime = System.currentTimeMillis() +
1522           this.server.getConfiguration().
1523             getLong("hbase.regionserver.rpc.startup.waittime", 60000);
1524         for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) {
1525           try {
1526             List<RegionOpeningState> regionOpeningStateList = serverManager
1527               .sendRegionOpen(destination, regionOpenInfos);
1528             if (regionOpeningStateList == null) {
1529               // Failed getting RPC connection to this server
1530               return false;
1531             }
1532             for (int k = 0, n = regionOpeningStateList.size(); k < n; k++) {
1533               RegionOpeningState openingState = regionOpeningStateList.get(k);
1534               if (openingState != RegionOpeningState.OPENED) {
1535                 HRegionInfo region = regionOpenInfos.get(k).getFirst();
1536                 if (openingState == RegionOpeningState.ALREADY_OPENED) {
1537                   processAlreadyOpenedRegion(region, destination);
1538                 } else if (openingState == RegionOpeningState.FAILED_OPENING) {
1539                   // Failed opening this region, reassign it later
1540                   failedToOpenRegions.add(region);
1541                 } else {
1542                   LOG.warn("THIS SHOULD NOT HAPPEN: unknown opening state "
1543                     + openingState + " in assigning region " + region);
1544                 }
1545               }
1546             }
1547             break;
1548           } catch (IOException e) {
1549             if (e instanceof RemoteException) {
1550               e = ((RemoteException)e).unwrapRemoteException();
1551             }
1552             if (e instanceof RegionServerStoppedException) {
1553               LOG.warn("The region server was shut down, ", e);
1554               // No need to retry, the region server is a goner.
1555               return false;
1556             } else if (e instanceof ServerNotRunningYetException) {
1557               long now = System.currentTimeMillis();
1558               if (now < maxWaitTime) {
1559                 LOG.debug("Server is not yet up; waiting up to " +
1560                   (maxWaitTime - now) + "ms", e);
1561                 Thread.sleep(100);
1562                 i--; // reset the try count
1563                 continue;
1564               }
1565             } else if (e instanceof java.net.SocketTimeoutException
1566                 && this.serverManager.isServerOnline(destination)) {
1567               // In case socket is timed out and the region server is still online,
1568               // the openRegion RPC could have been accepted by the server and
1569               // just the response didn't go through.  So we will retry to
1570               // open the region on the same server.
1571               if (LOG.isDebugEnabled()) {
1572                 LOG.debug("Bulk assigner openRegion() to " + destination
1573                   + " has timed out, but the regions might"
1574                   + " already be opened on it.", e);
1575               }
1576               continue;
1577             }
1578             throw e;
1579           }
1580         }
1581       } catch (IOException e) {
1582         // Can be a socket timeout, EOF, NoRouteToHost, etc
1583         LOG.info("Unable to communicate with the region server in order" +
1584           " to assign regions", e);
1585         return false;
1586       } catch (InterruptedException e) {
1587         throw new RuntimeException(e);
1588       }
1589     } finally {
1590       for (Lock lock : locks.values()) {
1591         lock.unlock();
1592       }
1593     }
1594 
1595     if (!failedToOpenRegions.isEmpty()) {
1596       for (HRegionInfo region : failedToOpenRegions) {
1597         invokeAssign(region);
1598       }
1599     }
1600     LOG.debug("Bulk assigning done for " + destination.toString());
1601     return true;
1602   }
1603 
1604   /**
1605    * Send CLOSE RPC if the server is online, otherwise, offline the region.
1606    *
1607    * The RPC will be sent only to the region sever found in the region state
1608    * if it is passed in, otherwise, to the src server specified. If region
1609    * state is not specified, we don't update region state at all, instead
1610    * we just send the RPC call. This is useful for some cleanup without
1611    * messing around the region states (see handleRegion, on region opened
1612    * on an unexpected server scenario, for an example)
1613    */
1614   private void unassign(final HRegionInfo region,
1615       final RegionState state, final int versionOfClosingNode,
1616       final ServerName dest, final boolean transitionInZK,
1617       final ServerName src) {
1618     ServerName server = src;
1619     if (state != null) {
1620       server = state.getServerName();
1621     }
1622     for (int i = 1; i <= this.maximumAttempts; i++) {
1623       // ClosedRegionhandler can remove the server from this.regions
1624       if (!serverManager.isServerOnline(server)) {
1625         if (transitionInZK) {
1626           // delete the node. if no node exists need not bother.
1627           deleteClosingOrClosedNode(region);
1628         }
1629         if (state != null) {
1630           regionOffline(region);
1631         }
1632         return;
1633       }
1634       try {
1635         // Send CLOSE RPC
1636         if (serverManager.sendRegionClose(server, region,
1637           versionOfClosingNode, dest, transitionInZK)) {
1638           LOG.debug("Sent CLOSE to " + server + " for region " +
1639             region.getRegionNameAsString());
1640           return;
1641         }
1642         // This never happens. Currently regionserver close always return true.
1643         // Todo; this can now happen (0.96) if there is an exception in a coprocessor
1644         LOG.warn("Server " + server + " region CLOSE RPC returned false for " +
1645           region.getRegionNameAsString());
1646       } catch (Throwable t) {
1647         if (t instanceof RemoteException) {
1648           t = ((RemoteException)t).unwrapRemoteException();
1649         }
1650         if (t instanceof NotServingRegionException
1651             || t instanceof RegionServerStoppedException) {
1652           if (transitionInZK) {
1653             deleteClosingOrClosedNode(region);
1654           }
1655           if (state != null) {
1656             regionOffline(region);
1657           }
1658           return;
1659         } else if (state != null
1660             && t instanceof RegionAlreadyInTransitionException) {
1661           // RS is already processing this region, only need to update the timestamp
1662           LOG.debug("update " + state + " the timestamp.");
1663           state.updateTimestampToNow();
1664         }
1665         LOG.info("Server " + server + " returned " + t + " for "
1666           + region.getRegionNameAsString() + ", try=" + i
1667           + " of " + this.maximumAttempts, t);
1668         // Presume retry or server will expire.
1669       }
1670     }
1671     // Run out of attempts
1672     if (!tomActivated && state != null) {
1673       regionStates.updateRegionState(region, RegionState.State.FAILED_CLOSE);
1674     }
1675   }
1676 
1677   /**
1678    * Set region to OFFLINE unless it is opening and forceNewPlan is false.
1679    */
1680   private RegionState forceRegionStateToOffline(
1681       final HRegionInfo region, final boolean forceNewPlan) {
1682     RegionState state = regionStates.getRegionState(region);
1683     if (state == null) {
1684       LOG.warn("Assigning a region not in region states: " + region);
1685       state = regionStates.createRegionState(region);
1686     } else {
1687       switch (state.getState()) {
1688       case OPEN:
1689       case OPENING:
1690       case PENDING_OPEN:
1691         if (!forceNewPlan) {
1692           LOG.debug("Attempting to assign region " +
1693             region + " but it is already in transition: " + state);
1694           return null;
1695         }
1696       case CLOSING:
1697       case PENDING_CLOSE:
1698       case FAILED_CLOSE:
1699         unassign(region, state, -1, null, false, null);
1700         state = regionStates.getRegionState(region);
1701         if (state.isOffline()) break;
1702       case FAILED_OPEN:
1703       case CLOSED:
1704         LOG.debug("Forcing OFFLINE; was=" + state);
1705         state = regionStates.updateRegionState(
1706           region, RegionState.State.OFFLINE);
1707       case OFFLINE:
1708         break;
1709       default:
1710         LOG.error("Trying to assign region " + region
1711           + ", which is in state " + state);
1712         return null;
1713       }
1714     }
1715     return state;
1716   }
1717 
1718   /**
1719    * Caller must hold lock on the passed <code>state</code> object.
1720    * @param state
1721    * @param setOfflineInZK
1722    * @param forceNewPlan
1723    */
1724   private void assign(RegionState state,
1725       final boolean setOfflineInZK, final boolean forceNewPlan) {
1726     RegionState currentState = state;
1727     int versionOfOfflineNode = -1;
1728     RegionPlan plan = null;
1729     long maxRegionServerStartupWaitTime = -1;
1730     HRegionInfo region = state.getRegion();
1731     RegionOpeningState regionOpenState;
1732     for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) {
1733       if (plan == null) { // Get a server for the region at first
1734         plan = getRegionPlan(region, forceNewPlan);
1735       }
1736       if (plan == null) {
1737         LOG.warn("Unable to determine a plan to assign " + region);
1738         if (tomActivated){
1739           this.timeoutMonitor.setAllRegionServersOffline(true);
1740         } else {
1741           regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
1742         }
1743         return;
1744       }
1745       if (setOfflineInZK && versionOfOfflineNode == -1) {
1746         // get the version of the znode after setting it to OFFLINE.
1747         // versionOfOfflineNode will be -1 if the znode was not set to OFFLINE
1748         versionOfOfflineNode = setOfflineInZooKeeper(currentState, plan.getDestination());
1749         if (versionOfOfflineNode != -1) {
1750           if (isDisabledorDisablingRegionInRIT(region)) {
1751             return;
1752           }
1753           // In case of assignment from EnableTableHandler table state is ENABLING. Any how
1754           // EnableTableHandler will set ENABLED after assigning all the table regions. If we
1755           // try to set to ENABLED directly then client API may think table is enabled.
1756           // When we have a case such as all the regions are added directly into .META. and we call
1757           // assignRegion then we need to make the table ENABLED. Hence in such case the table
1758           // will not be in ENABLING or ENABLED state.
1759           String tableName = region.getTableNameAsString();
1760           if (!zkTable.isEnablingTable(tableName) && !zkTable.isEnabledTable(tableName)) {
1761             LOG.debug("Setting table " + tableName + " to ENABLED state.");
1762             setEnabledTable(tableName);
1763           }
1764         }
1765       }
1766       if (setOfflineInZK && versionOfOfflineNode == -1) {
1767         LOG.info("Unable to set offline in ZooKeeper to assign " + region);
1768         // Setting offline in ZK must have been failed due to ZK racing or some
1769         // exception which may make the server to abort. If it is ZK racing,
1770         // we should retry since we already reset the region state,
1771         // existing (re)assignment will fail anyway.
1772         if (!server.isAborted()) {
1773           continue;
1774         }
1775       }
1776       if (this.server.isStopped() || this.server.isAborted()) {
1777         LOG.debug("Server stopped/aborted; skipping assign of " + region);
1778         return;
1779       }
1780       LOG.info("Assigning region " + region.getRegionNameAsString() +
1781           " to " + plan.getDestination().toString());
1782       // Transition RegionState to PENDING_OPEN
1783       currentState = regionStates.updateRegionState(region,
1784           RegionState.State.PENDING_OPEN, plan.getDestination());
1785 
1786       boolean needNewPlan;
1787       final String assignMsg = "Failed assignment of " + region.getRegionNameAsString() +
1788           " to " + plan.getDestination();
1789       try {
1790         regionOpenState = serverManager.sendRegionOpen(
1791             plan.getDestination(), region, versionOfOfflineNode);
1792 
1793         if (regionOpenState == RegionOpeningState.FAILED_OPENING) {
1794           // Failed opening this region, looping again on a new server.
1795           needNewPlan = true;
1796           LOG.warn(assignMsg + ", regionserver says 'FAILED_OPENING', " +
1797               " trying to assign elsewhere instead; " +
1798               "try=" + i + " of " + this.maximumAttempts);
1799         } else {
1800           // we're done
1801           if (regionOpenState == RegionOpeningState.ALREADY_OPENED) {
1802             processAlreadyOpenedRegion(region, plan.getDestination());
1803           }
1804           return;
1805         }
1806 
1807       } catch (Throwable t) {
1808         if (t instanceof RemoteException) {
1809           t = ((RemoteException) t).unwrapRemoteException();
1810         }
1811 
1812         // Should we wait a little before retrying? If the server is starting it's yes.
1813         // If the region is already in transition, it's yes as well: we want to be sure that
1814         //  the region will get opened but we don't want a double assignment.
1815         boolean hold = (t instanceof RegionAlreadyInTransitionException ||
1816             t instanceof ServerNotRunningYetException);
1817 
1818         // In case socket is timed out and the region server is still online,
1819         // the openRegion RPC could have been accepted by the server and
1820         // just the response didn't go through.  So we will retry to
1821         // open the region on the same server to avoid possible
1822         // double assignment.
1823         boolean retry = !hold && (t instanceof java.net.SocketTimeoutException
1824             && this.serverManager.isServerOnline(plan.getDestination()));
1825 
1826 
1827         if (hold) {
1828           LOG.warn(assignMsg + ", waiting a little before trying on the same region server " +
1829               "try=" + i + " of " + this.maximumAttempts, t);
1830 
1831           if (maxRegionServerStartupWaitTime < 0) {
1832             maxRegionServerStartupWaitTime = EnvironmentEdgeManager.currentTimeMillis() +
1833                 this.server.getConfiguration().
1834                     getLong("hbase.regionserver.rpc.startup.waittime", 60000);
1835           }
1836           try {
1837             long now = EnvironmentEdgeManager.currentTimeMillis();
1838             if (now < maxRegionServerStartupWaitTime) {
1839               LOG.debug("Server is not yet up; waiting up to " +
1840                   (maxRegionServerStartupWaitTime - now) + "ms", t);
1841               Thread.sleep(100);
1842               i--; // reset the try count
1843               needNewPlan = false;
1844             } else {
1845               LOG.debug("Server is not up for a while; try a new one", t);
1846               needNewPlan = true;
1847             }
1848           } catch (InterruptedException ie) {
1849             LOG.warn("Failed to assign "
1850                 + region.getRegionNameAsString() + " since interrupted", ie);
1851             Thread.currentThread().interrupt();
1852             if (!tomActivated) {
1853               regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
1854             }
1855             return;
1856           }
1857         } else if (retry) {
1858           needNewPlan = false;
1859           LOG.warn(assignMsg + ", trying to assign to the same region server " +
1860               "try=" + i + " of " + this.maximumAttempts, t);
1861         } else {
1862           needNewPlan = true;
1863           LOG.warn(assignMsg + ", trying to assign elsewhere instead;" +
1864               " try=" + i + " of " + this.maximumAttempts, t);
1865         }
1866       }
1867 
1868       if (i == this.maximumAttempts) {
1869         // Don't reset the region state or get a new plan any more.
1870         // This is the last try.
1871         continue;
1872       }
1873 
1874       // If region opened on destination of present plan, reassigning to new
1875       // RS may cause double assignments. In case of RegionAlreadyInTransitionException
1876       // reassigning to same RS.
1877       if (needNewPlan) {
1878         // Force a new plan and reassign. Will return null if no servers.
1879         // The new plan could be the same as the existing plan since we don't
1880         // exclude the server of the original plan, which should not be
1881         // excluded since it could be the only server up now.
1882         RegionPlan newPlan = getRegionPlan(region, true);
1883 
1884         if (newPlan == null) {
1885           if (tomActivated) {
1886             this.timeoutMonitor.setAllRegionServersOffline(true);
1887           } else {
1888             regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
1889           }
1890           LOG.warn("Unable to find a viable location to assign region " +
1891               region.getRegionNameAsString());
1892           return;
1893         }
1894 
1895         if (plan != newPlan && !plan.getDestination().equals(newPlan.getDestination())) {
1896           // Clean out plan we failed execute and one that doesn't look like it'll
1897           // succeed anyways; we need a new plan!
1898           // Transition back to OFFLINE
1899           currentState = regionStates.updateRegionState(region, RegionState.State.OFFLINE);
1900           versionOfOfflineNode = -1;
1901           plan = newPlan;
1902         }
1903       }
1904     }
1905     // Run out of attempts
1906     if (!tomActivated) {
1907       regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
1908     }
1909   }
1910 
1911   private void processAlreadyOpenedRegion(HRegionInfo region, ServerName sn) {
1912     // Remove region from in-memory transition and unassigned node from ZK
1913     // While trying to enable the table the regions of the table were
1914     // already enabled.
1915     LOG.debug("ALREADY_OPENED region " + region.getRegionNameAsString()
1916         + " to " + sn);
1917     String encodedRegionName = region.getEncodedName();
1918     try {
1919       ZKAssign.deleteOfflineNode(watcher, encodedRegionName);
1920     } catch (KeeperException.NoNodeException e) {
1921       if (LOG.isDebugEnabled()) {
1922         LOG.debug("The unassigned node " + encodedRegionName
1923             + " does not exist.");
1924       }
1925     } catch (KeeperException e) {
1926       server.abort(
1927           "Error deleting OFFLINED node in ZK for transition ZK node ("
1928               + encodedRegionName + ")", e);
1929     }
1930 
1931     regionStates.regionOnline(region, sn);
1932   }
1933 
1934   private boolean isDisabledorDisablingRegionInRIT(final HRegionInfo region) {
1935     String tableName = region.getTableNameAsString();
1936     boolean disabled = this.zkTable.isDisabledTable(tableName);
1937     if (disabled || this.zkTable.isDisablingTable(tableName)) {
1938       LOG.info("Table " + tableName + (disabled ? " disabled;" : " disabling;") +
1939         " skipping assign of " + region.getRegionNameAsString());
1940       offlineDisabledRegion(region);
1941       return true;
1942     }
1943     return false;
1944   }
1945 
1946   /**
1947    * Set region as OFFLINED up in zookeeper
1948    *
1949    * @param state
1950    * @return the version of the offline node if setting of the OFFLINE node was
1951    *         successful, -1 otherwise.
1952    */
1953   private int setOfflineInZooKeeper(final RegionState state, final ServerName destination) {
1954     if (!state.isClosed() && !state.isOffline()) {
1955       String msg = "Unexpected state : " + state + " .. Cannot transit it to OFFLINE.";
1956       this.server.abort(msg, new IllegalStateException(msg));
1957       return -1;
1958     }
1959     regionStates.updateRegionState(state.getRegion(),
1960       RegionState.State.OFFLINE);
1961     int versionOfOfflineNode;
1962     try {
1963       // get the version after setting the znode to OFFLINE
1964       versionOfOfflineNode = ZKAssign.createOrForceNodeOffline(watcher,
1965         state.getRegion(), destination);
1966       if (versionOfOfflineNode == -1) {
1967         LOG.warn("Attempted to create/force node into OFFLINE state before "
1968             + "completing assignment but failed to do so for " + state);
1969         return -1;
1970       }
1971     } catch (KeeperException e) {
1972       server.abort("Unexpected ZK exception creating/setting node OFFLINE", e);
1973       return -1;
1974     }
1975     return versionOfOfflineNode;
1976   }
1977 
1978   /**
1979    * @param region the region to assign
1980    * @return Plan for passed <code>region</code> (If none currently, it creates one or
1981    * if no servers to assign, it returns null).
1982    */
1983   private RegionPlan getRegionPlan(final HRegionInfo region,
1984       final boolean forceNewPlan) {
1985     return getRegionPlan(region, null, forceNewPlan);
1986   }
1987 
1988   /**
1989    * @param region the region to assign
1990    * @param serverToExclude Server to exclude (we know its bad). Pass null if
1991    * all servers are thought to be assignable.
1992    * @param forceNewPlan If true, then if an existing plan exists, a new plan
1993    * will be generated.
1994    * @return Plan for passed <code>region</code> (If none currently, it creates one or
1995    * if no servers to assign, it returns null).
1996    */
1997   private RegionPlan getRegionPlan(final HRegionInfo region,
1998       final ServerName serverToExclude, final boolean forceNewPlan) {
1999     // Pickup existing plan or make a new one
2000     final String encodedName = region.getEncodedName();
2001     final List<ServerName> destServers =
2002       serverManager.createDestinationServersList(serverToExclude);
2003 
2004     if (destServers.isEmpty()){
2005       LOG.warn("Can't move the region " + encodedName +
2006         ", there is no destination server available.");
2007       return null;
2008     }
2009 
2010     RegionPlan randomPlan = null;
2011     boolean newPlan = false;
2012     RegionPlan existingPlan;
2013 
2014     synchronized (this.regionPlans) {
2015       existingPlan = this.regionPlans.get(encodedName);
2016 
2017       if (existingPlan != null && existingPlan.getDestination() != null) {
2018         LOG.debug("Found an existing plan for " + region.getRegionNameAsString()
2019           + " destination server is " + existingPlan.getDestination() +
2020             " accepted as a dest server = " + destServers.contains(existingPlan.getDestination()));
2021       }
2022 
2023       if (forceNewPlan
2024           || existingPlan == null
2025           || existingPlan.getDestination() == null
2026           || !destServers.contains(existingPlan.getDestination())) {
2027         newPlan = true;
2028         randomPlan = new RegionPlan(region, null,
2029             balancer.randomAssignment(region, destServers));
2030         this.regionPlans.put(encodedName, randomPlan);
2031       }
2032     }
2033 
2034     if (newPlan) {
2035       if (randomPlan.getDestination() == null) {
2036         LOG.warn("Can't find a destination for region" + encodedName);
2037         return null;
2038       }
2039       LOG.debug("No previous transition plan was found (or we are ignoring " +
2040         "an existing plan) for " + region.getRegionNameAsString() +
2041         " so generated a random one; " + randomPlan + "; " +
2042         serverManager.countOfRegionServers() +
2043                " (online=" + serverManager.getOnlineServers().size() +
2044                ", available=" + destServers.size() + ") available servers" +
2045                ", forceNewPlan=" + forceNewPlan);
2046         return randomPlan;
2047       }
2048     LOG.debug("Using pre-existing plan for region " +
2049       region.getRegionNameAsString() + "; plan=" + existingPlan);
2050     return existingPlan;
2051   }
2052 
2053   /**
2054    * Unassign the list of regions. Configuration knobs:
2055    * hbase.bulk.waitbetween.reopen indicates the number of milliseconds to
2056    * wait before unassigning another region from this region server
2057    *
2058    * @param regions
2059    * @throws InterruptedException
2060    */
2061   public void unassign(List<HRegionInfo> regions) {
2062     int waitTime = this.server.getConfiguration().getInt(
2063         "hbase.bulk.waitbetween.reopen", 0);
2064     for (HRegionInfo region : regions) {
2065       if (regionStates.isRegionInTransition(region))
2066         continue;
2067       unassign(region, false);
2068       while (regionStates.isRegionInTransition(region)) {
2069         try {
2070           Thread.sleep(10);
2071         } catch (InterruptedException e) {
2072           // Do nothing, continue
2073         }
2074       }
2075       if (waitTime > 0)
2076         try {
2077           Thread.sleep(waitTime);
2078         } catch (InterruptedException e) {
2079           // Do nothing, continue
2080         }
2081     }
2082   }
2083 
2084   /**
2085    * Unassigns the specified region.
2086    * <p>
2087    * Updates the RegionState and sends the CLOSE RPC unless region is being
2088    * split by regionserver; then the unassign fails (silently) because we
2089    * presume the region being unassigned no longer exists (its been split out
2090    * of existence). TODO: What to do if split fails and is rolled back and
2091    * parent is revivified?
2092    * <p>
2093    * If a RegionPlan is already set, it will remain.
2094    *
2095    * @param region server to be unassigned
2096    */
2097   public void unassign(HRegionInfo region) {
2098     unassign(region, false);
2099   }
2100 
2101 
2102   /**
2103    * Unassigns the specified region.
2104    * <p>
2105    * Updates the RegionState and sends the CLOSE RPC unless region is being
2106    * split by regionserver; then the unassign fails (silently) because we
2107    * presume the region being unassigned no longer exists (its been split out
2108    * of existence). TODO: What to do if split fails and is rolled back and
2109    * parent is revivified?
2110    * <p>
2111    * If a RegionPlan is already set, it will remain.
2112    *
2113    * @param region server to be unassigned
2114    * @param force if region should be closed even if already closing
2115    */
2116   public void unassign(HRegionInfo region, boolean force, ServerName dest) {
2117     // TODO: Method needs refactoring.  Ugly buried returns throughout.  Beware!
2118     LOG.debug("Starting unassignment of region " +
2119       region.getRegionNameAsString() + " (offlining)");
2120 
2121     String encodedName = region.getEncodedName();
2122     // Grab the state of this region and synchronize on it
2123     int versionOfClosingNode = -1;
2124     // We need a lock here as we're going to do a put later and we don't want multiple states
2125     //  creation
2126     ReentrantLock lock = locker.acquireLock(encodedName);
2127     RegionState state = regionStates.getRegionTransitionState(encodedName);
2128     try {
2129       if (state == null) {
2130         // Create the znode in CLOSING state
2131         try {
2132           state = regionStates.getRegionState(region);
2133           if (state == null || state.getServerName() == null) {
2134             // We don't know where the region is, offline it.
2135             // No need to send CLOSE RPC
2136             regionOffline(region);
2137             return;
2138           }
2139           versionOfClosingNode = ZKAssign.createNodeClosing(
2140             watcher, region, state.getServerName());
2141           if (versionOfClosingNode == -1) {
2142             LOG.debug("Attempting to unassign region " +
2143                 region.getRegionNameAsString() + " but ZK closing node "
2144                 + "can't be created.");
2145             return;
2146           }
2147         } catch (KeeperException e) {
2148           if (e instanceof NodeExistsException) {
2149             // Handle race between master initiated close and regionserver
2150             // orchestrated splitting. See if existing node is in a
2151             // SPLITTING or SPLIT state.  If so, the regionserver started
2152             // an op on node before we could get our CLOSING in.  Deal.
2153             NodeExistsException nee = (NodeExistsException)e;
2154             String path = nee.getPath();
2155             try {
2156               if (isSplitOrSplittingOrMergeOrMerging(path)) {
2157                 LOG.debug(path + " is SPLIT or SPLITTING or MERGE or MERGING; " +
2158                   "skipping unassign because region no longer exists -- its split or merge");
2159                 return;
2160               }
2161             } catch (KeeperException.NoNodeException ke) {
2162               LOG.warn("Failed getData on SPLITTING/SPLIT at " + path +
2163                 "; presuming split and that the region to unassign, " +
2164                 encodedName + ", no longer exists -- confirm", ke);
2165               return;
2166             } catch (KeeperException ke) {
2167               LOG.error("Unexpected zk state", ke);
2168             } catch (DeserializationException de) {
2169               LOG.error("Failed parse", de);
2170             }
2171           }
2172           // If we get here, don't understand whats going on -- abort.
2173           server.abort("Unexpected ZK exception creating node CLOSING", e);
2174           return;
2175         }
2176         state = regionStates.updateRegionState(region, RegionState.State.PENDING_CLOSE);
2177       } else if (state.isFailedOpen()) {
2178         // The region is not open yet
2179         regionOffline(region);
2180         return;
2181       } else if (force && (state.isPendingClose()
2182           || state.isClosing() || state.isFailedClose())) {
2183         LOG.debug("Attempting to unassign region " + region.getRegionNameAsString() +
2184           " which is already " + state.getState()  +
2185           " but forcing to send a CLOSE RPC again ");
2186         if (state.isFailedClose()) {
2187           state = regionStates.updateRegionState(region, RegionState.State.PENDING_CLOSE);
2188         }
2189         state.updateTimestampToNow();
2190       } else {
2191         LOG.debug("Attempting to unassign region " +
2192           region.getRegionNameAsString() + " but it is " +
2193           "already in transition (" + state.getState() + ", force=" + force + ")");
2194         return;
2195       }
2196 
2197       unassign(region, state, versionOfClosingNode, dest, true, null);
2198     } finally {
2199       lock.unlock();
2200     }
2201   }
2202 
2203   public void unassign(HRegionInfo region, boolean force){
2204      unassign(region, force, null);
2205   }
2206 
2207   /**
2208    * @param region regioninfo of znode to be deleted.
2209    */
2210   public void deleteClosingOrClosedNode(HRegionInfo region) {
2211     String encodedName = region.getEncodedName();
2212     try {
2213       if (!ZKAssign.deleteNode(watcher, encodedName,
2214           EventType.M_ZK_REGION_CLOSING)) {
2215         boolean deleteNode = ZKAssign.deleteNode(watcher,
2216           encodedName, EventType.RS_ZK_REGION_CLOSED);
2217         // TODO : We don't abort if the delete node returns false. Is there any
2218         // such corner case?
2219         if (!deleteNode) {
2220           LOG.error("The deletion of the CLOSED node for the region "
2221             + encodedName + " returned " + deleteNode);
2222         }
2223       }
2224     } catch (NoNodeException e) {
2225       LOG.debug("CLOSING/CLOSED node for the region " + encodedName
2226         + " already deleted");
2227     } catch (KeeperException ke) {
2228       server.abort(
2229         "Unexpected ZK exception deleting node CLOSING/CLOSED for the region "
2230           + encodedName, ke);
2231     }
2232   }
2233 
2234   /**
2235    * @param path
2236    * @return True if znode is in SPLIT or SPLITTING or MERGE or MERGING state.
2237    * @throws KeeperException Can happen if the znode went away in meantime.
2238    * @throws DeserializationException
2239    */
2240   private boolean isSplitOrSplittingOrMergeOrMerging(final String path)
2241       throws KeeperException, DeserializationException {
2242     boolean result = false;
2243     // This may fail if the SPLIT or SPLITTING or MERGE or MERGING znode gets
2244     // cleaned up before we can get data from it.
2245     byte [] data = ZKAssign.getData(watcher, path);
2246     if (data == null) return false;
2247     RegionTransition rt = RegionTransition.parseFrom(data);
2248     switch (rt.getEventType()) {
2249     case RS_ZK_REGION_SPLIT:
2250     case RS_ZK_REGION_SPLITTING:
2251     case RS_ZK_REGION_MERGE:
2252     case RS_ZK_REGION_MERGING:
2253       result = true;
2254       break;
2255     default:
2256       break;
2257     }
2258     return result;
2259   }
2260 
2261   /**
2262    * Waits until the specified region has completed assignment.
2263    * <p>
2264    * If the region is already assigned, returns immediately.  Otherwise, method
2265    * blocks until the region is assigned.
2266    * @param regionInfo region to wait on assignment for
2267    * @throws InterruptedException
2268    */
2269   public boolean waitForAssignment(HRegionInfo regionInfo)
2270       throws InterruptedException {
2271     while (!regionStates.isRegionAssigned(regionInfo)) {
2272       if (regionStates.isRegionFailedToOpen(regionInfo)
2273           || this.server.isStopped()) {
2274         return false;
2275       }
2276 
2277       // We should receive a notification, but it's
2278       //  better to have a timeout to recheck the condition here:
2279       //  it lowers the impact of a race condition if any
2280       regionStates.waitForUpdate(100);
2281     }
2282     return true;
2283   }
2284 
2285   /**
2286    * Assigns the META region.
2287    * <p>
2288    * Assumes that META is currently closed and is not being actively served by
2289    * any RegionServer.
2290    * <p>
2291    * Forcibly unsets the current meta region location in ZooKeeper and assigns
2292    * META to a random RegionServer.
2293    * @throws KeeperException
2294    */
2295   public void assignMeta() throws KeeperException {
2296     MetaRegionTracker.deleteMetaLocation(this.watcher);
2297     assign(HRegionInfo.FIRST_META_REGIONINFO, true);
2298   }
2299 
2300   /**
2301    * Assigns specified regions retaining assignments, if any.
2302    * <p>
2303    * This is a synchronous call and will return once every region has been
2304    * assigned.  If anything fails, an exception is thrown
2305    * @throws InterruptedException
2306    * @throws IOException
2307    */
2308   public void assign(Map<HRegionInfo, ServerName> regions)
2309         throws IOException, InterruptedException {
2310     if (regions == null || regions.isEmpty()) {
2311       return;
2312     }
2313     List<ServerName> servers = serverManager.createDestinationServersList();
2314     if (servers == null || servers.isEmpty()) {
2315       throw new IOException("Found no destination server to assign region(s)");
2316     }
2317 
2318     // Reuse existing assignment info
2319     Map<ServerName, List<HRegionInfo>> bulkPlan =
2320       balancer.retainAssignment(regions, servers);
2321 
2322     assign(regions.size(), servers.size(),
2323       "retainAssignment=true", bulkPlan);
2324   }
2325 
2326   /**
2327    * Assigns specified regions round robin, if any.
2328    * <p>
2329    * This is a synchronous call and will return once every region has been
2330    * assigned.  If anything fails, an exception is thrown
2331    * @throws InterruptedException
2332    * @throws IOException
2333    */
2334   public void assign(List<HRegionInfo> regions)
2335         throws IOException, InterruptedException {
2336     if (regions == null || regions.isEmpty()) {
2337       return;
2338     }
2339 
2340     List<ServerName> servers = serverManager.createDestinationServersList();
2341     if (servers == null || servers.isEmpty()) {
2342       throw new IOException("Found no destination server to assign region(s)");
2343     }
2344 
2345     // Generate a round-robin bulk assignment plan
2346     Map<ServerName, List<HRegionInfo>> bulkPlan
2347       = balancer.roundRobinAssignment(regions, servers);
2348 
2349     assign(regions.size(), servers.size(),
2350       "round-robin=true", bulkPlan);
2351   }
2352 
2353   private void assign(int regions, int totalServers,
2354       String message, Map<ServerName, List<HRegionInfo>> bulkPlan)
2355           throws InterruptedException, IOException {
2356 
2357     int servers = bulkPlan.size();
2358     if (servers == 1 || (regions < bulkAssignThresholdRegions
2359         && servers < bulkAssignThresholdServers)) {
2360 
2361       // Not use bulk assignment.  This could be more efficient in small
2362       // cluster, especially mini cluster for testing, so that tests won't time out
2363       LOG.info("Not use bulk assigning since we are assigning only "
2364         + regions + " region(s) to " + servers + " server(s)");
2365 
2366       for (Map.Entry<ServerName, List<HRegionInfo>> plan: bulkPlan.entrySet()) {
2367         assign(plan.getKey(), plan.getValue());
2368       }
2369     } else {
2370       LOG.info("Bulk assigning " + regions + " region(s) across "
2371         + totalServers + " server(s), " + message);
2372 
2373       // Use fixed count thread pool assigning.
2374       BulkAssigner ba = new GeneralBulkAssigner(
2375         this.server, bulkPlan, this, bulkAssignWaitTillAllAssigned);
2376       ba.bulkAssign();
2377       LOG.info("Bulk assigning done");
2378     }
2379   }
2380 
2381   /**
2382    * Assigns all user regions, if any exist.  Used during cluster startup.
2383    * <p>
2384    * This is a synchronous call and will return once every region has been
2385    * assigned.  If anything fails, an exception is thrown and the cluster
2386    * should be shutdown.
2387    * @throws InterruptedException
2388    * @throws IOException
2389    * @throws KeeperException
2390    */
2391   private void assignAllUserRegions()
2392       throws IOException, InterruptedException, KeeperException {
2393     // Cleanup any existing ZK nodes and start watching
2394     ZKAssign.deleteAllNodes(watcher);
2395     ZKUtil.listChildrenAndWatchForNewChildren(this.watcher,
2396       this.watcher.assignmentZNode);
2397     failoverCleanupDone();
2398 
2399     // Skip assignment for regions of tables in DISABLING state because during clean cluster startup
2400     // no RS is alive and regions map also doesn't have any information about the regions.
2401     // See HBASE-6281.
2402     Set<String> disabledOrDisablingOrEnabling = ZKTable.getDisabledOrDisablingTables(watcher);
2403     disabledOrDisablingOrEnabling.addAll(ZKTable.getEnablingTables(watcher));
2404     // Scan META for all user regions, skipping any disabled tables
2405     Map<HRegionInfo, ServerName> allRegions = MetaReader.fullScan(
2406       catalogTracker, disabledOrDisablingOrEnabling, true);
2407     if (allRegions == null || allRegions.isEmpty()) return;
2408 
2409     // Determine what type of assignment to do on startup
2410     boolean retainAssignment = server.getConfiguration().
2411       getBoolean("hbase.master.startup.retainassign", true);
2412 
2413     if (retainAssignment) {
2414       assign(allRegions);
2415     } else {
2416       List<HRegionInfo> regions = new ArrayList<HRegionInfo>(allRegions.keySet());
2417       assign(regions);
2418     }
2419 
2420     for (HRegionInfo hri : allRegions.keySet()) {
2421       String tableName = hri.getTableNameAsString();
2422       if (!zkTable.isEnabledTable(tableName)) {
2423         setEnabledTable(tableName);
2424       }
2425     }
2426   }
2427 
2428   /**
2429    * Wait until no regions in transition.
2430    * @param timeout How long to wait.
2431    * @return True if nothing in regions in transition.
2432    * @throws InterruptedException
2433    */
2434   boolean waitUntilNoRegionsInTransition(final long timeout)
2435       throws InterruptedException {
2436     // Blocks until there are no regions in transition. It is possible that
2437     // there
2438     // are regions in transition immediately after this returns but guarantees
2439     // that if it returns without an exception that there was a period of time
2440     // with no regions in transition from the point-of-view of the in-memory
2441     // state of the Master.
2442     final long endTime = System.currentTimeMillis() + timeout;
2443 
2444     while (!this.server.isStopped() && regionStates.isRegionsInTransition()
2445         && endTime > System.currentTimeMillis()) {
2446       regionStates.waitForUpdate(100);
2447     }
2448 
2449     return !regionStates.isRegionsInTransition();
2450   }
2451 
2452   /**
2453    * Rebuild the list of user regions and assignment information.
2454    * <p>
2455    * Returns a map of servers that are not found to be online and the regions
2456    * they were hosting.
2457    * @return map of servers not online to their assigned regions, as stored
2458    *         in META
2459    * @throws IOException
2460    */
2461   Map<ServerName, List<HRegionInfo>> rebuildUserRegions() throws IOException, KeeperException {
2462     Set<String> enablingTables = ZKTable.getEnablingTables(watcher);
2463     Set<String> disabledOrEnablingTables = ZKTable.getDisabledTables(watcher);
2464     disabledOrEnablingTables.addAll(enablingTables);
2465     Set<String> disabledOrDisablingOrEnabling = ZKTable.getDisablingTables(watcher);
2466     disabledOrDisablingOrEnabling.addAll(disabledOrEnablingTables);
2467 
2468     // Region assignment from META
2469     List<Result> results = MetaReader.fullScan(this.catalogTracker);
2470     // Get any new but slow to checkin region server that joined the cluster
2471     Set<ServerName> onlineServers = serverManager.getOnlineServers().keySet();
2472     // Map of offline servers and their regions to be returned
2473     Map<ServerName, List<HRegionInfo>> offlineServers =
2474       new TreeMap<ServerName, List<HRegionInfo>>();
2475     // Iterate regions in META
2476     for (Result result : results) {
2477       Pair<HRegionInfo, ServerName> region = HRegionInfo.getHRegionInfoAndServerName(result);
2478       if (region == null) continue;
2479       HRegionInfo regionInfo = region.getFirst();
2480       ServerName regionLocation = region.getSecond();
2481       if (regionInfo == null) continue;
2482       regionStates.createRegionState(regionInfo);
2483       String tableName = regionInfo.getTableNameAsString();
2484       if (regionLocation == null) {
2485         // regionLocation could be null if createTable didn't finish properly.
2486         // When createTable is in progress, HMaster restarts.
2487         // Some regions have been added to .META., but have not been assigned.
2488         // When this happens, the region's table must be in ENABLING state.
2489         // It can't be in ENABLED state as that is set when all regions are
2490         // assigned.
2491         // It can't be in DISABLING state, because DISABLING state transitions
2492         // from ENABLED state when application calls disableTable.
2493         // It can't be in DISABLED state, because DISABLED states transitions
2494         // from DISABLING state.
2495         if (!enablingTables.contains(tableName)) {
2496           LOG.warn("Region " + regionInfo.getEncodedName() +
2497             " has null regionLocation." + " But its table " + tableName +
2498             " isn't in ENABLING state.");
2499         }
2500       } else if (!onlineServers.contains(regionLocation)) {
2501         // Region is located on a server that isn't online
2502         List<HRegionInfo> offlineRegions = offlineServers.get(regionLocation);
2503         if (offlineRegions == null) {
2504           offlineRegions = new ArrayList<HRegionInfo>(1);
2505           offlineServers.put(regionLocation, offlineRegions);
2506         }
2507         offlineRegions.add(regionInfo);
2508         // need to enable the table if not disabled or disabling or enabling
2509         // this will be used in rolling restarts
2510         if (!disabledOrDisablingOrEnabling.contains(tableName)
2511             && !getZKTable().isEnabledTable(tableName)) {
2512           setEnabledTable(tableName);
2513         }
2514       } else {
2515         // If region is in offline and split state check the ZKNode
2516         if (regionInfo.isOffline() && regionInfo.isSplit()) {
2517           String node = ZKAssign.getNodeName(this.watcher, regionInfo
2518               .getEncodedName());
2519           Stat stat = new Stat();
2520           byte[] data = ZKUtil.getDataNoWatch(this.watcher, node, stat);
2521           // If znode does not exist, don't consider this region
2522           if (data == null) {
2523             LOG.debug("Region "	+  regionInfo.getRegionNameAsString()
2524                + " split is completed. Hence need not add to regions list");
2525             continue;
2526           }
2527         }
2528         // Region is being served and on an active server
2529         // add only if region not in disabled or enabling table
2530         if (!disabledOrEnablingTables.contains(tableName)) {
2531           regionStates.regionOnline(regionInfo, regionLocation);
2532         }
2533         // need to enable the table if not disabled or disabling or enabling
2534         // this will be used in rolling restarts
2535         if (!disabledOrDisablingOrEnabling.contains(tableName)
2536             && !getZKTable().isEnabledTable(tableName)) {
2537           setEnabledTable(tableName);
2538         }
2539       }
2540     }
2541     return offlineServers;
2542   }
2543 
2544   /**
2545    * Recover the tables that were not fully moved to DISABLED state. These
2546    * tables are in DISABLING state when the master restarted/switched.
2547    *
2548    * @throws KeeperException
2549    * @throws TableNotFoundException
2550    * @throws IOException
2551    */
2552   private void recoverTableInDisablingState()
2553       throws KeeperException, TableNotFoundException, IOException {
2554     Set<String> disablingTables = ZKTable.getDisablingTables(watcher);
2555     if (disablingTables.size() != 0) {
2556       for (String tableName : disablingTables) {
2557         // Recover by calling DisableTableHandler
2558         LOG.info("The table " + tableName
2559             + " is in DISABLING state.  Hence recovering by moving the table"
2560             + " to DISABLED state.");
2561         new DisableTableHandler(this.server, tableName.getBytes(), catalogTracker,
2562             this, tableLockManager, true).prepare().process();
2563       }
2564     }
2565   }
2566 
2567   /**
2568    * Recover the tables that are not fully moved to ENABLED state. These tables
2569    * are in ENABLING state when the master restarted/switched
2570    *
2571    * @throws KeeperException
2572    * @throws org.apache.hadoop.hbase.exceptions.TableNotFoundException
2573    * @throws IOException
2574    */
2575   private void recoverTableInEnablingState()
2576       throws KeeperException, TableNotFoundException, IOException {
2577     Set<String> enablingTables = ZKTable.getEnablingTables(watcher);
2578     if (enablingTables.size() != 0) {
2579       for (String tableName : enablingTables) {
2580         // Recover by calling EnableTableHandler
2581         LOG.info("The table " + tableName
2582             + " is in ENABLING state.  Hence recovering by moving the table"
2583             + " to ENABLED state.");
2584         // enableTable in sync way during master startup,
2585         // no need to invoke coprocessor
2586         new EnableTableHandler(this.server, tableName.getBytes(),
2587             catalogTracker, this, tableLockManager, true).prepare().process();
2588       }
2589     }
2590   }
2591 
2592   /**
2593    * Processes list of dead servers from result of META scan and regions in RIT
2594    * <p>
2595    * This is used for failover to recover the lost regions that belonged to
2596    * RegionServers which failed while there was no active master or regions
2597    * that were in RIT.
2598    * <p>
2599    *
2600    *
2601    * @param deadServers
2602    *          The list of dead servers which failed while there was no active
2603    *          master. Can be null.
2604    * @throws IOException
2605    * @throws KeeperException
2606    */
2607   private void processDeadServersAndRecoverLostRegions(
2608       Map<ServerName, List<HRegionInfo>> deadServers)
2609           throws IOException, KeeperException {
2610     if (deadServers != null) {
2611       for (Map.Entry<ServerName, List<HRegionInfo>> server: deadServers.entrySet()) {
2612         ServerName serverName = server.getKey();
2613         if (!serverManager.isServerDead(serverName)) {
2614           serverManager.expireServer(serverName); // Let SSH do region re-assign
2615         }
2616       }
2617     }
2618     List<String> nodes = ZKUtil.listChildrenAndWatchForNewChildren(
2619       this.watcher, this.watcher.assignmentZNode);
2620     if (!nodes.isEmpty()) {
2621       for (String encodedRegionName : nodes) {
2622         processRegionInTransition(encodedRegionName, null);
2623       }
2624     }
2625 
2626     // Now we can safely claim failover cleanup completed and enable
2627     // ServerShutdownHandler for further processing. The nodes (below)
2628     // in transition, if any, are for regions not related to those
2629     // dead servers at all, and can be done in parallel to SSH.
2630     failoverCleanupDone();
2631   }
2632 
2633   /**
2634    * Set Regions in transitions metrics.
2635    * This takes an iterator on the RegionInTransition map (CLSM), and is not synchronized.
2636    * This iterator is not fail fast, which may lead to stale read; but that's better than
2637    * creating a copy of the map for metrics computation, as this method will be invoked
2638    * on a frequent interval.
2639    */
2640   public void updateRegionsInTransitionMetrics() {
2641     long currentTime = System.currentTimeMillis();
2642     int totalRITs = 0;
2643     int totalRITsOverThreshold = 0;
2644     long oldestRITTime = 0;
2645     int ritThreshold = this.server.getConfiguration().
2646       getInt(HConstants.METRICS_RIT_STUCK_WARNING_THRESHOLD, 60000);
2647     for (RegionState state: regionStates.getRegionsInTransition().values()) {
2648       totalRITs++;
2649       long ritTime = currentTime - state.getStamp();
2650       if (ritTime > ritThreshold) { // more than the threshold
2651         totalRITsOverThreshold++;
2652       }
2653       if (oldestRITTime < ritTime) {
2654         oldestRITTime = ritTime;
2655       }
2656     }
2657     if (this.metricsMaster != null) {
2658       this.metricsMaster.updateRITOldestAge(oldestRITTime);
2659       this.metricsMaster.updateRITCount(totalRITs);
2660       this.metricsMaster.updateRITCountOverThreshold(totalRITsOverThreshold);
2661     }
2662   }
2663 
2664   /**
2665    * @param region Region whose plan we are to clear.
2666    */
2667   void clearRegionPlan(final HRegionInfo region) {
2668     synchronized (this.regionPlans) {
2669       this.regionPlans.remove(region.getEncodedName());
2670     }
2671   }
2672 
2673   /**
2674    * Wait on region to clear regions-in-transition.
2675    * @param hri Region to wait on.
2676    * @throws IOException
2677    */
2678   public void waitOnRegionToClearRegionsInTransition(final HRegionInfo hri)
2679       throws IOException, InterruptedException {
2680     if (!regionStates.isRegionInTransition(hri)) return;
2681     RegionState rs = null;
2682     // There is already a timeout monitor on regions in transition so I
2683     // should not have to have one here too?
2684     while(!this.server.isStopped() && regionStates.isRegionInTransition(hri)) {
2685       LOG.info("Waiting on " + rs + " to clear regions-in-transition");
2686       regionStates.waitForUpdate(100);
2687     }
2688     if (this.server.isStopped()) {
2689       LOG.info("Giving up wait on regions in " +
2690         "transition because stoppable.isStopped is set");
2691     }
2692   }
2693 
2694   /**
2695    * Update timers for all regions in transition going against the server in the
2696    * serversInUpdatingTimer.
2697    */
2698   public class TimerUpdater extends Chore {
2699 
2700     public TimerUpdater(final int period, final Stoppable stopper) {
2701       super("AssignmentTimerUpdater", period, stopper);
2702     }
2703 
2704     @Override
2705     protected void chore() {
2706       Preconditions.checkState(tomActivated);
2707       ServerName serverToUpdateTimer = null;
2708       while (!serversInUpdatingTimer.isEmpty() && !stopper.isStopped()) {
2709         if (serverToUpdateTimer == null) {
2710           serverToUpdateTimer = serversInUpdatingTimer.first();
2711         } else {
2712           serverToUpdateTimer = serversInUpdatingTimer
2713               .higher(serverToUpdateTimer);
2714         }
2715         if (serverToUpdateTimer == null) {
2716           break;
2717         }
2718         updateTimers(serverToUpdateTimer);
2719         serversInUpdatingTimer.remove(serverToUpdateTimer);
2720       }
2721     }
2722   }
2723 
2724   /**
2725    * Monitor to check for time outs on region transition operations
2726    */
2727   public class TimeoutMonitor extends Chore {
2728     private boolean allRegionServersOffline = false;
2729     private ServerManager serverManager;
2730     private final int timeout;
2731 
2732     /**
2733      * Creates a periodic monitor to check for time outs on region transition
2734      * operations.  This will deal with retries if for some reason something
2735      * doesn't happen within the specified timeout.
2736      * @param period
2737    * @param stopper When {@link Stoppable#isStopped()} is true, this thread will
2738    * cleanup and exit cleanly.
2739      * @param timeout
2740      */
2741     public TimeoutMonitor(final int period, final Stoppable stopper,
2742         ServerManager serverManager,
2743         final int timeout) {
2744       super("AssignmentTimeoutMonitor", period, stopper);
2745       this.timeout = timeout;
2746       this.serverManager = serverManager;
2747     }
2748 
2749     private synchronized void setAllRegionServersOffline(
2750       boolean allRegionServersOffline) {
2751       this.allRegionServersOffline = allRegionServersOffline;
2752     }
2753 
2754     @Override
2755     protected void chore() {
2756       Preconditions.checkState(tomActivated);
2757       boolean noRSAvailable = this.serverManager.createDestinationServersList().isEmpty();
2758 
2759       // Iterate all regions in transition checking for time outs
2760       long now = System.currentTimeMillis();
2761       // no lock concurrent access ok: we will be working on a copy, and it's java-valid to do
2762       //  a copy while another thread is adding/removing items
2763       for (String regionName : regionStates.getRegionsInTransition().keySet()) {
2764         RegionState regionState = regionStates.getRegionTransitionState(regionName);
2765         if (regionState == null) continue;
2766 
2767         if (regionState.getStamp() + timeout <= now) {
2768           // decide on action upon timeout
2769           actOnTimeOut(regionState);
2770         } else if (this.allRegionServersOffline && !noRSAvailable) {
2771           RegionPlan existingPlan = regionPlans.get(regionName);
2772           if (existingPlan == null
2773               || !this.serverManager.isServerOnline(existingPlan
2774                   .getDestination())) {
2775             // if some RSs just came back online, we can start the assignment
2776             // right away
2777             actOnTimeOut(regionState);
2778           }
2779         }
2780       }
2781       setAllRegionServersOffline(noRSAvailable);
2782     }
2783 
2784     private void actOnTimeOut(RegionState regionState) {
2785       HRegionInfo regionInfo = regionState.getRegion();
2786       LOG.info("Regions in transition timed out:  " + regionState);
2787       // Expired! Do a retry.
2788       switch (regionState.getState()) {
2789       case CLOSED:
2790         LOG.info("Region " + regionInfo.getEncodedName()
2791             + " has been CLOSED for too long, waiting on queued "
2792             + "ClosedRegionHandler to run or server shutdown");
2793         // Update our timestamp.
2794         regionState.updateTimestampToNow();
2795         break;
2796       case OFFLINE:
2797         LOG.info("Region has been OFFLINE for too long, " + "reassigning "
2798             + regionInfo.getRegionNameAsString() + " to a random server");
2799         invokeAssign(regionInfo);
2800         break;
2801       case PENDING_OPEN:
2802         LOG.info("Region has been PENDING_OPEN for too "
2803             + "long, reassigning region=" + regionInfo.getRegionNameAsString());
2804         invokeAssign(regionInfo);
2805         break;
2806       case OPENING:
2807         processOpeningState(regionInfo);
2808         break;
2809       case OPEN:
2810         LOG.error("Region has been OPEN for too long, " +
2811             "we don't know where region was opened so can't do anything");
2812         regionState.updateTimestampToNow();
2813         break;
2814 
2815       case PENDING_CLOSE:
2816         LOG.info("Region has been PENDING_CLOSE for too "
2817             + "long, running forced unassign again on region="
2818             + regionInfo.getRegionNameAsString());
2819         invokeUnassign(regionInfo);
2820         break;
2821       case CLOSING:
2822         LOG.info("Region has been CLOSING for too " +
2823           "long, this should eventually complete or the server will " +
2824           "expire, send RPC again");
2825         invokeUnassign(regionInfo);
2826         break;
2827 
2828       case SPLIT:
2829       case SPLITTING:
2830       case FAILED_OPEN:
2831       case FAILED_CLOSE:
2832         break;
2833 
2834       default:
2835         throw new IllegalStateException("Received event is not valid.");
2836       }
2837     }
2838   }
2839 
2840   private void processOpeningState(HRegionInfo regionInfo) {
2841     LOG.info("Region has been OPENING for too long, reassigning region="
2842         + regionInfo.getRegionNameAsString());
2843     // Should have a ZK node in OPENING state
2844     try {
2845       String node = ZKAssign.getNodeName(watcher, regionInfo.getEncodedName());
2846       Stat stat = new Stat();
2847       byte [] data = ZKAssign.getDataNoWatch(watcher, node, stat);
2848       if (data == null) {
2849         LOG.warn("Data is null, node " + node + " no longer exists");
2850         return;
2851       }
2852       RegionTransition rt = RegionTransition.parseFrom(data);
2853       EventType et = rt.getEventType();
2854       if (et == EventType.RS_ZK_REGION_OPENED) {
2855         LOG.debug("Region has transitioned to OPENED, allowing "
2856             + "watched event handlers to process");
2857         return;
2858       } else if (et != EventType.RS_ZK_REGION_OPENING && et != EventType.RS_ZK_REGION_FAILED_OPEN ) {
2859         LOG.warn("While timing out a region, found ZK node in unexpected state: " + et);
2860         return;
2861       }
2862       invokeAssign(regionInfo);
2863     } catch (KeeperException ke) {
2864       LOG.error("Unexpected ZK exception timing out CLOSING region", ke);
2865     } catch (DeserializationException e) {
2866       LOG.error("Unexpected exception parsing CLOSING region", e);
2867     }
2868   }
2869 
2870   void invokeAssign(HRegionInfo regionInfo) {
2871     threadPoolExecutorService.submit(new AssignCallable(this, regionInfo));
2872   }
2873 
2874   private void invokeUnassign(HRegionInfo regionInfo) {
2875     threadPoolExecutorService.submit(new UnAssignCallable(this, regionInfo));
2876   }
2877 
2878   public boolean isCarryingMeta(ServerName serverName) {
2879     return isCarryingRegion(serverName, HRegionInfo.FIRST_META_REGIONINFO);
2880   }
2881 
2882   /**
2883    * Check if the shutdown server carries the specific region.
2884    * We have a bunch of places that store region location
2885    * Those values aren't consistent. There is a delay of notification.
2886    * The location from zookeeper unassigned node has the most recent data;
2887    * but the node could be deleted after the region is opened by AM.
2888    * The AM's info could be old when OpenedRegionHandler
2889    * processing hasn't finished yet when server shutdown occurs.
2890    * @return whether the serverName currently hosts the region
2891    */
2892   private boolean isCarryingRegion(ServerName serverName, HRegionInfo hri) {
2893     RegionTransition rt = null;
2894     try {
2895       byte [] data = ZKAssign.getData(watcher, hri.getEncodedName());
2896       // This call can legitimately come by null
2897       rt = data == null? null: RegionTransition.parseFrom(data);
2898     } catch (KeeperException e) {
2899       server.abort("Exception reading unassigned node for region=" + hri.getEncodedName(), e);
2900     } catch (DeserializationException e) {
2901       server.abort("Exception parsing unassigned node for region=" + hri.getEncodedName(), e);
2902     }
2903 
2904     ServerName addressFromZK = rt != null? rt.getServerName():  null;
2905     if (addressFromZK != null) {
2906       // if we get something from ZK, we will use the data
2907       boolean matchZK = addressFromZK.equals(serverName);
2908       LOG.debug("based on ZK, current region=" + hri.getRegionNameAsString() +
2909           " is on server=" + addressFromZK +
2910           " server being checked=: " + serverName);
2911       return matchZK;
2912     }
2913 
2914     ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
2915     boolean matchAM = (addressFromAM != null &&
2916       addressFromAM.equals(serverName));
2917     LOG.debug("based on AM, current region=" + hri.getRegionNameAsString() +
2918       " is on server=" + (addressFromAM != null ? addressFromAM : "null") +
2919       " server being checked: " + serverName);
2920 
2921     return matchAM;
2922   }
2923 
2924   /**
2925    * Process shutdown server removing any assignments.
2926    * @param sn Server that went down.
2927    * @return list of regions in transition on this server
2928    */
2929   public List<HRegionInfo> processServerShutdown(final ServerName sn) {
2930     // Clean out any existing assignment plans for this server
2931     synchronized (this.regionPlans) {
2932       for (Iterator <Map.Entry<String, RegionPlan>> i =
2933           this.regionPlans.entrySet().iterator(); i.hasNext();) {
2934         Map.Entry<String, RegionPlan> e = i.next();
2935         ServerName otherSn = e.getValue().getDestination();
2936         // The name will be null if the region is planned for a random assign.
2937         if (otherSn != null && otherSn.equals(sn)) {
2938           // Use iterator's remove else we'll get CME
2939           i.remove();
2940         }
2941       }
2942     }
2943     List<HRegionInfo> regions = regionStates.serverOffline(sn);
2944     for (Iterator<HRegionInfo> it = regions.iterator(); it.hasNext(); ) {
2945       HRegionInfo hri = it.next();
2946       String encodedName = hri.getEncodedName();
2947 
2948       // We need a lock on the region as we could update it
2949       Lock lock = locker.acquireLock(encodedName);
2950       try {
2951         RegionState regionState =
2952           regionStates.getRegionTransitionState(encodedName);
2953         if (regionState == null
2954             || !regionState.isPendingOpenOrOpeningOnServer(sn)) {
2955           LOG.info("Skip region " + hri
2956             + " since it is not opening on the dead server any more: " + sn);
2957           it.remove();
2958         } else {
2959           try{
2960             // Delete the ZNode if exists
2961             ZKAssign.deleteNodeFailSilent(watcher, hri);
2962           } catch (KeeperException ke) {
2963             server.abort("Unexpected ZK exception deleting node " + hri, ke);
2964           }
2965           // Mark the region closed and assign it again by SSH
2966           regionStates.updateRegionState(hri, RegionState.State.CLOSED);
2967         }
2968       } finally {
2969         lock.unlock();
2970       }
2971     }
2972     return regions;
2973   }
2974 
2975   /**
2976    * Update inmemory structures.
2977    * @param sn Server that reported the split
2978    * @param parent Parent region that was split
2979    * @param a Daughter region A
2980    * @param b Daughter region B
2981    */
2982   public void handleSplitReport(final ServerName sn, final HRegionInfo parent,
2983       final HRegionInfo a, final HRegionInfo b) {
2984     regionOffline(parent);
2985     regionOnline(a, sn);
2986     regionOnline(b, sn);
2987 
2988     // There's a possibility that the region was splitting while a user asked
2989     // the master to disable, we need to make sure we close those regions in
2990     // that case. This is not racing with the region server itself since RS
2991     // report is done after the split transaction completed.
2992     if (this.zkTable.isDisablingOrDisabledTable(
2993         parent.getTableNameAsString())) {
2994       unassign(a);
2995       unassign(b);
2996     }
2997   }
2998 
2999   /**
3000    * Update inmemory structures.
3001    * @param sn Server that reported the merge
3002    * @param merged regioninfo of merged
3003    * @param a region a
3004    * @param b region b
3005    */
3006   public void handleRegionsMergeReport(final ServerName sn,
3007       final HRegionInfo merged, final HRegionInfo a, final HRegionInfo b) {
3008     regionOffline(a);
3009     regionOffline(b);
3010     regionOnline(merged, sn);
3011 
3012     // There's a possibility that the region was merging while a user asked
3013     // the master to disable, we need to make sure we close those regions in
3014     // that case. This is not racing with the region server itself since RS
3015     // report is done after the regions merge transaction completed.
3016     if (this.zkTable.isDisablingOrDisabledTable(merged.getTableNameAsString())) {
3017       unassign(merged);
3018     }
3019   }
3020 
3021   /**
3022    * @param plan Plan to execute.
3023    */
3024   public void balance(final RegionPlan plan) {
3025     synchronized (this.regionPlans) {
3026       this.regionPlans.put(plan.getRegionName(), plan);
3027     }
3028     unassign(plan.getRegionInfo(), false, plan.getDestination());
3029   }
3030 
3031   public void stop() {
3032     if (tomActivated){
3033       this.timeoutMonitor.interrupt();
3034       this.timerUpdater.interrupt();
3035     }
3036   }
3037 
3038   /**
3039    * Shutdown the threadpool executor service
3040    */
3041   public void shutdown() {
3042     // It's an immediate shutdown, so we're clearing the remaining tasks.
3043     synchronized (zkEventWorkerWaitingList){
3044       zkEventWorkerWaitingList.clear();
3045     }
3046     threadPoolExecutorService.shutdownNow();
3047     zkEventWorkers.shutdownNow();
3048   }
3049 
3050   protected void setEnabledTable(String tableName) {
3051     try {
3052       this.zkTable.setEnabledTable(tableName);
3053     } catch (KeeperException e) {
3054       // here we can abort as it is the start up flow
3055       String errorMsg = "Unable to ensure that the table " + tableName
3056           + " will be" + " enabled because of a ZooKeeper issue";
3057       LOG.error(errorMsg);
3058       this.server.abort(errorMsg, e);
3059     }
3060   }
3061 
3062   /**
3063    * Set region as OFFLINED up in zookeeper asynchronously.
3064    * @param state
3065    * @return True if we succeeded, false otherwise (State was incorrect or failed
3066    * updating zk).
3067    */
3068   private boolean asyncSetOfflineInZooKeeper(final RegionState state,
3069       final AsyncCallback.StringCallback cb, final ServerName destination) {
3070     if (!state.isClosed() && !state.isOffline()) {
3071       this.server.abort("Unexpected state trying to OFFLINE; " + state,
3072         new IllegalStateException());
3073       return false;
3074     }
3075     regionStates.updateRegionState(
3076       state.getRegion(), RegionState.State.OFFLINE);
3077     try {
3078       ZKAssign.asyncCreateNodeOffline(watcher, state.getRegion(),
3079         destination, cb, state);
3080     } catch (KeeperException e) {
3081       if (e instanceof NodeExistsException) {
3082         LOG.warn("Node for " + state.getRegion() + " already exists");
3083       } else {
3084         server.abort("Unexpected ZK exception creating/setting node OFFLINE", e);
3085       }
3086       return false;
3087     }
3088     return true;
3089   }
3090 }