View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.master;
21  
22  import java.io.DataInput;
23  import java.io.DataOutput;
24  import java.io.EOFException;
25  import java.io.IOException;
26  import java.net.ConnectException;
27  import java.util.ArrayList;
28  import java.util.HashMap;
29  import java.util.Iterator;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.NavigableMap;
33  import java.util.Set;
34  import java.util.SortedMap;
35  import java.util.TreeMap;
36  import java.util.TreeSet;
37  import java.util.concurrent.ConcurrentSkipListMap;
38  import java.util.concurrent.atomic.AtomicInteger;
39  
40  import org.apache.commons.logging.Log;
41  import org.apache.commons.logging.LogFactory;
42  import org.apache.hadoop.conf.Configuration;
43  import org.apache.hadoop.hbase.Chore;
44  import org.apache.hadoop.hbase.HConstants;
45  import org.apache.hadoop.hbase.HRegionInfo;
46  import org.apache.hadoop.hbase.HServerAddress;
47  import org.apache.hadoop.hbase.HServerInfo;
48  import org.apache.hadoop.hbase.HTableDescriptor;
49  import org.apache.hadoop.hbase.NotServingRegionException;
50  import org.apache.hadoop.hbase.Server;
51  import org.apache.hadoop.hbase.Stoppable;
52  import org.apache.hadoop.hbase.catalog.CatalogTracker;
53  import org.apache.hadoop.hbase.catalog.MetaReader;
54  import org.apache.hadoop.hbase.catalog.RootLocationEditor;
55  import org.apache.hadoop.hbase.client.Result;
56  import org.apache.hadoop.hbase.executor.ExecutorService;
57  import org.apache.hadoop.hbase.executor.RegionTransitionData;
58  import org.apache.hadoop.hbase.executor.EventHandler.EventType;
59  import org.apache.hadoop.hbase.master.LoadBalancer.RegionPlan;
60  import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler;
61  import org.apache.hadoop.hbase.master.handler.OpenedRegionHandler;
62  import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
63  import org.apache.hadoop.hbase.util.Bytes;
64  import org.apache.hadoop.hbase.util.Pair;
65  import org.apache.hadoop.hbase.util.Threads;
66  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
67  import org.apache.hadoop.hbase.zookeeper.ZKTable;
68  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
69  import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
70  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
71  import org.apache.hadoop.hbase.zookeeper.ZKUtil.NodeAndData;
72  import org.apache.hadoop.io.Writable;
73  import org.apache.hadoop.ipc.RemoteException;
74  import org.apache.zookeeper.AsyncCallback;
75  import org.apache.zookeeper.KeeperException;
76  import org.apache.zookeeper.KeeperException.NoNodeException;
77  import org.apache.zookeeper.data.Stat;
78  
79  /**
80   * Manages and performs region assignment.
81   * <p>
82   * Monitors ZooKeeper for events related to regions in transition.
83   * <p>
84   * Handles existing regions in transition during master failover.
85   */
86  public class AssignmentManager extends ZooKeeperListener {
87    private static final Log LOG = LogFactory.getLog(AssignmentManager.class);
88  
89    protected Server master;
90  
91    private ServerManager serverManager;
92  
93    private CatalogTracker catalogTracker;
94  
95    private TimeoutMonitor timeoutMonitor;
96  
97    /*
98     * Maximum times we recurse an assignment.  See below in {@link #assign()}.
99     */
100   private final int maximumAssignmentAttempts;
101 
102   /**
103    * Regions currently in transition.  Map of encoded region names to the master
104    * in-memory state for that region.
105    */
106   final ConcurrentSkipListMap<String, RegionState> regionsInTransition =
107     new ConcurrentSkipListMap<String, RegionState>();
108 
109   /** Plans for region movement. Key is the encoded version of a region name*/
110   // TODO: When do plans get cleaned out?  Ever? In server open and in server
111   // shutdown processing -- St.Ack
112   // All access to this Map must be synchronized.
113   final NavigableMap<String, RegionPlan> regionPlans =
114     new TreeMap<String, RegionPlan>();
115 
116   private final ZKTable zkTable;
117 
118   /**
119    * Server to regions assignment map.
120    * Contains the set of regions currently assigned to a given server.
121    * This Map and {@link #regions} are tied.  Always update this in tandem
122    * with the other under a lock on {@link #regions}
123    * @see #regions
124    */
125   private final NavigableMap<HServerInfo, List<HRegionInfo>> servers =
126     new TreeMap<HServerInfo, List<HRegionInfo>>();
127 
128   /**
129    * Region to server assignment map.
130    * Contains the server a given region is currently assigned to.
131    * This Map and {@link #servers} are tied.  Always update this in tandem
132    * with the other under a lock on {@link #regions}
133    * @see #servers
134    */
135   private final SortedMap<HRegionInfo,HServerInfo> regions =
136     new TreeMap<HRegionInfo,HServerInfo>();
137 
138   private final ExecutorService executorService;
139 
140   /**
141    * Constructs a new assignment manager.
142    *
143    * @param master
144    * @param serverManager
145    * @param catalogTracker
146    * @param service
147    * @throws KeeperException
148    */
149   public AssignmentManager(Server master, ServerManager serverManager,
150       CatalogTracker catalogTracker, final ExecutorService service)
151   throws KeeperException {
152     super(master.getZooKeeper());
153     this.master = master;
154     this.serverManager = serverManager;
155     this.catalogTracker = catalogTracker;
156     this.executorService = service;
157     Configuration conf = master.getConfiguration();
158     this.timeoutMonitor = new TimeoutMonitor(
159       conf.getInt("hbase.master.assignment.timeoutmonitor.period", 10000),
160       master,
161       conf.getInt("hbase.master.assignment.timeoutmonitor.timeout", 30000));
162     Threads.setDaemonThreadRunning(timeoutMonitor,
163       master.getServerName() + ".timeoutMonitor");
164     this.zkTable = new ZKTable(this.master.getZooKeeper());
165     this.maximumAssignmentAttempts =
166       this.master.getConfiguration().getInt("hbase.assignment.maximum.attempts", 10);
167   }
168 
169   /**
170    * @return Instance of ZKTable.
171    */
172   public ZKTable getZKTable() {
173     // These are 'expensive' to make involving trip to zk ensemble so allow
174     // sharing.
175     return this.zkTable;
176   }
177 
178   /**
179    * Reset all unassigned znodes.  Called on startup of master.
180    * Call {@link #assignAllUserRegions()} after root and meta have been assigned.
181    * @throws IOException
182    * @throws KeeperException
183    */
184   void cleanoutUnassigned() throws IOException, KeeperException {
185     // Cleanup any existing ZK nodes and start watching
186     ZKAssign.deleteAllNodes(watcher);
187     ZKUtil.listChildrenAndWatchForNewChildren(this.watcher,
188       this.watcher.assignmentZNode);
189   }
190 
191   /**
192    * Handle failover.  Restore state from META and ZK.  Handle any regions in
193    * transition.  Presumes <code>.META.</code> and <code>-ROOT-</code> deployed.
194    * @throws KeeperException
195    * @throws IOException
196    */
197   void processFailover() throws KeeperException, IOException {
198     // Concurrency note: In the below the accesses on regionsInTransition are
199     // outside of a synchronization block where usually all accesses to RIT are
200     // synchronized.  The presumption is that in this case it is safe since this
201     // method is being played by a single thread on startup.
202 
203     // TODO: Check list of user regions and their assignments against regionservers.
204     // TODO: Regions that have a null location and are not in regionsInTransitions
205     // need to be handled.
206 
207     // Scan META to build list of existing regions, servers, and assignment
208     // Returns servers who have not checked in (assumed dead) and their regions
209     Map<HServerInfo,List<Pair<HRegionInfo,Result>>> deadServers =
210       rebuildUserRegions();
211     // Process list of dead servers
212     processDeadServers(deadServers);
213     // Check existing regions in transition
214     List<String> nodes = ZKUtil.listChildrenAndWatchForNewChildren(watcher,
215         watcher.assignmentZNode);
216     if (nodes.isEmpty()) {
217       LOG.info("No regions in transition in ZK to process on failover");
218       return;
219     }
220     LOG.info("Failed-over master needs to process " + nodes.size() +
221         " regions in transition");
222     for (String encodedRegionName: nodes) {
223       processRegionInTransition(encodedRegionName, null);
224     }
225   }
226 
227   /**
228    * If region is up in zk in transition, then do fixup and block and wait until
229    * the region is assigned and out of transition.  Used on startup for
230    * catalog regions.
231    * @param hri Region to look for.
232    * @return True if we processed a region in transition else false if region
233    * was not up in zk in transition.
234    * @throws InterruptedException
235    * @throws KeeperException
236    * @throws IOException
237    */
238   boolean processRegionInTransitionAndBlockUntilAssigned(final HRegionInfo hri)
239   throws InterruptedException, KeeperException, IOException {
240     boolean intransistion = processRegionInTransition(hri.getEncodedName(), hri);
241     if (!intransistion) return intransistion;
242     synchronized(this.regionsInTransition) {
243       while (!this.master.isStopped() &&
244           this.regionsInTransition.containsKey(hri.getEncodedName())) {
245         this.regionsInTransition.wait();
246       }
247     }
248     return intransistion;
249   }
250 
251   /**
252    * Process failover of <code>encodedName</code>.  Look in
253    * @param encodedRegionName Region to process failover for.
254    * @param encodedRegionName RegionInfo.  If null we'll go get it from meta table.
255    * @return
256    * @throws KeeperException
257    * @throws IOException
258    */
259   boolean processRegionInTransition(final String encodedRegionName,
260       final HRegionInfo regionInfo)
261   throws KeeperException, IOException {
262     RegionTransitionData data = ZKAssign.getData(watcher, encodedRegionName);
263     if (data == null) return false;
264     HRegionInfo hri = regionInfo;
265     if (hri == null) {
266       Pair<HRegionInfo, HServerAddress> p =
267         MetaReader.getRegion(catalogTracker, data.getRegionName());
268       if (p == null) return false;
269       hri = p.getFirst();
270     }
271     processRegionsInTransition(data, hri);
272     return true;
273   }
274 
275   void processRegionsInTransition(final RegionTransitionData data,
276       final HRegionInfo regionInfo)
277   throws KeeperException {
278     String encodedRegionName = regionInfo.getEncodedName();
279     LOG.info("Processing region " + regionInfo.getRegionNameAsString() +
280       " in state " + data.getEventType());
281     synchronized (regionsInTransition) {
282       switch (data.getEventType()) {
283       case RS_ZK_REGION_CLOSING:
284         // Just insert region into RIT.
285         // If this never updates the timeout will trigger new assignment
286         regionsInTransition.put(encodedRegionName, new RegionState(
287             regionInfo, RegionState.State.CLOSING, data.getStamp()));
288         break;
289 
290       case RS_ZK_REGION_CLOSED:
291         // Region is closed, insert into RIT and handle it
292         regionsInTransition.put(encodedRegionName, new RegionState(
293             regionInfo, RegionState.State.CLOSED, data.getStamp()));
294         new ClosedRegionHandler(master, this, regionInfo).process();
295         break;
296 
297       case M_ZK_REGION_OFFLINE:
298         // Region is offline, insert into RIT and handle it like a closed
299         regionsInTransition.put(encodedRegionName, new RegionState(
300             regionInfo, RegionState.State.OFFLINE, data.getStamp()));
301         new ClosedRegionHandler(master, this, regionInfo).process();
302         break;
303 
304       case RS_ZK_REGION_OPENING:
305         // Just insert region into RIT
306         // If this never updates the timeout will trigger new assignment
307         regionsInTransition.put(encodedRegionName, new RegionState(
308             regionInfo, RegionState.State.OPENING, data.getStamp()));
309         break;
310 
311       case RS_ZK_REGION_OPENED:
312         // Region is opened, insert into RIT and handle it
313         regionsInTransition.put(encodedRegionName, new RegionState(
314             regionInfo, RegionState.State.OPENING, data.getStamp()));
315         HServerInfo hsi = serverManager.getServerInfo(data.getServerName());
316         // hsi could be null if this server is no longer online.  If
317         // that the case, just let this RIT timeout; it'll be assigned
318         // to new server then.
319         if (hsi == null) {
320           LOG.warn("Region in transition " + regionInfo.getEncodedName() +
321             " references a server no longer up " + data.getServerName() +
322             "; letting RIT timeout so will be assigned elsewhere");
323           break;
324         }
325         new OpenedRegionHandler(master, this, regionInfo, hsi).process();
326         break;
327       }
328     }
329   }
330 
331   /**
332    * Handles various states an unassigned node can be in.
333    * <p>
334    * Method is called when a state change is suspected for an unassigned node.
335    * <p>
336    * This deals with skipped transitions (we got a CLOSED but didn't see CLOSING
337    * yet).
338    * @param data
339    */
340   private void handleRegion(final RegionTransitionData data) {
341     synchronized(regionsInTransition) {
342       if (data == null || data.getServerName() == null) {
343         LOG.warn("Unexpected NULL input " + data);
344         return;
345       }
346       // Check if this is a special HBCK transition
347       if (data.getServerName().equals(HConstants.HBCK_CODE_NAME)) {
348         handleHBCK(data);
349         return;
350       }
351       // Verify this is a known server
352       if (!serverManager.isServerOnline(data.getServerName()) &&
353           !this.master.getServerName().equals(data.getServerName())) {
354         LOG.warn("Attempted to handle region transition for server but " +
355           "server is not online: " + data.getRegionName());
356         return;
357       }
358       String encodedName = HRegionInfo.encodeRegionName(data.getRegionName());
359       String prettyPrintedRegionName = HRegionInfo.prettyPrint(encodedName);
360       LOG.debug("Handling transition=" + data.getEventType() +
361         ", server=" + data.getServerName() + ", region=" + prettyPrintedRegionName);
362       RegionState regionState = regionsInTransition.get(encodedName);
363       switch (data.getEventType()) {
364         case M_ZK_REGION_OFFLINE:
365           // Nothing to do.
366           break;
367 
368         case RS_ZK_REGION_CLOSING:
369           // Should see CLOSING after we have asked it to CLOSE or additional
370           // times after already being in state of CLOSING
371           if (regionState == null ||
372               (!regionState.isPendingClose() && !regionState.isClosing())) {
373             LOG.warn("Received CLOSING for region " + prettyPrintedRegionName +
374               " from server " + data.getServerName() + " but region was in " +
375               " the state " + regionState + " and not " +
376               "in expected PENDING_CLOSE or CLOSING states");
377             return;
378           }
379           // Transition to CLOSING (or update stamp if already CLOSING)
380           regionState.update(RegionState.State.CLOSING, data.getStamp());
381           break;
382 
383         case RS_ZK_REGION_CLOSED:
384           // Should see CLOSED after CLOSING but possible after PENDING_CLOSE
385           if (regionState == null ||
386               (!regionState.isPendingClose() && !regionState.isClosing())) {
387             LOG.warn("Received CLOSED for region " + prettyPrintedRegionName +
388                 " from server " + data.getServerName() + " but region was in " +
389                 " the state " + regionState + " and not " +
390                 "in expected PENDING_CLOSE or CLOSING states");
391             return;
392           }
393           // Handle CLOSED by assigning elsewhere or stopping if a disable
394           // If we got here all is good.  Need to update RegionState -- else
395           // what follows will fail because not in expected state.
396           regionState.update(RegionState.State.CLOSED, data.getStamp());
397           this.executorService.submit(new ClosedRegionHandler(master,
398             this, regionState.getRegion()));
399           break;
400 
401         case RS_ZK_REGION_OPENING:
402           // Should see OPENING after we have asked it to OPEN or additional
403           // times after already being in state of OPENING
404           if(regionState == null ||
405               (!regionState.isPendingOpen() && !regionState.isOpening())) {
406             LOG.warn("Received OPENING for region " +
407                 prettyPrintedRegionName +
408                 " from server " + data.getServerName() + " but region was in " +
409                 " the state " + regionState + " and not " +
410                 "in expected PENDING_OPEN or OPENING states");
411             return;
412           }
413           // Transition to OPENING (or update stamp if already OPENING)
414           regionState.update(RegionState.State.OPENING, data.getStamp());
415           break;
416 
417         case RS_ZK_REGION_OPENED:
418           // Should see OPENED after OPENING but possible after PENDING_OPEN
419           if(regionState == null ||
420               (!regionState.isPendingOpen() && !regionState.isOpening())) {
421             LOG.warn("Received OPENED for region " +
422                 prettyPrintedRegionName +
423                 " from server " + data.getServerName() + " but region was in " +
424                 " the state " + regionState + " and not " +
425                 "in expected PENDING_OPEN or OPENING states");
426             return;
427           }
428           // Handle OPENED by removing from transition and deleted zk node
429           regionState.update(RegionState.State.OPEN, data.getStamp());
430           this.executorService.submit(
431             new OpenedRegionHandler(master, this, regionState.getRegion(),
432               this.serverManager.getServerInfo(data.getServerName())));
433           break;
434       }
435     }
436   }
437 
438   /**
439    * Handle a ZK unassigned node transition triggered by HBCK repair tool.
440    * <p>
441    * This is handled in a separate code path because it breaks the normal rules.
442    * @param data
443    */
444   private void handleHBCK(RegionTransitionData data) {
445     String encodedName = HRegionInfo.encodeRegionName(data.getRegionName());
446     LOG.info("Handling HBCK triggered transition=" + data.getEventType() +
447       ", server=" + data.getServerName() + ", region=" +
448       HRegionInfo.prettyPrint(encodedName));
449     RegionState regionState = regionsInTransition.get(encodedName);
450     switch (data.getEventType()) {
451       case M_ZK_REGION_OFFLINE:
452         HRegionInfo regionInfo = null;
453         if (regionState != null) {
454           regionInfo = regionState.getRegion();
455         } else {
456           try {
457             regionInfo = MetaReader.getRegion(catalogTracker,
458                 data.getRegionName()).getFirst();
459           } catch (IOException e) {
460             LOG.info("Exception reading META doing HBCK repair operation", e);
461             return;
462           }
463         }
464         LOG.info("HBCK repair is triggering assignment of region=" +
465             regionInfo.getRegionNameAsString());
466         // trigger assign, node is already in OFFLINE so don't need to update ZK
467         assign(regionInfo, false);
468         break;
469 
470       default:
471         LOG.warn("Received unexpected region state from HBCK (" +
472             data.getEventType() + ")");
473         break;
474     }
475   }
476 
477   // ZooKeeper events
478 
479   /**
480    * New unassigned node has been created.
481    *
482    * <p>This happens when an RS begins the OPENING or CLOSING of a region by
483    * creating an unassigned node.
484    *
485    * <p>When this happens we must:
486    * <ol>
487    *   <li>Watch the node for further events</li>
488    *   <li>Read and handle the state in the node</li>
489    * </ol>
490    */
491   @Override
492   public void nodeCreated(String path) {
493     if(path.startsWith(watcher.assignmentZNode)) {
494       synchronized(regionsInTransition) {
495         try {
496           RegionTransitionData data = ZKAssign.getData(watcher, path);
497           if(data == null) {
498             return;
499           }
500           handleRegion(data);
501         } catch (KeeperException e) {
502           master.abort("Unexpected ZK exception reading unassigned node data", e);
503         }
504       }
505     }
506   }
507 
508   /**
509    * Existing unassigned node has had data changed.
510    *
511    * <p>This happens when an RS transitions from OFFLINE to OPENING, or between
512    * OPENING/OPENED and CLOSING/CLOSED.
513    *
514    * <p>When this happens we must:
515    * <ol>
516    *   <li>Watch the node for further events</li>
517    *   <li>Read and handle the state in the node</li>
518    * </ol>
519    */
520   @Override
521   public void nodeDataChanged(String path) {
522     if(path.startsWith(watcher.assignmentZNode)) {
523       synchronized(regionsInTransition) {
524         try {
525           RegionTransitionData data = ZKAssign.getData(watcher, path);
526           if(data == null) {
527             return;
528           }
529           handleRegion(data);
530         } catch (KeeperException e) {
531           master.abort("Unexpected ZK exception reading unassigned node data", e);
532         }
533       }
534     }
535   }
536 
537   /**
538    * New unassigned node has been created.
539    *
540    * <p>This happens when an RS begins the OPENING or CLOSING of a region by
541    * creating an unassigned node.
542    *
543    * <p>When this happens we must:
544    * <ol>
545    *   <li>Watch the node for further children changed events</li>
546    *   <li>Watch all new children for changed events</li>
547    *   <li>Read all children and handle them</li>
548    * </ol>
549    */
550   @Override
551   public void nodeChildrenChanged(String path) {
552     if(path.equals(watcher.assignmentZNode)) {
553       synchronized(regionsInTransition) {
554         try {
555           List<NodeAndData> newNodes = ZKUtil.watchAndGetNewChildren(watcher,
556               watcher.assignmentZNode);
557           for(NodeAndData newNode : newNodes) {
558             LOG.debug("Handling new unassigned node: " + newNode);
559             handleRegion(RegionTransitionData.fromBytes(newNode.getData()));
560           }
561         } catch(KeeperException e) {
562           master.abort("Unexpected ZK exception reading unassigned children", e);
563         }
564       }
565     }
566   }
567 
568   /**
569    * Marks the region as online.  Removes it from regions in transition and
570    * updates the in-memory assignment information.
571    * <p>
572    * Used when a region has been successfully opened on a region server.
573    * @param regionInfo
574    * @param serverInfo
575    */
576   public void regionOnline(HRegionInfo regionInfo, HServerInfo serverInfo) {
577     synchronized (this.regionsInTransition) {
578       RegionState rs =
579         this.regionsInTransition.remove(regionInfo.getEncodedName());
580       if (rs != null) {
581         this.regionsInTransition.notifyAll();
582       }
583     }
584     synchronized (this.regions) {
585       // Add check
586       HServerInfo hsi = this.regions.get(regionInfo);
587       if (hsi != null) LOG.warn("Overwriting " + regionInfo.getEncodedName() +
588         " on " + hsi);
589       this.regions.put(regionInfo, serverInfo);
590       addToServers(serverInfo, regionInfo);
591       this.regions.notifyAll();
592     }
593     // Remove plan if one.
594     clearRegionPlan(regionInfo);
595     // Update timers for all regions in transition going against this server.
596     updateTimers(serverInfo);
597   }
598 
599   /**
600    * Touch timers for all regions in transition that have the passed
601    * <code>hsi</code> in common.
602    * Call this method whenever a server checks in.  Doing so helps the case where
603    * a new regionserver has joined the cluster and its been given 1k regions to
604    * open.  If this method is tickled every time the region reports in a
605    * successful open then the 1k-th region won't be timed out just because its
606    * sitting behind the open of 999 other regions.  This method is NOT used
607    * as part of bulk assign -- there we have a different mechanism for extending
608    * the regions in transition timer (we turn it off temporarily -- because
609    * there is no regionplan involved when bulk assigning.
610    * @param hsi
611    */
612   private void updateTimers(final HServerInfo hsi) {
613     // This loop could be expensive.
614     // First make a copy of current regionPlan rather than hold sync while
615     // looping because holding sync can cause deadlock.  Its ok in this loop
616     // if the Map we're going against is a little stale
617     Map<String, RegionPlan> copy = new HashMap<String, RegionPlan>();
618     synchronized(this.regionPlans) {
619       copy.putAll(this.regionPlans);
620     }
621     for (Map.Entry<String, RegionPlan> e: copy.entrySet()) {
622       if (!e.getValue().getDestination().equals(hsi)) continue;
623       RegionState rs = null;
624       synchronized (this.regionsInTransition) {
625         rs = this.regionsInTransition.get(e.getKey());
626       }
627       if (rs == null) continue;
628       synchronized (rs) {
629         rs.update(rs.getState());
630       }
631     }
632   }
633 
634   /**
635    * Marks the region as offline.  Removes it from regions in transition and
636    * removes in-memory assignment information.
637    * <p>
638    * Used when a region has been closed and should remain closed.
639    * @param regionInfo
640    */
641   public void regionOffline(final HRegionInfo regionInfo) {
642     synchronized(this.regionsInTransition) {
643       if (this.regionsInTransition.remove(regionInfo.getEncodedName()) != null) {
644         this.regionsInTransition.notifyAll();
645       }
646     }
647     // remove the region plan as well just in case.
648     clearRegionPlan(regionInfo);
649     setOffline(regionInfo);
650   }
651 
652   /**
653    * Sets the region as offline by removing in-memory assignment information but
654    * retaining transition information.
655    * <p>
656    * Used when a region has been closed but should be reassigned.
657    * @param regionInfo
658    */
659   public void setOffline(HRegionInfo regionInfo) {
660     synchronized (this.regions) {
661       HServerInfo serverInfo = this.regions.remove(regionInfo);
662       if (serverInfo == null) return;
663       List<HRegionInfo> serverRegions = this.servers.get(serverInfo);
664       if (!serverRegions.remove(regionInfo)) {
665         LOG.warn("No " + regionInfo + " on " + serverInfo);
666       }
667     }
668   }
669 
670   public void offlineDisabledRegion(HRegionInfo regionInfo) {
671     // Disabling so should not be reassigned, just delete the CLOSED node
672     LOG.debug("Table being disabled so deleting ZK node and removing from " +
673         "regions in transition, skipping assignment of region " +
674           regionInfo.getRegionNameAsString());
675     try {
676       if (!ZKAssign.deleteClosedNode(watcher, regionInfo.getEncodedName())) {
677         // Could also be in OFFLINE mode
678         ZKAssign.deleteOfflineNode(watcher, regionInfo.getEncodedName());
679       }
680     } catch (KeeperException.NoNodeException nne) {
681       LOG.debug("Tried to delete closed node for " + regionInfo + " but it " +
682           "does not exist so just offlining");
683     } catch (KeeperException e) {
684       this.master.abort("Error deleting CLOSED node in ZK", e);
685     }
686     regionOffline(regionInfo);
687   }
688 
689   // Assignment methods
690 
691   /**
692    * Assigns the specified region.
693    * <p>
694    * If a RegionPlan is available with a valid destination then it will be used
695    * to determine what server region is assigned to.  If no RegionPlan is
696    * available, region will be assigned to a random available server.
697    * <p>
698    * Updates the RegionState and sends the OPEN RPC.
699    * <p>
700    * This will only succeed if the region is in transition and in a CLOSED or
701    * OFFLINE state or not in transition (in-memory not zk), and of course, the
702    * chosen server is up and running (It may have just crashed!).  If the
703    * in-memory checks pass, the zk node is forced to OFFLINE before assigning.
704    *
705    * @param region server to be assigned
706    * @param setOfflineInZK whether ZK node should be created/transitioned to an
707    *                       OFFLINE state before assigning the region
708    */
709   public void assign(HRegionInfo region, boolean setOfflineInZK) {
710     assign(region, setOfflineInZK, false);
711   }
712 
713   public void assign(HRegionInfo region, boolean setOfflineInZK,
714       boolean forceNewPlan) {
715     String tableName = region.getTableDesc().getNameAsString();
716     boolean disabled = this.zkTable.isDisabledTable(tableName);
717     if (disabled || this.zkTable.isDisablingTable(tableName)) {
718       LOG.info("Table " + tableName + (disabled? " disabled;": " disabling;") +
719         " skipping assign of " + region.getRegionNameAsString());
720       offlineDisabledRegion(region);
721       return;
722     }
723     if (this.serverManager.isClusterShutdown()) {
724       LOG.info("Cluster shutdown is set; skipping assign of " +
725         region.getRegionNameAsString());
726       return;
727     }
728     RegionState state = addToRegionsInTransition(region);
729     synchronized (state) {
730       assign(state, setOfflineInZK, forceNewPlan);
731     }
732   }
733 
734   /**
735    * Bulk assign regions to <code>destination</code>.  If we fail in any way,
736    * we'll abort the server.
737    * @param destination
738    * @param regions Regions to assign.
739    */
740   void assign(final HServerInfo destination,
741       final List<HRegionInfo> regions) {
742     LOG.debug("Bulk assigning " + regions.size() + " region(s) to " +
743       destination.getServerName());
744 
745     List<RegionState> states = new ArrayList<RegionState>(regions.size());
746     synchronized (this.regionsInTransition) {
747       for (HRegionInfo region: regions) {
748         states.add(forceRegionStateToOffline(region));
749       }
750     }
751     // Presumption is that only this thread will be updating the state at this
752     // time; i.e. handlers on backend won't be trying to set it to OPEN, etc.
753     AtomicInteger counter = new AtomicInteger(0);
754     CreateUnassignedAsyncCallback cb =
755       new CreateUnassignedAsyncCallback(this.watcher, destination, counter);
756     for (RegionState state: states) {
757       if (!asyncSetOfflineInZooKeeper(state, cb, state)) {
758         return;
759       }
760     }
761     // Wait until all unassigned nodes have been put up and watchers set.
762     int total = regions.size();
763     for (int oldCounter = 0; true;) {
764       int count = counter.get();
765       if (oldCounter != count) {
766         LOG.info(destination.getServerName() + " unassigned znodes=" + count +
767           " of total=" + total);
768         oldCounter = count;
769       }
770       if (count == total) break;
771       Threads.sleep(1);
772     }
773     // Move on to open regions.
774     try {
775       // Send OPEN RPC. This can fail if the server on other end is is not up.
776       // If we fail, fail the startup by aborting the server.  There is one
777       // exception we will tolerate: ServerNotRunningException.  This is thrown
778       // between report of regionserver being up and 
779       long maxWaitTime = System.currentTimeMillis() +
780         this.master.getConfiguration().getLong("hbase.regionserver.rpc.startup.waittime", 60000);
781       while (!this.master.isStopped()) {
782         try {
783           this.serverManager.sendRegionOpen(destination, regions);
784           break;
785         } catch (org.apache.hadoop.hbase.ipc.ServerNotRunningException e) {
786           // This is the one exception to retry.  For all else we should just fail
787           // the startup.
788           long now = System.currentTimeMillis();
789           if (now > maxWaitTime) throw e;
790           LOG.debug("Server is not yet up; waiting up to " +
791               (maxWaitTime - now) + "ms", e);
792           Thread.sleep(1000);
793         }
794       }
795     } catch (Throwable t) {
796       this.master.abort("Failed assignment of regions to " + destination +
797         "; bulk assign FAILED", t);
798       return;
799     }
800     LOG.debug("Bulk assigning done for " + destination.getServerName());
801   }
802 
803   /**
804    * Callback handler for create unassigned znodes used during bulk assign.
805    */
806   static class CreateUnassignedAsyncCallback implements AsyncCallback.StringCallback {
807     private final Log LOG = LogFactory.getLog(CreateUnassignedAsyncCallback.class);
808     private final ZooKeeperWatcher zkw;
809     private final HServerInfo destination;
810     private final AtomicInteger counter;
811 
812     CreateUnassignedAsyncCallback(final ZooKeeperWatcher zkw,
813         final HServerInfo destination, final AtomicInteger counter) {
814       this.zkw = zkw;
815       this.destination = destination;
816       this.counter = counter;
817     }
818 
819     @Override
820     public void processResult(int rc, String path, Object ctx, String name) {
821       if (rc != 0) {
822         // Thisis resultcode.  If non-zero, need to resubmit.
823         LOG.warn("rc != 0 for " + path + " -- retryable connectionloss -- " +
824           "FIX see http://wiki.apache.org/hadoop/ZooKeeper/FAQ#A2");
825         this.zkw.abort("Connectionloss writing unassigned at " + path +
826           ", rc=" + rc, null);
827         return;
828       }
829       LOG.debug("rs=" + (RegionState)ctx + ", server=" + this.destination.getServerName());
830       // Async exists to set a watcher so we'll get triggered when
831       // unassigned node changes.
832       this.zkw.getZooKeeper().exists(path, this.zkw,
833         new ExistsUnassignedAsyncCallback(this.counter), ctx);
834     }
835   }
836 
837   /**
838    * Callback handler for the exists call that sets watcher on unassigned znodes.
839    * Used during bulk assign on startup.
840    */
841   static class ExistsUnassignedAsyncCallback implements AsyncCallback.StatCallback {
842     private final Log LOG = LogFactory.getLog(ExistsUnassignedAsyncCallback.class);
843     private final AtomicInteger counter;
844 
845     ExistsUnassignedAsyncCallback(final AtomicInteger counter) {
846       this.counter = counter;
847     }
848 
849     @Override
850     public void processResult(int rc, String path, Object ctx, Stat stat) {
851       if (rc != 0) {
852         // Thisis resultcode.  If non-zero, need to resubmit.
853         LOG.warn("rc != 0 for " + path + " -- retryable connectionloss -- " +
854           "FIX see http://wiki.apache.org/hadoop/ZooKeeper/FAQ#A2");
855         return;
856       }
857       RegionState state = (RegionState)ctx;
858       LOG.debug("rs=" + state);
859       // Transition RegionState to PENDING_OPEN here in master; means we've
860       // sent the open.  We're a little ahead of ourselves here since we've not
861       // yet sent out the actual open but putting this state change after the
862       // call to open risks our writing PENDING_OPEN after state has been moved
863       // to OPENING by the regionserver.
864       state.update(RegionState.State.PENDING_OPEN);
865       this.counter.addAndGet(1);
866     }
867   }
868 
869   /**
870    * @param region
871    * @return
872    */
873   private RegionState addToRegionsInTransition(final HRegionInfo region) {
874     synchronized (regionsInTransition) {
875       return forceRegionStateToOffline(region);
876     }
877   }
878 
879   /**
880    * Sets regions {@link RegionState} to {@link RegionState.State#OFFLINE}.
881    * Caller must hold lock on this.regionsInTransition.
882    * @param region
883    * @return Amended RegionState.
884    */
885   private RegionState forceRegionStateToOffline(final HRegionInfo region) {
886     String encodedName = region.getEncodedName();
887     RegionState state = this.regionsInTransition.get(encodedName);
888     if (state == null) {
889       state = new RegionState(region, RegionState.State.OFFLINE);
890       this.regionsInTransition.put(encodedName, state);
891     } else {
892       LOG.debug("Forcing OFFLINE; was=" + state);
893       state.update(RegionState.State.OFFLINE);
894     }
895     return state;
896   }
897 
898   /**
899    * Caller must hold lock on the passed <code>state</code> object.
900    * @param state
901    * @param setOfflineInZK
902    * @param forceNewPlan
903    */
904   private void assign(final RegionState state, final boolean setOfflineInZK,
905       final boolean forceNewPlan) {
906     for (int i = 0; i < this.maximumAssignmentAttempts; i++) {
907       if (setOfflineInZK && !setOfflineInZooKeeper(state)) return;
908       if (this.master.isStopped()) {
909         LOG.debug("Server stopped; skipping assign of " + state);
910         return;
911       }
912       RegionPlan plan = getRegionPlan(state, forceNewPlan);
913       if (plan == null) return; // Should get reassigned later when RIT times out.
914       try {
915         LOG.debug("Assigning region " + state.getRegion().getRegionNameAsString() +
916           " to " + plan.getDestination().getServerName());
917         // Transition RegionState to PENDING_OPEN
918         state.update(RegionState.State.PENDING_OPEN);
919         // Send OPEN RPC. This can fail if the server on other end is is not up.
920         serverManager.sendRegionOpen(plan.getDestination(), state.getRegion());
921         break;
922       } catch (Throwable t) {
923         LOG.warn("Failed assignment of " +
924           state.getRegion().getRegionNameAsString() + " to " +
925           plan.getDestination() + ", trying to assign elsewhere instead; " +
926           "retry=" + i, t);
927         // Clean out plan we failed execute and one that doesn't look like it'll
928         // succeed anyways; we need a new plan!
929         // Transition back to OFFLINE
930         state.update(RegionState.State.OFFLINE);
931         // Force a new plan and reassign.  Will return null if no servers.
932         if (getRegionPlan(state, plan.getDestination(), true) == null) {
933           LOG.warn("Unable to find a viable location to assign region " +
934             state.getRegion().getRegionNameAsString());
935           return;
936         }
937       }
938     }
939   }
940 
941   /**
942    * Set region as OFFLINED up in zookeeper
943    * @param state
944    * @return True if we succeeded, false otherwise (State was incorrect or failed
945    * updating zk).
946    */
947   boolean setOfflineInZooKeeper(final RegionState state) {
948     if (!state.isClosed() && !state.isOffline()) {
949         new RuntimeException("Unexpected state trying to OFFLINE; " + state);
950       this.master.abort("Unexpected state trying to OFFLINE; " + state,
951         new IllegalStateException());
952       return false;
953     }
954     state.update(RegionState.State.OFFLINE);
955     try {
956       if(!ZKAssign.createOrForceNodeOffline(master.getZooKeeper(),
957           state.getRegion(), master.getServerName())) {
958         LOG.warn("Attempted to create/force node into OFFLINE state before " +
959           "completing assignment but failed to do so for " + state);
960         return false;
961       }
962     } catch (KeeperException e) {
963       master.abort("Unexpected ZK exception creating/setting node OFFLINE", e);
964       return false;
965     }
966     return true;
967   }
968 
969   /**
970    * Set region as OFFLINED up in zookeeper asynchronously.
971    * @param state
972    * @return True if we succeeded, false otherwise (State was incorrect or failed
973    * updating zk).
974    */
975   boolean asyncSetOfflineInZooKeeper(final RegionState state,
976       final AsyncCallback.StringCallback cb, final Object ctx) {
977     if (!state.isClosed() && !state.isOffline()) {
978         new RuntimeException("Unexpected state trying to OFFLINE; " + state);
979       this.master.abort("Unexpected state trying to OFFLINE; " + state,
980         new IllegalStateException());
981       return false;
982     }
983     state.update(RegionState.State.OFFLINE);
984     try {
985       ZKAssign.asyncCreateNodeOffline(master.getZooKeeper(), state.getRegion(),
986         master.getServerName(), cb, ctx);
987     } catch (KeeperException e) {
988       master.abort("Unexpected ZK exception creating/setting node OFFLINE", e);
989       return false;
990     }
991     return true;
992   }
993 
994   /**
995    * @param state
996    * @return Plan for passed <code>state</code> (If none currently, it creates one or
997    * if no servers to assign, it returns null).
998    */
999   RegionPlan getRegionPlan(final RegionState state,
1000       final boolean forceNewPlan) {
1001     return getRegionPlan(state, null, forceNewPlan);
1002   }
1003 
1004   /**
1005    * @param state
1006    * @param serverToExclude Server to exclude (we know its bad). Pass null if
1007    * all servers are thought to be assignable.
1008    * @param forceNewPlan If true, then if an existing plan exists, a new plan
1009    * will be generated.
1010    * @return Plan for passed <code>state</code> (If none currently, it creates one or
1011    * if no servers to assign, it returns null).
1012    */
1013   RegionPlan getRegionPlan(final RegionState state,
1014       final HServerInfo serverToExclude, final boolean forceNewPlan) {
1015     // Pickup existing plan or make a new one
1016     String encodedName = state.getRegion().getEncodedName();
1017     List<HServerInfo> servers = this.serverManager.getOnlineServersList();
1018     // The remove below hinges on the fact that the call to
1019     // serverManager.getOnlineServersList() returns a copy
1020     if (serverToExclude != null) servers.remove(serverToExclude);
1021     if (servers.isEmpty()) return null;
1022     RegionPlan randomPlan = new RegionPlan(state.getRegion(), null,
1023       LoadBalancer.randomAssignment(servers));
1024     boolean newPlan = false;
1025     RegionPlan existingPlan = null;
1026     synchronized (this.regionPlans) {
1027       existingPlan = this.regionPlans.get(encodedName);
1028       if (forceNewPlan || existingPlan == null 
1029               || existingPlan.getDestination() == null 
1030               || existingPlan.getDestination().equals(serverToExclude)) {
1031         newPlan = true;
1032         this.regionPlans.put(encodedName, randomPlan);
1033       }
1034     }
1035     if (newPlan) {
1036       LOG.debug("No previous transition plan was found (or we are ignoring " +
1037         "an existing plan) for " + state.getRegion().getRegionNameAsString() +
1038         " so generated a random one; " + randomPlan + "; " +
1039         serverManager.countOfRegionServers() +
1040         " (online=" + serverManager.getOnlineServers().size() +
1041         ", exclude=" + serverToExclude + ") available servers");
1042         return randomPlan;
1043       }
1044       LOG.debug("Using pre-existing plan for region " +
1045         state.getRegion().getRegionNameAsString() + "; plan=" + existingPlan);
1046       return existingPlan;
1047   }
1048 
1049   /**
1050    * Unassigns the specified region.
1051    * <p>
1052    * Updates the RegionState and sends the CLOSE RPC.
1053    * <p>
1054    * If a RegionPlan is already set, it will remain.
1055    *
1056    * @param region server to be unassigned
1057    */
1058   public void unassign(HRegionInfo region) {
1059     unassign(region, false);
1060   }
1061 
1062   /**
1063    * Unassigns the specified region.
1064    * <p>
1065    * Updates the RegionState and sends the CLOSE RPC.
1066    * <p>
1067    * If a RegionPlan is already set, it will remain.
1068    *
1069    * @param region server to be unassigned
1070    * @param force if region should be closed even if already closing
1071    */
1072   public void unassign(HRegionInfo region, boolean force) {
1073     LOG.debug("Starting unassignment of region " +
1074       region.getRegionNameAsString() + " (offlining)");
1075     synchronized (this.regions) {
1076       // Check if this region is currently assigned
1077       if (!regions.containsKey(region)) {
1078         LOG.debug("Attempted to unassign region " +
1079           region.getRegionNameAsString() + " but it is not " +
1080           "currently assigned anywhere");
1081         return;
1082       }
1083     }
1084     String encodedName = region.getEncodedName();
1085     // Grab the state of this region and synchronize on it
1086     RegionState state;
1087     synchronized (regionsInTransition) {
1088       state = regionsInTransition.get(encodedName);
1089       if (state == null) {
1090         state = new RegionState(region, RegionState.State.PENDING_CLOSE);
1091         regionsInTransition.put(encodedName, state);
1092       } else if (force && state.isPendingClose()) {
1093         LOG.debug("Attempting to unassign region " +
1094             region.getRegionNameAsString() + " which is already pending close "
1095             + "but forcing an additional close");
1096         state.update(RegionState.State.PENDING_CLOSE);
1097       } else {
1098         LOG.debug("Attempting to unassign region " +
1099           region.getRegionNameAsString() + " but it is " +
1100           "already in transition (" + state.getState() + ")");
1101         return;
1102       }
1103     }
1104     // Send CLOSE RPC
1105     HServerInfo server = null;
1106     synchronized (this.regions) {
1107       server = regions.get(region);
1108     }
1109     try {
1110       // TODO: We should consider making this look more like it does for the
1111       // region open where we catch all throwables and never abort
1112       if (serverManager.sendRegionClose(server, state.getRegion())) {
1113         LOG.debug("Sent CLOSE to " + server + " for region " +
1114           region.getRegionNameAsString());
1115         return;
1116       }
1117       // This never happens. Currently regionserver close always return true.
1118       LOG.debug("Server " + server + " region CLOSE RPC returned false for " +
1119         region.getEncodedName());
1120     } catch (NotServingRegionException nsre) {
1121       LOG.info("Server " + server + " returned " + nsre + " for " +
1122         region.getEncodedName());
1123       // Presume that master has stale data.  Presume remote side just split.
1124       // Presume that the split message when it comes in will fix up the master's
1125       // in memory cluster state.
1126       return;
1127     } catch (ConnectException e) {
1128       LOG.info("Failed connect to " + server + ", message=" + e.getMessage() +
1129         ", region=" + region.getEncodedName());
1130       // Presume that regionserver just failed and we haven't got expired
1131       // server from zk yet.  Let expired server deal with clean up.
1132     } catch (java.net.SocketTimeoutException e) {
1133       LOG.info("Server " + server + " returned " + e.getMessage() + " for " +
1134         region.getEncodedName());
1135       // Presume retry or server will expire.
1136     } catch (EOFException e) {
1137       LOG.info("Server " + server + " returned " + e.getMessage() + " for " +
1138         region.getEncodedName());
1139       // Presume retry or server will expire.
1140     } catch (RemoteException re) {
1141       IOException ioe = re.unwrapRemoteException();
1142       if (ioe instanceof NotServingRegionException) {
1143         // Failed to close, so pass through and reassign
1144         LOG.debug("Server " + server + " returned " + ioe + " for " +
1145           region.getEncodedName());
1146       } else if (ioe instanceof EOFException) {
1147         // Failed to close, so pass through and reassign
1148         LOG.debug("Server " + server + " returned " + ioe + " for " +
1149           region.getEncodedName());
1150       } else {
1151         this.master.abort("Remote unexpected exception", ioe);
1152       }
1153     } catch (Throwable t) {
1154       // For now call abort if unexpected exception -- radical, but will get
1155       // fellas attention. St.Ack 20101012
1156       this.master.abort("Remote unexpected exception", t);
1157     }
1158   }
1159 
1160   /**
1161    * Waits until the specified region has completed assignment.
1162    * <p>
1163    * If the region is already assigned, returns immediately.  Otherwise, method
1164    * blocks until the region is assigned.
1165    * @param regionInfo region to wait on assignment for
1166    * @throws InterruptedException
1167    */
1168   public void waitForAssignment(HRegionInfo regionInfo)
1169   throws InterruptedException {
1170     synchronized(regions) {
1171       while(!regions.containsKey(regionInfo)) {
1172         regions.wait();
1173       }
1174     }
1175   }
1176 
1177   /**
1178    * Assigns the ROOT region.
1179    * <p>
1180    * Assumes that ROOT is currently closed and is not being actively served by
1181    * any RegionServer.
1182    * <p>
1183    * Forcibly unsets the current root region location in ZooKeeper and assigns
1184    * ROOT to a random RegionServer.
1185    * @throws KeeperException
1186    */
1187   public void assignRoot() throws KeeperException {
1188     RootLocationEditor.deleteRootLocation(this.master.getZooKeeper());
1189     assign(HRegionInfo.ROOT_REGIONINFO, true);
1190   }
1191 
1192   /**
1193    * Assigns the META region.
1194    * <p>
1195    * Assumes that META is currently closed and is not being actively served by
1196    * any RegionServer.
1197    * <p>
1198    * Forcibly assigns META to a random RegionServer.
1199    */
1200   public void assignMeta() {
1201     // Force assignment to a random server
1202     assign(HRegionInfo.FIRST_META_REGIONINFO, true);
1203   }
1204 
1205   /**
1206    * Assigns list of user regions in round-robin fashion, if any exist.
1207    * <p>
1208    * This is a synchronous call and will return once every region has been
1209    * assigned.  If anything fails, an exception is thrown
1210    * @throws InterruptedException
1211    * @throws IOException
1212    */
1213   public void assignUserRegions(List<HRegionInfo> regions, List<HServerInfo> servers) throws IOException, InterruptedException {
1214     if (regions == null)
1215       return;
1216     Map<HServerInfo, List<HRegionInfo>> bulkPlan = null;
1217     // Generate a round-robin bulk assignment plan
1218     bulkPlan = LoadBalancer.roundRobinAssignment(regions, servers);
1219     LOG.info("Bulk assigning " + regions.size() + " region(s) round-robin across " +
1220                servers.size() + " server(s)");
1221     // Use fixed count thread pool assigning.
1222     BulkAssigner ba = new BulkStartupAssigner(this.master, bulkPlan, this);
1223     ba.bulkAssign();
1224     LOG.info("Bulk assigning done");
1225   }
1226 
1227   /**
1228    * Assigns all user regions, if any exist.  Used during cluster startup.
1229    * <p>
1230    * This is a synchronous call and will return once every region has been
1231    * assigned.  If anything fails, an exception is thrown and the cluster
1232    * should be shutdown.
1233    * @throws InterruptedException
1234    * @throws IOException
1235    */
1236   public void assignAllUserRegions() throws IOException, InterruptedException {
1237     // Get all available servers
1238     List<HServerInfo> servers = serverManager.getOnlineServersList();
1239 
1240     // Scan META for all user regions, skipping any disabled tables
1241     Map<HRegionInfo,HServerAddress> allRegions =
1242       MetaReader.fullScan(catalogTracker, this.zkTable.getDisabledTables(), true);
1243     if (allRegions == null || allRegions.isEmpty()) return;
1244 
1245     // Determine what type of assignment to do on startup
1246     boolean retainAssignment = master.getConfiguration().
1247       getBoolean("hbase.master.startup.retainassign", true);
1248 
1249     Map<HServerInfo, List<HRegionInfo>> bulkPlan = null;
1250     if (retainAssignment) {
1251       // Reuse existing assignment info
1252       bulkPlan = LoadBalancer.retainAssignment(allRegions, servers);
1253     } else {
1254       // assign regions in round-robin fashion
1255       assignUserRegions(new ArrayList<HRegionInfo>(allRegions.keySet()), servers);
1256       return;
1257     }
1258     LOG.info("Bulk assigning " + allRegions.size() + " region(s) across " +
1259       servers.size() + " server(s), retainAssignment=" + retainAssignment);
1260 
1261     // Use fixed count thread pool assigning.
1262     BulkAssigner ba = new BulkStartupAssigner(this.master, bulkPlan, this);
1263     ba.bulkAssign();
1264     LOG.info("Bulk assigning done");
1265   }
1266 
1267   /**
1268    * Run bulk assign on startup.
1269    */
1270   static class BulkStartupAssigner extends BulkAssigner {
1271     private final Map<HServerInfo, List<HRegionInfo>> bulkPlan;
1272     private final AssignmentManager assignmentManager;
1273 
1274     BulkStartupAssigner(final Server server,
1275         final Map<HServerInfo, List<HRegionInfo>> bulkPlan,
1276         final AssignmentManager am) {
1277       super(server);
1278       this.bulkPlan = bulkPlan;
1279       this.assignmentManager = am;
1280     }
1281 
1282     @Override
1283     public boolean bulkAssign() throws InterruptedException {
1284       // Disable timing out regions in transition up in zk while bulk assigning.
1285       this.assignmentManager.timeoutMonitor.bulkAssign(true);
1286       try {
1287         return super.bulkAssign();
1288       } finally {
1289         // Reenable timing out regions in transition up in zi.
1290         this.assignmentManager.timeoutMonitor.bulkAssign(false);
1291       }
1292     }
1293 
1294     @Override
1295    protected String getThreadNamePrefix() {
1296     return super.getThreadNamePrefix() + "-startup";
1297    }
1298 
1299     @Override
1300     protected void populatePool(java.util.concurrent.ExecutorService pool) {
1301       for (Map.Entry<HServerInfo, List<HRegionInfo>> e: this.bulkPlan.entrySet()) {
1302         pool.execute(new SingleServerBulkAssigner(e.getKey(), e.getValue(),
1303           this.assignmentManager));
1304       }
1305     }
1306 
1307     protected boolean waitUntilDone(final long timeout)
1308     throws InterruptedException {
1309       return this.assignmentManager.waitUntilNoRegionsInTransition(timeout);
1310     }
1311   }
1312 
1313   /**
1314    * Manage bulk assigning to a server.
1315    */
1316   static class SingleServerBulkAssigner implements Runnable {
1317     private final HServerInfo regionserver;
1318     private final List<HRegionInfo> regions;
1319     private final AssignmentManager assignmentManager;
1320 
1321     SingleServerBulkAssigner(final HServerInfo regionserver,
1322         final List<HRegionInfo> regions, final AssignmentManager am) {
1323       this.regionserver = regionserver;
1324       this.regions = regions;
1325       this.assignmentManager = am;
1326     }
1327     @Override
1328     public void run() {
1329       this.assignmentManager.assign(this.regionserver, this.regions);
1330     }
1331   }
1332 
1333   /**
1334    * Wait until no regions in transition.
1335    * @param timeout How long to wait.
1336    * @return True if nothing in regions in transition.
1337    * @throws InterruptedException
1338    */
1339   boolean waitUntilNoRegionsInTransition(final long timeout)
1340   throws InterruptedException {
1341     // Blocks until there are no regions in transition. It is possible that
1342     // there
1343     // are regions in transition immediately after this returns but guarantees
1344     // that if it returns without an exception that there was a period of time
1345     // with no regions in transition from the point-of-view of the in-memory
1346     // state of the Master.
1347     long startTime = System.currentTimeMillis();
1348     long remaining = timeout;
1349     synchronized (regionsInTransition) {
1350       while (regionsInTransition.size() > 0 && !this.master.isStopped()
1351           && remaining > 0) {
1352         regionsInTransition.wait(remaining);
1353         remaining = timeout - (System.currentTimeMillis() - startTime);
1354       }
1355     }
1356     return regionsInTransition.isEmpty();
1357   }
1358 
1359   /**
1360    * Rebuild the list of user regions and assignment information.
1361    * <p>
1362    * Returns a map of servers that are not found to be online and the regions
1363    * they were hosting.
1364    * @return map of servers not online to their assigned regions, as stored
1365    *         in META
1366    * @throws IOException
1367    */
1368   private Map<HServerInfo,List<Pair<HRegionInfo,Result>>> rebuildUserRegions()
1369   throws IOException {
1370     // Region assignment from META
1371     List<Result> results = MetaReader.fullScanOfResults(catalogTracker);
1372     // Map of offline servers and their regions to be returned
1373     Map<HServerInfo,List<Pair<HRegionInfo,Result>>> offlineServers =
1374       new TreeMap<HServerInfo,List<Pair<HRegionInfo,Result>>>();
1375     // Iterate regions in META
1376     for (Result result : results) {
1377       Pair<HRegionInfo,HServerInfo> region =
1378         MetaReader.metaRowToRegionPairWithInfo(result);
1379       if (region == null) continue;
1380       HServerInfo regionLocation = region.getSecond();
1381       HRegionInfo regionInfo = region.getFirst();
1382       if (regionLocation == null) {
1383         // Region not being served, add to region map with no assignment
1384         // If this needs to be assigned out, it will also be in ZK as RIT
1385         this.regions.put(regionInfo, null);
1386       } else if (!serverManager.isServerOnline(
1387           regionLocation.getServerName())) {
1388         // Region is located on a server that isn't online
1389         List<Pair<HRegionInfo,Result>> offlineRegions =
1390           offlineServers.get(regionLocation);
1391         if (offlineRegions == null) {
1392           offlineRegions = new ArrayList<Pair<HRegionInfo,Result>>(1);
1393           offlineServers.put(regionLocation, offlineRegions);
1394         }
1395         offlineRegions.add(new Pair<HRegionInfo,Result>(regionInfo, result));
1396       } else {
1397         // Region is being served and on an active server
1398         regions.put(regionInfo, regionLocation);
1399         addToServers(regionLocation, regionInfo);
1400       }
1401     }
1402     return offlineServers;
1403   }
1404 
1405   /**
1406    * Processes list of dead servers from result of META scan.
1407    * <p>
1408    * This is used as part of failover to handle RegionServers which failed
1409    * while there was no active master.
1410    * <p>
1411    * Method stubs in-memory data to be as expected by the normal server shutdown
1412    * handler.
1413    *
1414    * @param deadServers
1415    * @throws IOException
1416    * @throws KeeperException
1417    */
1418   private void processDeadServers(
1419       Map<HServerInfo, List<Pair<HRegionInfo, Result>>> deadServers)
1420   throws IOException, KeeperException {
1421     for (Map.Entry<HServerInfo, List<Pair<HRegionInfo,Result>>> deadServer :
1422       deadServers.entrySet()) {
1423       List<Pair<HRegionInfo,Result>> regions = deadServer.getValue();
1424       for (Pair<HRegionInfo,Result> region : regions) {
1425         HRegionInfo regionInfo = region.getFirst();
1426         Result result = region.getSecond();
1427         // If region was in transition (was in zk) force it offline for reassign
1428         try {
1429           ZKAssign.createOrForceNodeOffline(watcher, regionInfo,
1430               master.getServerName());
1431         } catch (KeeperException.NoNodeException nne) {
1432           // This is fine
1433         }
1434         // Process with existing RS shutdown code
1435         ServerShutdownHandler.processDeadRegion(regionInfo, result, this,
1436             this.catalogTracker);
1437       }
1438     }
1439   }
1440 
1441   /*
1442    * Presumes caller has taken care of necessary locking modifying servers Map.
1443    * @param hsi
1444    * @param hri
1445    */
1446   private void addToServers(final HServerInfo hsi, final HRegionInfo hri) {
1447     List<HRegionInfo> hris = servers.get(hsi);
1448     if (hris == null) {
1449       hris = new ArrayList<HRegionInfo>();
1450       servers.put(hsi, hris);
1451     }
1452     hris.add(hri);
1453   }
1454 
1455   /**
1456    * @return A copy of the Map of regions currently in transition.
1457    */
1458   public NavigableMap<String, RegionState> getRegionsInTransition() {
1459     synchronized (this.regionsInTransition) {
1460       return new TreeMap<String, RegionState>(this.regionsInTransition);
1461     }
1462   }
1463 
1464   /**
1465    * @return True if regions in transition.
1466    */
1467   public boolean isRegionsInTransition() {
1468     synchronized (this.regionsInTransition) {
1469       return !this.regionsInTransition.isEmpty();
1470     }
1471   }
1472 
1473   /**
1474    * @param hri Region to check.
1475    * @return Returns null if passed region is not in transition else the current
1476    * RegionState
1477    */
1478   public RegionState isRegionInTransition(final HRegionInfo hri) {
1479     synchronized (this.regionsInTransition) {
1480       return this.regionsInTransition.get(hri.getEncodedName());
1481     }
1482   }
1483 
1484   /**
1485    * Clears the specified region from being in transition.
1486    * <p>
1487    * Used only by HBCK tool.
1488    * @param hri
1489    */
1490   public void clearRegionFromTransition(HRegionInfo hri) {
1491     synchronized (this.regionsInTransition) {
1492       this.regionsInTransition.remove(hri.getEncodedName());
1493     }
1494     synchronized (this.regions) {
1495       this.regions.remove(hri);
1496       for (List<HRegionInfo> regions : this.servers.values()) {
1497         for (int i=0;i<regions.size();i++) {
1498           if (regions.get(i).equals(hri)) {
1499             regions.remove(i);
1500             break;
1501           }
1502         }
1503       }
1504     }
1505     clearRegionPlan(hri);
1506   }
1507 
1508   /**
1509    * @param region Region whose plan we are to clear.
1510    */
1511   void clearRegionPlan(final HRegionInfo region) {
1512     synchronized (this.regionPlans) {
1513       this.regionPlans.remove(region.getEncodedName());
1514     }
1515   }
1516 
1517   /**
1518    * Wait on region to clear regions-in-transition.
1519    * @param hri Region to wait on.
1520    * @throws IOException
1521    */
1522   public void waitOnRegionToClearRegionsInTransition(final HRegionInfo hri)
1523   throws IOException {
1524     if (isRegionInTransition(hri) == null) return;
1525     RegionState rs = null;
1526     // There is already a timeout monitor on regions in transition so I
1527     // should not have to have one here too?
1528     while(!this.master.isStopped() && (rs = isRegionInTransition(hri)) != null) {
1529       Threads.sleep(1000);
1530       LOG.info("Waiting on " + rs + " to clear regions-in-transition");
1531     }
1532     if (this.master.isStopped()) {
1533       LOG.info("Giving up wait on regions in " +
1534         "transition because stoppable.isStopped is set");
1535     }
1536   }
1537 
1538 
1539   /**
1540    * Gets the online regions of the specified table.
1541    * This method looks at the in-memory state.  It does not go to <code>.META.</code>.
1542    * Only returns <em>online</em> regions.  If a region on this table has been
1543    * closed during a disable, etc., it will be included in the returned list.
1544    * So, the returned list may not necessarily be ALL regions in this table, its
1545    * all the ONLINE regions in the table.
1546    * @param tableName
1547    * @return Online regions from <code>tableName</code>
1548    */
1549   public List<HRegionInfo> getRegionsOfTable(byte[] tableName) {
1550     List<HRegionInfo> tableRegions = new ArrayList<HRegionInfo>();
1551     HRegionInfo boundary =
1552       new HRegionInfo(new HTableDescriptor(tableName), null, null);
1553     synchronized (this.regions) {
1554       for (HRegionInfo regionInfo: this.regions.tailMap(boundary).keySet()) {
1555         if(Bytes.equals(regionInfo.getTableDesc().getName(), tableName)) {
1556           tableRegions.add(regionInfo);
1557         } else {
1558           break;
1559         }
1560       }
1561     }
1562     return tableRegions;
1563   }
1564 
1565   /**
1566    * Monitor to check for time outs on region transition operations
1567    */
1568   public class TimeoutMonitor extends Chore {
1569     private final int timeout;
1570     private boolean bulkAssign = false;
1571 
1572     /**
1573      * Creates a periodic monitor to check for time outs on region transition
1574      * operations.  This will deal with retries if for some reason something
1575      * doesn't happen within the specified timeout.
1576      * @param period
1577    * @param stopper When {@link Stoppable#isStopped()} is true, this thread will
1578    * cleanup and exit cleanly.
1579      * @param timeout
1580      */
1581     public TimeoutMonitor(final int period, final Stoppable stopper,
1582         final int timeout) {
1583       super("AssignmentTimeoutMonitor", period, stopper);
1584       this.timeout = timeout;
1585     }
1586 
1587     /**
1588      * @param bulkAssign If true, we'll suspend checking regions in transition
1589      * up in zookeeper.  If false, will reenable check.
1590      * @return Old setting for bulkAssign.
1591      */
1592     public boolean bulkAssign(final boolean bulkAssign) {
1593       boolean result = this.bulkAssign;
1594       this.bulkAssign = bulkAssign;
1595       return result;
1596     }
1597 
1598     @Override
1599     protected void chore() {
1600       // If bulkAssign in progress, suspend checks
1601       if (this.bulkAssign) return;
1602       List<HRegionInfo> unassigns = new ArrayList<HRegionInfo>();
1603       Map<HRegionInfo, Boolean> assigns =
1604         new HashMap<HRegionInfo, Boolean>();
1605       synchronized (regionsInTransition) {
1606         // Iterate all regions in transition checking for time outs
1607         long now = System.currentTimeMillis();
1608         for (RegionState regionState : regionsInTransition.values()) {
1609           if (regionState.getStamp() + timeout <= now) {
1610             HRegionInfo regionInfo = regionState.getRegion();
1611             LOG.info("Regions in transition timed out:  " + regionState);
1612             // Expired!  Do a retry.
1613             switch (regionState.getState()) {
1614               case CLOSED:
1615                 LOG.info("Region " + regionInfo.getEncodedName() +
1616                   " has been CLOSED for too long, waiting on queued " +
1617                   "ClosedRegionHandler to run or server shutdown");
1618                 // Update our timestamp.
1619                 synchronized(regionState) {
1620                   regionState.update(regionState.getState());
1621                 }
1622                 break;
1623               case OFFLINE:
1624                 LOG.info("Region has been OFFLINE for too long, " +
1625                   "reassigning " + regionInfo.getRegionNameAsString() +
1626                   " to a random server");
1627                 assigns.put(regionState.getRegion(), Boolean.FALSE);
1628                 break;
1629               case PENDING_OPEN:
1630                 LOG.info("Region has been PENDING_OPEN for too " +
1631                     "long, reassigning region=" +
1632                     regionInfo.getRegionNameAsString());
1633                 assigns.put(regionState.getRegion(), Boolean.TRUE);
1634                 break;
1635               case OPENING:
1636                 LOG.info("Region has been OPENING for too " +
1637                   "long, reassigning region=" +
1638                   regionInfo.getRegionNameAsString());
1639                 // Should have a ZK node in OPENING state
1640                 try {
1641                   String node = ZKAssign.getNodeName(watcher,
1642                       regionInfo.getEncodedName());
1643                   Stat stat = new Stat();
1644                   RegionTransitionData data = ZKAssign.getDataNoWatch(watcher,
1645                       node, stat);
1646                   if (data == null) {
1647                     LOG.warn("Data is null, node " + node + " no longer exists");
1648                     break;
1649                   }
1650                   if (data.getEventType() == EventType.RS_ZK_REGION_OPENED) {
1651                     LOG.debug("Region has transitioned to OPENED, allowing " +
1652                         "watched event handlers to process");
1653                     break;
1654                   } else if (data.getEventType() !=
1655                       EventType.RS_ZK_REGION_OPENING) {
1656                     LOG.warn("While timing out a region in state OPENING, " +
1657                         "found ZK node in unexpected state: " +
1658                         data.getEventType());
1659                     break;
1660                   }
1661                   // Attempt to transition node into OFFLINE
1662                   try {
1663                     data = new RegionTransitionData(
1664                       EventType.M_ZK_REGION_OFFLINE, regionInfo.getRegionName(),
1665                       master.getServerName());
1666                     if (ZKUtil.setData(watcher, node, data.getBytes(),
1667                         stat.getVersion())) {
1668                       // Node is now OFFLINE, let's trigger another assignment
1669                       ZKUtil.getDataAndWatch(watcher, node); // re-set the watch
1670                       LOG.info("Successfully transitioned region=" +
1671                           regionInfo.getRegionNameAsString() + " into OFFLINE" +
1672                           " and forcing a new assignment");
1673                       assigns.put(regionState.getRegion(), Boolean.TRUE);
1674                     }
1675                   } catch (KeeperException.NoNodeException nne) {
1676                     // Node did not exist, can't time this out
1677                   }
1678                 } catch (KeeperException ke) {
1679                   LOG.error("Unexpected ZK exception timing out CLOSING region",
1680                       ke);
1681                   break;
1682                 }
1683                 break;
1684               case OPEN:
1685                 LOG.error("Region has been OPEN for too long, " +
1686                 "we don't know where region was opened so can't do anything");
1687                 break;
1688               case PENDING_CLOSE:
1689                 LOG.info("Region has been PENDING_CLOSE for too " +
1690                     "long, running forced unassign again on region=" +
1691                     regionInfo.getRegionNameAsString());
1692                   try {
1693                     // If the server got the RPC, it will transition the node
1694                     // to CLOSING, so only do something here if no node exists
1695                     if (!ZKUtil.watchAndCheckExists(watcher,
1696                       ZKAssign.getNodeName(watcher, regionInfo.getEncodedName()))) {
1697                       // Queue running of an unassign -- do actual unassign
1698                       // outside of the regionsInTransition lock.
1699                       unassigns.add(regionInfo);
1700                     }
1701                   } catch (NoNodeException e) {
1702                     LOG.debug("Node no longer existed so not forcing another " +
1703                       "unassignment");
1704                   } catch (KeeperException e) {
1705                     LOG.warn("Unexpected ZK exception timing out a region " +
1706                       "close", e);
1707                   }
1708                   break;
1709               case CLOSING:
1710                 LOG.info("Region has been CLOSING for too " +
1711                   "long, this should eventually complete or the server will " +
1712                   "expire, doing nothing");
1713                 break;
1714             }
1715           }
1716         }
1717       }
1718       // Finish the work for regions in PENDING_CLOSE state
1719       for (HRegionInfo hri: unassigns) {
1720         unassign(hri, true);
1721       }
1722       for (Map.Entry<HRegionInfo, Boolean> e: assigns.entrySet()){
1723         assign(e.getKey(), false, e.getValue());
1724       }
1725     }
1726   }
1727 
1728   /**
1729    * Process shutdown server removing any assignments.
1730    * @param hsi Server that went down.
1731    * @return list of regions in transition on this server
1732    */
1733   public List<RegionState> processServerShutdown(final HServerInfo hsi) {
1734     // Clean out any existing assignment plans for this server
1735     synchronized (this.regionPlans) {
1736       for (Iterator <Map.Entry<String, RegionPlan>> i =
1737           this.regionPlans.entrySet().iterator(); i.hasNext();) {
1738         Map.Entry<String, RegionPlan> e = i.next();
1739         if (e.getValue().getDestination().equals(hsi)) {
1740           // Use iterator's remove else we'll get CME
1741           i.remove();
1742         }
1743       }
1744     }
1745     // TODO: Do we want to sync on RIT here?
1746     // Remove this server from map of servers to regions, and remove all regions
1747     // of this server from online map of regions.
1748     Set<HRegionInfo> deadRegions = null;
1749     List<RegionState> rits = new ArrayList<RegionState>();
1750     synchronized (this.regions) {
1751       List<HRegionInfo> assignedRegions = this.servers.remove(hsi);
1752       if (assignedRegions == null || assignedRegions.isEmpty()) {
1753         // No regions on this server, we are done, return empty list of RITs
1754         return rits;
1755       }
1756       deadRegions = new TreeSet<HRegionInfo>(assignedRegions);
1757       for (HRegionInfo region : deadRegions) {
1758         this.regions.remove(region);
1759       }
1760     }
1761     // See if any of the regions that were online on this server were in RIT
1762     // If they are, normal timeouts will deal with them appropriately so
1763     // let's skip a manual re-assignment.
1764     synchronized (regionsInTransition) {
1765       for (RegionState region : this.regionsInTransition.values()) {
1766         if (deadRegions.remove(region.getRegion())) {
1767           rits.add(region);
1768         }
1769       }
1770     }
1771     return rits;
1772   }
1773 
1774   /**
1775    * Update inmemory structures.
1776    * @param hsi Server that reported the split
1777    * @param parent Parent region that was split
1778    * @param a Daughter region A
1779    * @param b Daughter region B
1780    */
1781   public void handleSplitReport(final HServerInfo hsi, final HRegionInfo parent,
1782       final HRegionInfo a, final HRegionInfo b) {
1783     regionOffline(parent);
1784     // Remove any CLOSING node, if exists, due to race between master & rs
1785     // for close & split.  Not putting into regionOffline method because it is
1786     // called from various locations.
1787     try {
1788       RegionTransitionData node = ZKAssign.getDataNoWatch(this.watcher,
1789         parent.getEncodedName(), null);
1790       if (node != null) {
1791         if (node.getEventType().equals(EventType.RS_ZK_REGION_CLOSING)) {
1792           ZKAssign.deleteClosingNode(this.watcher, parent);
1793         } else {
1794           LOG.warn("Split report has RIT node (shouldnt have one): " +
1795             parent + " node: " + node);
1796         }
1797       }
1798     } catch (KeeperException e) {
1799       LOG.warn("Exception while validating RIT during split report", e);
1800     }
1801 
1802     regionOnline(a, hsi);
1803     regionOnline(b, hsi);
1804 
1805     // There's a possibility that the region was splitting while a user asked
1806     // the master to disable, we need to make sure we close those regions in
1807     // that case. This is not racing with the region server itself since RS
1808     // report is done after the split transaction completed.
1809     if (this.zkTable.isDisablingOrDisabledTable(
1810         parent.getTableDesc().getNameAsString())) {
1811       unassign(a);
1812       unassign(b);
1813     }
1814   }
1815 
1816   /**
1817    * @return A clone of current assignments. Note, this is assignments only.
1818    * If a new server has come in and it has no regions, it will not be included
1819    * in the returned Map.
1820    */
1821   Map<HServerInfo, List<HRegionInfo>> getAssignments() {
1822     // This is an EXPENSIVE clone.  Cloning though is the safest thing to do.
1823     // Can't let out original since it can change and at least the loadbalancer
1824     // wants to iterate this exported list.  We need to synchronize on regions
1825     // since all access to this.servers is under a lock on this.regions.
1826     Map<HServerInfo, List<HRegionInfo>> result = null;
1827     synchronized (this.regions) {
1828       result = new HashMap<HServerInfo, List<HRegionInfo>>(this.servers.size());
1829       for (Map.Entry<HServerInfo, List<HRegionInfo>> e: this.servers.entrySet()) {
1830         List<HRegionInfo> shallowCopy = new ArrayList<HRegionInfo>(e.getValue());
1831         HServerInfo clone = new HServerInfo(e.getKey());
1832         // Set into server load the number of regions this server is carrying
1833         // The load balancer calculation needs it at least and its handy.
1834         clone.getLoad().setNumberOfRegions(e.getValue().size());
1835         result.put(clone, shallowCopy);
1836       }
1837     }
1838     return result;
1839   }
1840 
1841   /**
1842    * @param encodedRegionName Region encoded name.
1843    * @return Null or a {@link Pair} instance that holds the full {@link HRegionInfo}
1844    * and the hosting servers {@link HServerInfo}.
1845    */
1846   Pair<HRegionInfo, HServerInfo> getAssignment(final byte [] encodedRegionName) {
1847     String name = Bytes.toString(encodedRegionName);
1848     synchronized(this.regions) {
1849       for (Map.Entry<HRegionInfo, HServerInfo> e: this.regions.entrySet()) {
1850         if (e.getKey().getEncodedName().equals(name)) {
1851           return new Pair<HRegionInfo, HServerInfo>(e.getKey(), e.getValue());
1852         }
1853       }
1854     }
1855     return null;
1856   }
1857 
1858   /**
1859    * @param plan Plan to execute.
1860    */
1861   void balance(final RegionPlan plan) {
1862     synchronized (this.regionPlans) {
1863       this.regionPlans.put(plan.getRegionName(), plan);
1864     }
1865     unassign(plan.getRegionInfo());
1866   }
1867 
1868   /**
1869    * State of a Region while undergoing transitions.
1870    */
1871   public static class RegionState implements Writable {
1872     private HRegionInfo region;
1873 
1874     public enum State {
1875       OFFLINE,        // region is in an offline state
1876       PENDING_OPEN,   // sent rpc to server to open but has not begun
1877       OPENING,        // server has begun to open but not yet done
1878       OPEN,           // server opened region and updated meta
1879       PENDING_CLOSE,  // sent rpc to server to close but has not begun
1880       CLOSING,        // server has begun to close but not yet done
1881       CLOSED          // server closed region and updated meta
1882     }
1883 
1884     private State state;
1885     private long stamp;
1886 
1887     public RegionState() {}
1888 
1889     RegionState(HRegionInfo region, State state) {
1890       this(region, state, System.currentTimeMillis());
1891     }
1892 
1893     RegionState(HRegionInfo region, State state, long stamp) {
1894       this.region = region;
1895       this.state = state;
1896       this.stamp = stamp;
1897     }
1898 
1899     public void update(State state, long stamp) {
1900       this.state = state;
1901       this.stamp = stamp;
1902     }
1903 
1904     public void update(State state) {
1905       this.state = state;
1906       this.stamp = System.currentTimeMillis();
1907     }
1908 
1909     public State getState() {
1910       return state;
1911     }
1912 
1913     public long getStamp() {
1914       return stamp;
1915     }
1916 
1917     public HRegionInfo getRegion() {
1918       return region;
1919     }
1920 
1921     public boolean isClosing() {
1922       return state == State.CLOSING;
1923     }
1924 
1925     public boolean isClosed() {
1926       return state == State.CLOSED;
1927     }
1928 
1929     public boolean isPendingClose() {
1930       return state == State.PENDING_CLOSE;
1931     }
1932 
1933     public boolean isOpening() {
1934       return state == State.OPENING;
1935     }
1936 
1937     public boolean isOpened() {
1938       return state == State.OPEN;
1939     }
1940 
1941     public boolean isPendingOpen() {
1942       return state == State.PENDING_OPEN;
1943     }
1944 
1945     public boolean isOffline() {
1946       return state == State.OFFLINE;
1947     }
1948 
1949     @Override
1950     public String toString() {
1951       return region.getRegionNameAsString() + " state=" + state +
1952         ", ts=" + stamp;
1953     }
1954 
1955     @Override
1956     public void readFields(DataInput in) throws IOException {
1957       region = new HRegionInfo();
1958       region.readFields(in);
1959       state = State.valueOf(in.readUTF());
1960       stamp = in.readLong();
1961     }
1962 
1963     @Override
1964     public void write(DataOutput out) throws IOException {
1965       region.write(out);
1966       out.writeUTF(state.name());
1967       out.writeLong(stamp);
1968     }
1969   }
1970 
1971   public void stop() {
1972     this.timeoutMonitor.interrupt();
1973   }
1974 }