View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.zookeeper;
21  
22  import java.util.List;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.hbase.HRegionInfo;
27  import org.apache.hadoop.hbase.executor.RegionTransitionData;
28  import org.apache.hadoop.hbase.executor.EventHandler.EventType;
29  import org.apache.zookeeper.AsyncCallback;
30  import org.apache.zookeeper.KeeperException;
31  import org.apache.zookeeper.KeeperException.Code;
32  import org.apache.zookeeper.KeeperException.NoNodeException;
33  import org.apache.zookeeper.KeeperException.NodeExistsException;
34  import org.apache.zookeeper.data.Stat;
35  
36  /**
37   * Utility class for doing region assignment in ZooKeeper.  This class extends
38   * stuff done in {@link ZKUtil} to cover specific assignment operations.
39   * <p>
40   * Contains only static methods and constants.
41   * <p>
42   * Used by both the Master and RegionServer.
43   * <p>
44   * All valid transitions outlined below:
45   * <p>
46   * <b>MASTER</b>
47   * <ol>
48   *   <li>
49   *     Master creates an unassigned node as OFFLINE.
50   *     - Cluster startup and table enabling.
51   *   </li>
52   *   <li>
53   *     Master forces an existing unassigned node to OFFLINE.
54   *     - RegionServer failure.
55   *     - Allows transitions from all states to OFFLINE.
56   *   </li>
57   *   <li>
58   *     Master deletes an unassigned node that was in a OPENED state.
59   *     - Normal region transitions.  Besides cluster startup, no other deletions
60   *     of unassigned nodes is allowed.
61   *   </li>
62   *   <li>
63   *     Master deletes all unassigned nodes regardless of state.
64   *     - Cluster startup before any assignment happens.
65   *   </li>
66   * </ol>
67   * <p>
68   * <b>REGIONSERVER</b>
69   * <ol>
70   *   <li>
71   *     RegionServer creates an unassigned node as CLOSING.
72   *     - All region closes will do this in response to a CLOSE RPC from Master.
73   *     - A node can never be transitioned to CLOSING, only created.
74   *   </li>
75   *   <li>
76   *     RegionServer transitions an unassigned node from CLOSING to CLOSED.
77   *     - Normal region closes.  CAS operation.
78   *   </li>
79   *   <li>
80   *     RegionServer transitions an unassigned node from OFFLINE to OPENING.
81   *     - All region opens will do this in response to an OPEN RPC from the Master.
82   *     - Normal region opens.  CAS operation.
83   *   </li>
84   *   <li>
85   *     RegionServer transitions an unassigned node from OPENING to OPENED.
86   *     - Normal region opens.  CAS operation.
87   *   </li>
88   * </ol>
89   */
90  public class ZKAssign {
91    private static final Log LOG = LogFactory.getLog(ZKAssign.class);
92  
93    /**
94     * Gets the full path node name for the unassigned node for the specified
95     * region.
96     * @param zkw zk reference
97     * @param regionName region name
98     * @return full path node name
99     */
100   public static String getNodeName(ZooKeeperWatcher zkw, String regionName) {
101     return ZKUtil.joinZNode(zkw.assignmentZNode, regionName);
102   }
103 
104   /**
105    * Gets the region name from the full path node name of an unassigned node.
106    * @param path full zk path
107    * @return region name
108    */
109   public static String getRegionName(ZooKeeperWatcher zkw, String path) {
110     return path.substring(zkw.assignmentZNode.length()+1);
111   }
112 
113   // Master methods
114 
115   /**
116    * Creates a new unassigned node in the OFFLINE state for the specified region.
117    *
118    * <p>Does not transition nodes from other states.  If a node already exists
119    * for this region, a {@link NodeExistsException} will be thrown.
120    *
121    * <p>Sets a watcher on the unassigned region node if the method is successful.
122    *
123    * <p>This method should only be used during cluster startup and the enabling
124    * of a table.
125    *
126    * @param zkw zk reference
127    * @param region region to be created as offline
128    * @param serverName server event originates from
129    * @throws KeeperException if unexpected zookeeper exception
130    * @throws KeeperException.NodeExistsException if node already exists
131    */
132   public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
133       String serverName)
134   throws KeeperException, KeeperException.NodeExistsException {
135     createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE);
136   }
137 
138   public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
139       String serverName, final EventType event)
140   throws KeeperException, KeeperException.NodeExistsException {
141     LOG.debug(zkw.prefix("Creating unassigned node for " +
142       region.getEncodedName() + " in OFFLINE state"));
143     RegionTransitionData data = new RegionTransitionData(event,
144       region.getRegionName(), serverName);
145     synchronized(zkw.getNodes()) {
146       String node = getNodeName(zkw, region.getEncodedName());
147       zkw.getNodes().add(node);
148       ZKUtil.createAndWatch(zkw, node, data.getBytes());
149     }
150   }
151 
152   /**
153    * Creates an unassigned node in the OFFLINE state for the specified region.
154    * <p>
155    * Runs asynchronously.  Depends on no pre-existing znode.
156    *
157    * <p>Sets a watcher on the unassigned region node.
158    *
159    * @param zkw zk reference
160    * @param region region to be created as offline
161    * @param serverName server event originates from
162    * @param cb
163    * @param ctx
164    * @throws KeeperException if unexpected zookeeper exception
165    * @throws KeeperException.NodeExistsException if node already exists
166    */
167   public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw,
168       HRegionInfo region, String serverName,
169       final AsyncCallback.StringCallback cb, final Object ctx)
170   throws KeeperException {
171     LOG.debug(zkw.prefix("Async create of unassigned node for " +
172       region.getEncodedName() + " with OFFLINE state"));
173     RegionTransitionData data = new RegionTransitionData(
174         EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
175     synchronized(zkw.getNodes()) {
176       String node = getNodeName(zkw, region.getEncodedName());
177       zkw.getNodes().add(node);
178       ZKUtil.asyncCreate(zkw, node, data.getBytes(), cb, ctx);
179     }
180   }
181 
182   /**
183    * Forces an existing unassigned node to the OFFLINE state for the specified
184    * region.
185    *
186    * <p>Does not create a new node.  If a node does not already exist for this
187    * region, a {@link NoNodeException} will be thrown.
188    *
189    * <p>Sets a watcher on the unassigned region node if the method is
190    * successful.
191    *
192    * <p>This method should only be used during recovery of regionserver failure.
193    *
194    * @param zkw zk reference
195    * @param region region to be forced as offline
196    * @param serverName server event originates from
197    * @throws KeeperException if unexpected zookeeper exception
198    * @throws KeeperException.NoNodeException if node does not exist
199    */
200   public static void forceNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
201       String serverName)
202   throws KeeperException, KeeperException.NoNodeException {
203     LOG.debug(zkw.prefix("Forcing existing unassigned node for " +
204       region.getEncodedName() + " to OFFLINE state"));
205     RegionTransitionData data = new RegionTransitionData(
206         EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
207     synchronized(zkw.getNodes()) {
208       String node = getNodeName(zkw, region.getEncodedName());
209       zkw.getNodes().add(node);
210       ZKUtil.setData(zkw, node, data.getBytes());
211     }
212   }
213 
214 
215   /**
216    * Creates or force updates an unassigned node to the OFFLINE state for the
217    * specified region.
218    * <p>
219    * Attempts to create the node but if it exists will force it to transition to
220    * and OFFLINE state.
221    *
222    * <p>Sets a watcher on the unassigned region node if the method is
223    * successful.
224    *
225    * <p>This method should be used when assigning a region.
226    *
227    * @param zkw zk reference
228    * @param region region to be created as offline
229    * @param serverName server event originates from
230    * @throws KeeperException if unexpected zookeeper exception
231    * @throws KeeperException.NodeExistsException if node already exists
232    */
233   public static boolean createOrForceNodeOffline(ZooKeeperWatcher zkw,
234       HRegionInfo region, String serverName)
235   throws KeeperException {
236     LOG.debug(zkw.prefix("Creating (or updating) unassigned node for " +
237       region.getEncodedName() + " with OFFLINE state"));
238     RegionTransitionData data = new RegionTransitionData(
239         EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
240     synchronized(zkw.getNodes()) {
241       String node = getNodeName(zkw, region.getEncodedName());
242       zkw.sync(node);
243       zkw.getNodes().add(node);
244       int version = ZKUtil.checkExists(zkw, node);
245       if(version == -1) {
246         ZKUtil.createAndWatch(zkw, node, data.getBytes());
247       } else {
248         if (!ZKUtil.setData(zkw, node, data.getBytes(), version)) {
249           return false;
250         } else {
251           // We successfully forced to OFFLINE, reset watch and handle if
252           // the state changed in between our set and the watch
253           RegionTransitionData curData =
254             ZKAssign.getData(zkw, region.getEncodedName());
255           if (curData.getEventType() != data.getEventType()) {
256             // state changed, need to process
257             return false;
258           }
259         }
260       }
261     }
262     return true;
263   }
264 
265   /**
266    * Deletes an existing unassigned node that is in the OPENED state for the
267    * specified region.
268    *
269    * <p>If a node does not already exist for this region, a
270    * {@link NoNodeException} will be thrown.
271    *
272    * <p>No watcher is set whether this succeeds or not.
273    *
274    * <p>Returns false if the node was not in the proper state but did exist.
275    *
276    * <p>This method is used during normal region transitions when a region
277    * finishes successfully opening.  This is the Master acknowledging completion
278    * of the specified regions transition.
279    *
280    * @param zkw zk reference
281    * @param regionName opened region to be deleted from zk
282    * @throws KeeperException if unexpected zookeeper exception
283    * @throws KeeperException.NoNodeException if node does not exist
284    */
285   public static boolean deleteOpenedNode(ZooKeeperWatcher zkw,
286       String regionName)
287   throws KeeperException, KeeperException.NoNodeException {
288     return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_OPENED);
289   }
290 
291   /**
292    * Deletes an existing unassigned node that is in the OFFLINE state for the
293    * specified region.
294    *
295    * <p>If a node does not already exist for this region, a
296    * {@link NoNodeException} will be thrown.
297    *
298    * <p>No watcher is set whether this succeeds or not.
299    *
300    * <p>Returns false if the node was not in the proper state but did exist.
301    *
302    * <p>This method is used during master failover when the regions on an RS
303    * that has died are all set to OFFLINE before being processed.
304    *
305    * @param zkw zk reference
306    * @param regionName closed region to be deleted from zk
307    * @throws KeeperException if unexpected zookeeper exception
308    * @throws KeeperException.NoNodeException if node does not exist
309    */
310   public static boolean deleteOfflineNode(ZooKeeperWatcher zkw,
311       String regionName)
312   throws KeeperException, KeeperException.NoNodeException {
313     return deleteNode(zkw, regionName, EventType.M_ZK_REGION_OFFLINE);
314   }
315 
316   /**
317    * Deletes an existing unassigned node that is in the CLOSED state for the
318    * specified region.
319    *
320    * <p>If a node does not already exist for this region, a
321    * {@link NoNodeException} will be thrown.
322    *
323    * <p>No watcher is set whether this succeeds or not.
324    *
325    * <p>Returns false if the node was not in the proper state but did exist.
326    *
327    * <p>This method is used during table disables when a region finishes
328    * successfully closing.  This is the Master acknowledging completion
329    * of the specified regions transition to being closed.
330    *
331    * @param zkw zk reference
332    * @param regionName closed region to be deleted from zk
333    * @throws KeeperException if unexpected zookeeper exception
334    * @throws KeeperException.NoNodeException if node does not exist
335    */
336   public static boolean deleteClosedNode(ZooKeeperWatcher zkw,
337       String regionName)
338   throws KeeperException, KeeperException.NoNodeException {
339     return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_CLOSED);
340   }
341 
342   /**
343    * Deletes an existing unassigned node that is in the CLOSING state for the
344    * specified region.
345    *
346    * <p>If a node does not already exist for this region, a
347    * {@link NoNodeException} will be thrown.
348    *
349    * <p>No watcher is set whether this succeeds or not.
350    *
351    * <p>Returns false if the node was not in the proper state but did exist.
352    *
353    * <p>This method is used during table disables when a region finishes
354    * successfully closing.  This is the Master acknowledging completion
355    * of the specified regions transition to being closed.
356    *
357    * @param zkw zk reference
358    * @param region closing region to be deleted from zk
359    * @throws KeeperException if unexpected zookeeper exception
360    * @throws KeeperException.NoNodeException if node does not exist
361    */
362   public static boolean deleteClosingNode(ZooKeeperWatcher zkw,
363       HRegionInfo region)
364   throws KeeperException, KeeperException.NoNodeException {
365     String regionName = region.getEncodedName();
366     return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_CLOSING);
367   }
368 
369   /**
370    * Deletes an existing unassigned node that is in the specified state for the
371    * specified region.
372    *
373    * <p>If a node does not already exist for this region, a
374    * {@link NoNodeException} will be thrown.
375    *
376    * <p>No watcher is set whether this succeeds or not.
377    *
378    * <p>Returns false if the node was not in the proper state but did exist.
379    *
380    * <p>This method is used during table disables when a region finishes
381    * successfully closing.  This is the Master acknowledging completion
382    * of the specified regions transition to being closed.
383    *
384    * @param zkw zk reference
385    * @param regionName region to be deleted from zk
386    * @param expectedState state region must be in for delete to complete
387    * @throws KeeperException if unexpected zookeeper exception
388    * @throws KeeperException.NoNodeException if node does not exist
389    */
390   private static boolean deleteNode(ZooKeeperWatcher zkw, String regionName,
391       EventType expectedState)
392   throws KeeperException, KeeperException.NoNodeException {
393     LOG.debug(zkw.prefix("Deleting existing unassigned " +
394       "node for " + regionName + " that is in expected state " + expectedState));
395     String node = getNodeName(zkw, regionName);
396     zkw.sync(node);
397     Stat stat = new Stat();
398     byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat);
399     if (bytes == null) {
400       // If it came back null, node does not exist.
401       throw KeeperException.create(Code.NONODE);
402     }
403     RegionTransitionData data = RegionTransitionData.fromBytes(bytes);
404     if (!data.getEventType().equals(expectedState)) {
405       LOG.warn(zkw.prefix("Attempting to delete unassigned " +
406         "node " + regionName + " in " + expectedState +
407         " state but node is in " + data.getEventType() + " state"));
408       return false;
409     }
410     synchronized(zkw.getNodes()) {
411       // TODO: Does this go here or only if we successfully delete node?
412       zkw.getNodes().remove(node);
413       if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) {
414         LOG.warn(zkw.prefix("Attempting to delete " +
415           "unassigned node in " + expectedState +
416             " state but " +
417             "after verifying it was in OPENED state, we got a version mismatch"));
418         return false;
419       }
420       LOG.debug(zkw.prefix("Successfully deleted unassigned node for region " +
421           regionName + " in expected state " + expectedState));
422       return true;
423     }
424   }
425 
426   /**
427    * Deletes all unassigned nodes regardless of their state.
428    *
429    * <p>No watchers are set.
430    *
431    * <p>This method is used by the Master during cluster startup to clear out
432    * any existing state from other cluster runs.
433    *
434    * @param zkw zk reference
435    * @throws KeeperException if unexpected zookeeper exception
436    */
437   public static void deleteAllNodes(ZooKeeperWatcher zkw)
438   throws KeeperException {
439     LOG.debug(zkw.prefix("Deleting any existing unassigned nodes"));
440     ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode);
441   }
442 
443   // RegionServer methods
444 
445   /**
446    * Creates a new unassigned node in the CLOSING state for the specified
447    * region.
448    *
449    * <p>Does not transition nodes from any states.  If a node already exists
450    * for this region, a {@link NodeExistsException} will be thrown.
451    *
452    * <p>If creation is successful, returns the version number of the CLOSING
453    * node created.
454    *
455    * <p>Does not set any watches.
456    *
457    * <p>This method should only be used by a RegionServer when initiating a
458    * close of a region after receiving a CLOSE RPC from the Master.
459    *
460    * @param zkw zk reference
461    * @param region region to be created as closing
462    * @param serverName server event originates from
463    * @return version of node after transition, -1 if unsuccessful transition
464    * @throws KeeperException if unexpected zookeeper exception
465    * @throws KeeperException.NodeExistsException if node already exists
466    */
467   public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region,
468       String serverName)
469   throws KeeperException, KeeperException.NodeExistsException {
470     LOG.debug(zkw.prefix("Creating unassigned node for " +
471       region.getEncodedName() + " in a CLOSING state"));
472 
473     RegionTransitionData data = new RegionTransitionData(
474         EventType.RS_ZK_REGION_CLOSING, region.getRegionName(), serverName);
475 
476     synchronized (zkw.getNodes()) {
477       String node = getNodeName(zkw, region.getEncodedName());
478       zkw.getNodes().add(node);
479       return ZKUtil.createAndWatch(zkw, node, data.getBytes());
480     }
481   }
482 
483   /**
484    * Transitions an existing unassigned node for the specified region which is
485    * currently in the CLOSING state to be in the CLOSED state.
486    *
487    * <p>Does not transition nodes from other states.  If for some reason the
488    * node could not be transitioned, the method returns -1.  If the transition
489    * is successful, the version of the node after transition is returned.
490    *
491    * <p>This method can fail and return false for three different reasons:
492    * <ul><li>Unassigned node for this region does not exist</li>
493    * <li>Unassigned node for this region is not in CLOSING state</li>
494    * <li>After verifying CLOSING state, update fails because of wrong version
495    * (someone else already transitioned the node)</li>
496    * </ul>
497    *
498    * <p>Does not set any watches.
499    *
500    * <p>This method should only be used by a RegionServer when initiating a
501    * close of a region after receiving a CLOSE RPC from the Master.
502    *
503    * @param zkw zk reference
504    * @param region region to be transitioned to closed
505    * @param serverName server event originates from
506    * @return version of node after transition, -1 if unsuccessful transition
507    * @throws KeeperException if unexpected zookeeper exception
508    */
509   public static int transitionNodeClosed(ZooKeeperWatcher zkw,
510       HRegionInfo region, String serverName, int expectedVersion)
511   throws KeeperException {
512     return transitionNode(zkw, region, serverName,
513         EventType.RS_ZK_REGION_CLOSING,
514         EventType.RS_ZK_REGION_CLOSED, expectedVersion);
515   }
516 
517   /**
518    * Transitions an existing unassigned node for the specified region which is
519    * currently in the OFFLINE state to be in the OPENING state.
520    *
521    * <p>Does not transition nodes from other states.  If for some reason the
522    * node could not be transitioned, the method returns -1.  If the transition
523    * is successful, the version of the node written as OPENING is returned.
524    *
525    * <p>This method can fail and return -1 for three different reasons:
526    * <ul><li>Unassigned node for this region does not exist</li>
527    * <li>Unassigned node for this region is not in OFFLINE state</li>
528    * <li>After verifying OFFLINE state, update fails because of wrong version
529    * (someone else already transitioned the node)</li>
530    * </ul>
531    *
532    * <p>Does not set any watches.
533    *
534    * <p>This method should only be used by a RegionServer when initiating an
535    * open of a region after receiving an OPEN RPC from the Master.
536    *
537    * @param zkw zk reference
538    * @param region region to be transitioned to opening
539    * @param serverName server event originates from
540    * @return version of node after transition, -1 if unsuccessful transition
541    * @throws KeeperException if unexpected zookeeper exception
542    */
543   public static int transitionNodeOpening(ZooKeeperWatcher zkw,
544       HRegionInfo region, String serverName)
545   throws KeeperException {
546     return transitionNodeOpening(zkw, region, serverName,
547       EventType.M_ZK_REGION_OFFLINE);
548   }
549 
550   public static int transitionNodeOpening(ZooKeeperWatcher zkw,
551       HRegionInfo region, String serverName, final EventType beginState)
552   throws KeeperException {
553     return transitionNode(zkw, region, serverName, beginState,
554       EventType.RS_ZK_REGION_OPENING, -1);
555   }
556 
557   /**
558    * Retransitions an existing unassigned node for the specified region which is
559    * currently in the OPENING state to be in the OPENING state.
560    *
561    * <p>Does not transition nodes from other states.  If for some reason the
562    * node could not be transitioned, the method returns -1.  If the transition
563    * is successful, the version of the node rewritten as OPENING is returned.
564    *
565    * <p>This method can fail and return -1 for three different reasons:
566    * <ul><li>Unassigned node for this region does not exist</li>
567    * <li>Unassigned node for this region is not in OPENING state</li>
568    * <li>After verifying OPENING state, update fails because of wrong version
569    * (someone else already transitioned the node)</li>
570    * </ul>
571    *
572    * <p>Does not set any watches.
573    *
574    * <p>This method should only be used by a RegionServer when initiating an
575    * open of a region after receiving an OPEN RPC from the Master.
576    *
577    * @param zkw zk reference
578    * @param region region to be transitioned to opening
579    * @param serverName server event originates from
580    * @return version of node after transition, -1 if unsuccessful transition
581    * @throws KeeperException if unexpected zookeeper exception
582    */
583   public static int retransitionNodeOpening(ZooKeeperWatcher zkw,
584       HRegionInfo region, String serverName, int expectedVersion)
585   throws KeeperException {
586     return transitionNode(zkw, region, serverName,
587         EventType.RS_ZK_REGION_OPENING,
588         EventType.RS_ZK_REGION_OPENING, expectedVersion);
589   }
590 
591   /**
592    * Transitions an existing unassigned node for the specified region which is
593    * currently in the OPENING state to be in the OPENED state.
594    *
595    * <p>Does not transition nodes from other states.  If for some reason the
596    * node could not be transitioned, the method returns -1.  If the transition
597    * is successful, the version of the node after transition is returned.
598    *
599    * <p>This method can fail and return false for three different reasons:
600    * <ul><li>Unassigned node for this region does not exist</li>
601    * <li>Unassigned node for this region is not in OPENING state</li>
602    * <li>After verifying OPENING state, update fails because of wrong version
603    * (this should never actually happen since an RS only does this transition
604    * following a transition to OPENING.  if two RS are conflicting, one would
605    * fail the original transition to OPENING and not this transition)</li>
606    * </ul>
607    *
608    * <p>Does not set any watches.
609    *
610    * <p>This method should only be used by a RegionServer when completing the
611    * open of a region.
612    *
613    * @param zkw zk reference
614    * @param region region to be transitioned to opened
615    * @param serverName server event originates from
616    * @return version of node after transition, -1 if unsuccessful transition
617    * @throws KeeperException if unexpected zookeeper exception
618    */
619   public static int transitionNodeOpened(ZooKeeperWatcher zkw,
620       HRegionInfo region, String serverName, int expectedVersion)
621   throws KeeperException {
622     return transitionNode(zkw, region, serverName,
623         EventType.RS_ZK_REGION_OPENING,
624         EventType.RS_ZK_REGION_OPENED, expectedVersion);
625   }
626 
627   /**
628    * Private method that actually performs unassigned node transitions.
629    *
630    * <p>Attempts to transition the unassigned node for the specified region
631    * from the expected state to the state in the specified transition data.
632    *
633    * <p>Method first reads existing data and verifies it is in the expected
634    * state.  If the node does not exist or the node is not in the expected
635    * state, the method returns -1.  If the transition is successful, the
636    * version number of the node following the transition is returned.
637    *
638    * <p>If the read state is what is expected, it attempts to write the new
639    * state and data into the node.  When doing this, it includes the expected
640    * version (determined when the existing state was verified) to ensure that
641    * only one transition is successful.  If there is a version mismatch, the
642    * method returns -1.
643    *
644    * <p>If the write is successful, no watch is set and the method returns true.
645    *
646    * @param zkw zk reference
647    * @param region region to be transitioned to opened
648    * @param serverName server event originates from
649    * @param endState state to transition node to if all checks pass
650    * @param beginState state the node must currently be in to do transition
651    * @param expectedVersion expected version of data before modification, or -1
652    * @return version of node after transition, -1 if unsuccessful transition
653    * @throws KeeperException if unexpected zookeeper exception
654    */
655   public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
656       String serverName, EventType beginState, EventType endState,
657       int expectedVersion)
658   throws KeeperException {
659     String encoded = region.getEncodedName();
660     if(LOG.isDebugEnabled()) {
661       LOG.debug(zkw.prefix("Attempting to transition node " +
662         HRegionInfo.prettyPrint(encoded) +
663         " from " + beginState.toString() + " to " + endState.toString()));
664     }
665 
666     String node = getNodeName(zkw, encoded);
667     zkw.sync(node);
668 
669     // Read existing data of the node
670     Stat stat = new Stat();
671     byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
672     if (existingBytes == null) {
673       // Node no longer exists.  Return -1. It means unsuccessful transition.
674       return -1;
675     }
676     RegionTransitionData existingData =
677       RegionTransitionData.fromBytes(existingBytes);
678 
679     // Verify it is the expected version
680     if(expectedVersion != -1 && stat.getVersion() != expectedVersion) {
681       LOG.warn(zkw.prefix("Attempt to transition the " +
682         "unassigned node for " + encoded +
683         " from " + beginState + " to " + endState + " failed, " +
684         "the node existed but was version " + stat.getVersion() +
685         " not the expected version " + expectedVersion));
686         return -1;
687     }
688 
689     // Verify it is in expected state
690     if(!existingData.getEventType().equals(beginState)) {
691       LOG.warn(zkw.prefix("Attempt to transition the " +
692         "unassigned node for " + encoded +
693         " from " + beginState + " to " + endState + " failed, " +
694         "the node existed but was in the state " + existingData.getEventType() +
695         " set by the server " + existingData.getServerName()));
696       return -1;
697     }
698 
699     // Write new data, ensuring data has not changed since we last read it
700     try {
701       RegionTransitionData data = new RegionTransitionData(endState,
702           region.getRegionName(), serverName);
703       if(!ZKUtil.setData(zkw, node, data.getBytes(), stat.getVersion())) {
704         LOG.warn(zkw.prefix("Attempt to transition the " +
705         "unassigned node for " + encoded +
706         " from " + beginState + " to " + endState + " failed, " +
707         "the node existed and was in the expected state but then when " +
708         "setting data we got a version mismatch"));
709         return -1;
710       }
711       if(LOG.isDebugEnabled()) {
712         LOG.debug(zkw.prefix("Successfully transitioned node " + encoded +
713           " from " + beginState + " to " + endState));
714       }
715       return stat.getVersion() + 1;
716     } catch (KeeperException.NoNodeException nne) {
717       LOG.warn(zkw.prefix("Attempt to transition the " +
718         "unassigned node for " + encoded +
719         " from " + beginState + " to " + endState + " failed, " +
720         "the node existed and was in the expected state but then when " +
721         "setting data it no longer existed"));
722       return -1;
723     }
724   }
725 
726   /**
727    * Gets the current data in the unassigned node for the specified region name
728    * or fully-qualified path.
729    *
730    * <p>Returns null if the region does not currently have a node.
731    *
732    * <p>Sets a watch on the node if the node exists.
733    *
734    * @param zkw zk reference
735    * @param pathOrRegionName fully-specified path or region name
736    * @return data for the unassigned node
737    * @throws KeeperException if unexpected zookeeper exception
738    */
739   public static RegionTransitionData getData(ZooKeeperWatcher zkw,
740       String pathOrRegionName)
741   throws KeeperException {
742     String node = pathOrRegionName.startsWith("/") ?
743         pathOrRegionName : getNodeName(zkw, pathOrRegionName);
744     byte [] data = ZKUtil.getDataAndWatch(zkw, node);
745     if(data == null) {
746       return null;
747     }
748     return RegionTransitionData.fromBytes(data);
749   }
750 
751   /**
752    * Gets the current data in the unassigned node for the specified region name
753    * or fully-qualified path.
754    *
755    * <p>Returns null if the region does not currently have a node.
756    *
757    * <p>Does not set a watch.
758    *
759    * @param zkw zk reference
760    * @param pathOrRegionName fully-specified path or region name
761    * @param stat object to store node info into on getData call
762    * @return data for the unassigned node or null if node does not exist
763    * @throws KeeperException if unexpected zookeeper exception
764    */
765   public static RegionTransitionData getDataNoWatch(ZooKeeperWatcher zkw,
766       String pathOrRegionName, Stat stat)
767   throws KeeperException {
768     String node = pathOrRegionName.startsWith("/") ?
769         pathOrRegionName : getNodeName(zkw, pathOrRegionName);
770     byte [] data = ZKUtil.getDataNoWatch(zkw, node, stat);
771     if (data == null) {
772       return null;
773     }
774     return RegionTransitionData.fromBytes(data);
775   }
776 
777   /**
778    * Delete the assignment node regardless of its current state.
779    * <p>
780    * Fail silent even if the node does not exist at all.
781    * @param watcher
782    * @param regionInfo
783    * @throws KeeperException
784    */
785   public static void deleteNodeFailSilent(ZooKeeperWatcher watcher,
786       HRegionInfo regionInfo)
787   throws KeeperException {
788     String node = getNodeName(watcher, regionInfo.getEncodedName());
789     ZKUtil.deleteNodeFailSilent(watcher, node);
790   }
791 
792   /**
793    * Blocks until there are no node in regions in transition.
794    * <p>
795    * Used in testing only.
796    * @param zkw zk reference
797    * @throws KeeperException
798    * @throws InterruptedException
799    */
800   public static void blockUntilNoRIT(ZooKeeperWatcher zkw)
801   throws KeeperException, InterruptedException {
802     while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
803       List<String> znodes =
804         ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
805       if (znodes != null && !znodes.isEmpty()) {
806         for (String znode : znodes) {
807           LOG.debug("ZK RIT -> " + znode);
808         }
809       }
810       Thread.sleep(100);
811     }
812   }
813 
814   /**
815    * Blocks until there is at least one node in regions in transition.
816    * <p>
817    * Used in testing only.
818    * @param zkw zk reference
819    * @throws KeeperException
820    * @throws InterruptedException
821    */
822   public static void blockUntilRIT(ZooKeeperWatcher zkw)
823   throws KeeperException, InterruptedException {
824     while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
825       List<String> znodes =
826         ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
827       if (znodes == null || znodes.isEmpty()) {
828         LOG.debug("No RIT in ZK");
829       }
830       Thread.sleep(100);
831     }
832   }
833 
834   /**
835    * Verifies that the specified region is in the specified state in ZooKeeper.
836    * <p>
837    * Returns true if region is in transition and in the specified state in
838    * ZooKeeper.  Returns false if the region does not exist in ZK or is in
839    * a different state.
840    * <p>
841    * Method synchronizes() with ZK so will yield an up-to-date result but is
842    * a slow read.
843    * @param zkw
844    * @param region
845    * @param expectedState
846    * @return true if region exists and is in expected state
847    */
848   public static boolean verifyRegionState(ZooKeeperWatcher zkw,
849       HRegionInfo region, EventType expectedState)
850   throws KeeperException {
851     String encoded = region.getEncodedName();
852 
853     String node = getNodeName(zkw, encoded);
854     zkw.sync(node);
855 
856     // Read existing data of the node
857     byte [] existingBytes = null;
858     try {
859       existingBytes = ZKUtil.getDataAndWatch(zkw, node);
860     } catch (KeeperException.NoNodeException nne) {
861       return false;
862     } catch (KeeperException e) {
863       throw e;
864     }
865     if (existingBytes == null) return false;
866     RegionTransitionData existingData =
867       RegionTransitionData.fromBytes(existingBytes);
868     if (existingData.getEventType() == expectedState){
869       return true;
870     }
871     return false;
872   }
873 }