View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.zookeeper;
20  
21  import org.apache.commons.logging.Log;
22  import org.apache.commons.logging.LogFactory;
23  import org.apache.hadoop.classification.InterfaceAudience;
24  import org.apache.hadoop.classification.InterfaceStability;
25  import org.apache.hadoop.hbase.HConstants;
26  import org.apache.hadoop.hbase.HRegionInfo;
27  import org.apache.hadoop.hbase.RegionTransition;
28  import org.apache.hadoop.hbase.ServerName;
29  import org.apache.hadoop.hbase.exceptions.DeserializationException;
30  import org.apache.hadoop.hbase.executor.EventType;
31  import org.apache.zookeeper.AsyncCallback;
32  import org.apache.zookeeper.KeeperException;
33  import org.apache.zookeeper.KeeperException.Code;
34  import org.apache.zookeeper.KeeperException.NoNodeException;
35  import org.apache.zookeeper.KeeperException.NodeExistsException;
36  import org.apache.zookeeper.data.Stat;
37  
38  import java.util.List;
39  
40  // We should not be importing this Type here, nor a RegionTransition, etc.  This class should be
41  // about zk and bytes only.
42  
43  /**
44   * Utility class for doing region assignment in ZooKeeper.  This class extends
45   * stuff done in {@link ZKUtil} to cover specific assignment operations.
46   * <p>
47   * Contains only static methods and constants.
48   * <p>
49   * Used by both the Master and RegionServer.
50   * <p>
51   * All valid transitions outlined below:
52   * <p>
53   * <b>MASTER</b>
54   * <ol>
55   *   <li>
56   *     Master creates an unassigned node as OFFLINE.
57   *     - Cluster startup and table enabling.
58   *   </li>
59   *   <li>
60   *     Master forces an existing unassigned node to OFFLINE.
61   *     - RegionServer failure.
62   *     - Allows transitions from all states to OFFLINE.
63   *   </li>
64   *   <li>
65   *     Master deletes an unassigned node that was in a OPENED state.
66   *     - Normal region transitions.  Besides cluster startup, no other deletions
67   *     of unassigned nodes is allowed.
68   *   </li>
69   *   <li>
70   *     Master deletes all unassigned nodes regardless of state.
71   *     - Cluster startup before any assignment happens.
72   *   </li>
73   * </ol>
74   * <p>
75   * <b>REGIONSERVER</b>
76   * <ol>
77   *   <li>
78   *     RegionServer creates an unassigned node as CLOSING.
79   *     - All region closes will do this in response to a CLOSE RPC from Master.
80   *     - A node can never be transitioned to CLOSING, only created.
81   *   </li>
82   *   <li>
83   *     RegionServer transitions an unassigned node from CLOSING to CLOSED.
84   *     - Normal region closes.  CAS operation.
85   *   </li>
86   *   <li>
87   *     RegionServer transitions an unassigned node from OFFLINE to OPENING.
88   *     - All region opens will do this in response to an OPEN RPC from the Master.
89   *     - Normal region opens.  CAS operation.
90   *   </li>
91   *   <li>
92   *     RegionServer transitions an unassigned node from OPENING to OPENED.
93   *     - Normal region opens.  CAS operation.
94   *   </li>
95   * </ol>
96   */
97  @InterfaceAudience.Public
98  @InterfaceStability.Evolving
99  public class ZKAssign {
100   private static final Log LOG = LogFactory.getLog(ZKAssign.class);
101 
102   /**
103    * Gets the full path node name for the unassigned node for the specified
104    * region.
105    * @param zkw zk reference
106    * @param regionName region name
107    * @return full path node name
108    */
109   public static String getNodeName(ZooKeeperWatcher zkw, String regionName) {
110     return ZKUtil.joinZNode(zkw.assignmentZNode, regionName);
111   }
112 
113   /**
114    * Gets the region name from the full path node name of an unassigned node.
115    * @param path full zk path
116    * @return region name
117    */
118   public static String getRegionName(ZooKeeperWatcher zkw, String path) {
119     return path.substring(zkw.assignmentZNode.length()+1);
120   }
121 
122   // Master methods
123 
124   /**
125    * Creates a new unassigned node in the OFFLINE state for the specified region.
126    *
127    * <p>Does not transition nodes from other states.  If a node already exists
128    * for this region, a {@link NodeExistsException} will be thrown.
129    *
130    * <p>Sets a watcher on the unassigned region node if the method is successful.
131    *
132    * <p>This method should only be used during cluster startup and the enabling
133    * of a table.
134    *
135    * @param zkw zk reference
136    * @param region region to be created as offline
137    * @param serverName server transition will happen on
138    * @throws KeeperException if unexpected zookeeper exception
139    * @throws KeeperException.NodeExistsException if node already exists
140    */
141   public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
142       ServerName serverName)
143   throws KeeperException, KeeperException.NodeExistsException {
144     createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE);
145   }
146 
147   public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
148       ServerName serverName, final EventType event)
149   throws KeeperException, KeeperException.NodeExistsException {
150     LOG.debug(zkw.prefix("Creating unassigned node for " +
151       region.getEncodedName() + " in OFFLINE state"));
152     RegionTransition rt =
153       RegionTransition.createRegionTransition(event, region.getRegionName(), serverName);
154     String node = getNodeName(zkw, region.getEncodedName());
155     ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
156   }
157 
158   /**
159    * Creates an unassigned node in the OFFLINE state for the specified region.
160    * <p>
161    * Runs asynchronously.  Depends on no pre-existing znode.
162    *
163    * <p>Sets a watcher on the unassigned region node.
164    *
165    * @param zkw zk reference
166    * @param region region to be created as offline
167    * @param serverName server transition will happen on
168    * @param cb
169    * @param ctx
170    * @throws KeeperException if unexpected zookeeper exception
171    * @throws KeeperException.NodeExistsException if node already exists
172    */
173   public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw,
174       HRegionInfo region, ServerName serverName,
175       final AsyncCallback.StringCallback cb, final Object ctx)
176   throws KeeperException {
177     LOG.debug(zkw.prefix("Async create of unassigned node for " +
178       region.getEncodedName() + " with OFFLINE state"));
179     RegionTransition rt =
180       RegionTransition.createRegionTransition(
181           EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
182     String node = getNodeName(zkw, region.getEncodedName());
183     ZKUtil.asyncCreate(zkw, node, rt.toByteArray(), cb, ctx);
184   }
185 
186   /**
187    * Creates or force updates an unassigned node to the OFFLINE state for the
188    * specified region.
189    * <p>
190    * Attempts to create the node but if it exists will force it to transition to
191    * and OFFLINE state.
192    *
193    * <p>Sets a watcher on the unassigned region node if the method is
194    * successful.
195    *
196    * <p>This method should be used when assigning a region.
197    *
198    * @param zkw zk reference
199    * @param region region to be created as offline
200    * @param serverName server transition will happen on
201    * @return the version of the znode created in OFFLINE state, -1 if
202    *         unsuccessful.
203    * @throws KeeperException if unexpected zookeeper exception
204    * @throws KeeperException.NodeExistsException if node already exists
205    */
206   public static int createOrForceNodeOffline(ZooKeeperWatcher zkw,
207       HRegionInfo region, ServerName serverName) throws KeeperException {
208     LOG.debug(zkw.prefix("Creating (or updating) unassigned node for " +
209       region.getEncodedName() + " with OFFLINE state"));
210     RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_OFFLINE,
211       region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
212     byte [] data = rt.toByteArray();
213     String node = getNodeName(zkw, region.getEncodedName());
214     zkw.sync(node);
215     int version = ZKUtil.checkExists(zkw, node);
216     if (version == -1) {
217       return ZKUtil.createAndWatch(zkw, node, data);
218     } else {
219       boolean setData = false;
220       try {
221         setData = ZKUtil.setData(zkw, node, data, version);
222         // Setdata throws KeeperException which aborts the Master. So we are
223         // catching it here.
224         // If just before setting the znode to OFFLINE if the RS has made any
225         // change to the
226         // znode state then we need to return -1.
227       } catch (KeeperException kpe) {
228         LOG.info("Version mismatch while setting the node to OFFLINE state.");
229         return -1;
230       }
231       if (!setData) {
232         return -1;
233       } else {
234         // We successfully forced to OFFLINE, reset watch and handle if
235         // the state changed in between our set and the watch
236         byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
237         rt = getRegionTransition(bytes);
238         if (rt.getEventType() != EventType.M_ZK_REGION_OFFLINE) {
239           // state changed, need to process
240           return -1;
241         }
242       }
243     }
244     return version + 1;
245   }
246 
247   /**
248    * Deletes an existing unassigned node that is in the OPENED state for the
249    * specified region.
250    *
251    * <p>If a node does not already exist for this region, a
252    * {@link NoNodeException} will be thrown.
253    *
254    * <p>No watcher is set whether this succeeds or not.
255    *
256    * <p>Returns false if the node was not in the proper state but did exist.
257    *
258    * <p>This method is used during normal region transitions when a region
259    * finishes successfully opening.  This is the Master acknowledging completion
260    * of the specified regions transition.
261    *
262    * @param zkw zk reference
263    * @param encodedRegionName opened region to be deleted from zk
264    * @throws KeeperException if unexpected zookeeper exception
265    * @throws KeeperException.NoNodeException if node does not exist
266    */
267   public static boolean deleteOpenedNode(ZooKeeperWatcher zkw,
268       String encodedRegionName)
269   throws KeeperException, KeeperException.NoNodeException {
270     return deleteNode(zkw, encodedRegionName, EventType.RS_ZK_REGION_OPENED);
271   }
272 
273   /**
274    * Deletes an existing unassigned node that is in the OFFLINE state for the
275    * specified region.
276    *
277    * <p>If a node does not already exist for this region, a
278    * {@link NoNodeException} will be thrown.
279    *
280    * <p>No watcher is set whether this succeeds or not.
281    *
282    * <p>Returns false if the node was not in the proper state but did exist.
283    *
284    * <p>This method is used during master failover when the regions on an RS
285    * that has died are all set to OFFLINE before being processed.
286    *
287    * @param zkw zk reference
288    * @param encodedRegionName closed region to be deleted from zk
289    * @throws KeeperException if unexpected zookeeper exception
290    * @throws KeeperException.NoNodeException if node does not exist
291    */
292   public static boolean deleteOfflineNode(ZooKeeperWatcher zkw,
293       String encodedRegionName)
294   throws KeeperException, KeeperException.NoNodeException {
295     return deleteNode(zkw, encodedRegionName, EventType.M_ZK_REGION_OFFLINE);
296   }
297 
298   /**
299    * Deletes an existing unassigned node that is in the CLOSED state for the
300    * specified region.
301    *
302    * <p>If a node does not already exist for this region, a
303    * {@link NoNodeException} will be thrown.
304    *
305    * <p>No watcher is set whether this succeeds or not.
306    *
307    * <p>Returns false if the node was not in the proper state but did exist.
308    *
309    * <p>This method is used during table disables when a region finishes
310    * successfully closing.  This is the Master acknowledging completion
311    * of the specified regions transition to being closed.
312    *
313    * @param zkw zk reference
314    * @param encodedRegionName closed region to be deleted from zk
315    * @throws KeeperException if unexpected zookeeper exception
316    * @throws KeeperException.NoNodeException if node does not exist
317    */
318   public static boolean deleteClosedNode(ZooKeeperWatcher zkw,
319       String encodedRegionName)
320   throws KeeperException, KeeperException.NoNodeException {
321     return deleteNode(zkw, encodedRegionName, EventType.RS_ZK_REGION_CLOSED);
322   }
323 
324   /**
325    * Deletes an existing unassigned node that is in the CLOSING state for the
326    * specified region.
327    *
328    * <p>If a node does not already exist for this region, a
329    * {@link NoNodeException} will be thrown.
330    *
331    * <p>No watcher is set whether this succeeds or not.
332    *
333    * <p>Returns false if the node was not in the proper state but did exist.
334    *
335    * <p>This method is used during table disables when a region finishes
336    * successfully closing.  This is the Master acknowledging completion
337    * of the specified regions transition to being closed.
338    *
339    * @param zkw zk reference
340    * @param region closing region to be deleted from zk
341    * @throws KeeperException if unexpected zookeeper exception
342    * @throws KeeperException.NoNodeException if node does not exist
343    */
344   public static boolean deleteClosingNode(ZooKeeperWatcher zkw,
345       HRegionInfo region)
346   throws KeeperException, KeeperException.NoNodeException {
347     String encodedRegionName = region.getEncodedName();
348     return deleteNode(zkw, encodedRegionName, EventType.M_ZK_REGION_CLOSING);
349   }
350 
351   /**
352    * Deletes an existing unassigned node that is in the specified state for the
353    * specified region.
354    *
355    * <p>If a node does not already exist for this region, a
356    * {@link NoNodeException} will be thrown.
357    *
358    * <p>No watcher is set whether this succeeds or not.
359    *
360    * <p>Returns false if the node was not in the proper state but did exist.
361    *
362    * <p>This method is used when a region finishes opening/closing.
363    * The Master acknowledges completion
364    * of the specified regions transition to being closed/opened.
365    *
366    * @param zkw zk reference
367    * @param encodedRegionName region to be deleted from zk
368    * @param expectedState state region must be in for delete to complete
369    * @throws KeeperException if unexpected zookeeper exception
370    * @throws KeeperException.NoNodeException if node does not exist
371    */
372   public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
373       EventType expectedState)
374   throws KeeperException, KeeperException.NoNodeException {
375     return deleteNode(zkw, encodedRegionName, expectedState, -1);
376   }
377 
378   /**
379    * Deletes an existing unassigned node that is in the specified state for the
380    * specified region.
381    *
382    * <p>If a node does not already exist for this region, a
383    * {@link NoNodeException} will be thrown.
384    *
385    * <p>No watcher is set whether this succeeds or not.
386    *
387    * <p>Returns false if the node was not in the proper state but did exist.
388    *
389    * <p>This method is used when a region finishes opening/closing.
390    * The Master acknowledges completion
391    * of the specified regions transition to being closed/opened.
392    *
393    * @param zkw zk reference
394    * @param encodedRegionName region to be deleted from zk
395    * @param expectedState state region must be in for delete to complete
396    * @param expectedVersion of the znode that is to be deleted.
397    *        If expectedVersion need not be compared while deleting the znode
398    *        pass -1
399    * @throws KeeperException if unexpected zookeeper exception
400    * @throws KeeperException.NoNodeException if node does not exist
401    */
402   public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
403       EventType expectedState, int expectedVersion)
404   throws KeeperException, KeeperException.NoNodeException {
405     LOG.debug(zkw.prefix("Deleting existing unassigned " +
406       "node for " + encodedRegionName + " that is in expected state " + expectedState));
407     String node = getNodeName(zkw, encodedRegionName);
408     zkw.sync(node);
409     Stat stat = new Stat();
410     byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat);
411     if (bytes == null) {
412       // If it came back null, node does not exist.
413       throw KeeperException.create(Code.NONODE);
414     }
415     RegionTransition rt = getRegionTransition(bytes);
416     EventType et = rt.getEventType();
417     if (!et.equals(expectedState)) {
418       LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName + " in " +
419         expectedState + " state but node is in " + et + " state"));
420       return false;
421     }
422     if (expectedVersion != -1
423         && stat.getVersion() != expectedVersion) {
424       LOG.warn("The node " + encodedRegionName + " we are trying to delete is not" +
425         " the expected one. Got a version mismatch");
426       return false;
427     }
428     if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) {
429       LOG.warn(zkw.prefix("Attempting to delete " +
430           "unassigned node " + encodedRegionName + " in " + expectedState +
431           " state but after verifying state, we got a version mismatch"));
432       return false;
433     }
434     LOG.debug(zkw.prefix("Successfully deleted unassigned node for region " +
435         encodedRegionName + " in expected state " + expectedState));
436     return true;
437   }
438 
439   /**
440    * Deletes all unassigned nodes regardless of their state.
441    *
442    * <p>No watchers are set.
443    *
444    * <p>This method is used by the Master during cluster startup to clear out
445    * any existing state from other cluster runs.
446    *
447    * @param zkw zk reference
448    * @throws KeeperException if unexpected zookeeper exception
449    */
450   public static void deleteAllNodes(ZooKeeperWatcher zkw)
451   throws KeeperException {
452     LOG.debug(zkw.prefix("Deleting any existing unassigned nodes"));
453     ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode);
454   }
455 
456   /**
457    * Creates a new unassigned node in the CLOSING state for the specified
458    * region.
459    *
460    * <p>Does not transition nodes from any states.  If a node already exists
461    * for this region, a {@link NodeExistsException} will be thrown.
462    *
463    * <p>If creation is successful, returns the version number of the CLOSING
464    * node created.
465    *
466    * <p>Set a watch.
467    *
468    * <p>This method should only be used by a Master when initiating a
469    * close of a region before sending a close request to the region server.
470    *
471    * @param zkw zk reference
472    * @param region region to be created as closing
473    * @param serverName server transition will happen on
474    * @return version of node after transition, -1 if unsuccessful transition
475    * @throws KeeperException if unexpected zookeeper exception
476    * @throws KeeperException.NodeExistsException if node already exists
477    */
478   public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region,
479       ServerName serverName)
480   throws KeeperException, KeeperException.NodeExistsException {
481     LOG.debug(zkw.prefix("Creating unassigned node for " +
482       region.getEncodedName() + " in a CLOSING state"));
483     RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
484       region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
485     String node = getNodeName(zkw, region.getEncodedName());
486     return ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
487   }
488 
489   // RegionServer methods
490 
491   /**
492    * Transitions an existing unassigned node for the specified region which is
493    * currently in the CLOSING state to be in the CLOSED state.
494    *
495    * <p>Does not transition nodes from other states.  If for some reason the
496    * node could not be transitioned, the method returns -1.  If the transition
497    * is successful, the version of the node after transition is returned.
498    *
499    * <p>This method can fail and return false for three different reasons:
500    * <ul><li>Unassigned node for this region does not exist</li>
501    * <li>Unassigned node for this region is not in CLOSING state</li>
502    * <li>After verifying CLOSING state, update fails because of wrong version
503    * (someone else already transitioned the node)</li>
504    * </ul>
505    *
506    * <p>Does not set any watches.
507    *
508    * <p>This method should only be used by a RegionServer when initiating a
509    * close of a region after receiving a CLOSE RPC from the Master.
510    *
511    * @param zkw zk reference
512    * @param region region to be transitioned to closed
513    * @param serverName server transition happens on
514    * @return version of node after transition, -1 if unsuccessful transition
515    * @throws KeeperException if unexpected zookeeper exception
516    */
517   public static int transitionNodeClosed(ZooKeeperWatcher zkw,
518       HRegionInfo region, ServerName serverName, int expectedVersion)
519   throws KeeperException {
520     return transitionNode(zkw, region, serverName,
521         EventType.M_ZK_REGION_CLOSING,
522         EventType.RS_ZK_REGION_CLOSED, expectedVersion);
523   }
524 
525   /**
526    * Transitions an existing unassigned node for the specified region which is
527    * currently in the OFFLINE state to be in the OPENING state.
528    *
529    * <p>Does not transition nodes from other states.  If for some reason the
530    * node could not be transitioned, the method returns -1.  If the transition
531    * is successful, the version of the node written as OPENING is returned.
532    *
533    * <p>This method can fail and return -1 for three different reasons:
534    * <ul><li>Unassigned node for this region does not exist</li>
535    * <li>Unassigned node for this region is not in OFFLINE state</li>
536    * <li>After verifying OFFLINE state, update fails because of wrong version
537    * (someone else already transitioned the node)</li>
538    * </ul>
539    *
540    * <p>Does not set any watches.
541    *
542    * <p>This method should only be used by a RegionServer when initiating an
543    * open of a region after receiving an OPEN RPC from the Master.
544    *
545    * @param zkw zk reference
546    * @param region region to be transitioned to opening
547    * @param serverName server transition happens on
548    * @return version of node after transition, -1 if unsuccessful transition
549    * @throws KeeperException if unexpected zookeeper exception
550    */
551   public static int transitionNodeOpening(ZooKeeperWatcher zkw,
552       HRegionInfo region, ServerName serverName)
553   throws KeeperException {
554     return transitionNodeOpening(zkw, region, serverName,
555       EventType.M_ZK_REGION_OFFLINE);
556   }
557 
558   public static int transitionNodeOpening(ZooKeeperWatcher zkw,
559       HRegionInfo region, ServerName serverName, final EventType beginState)
560   throws KeeperException {
561     return transitionNode(zkw, region, serverName, beginState,
562       EventType.RS_ZK_REGION_OPENING, -1);
563   }
564 
565   /**
566    * Retransitions an existing unassigned node for the specified region which is
567    * currently in the OPENING state to be in the OPENING state.
568    *
569    * <p>Does not transition nodes from other states.  If for some reason the
570    * node could not be transitioned, the method returns -1.  If the transition
571    * is successful, the version of the node rewritten as OPENING is returned.
572    *
573    * <p>This method can fail and return -1 for three different reasons:
574    * <ul><li>Unassigned node for this region does not exist</li>
575    * <li>Unassigned node for this region is not in OPENING state</li>
576    * <li>After verifying OPENING state, update fails because of wrong version
577    * (someone else already transitioned the node)</li>
578    * </ul>
579    *
580    * <p>Does not set any watches.
581    *
582    * <p>This method should only be used by a RegionServer when initiating an
583    * open of a region after receiving an OPEN RPC from the Master.
584    *
585    * @param zkw zk reference
586    * @param region region to be transitioned to opening
587    * @param serverName server transition happens on
588    * @return version of node after transition, -1 if unsuccessful transition
589    * @throws KeeperException if unexpected zookeeper exception
590    */
591   public static int retransitionNodeOpening(ZooKeeperWatcher zkw,
592       HRegionInfo region, ServerName serverName, int expectedVersion)
593   throws KeeperException {
594     return transitionNode(zkw, region, serverName,
595         EventType.RS_ZK_REGION_OPENING,
596         EventType.RS_ZK_REGION_OPENING, expectedVersion);
597   }
598 
599   /**
600    * Transitions an existing unassigned node for the specified region which is
601    * currently in the OPENING state to be in the OPENED state.
602    *
603    * <p>Does not transition nodes from other states.  If for some reason the
604    * node could not be transitioned, the method returns -1.  If the transition
605    * is successful, the version of the node after transition is returned.
606    *
607    * <p>This method can fail and return false for three different reasons:
608    * <ul><li>Unassigned node for this region does not exist</li>
609    * <li>Unassigned node for this region is not in OPENING state</li>
610    * <li>After verifying OPENING state, update fails because of wrong version
611    * (this should never actually happen since an RS only does this transition
612    * following a transition to OPENING.  if two RS are conflicting, one would
613    * fail the original transition to OPENING and not this transition)</li>
614    * </ul>
615    *
616    * <p>Does not set any watches.
617    *
618    * <p>This method should only be used by a RegionServer when completing the
619    * open of a region.
620    *
621    * @param zkw zk reference
622    * @param region region to be transitioned to opened
623    * @param serverName server transition happens on
624    * @return version of node after transition, -1 if unsuccessful transition
625    * @throws KeeperException if unexpected zookeeper exception
626    */
627   public static int transitionNodeOpened(ZooKeeperWatcher zkw,
628       HRegionInfo region, ServerName serverName, int expectedVersion)
629   throws KeeperException {
630     return transitionNode(zkw, region, serverName,
631         EventType.RS_ZK_REGION_OPENING,
632         EventType.RS_ZK_REGION_OPENED, expectedVersion);
633   }
634 
635   /**
636    *
637    * @param zkw zk reference
638    * @param region region to be closed
639    * @param expectedVersion expected version of the znode
640    * @return true if the znode exists, has the right version and the right state. False otherwise.
641    * @throws KeeperException
642    */
643   public static boolean checkClosingState(ZooKeeperWatcher zkw, HRegionInfo region,
644                                           int expectedVersion) throws KeeperException {
645 
646     final String encoded = getNodeName(zkw, region.getEncodedName());
647     zkw.sync(encoded);
648 
649     // Read existing data of the node
650     Stat stat = new Stat();
651     byte[] existingBytes = ZKUtil.getDataNoWatch(zkw, encoded, stat);
652 
653     if (existingBytes == null) {
654       LOG.warn(zkw.prefix("Attempt to check the " +
655           "closing node for " + encoded +
656           ". The node does not exist"));
657       return false;
658     }
659 
660     if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
661       LOG.warn(zkw.prefix("Attempt to check the " +
662           "closing node for " + encoded +
663           ". The node existed but was version " + stat.getVersion() +
664           " not the expected version " + expectedVersion));
665       return false;
666     }
667 
668     RegionTransition rt = getRegionTransition(existingBytes);
669 
670     if (!EventType.M_ZK_REGION_CLOSING.equals(rt.getEventType())) {
671       LOG.warn(zkw.prefix("Attempt to check the " +
672           "closing node for " + encoded +
673           ". The node existed but was in an unexpected state: " + rt.getEventType()));
674       return false;
675     }
676 
677     return true;
678   }
679 
680   /**
681    * Method that actually performs unassigned node transitions.
682    *
683    * <p>Attempts to transition the unassigned node for the specified region
684    * from the expected state to the state in the specified transition data.
685    *
686    * <p>Method first reads existing data and verifies it is in the expected
687    * state.  If the node does not exist or the node is not in the expected
688    * state, the method returns -1.  If the transition is successful, the
689    * version number of the node following the transition is returned.
690    *
691    * <p>If the read state is what is expected, it attempts to write the new
692    * state and data into the node.  When doing this, it includes the expected
693    * version (determined when the existing state was verified) to ensure that
694    * only one transition is successful.  If there is a version mismatch, the
695    * method returns -1.
696    *
697    * <p>If the write is successful, no watch is set and the method returns true.
698    *
699    * @param zkw zk reference
700    * @param region region to be transitioned to opened
701    * @param serverName server transition happens on
702    * @param endState state to transition node to if all checks pass
703    * @param beginState state the node must currently be in to do transition
704    * @param expectedVersion expected version of data before modification, or -1
705    * @return version of node after transition, -1 if unsuccessful transition
706    * @throws KeeperException if unexpected zookeeper exception
707    */
708   public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
709       ServerName serverName, EventType beginState, EventType endState,
710       int expectedVersion)
711   throws KeeperException {
712     return transitionNode(zkw, region, serverName, beginState, endState, expectedVersion, null);
713   }
714 
715   public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
716       ServerName serverName, EventType beginState, EventType endState,
717       int expectedVersion, final byte [] payload)
718   throws KeeperException {
719     String encoded = region.getEncodedName();
720     if(LOG.isDebugEnabled()) {
721       LOG.debug(zkw.prefix("Attempting to transition node " +
722         HRegionInfo.prettyPrint(encoded) +
723         " from " + beginState.toString() + " to " + endState.toString()));
724     }
725 
726     String node = getNodeName(zkw, encoded);
727     zkw.sync(node);
728 
729     // Read existing data of the node
730     Stat stat = new Stat();
731     byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
732     if (existingBytes == null) {
733       // Node no longer exists.  Return -1. It means unsuccessful transition.
734       return -1;
735     }
736     RegionTransition rt = getRegionTransition(existingBytes);
737 
738     // Verify it is the expected version
739     if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
740       LOG.warn(zkw.prefix("Attempt to transition the " +
741         "unassigned node for " + encoded +
742         " from " + beginState + " to " + endState + " failed, " +
743         "the node existed but was version " + stat.getVersion() +
744         " not the expected version " + expectedVersion));
745         return -1;
746     } else if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
747         && endState.equals(EventType.RS_ZK_REGION_OPENING)
748         && expectedVersion == -1 && stat.getVersion() != 0) {
749       // the below check ensures that double assignment doesnot happen.
750       // When the node is created for the first time then the expected version
751       // that is passed will be -1 and the version in znode will be 0.
752       // In all other cases the version in znode will be > 0.
753       LOG.warn(zkw.prefix("Attempt to transition the " + "unassigned node for "
754           + encoded + " from " + beginState + " to " + endState + " failed, "
755           + "the node existed but was version " + stat.getVersion()
756           + " not the expected version " + expectedVersion));
757       return -1;
758     }
759 
760     // Verify it is in expected state
761     EventType et = rt.getEventType();
762     if (!et.equals(beginState)) {
763       String existingServer = (rt.getServerName() == null)
764         ? "<unknown>" : rt.getServerName().toString();
765       LOG.warn(zkw.prefix("Attempt to transition the unassigned node for " + encoded
766         + " from " + beginState + " to " + endState + " failed, the node existed but"
767         + " was in the state " + et + " set by the server " + existingServer));
768       return -1;
769     }
770 
771     // Write new data, ensuring data has not changed since we last read it
772     try {
773       rt = RegionTransition.createRegionTransition(
774           endState, region.getRegionName(), serverName, payload);
775       if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) {
776         LOG.warn(zkw.prefix("Attempt to transition the " +
777         "unassigned node for " + encoded +
778         " from " + beginState + " to " + endState + " failed, " +
779         "the node existed and was in the expected state but then when " +
780         "setting data we got a version mismatch"));
781         return -1;
782       }
783       if(LOG.isDebugEnabled()) {
784         LOG.debug(zkw.prefix("Successfully transitioned node " + encoded +
785           " from " + beginState + " to " + endState));
786       }
787       return stat.getVersion() + 1;
788     } catch (KeeperException.NoNodeException nne) {
789       LOG.warn(zkw.prefix("Attempt to transition the " +
790         "unassigned node for " + encoded +
791         " from " + beginState + " to " + endState + " failed, " +
792         "the node existed and was in the expected state but then when " +
793         "setting data it no longer existed"));
794       return -1;
795     }
796   }
797 
798   private static RegionTransition getRegionTransition(final byte [] bytes) throws KeeperException {
799     try {
800       return RegionTransition.parseFrom(bytes);
801     } catch (DeserializationException e) {
802       // Convert to a zk exception for now.  Otherwise have to change API
803       throw ZKUtil.convert(e);
804     }
805   }
806 
807   /**
808    * Gets the current data in the unassigned node for the specified region name
809    * or fully-qualified path.
810    *
811    * <p>Returns null if the region does not currently have a node.
812    *
813    * <p>Sets a watch on the node if the node exists.
814    *
815    * @param zkw zk reference
816    * @param pathOrRegionName fully-specified path or region name
817    * @return znode content
818    * @throws KeeperException if unexpected zookeeper exception
819    */
820   public static byte [] getData(ZooKeeperWatcher zkw,
821       String pathOrRegionName)
822   throws KeeperException {
823     String node = getPath(zkw, pathOrRegionName);
824     return ZKUtil.getDataAndWatch(zkw, node);
825   }
826 
827   /**
828    * Gets the current data in the unassigned node for the specified region name
829    * or fully-qualified path.
830    *
831    * <p>Returns null if the region does not currently have a node.
832    *
833    * <p>Sets a watch on the node if the node exists.
834    *
835    * @param zkw zk reference
836    * @param pathOrRegionName fully-specified path or region name
837    * @param stat object to populate the version.
838    * @return znode content
839    * @throws KeeperException if unexpected zookeeper exception
840    */
841   public static byte [] getDataAndWatch(ZooKeeperWatcher zkw,
842       String pathOrRegionName, Stat stat)
843   throws KeeperException {
844     String node = getPath(zkw, pathOrRegionName);
845     return ZKUtil.getDataAndWatch(zkw, node, stat);
846   }
847 
848   /**
849    * Gets the current data in the unassigned node for the specified region name
850    * or fully-qualified path.
851    *
852    * <p>Returns null if the region does not currently have a node.
853    *
854    * <p>Does not set a watch.
855    *
856    * @param zkw zk reference
857    * @param pathOrRegionName fully-specified path or region name
858    * @param stat object to store node info into on getData call
859    * @return znode content
860    * @throws KeeperException if unexpected zookeeper exception
861    */
862   public static byte [] getDataNoWatch(ZooKeeperWatcher zkw,
863       String pathOrRegionName, Stat stat)
864   throws KeeperException {
865     String node = getPath(zkw, pathOrRegionName);
866     return ZKUtil.getDataNoWatch(zkw, node, stat);
867   }
868 
869   /**
870    * @param zkw
871    * @param pathOrRegionName
872    * @return Path to znode
873    */
874   public static String getPath(final ZooKeeperWatcher zkw, final String pathOrRegionName) {
875     return pathOrRegionName.startsWith("/")? pathOrRegionName : getNodeName(zkw, pathOrRegionName);
876   }
877 
878   /**
879    * Get the version of the specified znode
880    * @param zkw zk reference
881    * @param region region's info
882    * @return the version of the znode, -1 if it doesn't exist
883    * @throws KeeperException
884    */
885   public static int getVersion(ZooKeeperWatcher zkw, HRegionInfo region)
886     throws KeeperException {
887     String znode = getNodeName(zkw, region.getEncodedName());
888     return ZKUtil.checkExists(zkw, znode);
889   }
890 
891   /**
892    * Delete the assignment node regardless of its current state.
893    * <p>
894    * Fail silent even if the node does not exist at all.
895    * @param watcher
896    * @param regionInfo
897    * @throws KeeperException
898    */
899   public static void deleteNodeFailSilent(ZooKeeperWatcher watcher,
900       HRegionInfo regionInfo)
901   throws KeeperException {
902     String node = getNodeName(watcher, regionInfo.getEncodedName());
903     ZKUtil.deleteNodeFailSilent(watcher, node);
904   }
905 
906   /**
907    * Blocks until there are no node in regions in transition.
908    * <p>
909    * Used in testing only.
910    * @param zkw zk reference
911    * @throws KeeperException
912    * @throws InterruptedException
913    */
914   public static void blockUntilNoRIT(ZooKeeperWatcher zkw)
915   throws KeeperException, InterruptedException {
916     while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
917       List<String> znodes =
918         ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
919       if (znodes != null && !znodes.isEmpty()) {
920         for (String znode : znodes) {
921           LOG.debug("ZK RIT -> " + znode);
922         }
923       }
924       Thread.sleep(100);
925     }
926   }
927 
928   /**
929    * Blocks until there is at least one node in regions in transition.
930    * <p>
931    * Used in testing only.
932    * @param zkw zk reference
933    * @throws KeeperException
934    * @throws InterruptedException
935    */
936   public static void blockUntilRIT(ZooKeeperWatcher zkw)
937   throws KeeperException, InterruptedException {
938     while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
939       List<String> znodes =
940         ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
941       if (znodes == null || znodes.isEmpty()) {
942         LOG.debug("No RIT in ZK");
943       }
944       Thread.sleep(100);
945     }
946   }
947 
948   /**
949    * Presume bytes are serialized unassigned data structure
950    * @param znodeBytes
951    * @return String of the deserialized znode bytes.
952    */
953   static String toString(final byte[] znodeBytes) {
954     // This method should not exist.  Used by ZKUtil stringifying RegionTransition.  Have the
955     // method in here so RegionTransition does not leak into ZKUtil.
956     try {
957       RegionTransition rt = RegionTransition.parseFrom(znodeBytes);
958       return rt.toString();
959     } catch (DeserializationException e) {
960       return "";
961     }
962   }
963 }