View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.zookeeper;
20  
21  import org.apache.commons.logging.Log;
22  import org.apache.commons.logging.LogFactory;
23  import org.apache.hadoop.classification.InterfaceAudience;
24  import org.apache.hadoop.classification.InterfaceStability;
25  import org.apache.hadoop.hbase.HConstants;
26  import org.apache.hadoop.hbase.HRegionInfo;
27  import org.apache.hadoop.hbase.RegionTransition;
28  import org.apache.hadoop.hbase.ServerName;
29  import org.apache.hadoop.hbase.exceptions.DeserializationException;
30  import org.apache.hadoop.hbase.executor.EventType;
31  import org.apache.zookeeper.AsyncCallback;
32  import org.apache.zookeeper.KeeperException;
33  import org.apache.zookeeper.KeeperException.Code;
34  import org.apache.zookeeper.KeeperException.NoNodeException;
35  import org.apache.zookeeper.KeeperException.NodeExistsException;
36  import org.apache.zookeeper.data.Stat;
37  
38  import java.util.List;
39  
40  // We should not be importing this Type here, nor a RegionTransition, etc.  This class should be
41  // about zk and bytes only.
42  
43  /**
44   * Utility class for doing region assignment in ZooKeeper.  This class extends
45   * stuff done in {@link ZKUtil} to cover specific assignment operations.
46   * <p>
47   * Contains only static methods and constants.
48   * <p>
49   * Used by both the Master and RegionServer.
50   * <p>
51   * All valid transitions outlined below:
52   * <p>
53   * <b>MASTER</b>
54   * <ol>
55   *   <li>
56   *     Master creates an unassigned node as OFFLINE.
57   *     - Cluster startup and table enabling.
58   *   </li>
59   *   <li>
60   *     Master forces an existing unassigned node to OFFLINE.
61   *     - RegionServer failure.
62   *     - Allows transitions from all states to OFFLINE.
63   *   </li>
64   *   <li>
65   *     Master deletes an unassigned node that was in a OPENED state.
66   *     - Normal region transitions.  Besides cluster startup, no other deletions
67   *     of unassigned nodes is allowed.
68   *   </li>
69   *   <li>
70   *     Master deletes all unassigned nodes regardless of state.
71   *     - Cluster startup before any assignment happens.
72   *   </li>
73   * </ol>
74   * <p>
75   * <b>REGIONSERVER</b>
76   * <ol>
77   *   <li>
78   *     RegionServer creates an unassigned node as CLOSING.
79   *     - All region closes will do this in response to a CLOSE RPC from Master.
80   *     - A node can never be transitioned to CLOSING, only created.
81   *   </li>
82   *   <li>
83   *     RegionServer transitions an unassigned node from CLOSING to CLOSED.
84   *     - Normal region closes.  CAS operation.
85   *   </li>
86   *   <li>
87   *     RegionServer transitions an unassigned node from OFFLINE to OPENING.
88   *     - All region opens will do this in response to an OPEN RPC from the Master.
89   *     - Normal region opens.  CAS operation.
90   *   </li>
91   *   <li>
92   *     RegionServer transitions an unassigned node from OPENING to OPENED.
93   *     - Normal region opens.  CAS operation.
94   *   </li>
95   * </ol>
96   */
97  @InterfaceAudience.Public
98  @InterfaceStability.Evolving
99  public class ZKAssign {
100   private static final Log LOG = LogFactory.getLog(ZKAssign.class);
101 
102   /**
103    * Gets the full path node name for the unassigned node for the specified
104    * region.
105    * @param zkw zk reference
106    * @param regionName region name
107    * @return full path node name
108    */
109   public static String getNodeName(ZooKeeperWatcher zkw, String regionName) {
110     return ZKUtil.joinZNode(zkw.assignmentZNode, regionName);
111   }
112 
113   /**
114    * Gets the region name from the full path node name of an unassigned node.
115    * @param path full zk path
116    * @return region name
117    */
118   public static String getRegionName(ZooKeeperWatcher zkw, String path) {
119     return path.substring(zkw.assignmentZNode.length()+1);
120   }
121 
122   // Master methods
123 
124   /**
125    * Creates a new unassigned node in the OFFLINE state for the specified region.
126    *
127    * <p>Does not transition nodes from other states.  If a node already exists
128    * for this region, a {@link NodeExistsException} will be thrown.
129    *
130    * <p>Sets a watcher on the unassigned region node if the method is successful.
131    *
132    * <p>This method should only be used during cluster startup and the enabling
133    * of a table.
134    *
135    * @param zkw zk reference
136    * @param region region to be created as offline
137    * @param serverName server transition will happen on
138    * @throws KeeperException if unexpected zookeeper exception
139    * @throws KeeperException.NodeExistsException if node already exists
140    */
141   public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
142       ServerName serverName)
143   throws KeeperException, KeeperException.NodeExistsException {
144     createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE);
145   }
146 
147   public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
148       ServerName serverName, final EventType event)
149   throws KeeperException, KeeperException.NodeExistsException {
150     LOG.debug(zkw.prefix("Creating unassigned node for " +
151       region.getEncodedName() + " in OFFLINE state"));
152     RegionTransition rt =
153       RegionTransition.createRegionTransition(event, region.getRegionName(), serverName);
154     String node = getNodeName(zkw, region.getEncodedName());
155     ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
156   }
157 
158   /**
159    * Creates an unassigned node in the OFFLINE state for the specified region.
160    * <p>
161    * Runs asynchronously.  Depends on no pre-existing znode.
162    *
163    * <p>Sets a watcher on the unassigned region node.
164    *
165    * @param zkw zk reference
166    * @param region region to be created as offline
167    * @param serverName server transition will happen on
168    * @param cb
169    * @param ctx
170    * @throws KeeperException if unexpected zookeeper exception
171    * @throws KeeperException.NodeExistsException if node already exists
172    */
173   public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw,
174       HRegionInfo region, ServerName serverName,
175       final AsyncCallback.StringCallback cb, final Object ctx)
176   throws KeeperException {
177     LOG.debug(zkw.prefix("Async create of unassigned node for " +
178       region.getEncodedName() + " with OFFLINE state"));
179     RegionTransition rt =
180       RegionTransition.createRegionTransition(
181           EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
182     String node = getNodeName(zkw, region.getEncodedName());
183     ZKUtil.asyncCreate(zkw, node, rt.toByteArray(), cb, ctx);
184   }
185 
186   /**
187    * Creates or force updates an unassigned node to the OFFLINE state for the
188    * specified region.
189    * <p>
190    * Attempts to create the node but if it exists will force it to transition to
191    * and OFFLINE state.
192    *
193    * <p>Sets a watcher on the unassigned region node if the method is
194    * successful.
195    *
196    * <p>This method should be used when assigning a region.
197    *
198    * @param zkw zk reference
199    * @param region region to be created as offline
200    * @param serverName server transition will happen on
201    * @return the version of the znode created in OFFLINE state, -1 if
202    *         unsuccessful.
203    * @throws KeeperException if unexpected zookeeper exception
204    * @throws KeeperException.NodeExistsException if node already exists
205    */
206   public static int createOrForceNodeOffline(ZooKeeperWatcher zkw,
207       HRegionInfo region, ServerName serverName) throws KeeperException {
208     LOG.debug(zkw.prefix("Creating (or updating) unassigned node for " +
209       region.getEncodedName() + " with OFFLINE state"));
210     RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_OFFLINE,
211       region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
212     byte [] data = rt.toByteArray();
213     String node = getNodeName(zkw, region.getEncodedName());
214     zkw.sync(node);
215     int version = ZKUtil.checkExists(zkw, node);
216     if (version == -1) {
217       return ZKUtil.createAndWatch(zkw, node, data);
218     } else {
219       boolean setData = false;
220       try {
221         setData = ZKUtil.setData(zkw, node, data, version);
222         // Setdata throws KeeperException which aborts the Master. So we are
223         // catching it here.
224         // If just before setting the znode to OFFLINE if the RS has made any
225         // change to the
226         // znode state then we need to return -1.
227       } catch (KeeperException kpe) {
228         LOG.info("Version mismatch while setting the node to OFFLINE state.");
229         return -1;
230       }
231       if (!setData) {
232         return -1;
233       } else {
234         // We successfully forced to OFFLINE, reset watch and handle if
235         // the state changed in between our set and the watch
236         byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
237         rt = getRegionTransition(bytes);
238         if (rt.getEventType() != EventType.M_ZK_REGION_OFFLINE) {
239           // state changed, need to process
240           return -1;
241         }
242       }
243     }
244     return version + 1;
245   }
246 
247   /**
248    * Deletes an existing unassigned node that is in the OPENED state for the
249    * specified region.
250    *
251    * <p>If a node does not already exist for this region, a
252    * {@link NoNodeException} will be thrown.
253    *
254    * <p>No watcher is set whether this succeeds or not.
255    *
256    * <p>Returns false if the node was not in the proper state but did exist.
257    *
258    * <p>This method is used during normal region transitions when a region
259    * finishes successfully opening.  This is the Master acknowledging completion
260    * of the specified regions transition.
261    *
262    * @param zkw zk reference
263    * @param encodedRegionName opened region to be deleted from zk
264    * @throws KeeperException if unexpected zookeeper exception
265    * @throws KeeperException.NoNodeException if node does not exist
266    */
267   public static boolean deleteOpenedNode(ZooKeeperWatcher zkw,
268       String encodedRegionName)
269   throws KeeperException, KeeperException.NoNodeException {
270     return deleteNode(zkw, encodedRegionName, EventType.RS_ZK_REGION_OPENED);
271   }
272 
273   /**
274    * Deletes an existing unassigned node that is in the OFFLINE state for the
275    * specified region.
276    *
277    * <p>If a node does not already exist for this region, a
278    * {@link NoNodeException} will be thrown.
279    *
280    * <p>No watcher is set whether this succeeds or not.
281    *
282    * <p>Returns false if the node was not in the proper state but did exist.
283    *
284    * <p>This method is used during master failover when the regions on an RS
285    * that has died are all set to OFFLINE before being processed.
286    *
287    * @param zkw zk reference
288    * @param encodedRegionName closed region to be deleted from zk
289    * @throws KeeperException if unexpected zookeeper exception
290    * @throws KeeperException.NoNodeException if node does not exist
291    */
292   public static boolean deleteOfflineNode(ZooKeeperWatcher zkw,
293       String encodedRegionName)
294   throws KeeperException, KeeperException.NoNodeException {
295     return deleteNode(zkw, encodedRegionName, EventType.M_ZK_REGION_OFFLINE);
296   }
297 
298   /**
299    * Deletes an existing unassigned node that is in the CLOSED state for the
300    * specified region.
301    *
302    * <p>If a node does not already exist for this region, a
303    * {@link NoNodeException} will be thrown.
304    *
305    * <p>No watcher is set whether this succeeds or not.
306    *
307    * <p>Returns false if the node was not in the proper state but did exist.
308    *
309    * <p>This method is used during table disables when a region finishes
310    * successfully closing.  This is the Master acknowledging completion
311    * of the specified regions transition to being closed.
312    *
313    * @param zkw zk reference
314    * @param encodedRegionName closed region to be deleted from zk
315    * @throws KeeperException if unexpected zookeeper exception
316    * @throws KeeperException.NoNodeException if node does not exist
317    */
318   public static boolean deleteClosedNode(ZooKeeperWatcher zkw,
319       String encodedRegionName)
320   throws KeeperException, KeeperException.NoNodeException {
321     return deleteNode(zkw, encodedRegionName, EventType.RS_ZK_REGION_CLOSED);
322   }
323 
324   /**
325    * Deletes an existing unassigned node that is in the CLOSING state for the
326    * specified region.
327    *
328    * <p>If a node does not already exist for this region, a
329    * {@link NoNodeException} will be thrown.
330    *
331    * <p>No watcher is set whether this succeeds or not.
332    *
333    * <p>Returns false if the node was not in the proper state but did exist.
334    *
335    * <p>This method is used during table disables when a region finishes
336    * successfully closing.  This is the Master acknowledging completion
337    * of the specified regions transition to being closed.
338    *
339    * @param zkw zk reference
340    * @param region closing region to be deleted from zk
341    * @throws KeeperException if unexpected zookeeper exception
342    * @throws KeeperException.NoNodeException if node does not exist
343    */
344   public static boolean deleteClosingNode(ZooKeeperWatcher zkw,
345       HRegionInfo region)
346   throws KeeperException, KeeperException.NoNodeException {
347     String encodedRegionName = region.getEncodedName();
348     return deleteNode(zkw, encodedRegionName, EventType.M_ZK_REGION_CLOSING);
349   }
350 
351   /**
352    * Deletes an existing unassigned node that is in the specified state for the
353    * specified region.
354    *
355    * <p>If a node does not already exist for this region, a
356    * {@link NoNodeException} will be thrown.
357    *
358    * <p>No watcher is set whether this succeeds or not.
359    *
360    * <p>Returns false if the node was not in the proper state but did exist.
361    *
362    * <p>This method is used when a region finishes opening/closing.
363    * The Master acknowledges completion
364    * of the specified regions transition to being closed/opened.
365    *
366    * @param zkw zk reference
367    * @param encodedRegionName region to be deleted from zk
368    * @param expectedState state region must be in for delete to complete
369    * @throws KeeperException if unexpected zookeeper exception
370    * @throws KeeperException.NoNodeException if node does not exist
371    */
372   public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
373       EventType expectedState)
374   throws KeeperException, KeeperException.NoNodeException {
375     return deleteNode(zkw, encodedRegionName, expectedState, -1);
376   }
377 
378   /**
379    * Deletes an existing unassigned node that is in the specified state for the
380    * specified region.
381    *
382    * <p>If a node does not already exist for this region, a
383    * {@link NoNodeException} will be thrown.
384    *
385    * <p>No watcher is set whether this succeeds or not.
386    *
387    * <p>Returns false if the node was not in the proper state but did exist.
388    *
389    * <p>This method is used when a region finishes opening/closing.
390    * The Master acknowledges completion
391    * of the specified regions transition to being closed/opened.
392    *
393    * @param zkw zk reference
394    * @param encodedRegionName region to be deleted from zk
395    * @param expectedState state region must be in for delete to complete
396    * @param expectedVersion of the znode that is to be deleted.
397    *        If expectedVersion need not be compared while deleting the znode
398    *        pass -1
399    * @throws KeeperException if unexpected zookeeper exception
400    * @throws KeeperException.NoNodeException if node does not exist
401    */
402   public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
403       EventType expectedState, int expectedVersion)
404   throws KeeperException, KeeperException.NoNodeException {
405     LOG.debug(zkw.prefix("Deleting existing unassigned " +
406       "node for " + encodedRegionName + " that is in expected state " + expectedState));
407     String node = getNodeName(zkw, encodedRegionName);
408     zkw.sync(node);
409     Stat stat = new Stat();
410     byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat);
411     if (bytes == null) {
412       // If it came back null, node does not exist.
413       throw KeeperException.create(Code.NONODE);
414     }
415     RegionTransition rt = getRegionTransition(bytes);
416     EventType et = rt.getEventType();
417     if (!et.equals(expectedState)) {
418       LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName + " in " +
419         expectedState + " state but node is in " + et + " state"));
420       return false;
421     }
422     if (expectedVersion != -1
423         && stat.getVersion() != expectedVersion) {
424       LOG.warn("The node " + encodedRegionName + " we are trying to delete is not" +
425         " the expected one. Got a version mismatch");
426       return false;
427     }
428     if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) {
429       LOG.warn(zkw.prefix("Attempting to delete " +
430           "unassigned node " + encodedRegionName + " in " + expectedState +
431           " state but after verifying state, we got a version mismatch"));
432       return false;
433     }
434     LOG.debug(zkw.prefix("Successfully deleted unassigned node for region " +
435         encodedRegionName + " in expected state " + expectedState));
436     return true;
437   }
438 
439   /**
440    * Deletes all unassigned nodes regardless of their state.
441    *
442    * <p>No watchers are set.
443    *
444    * <p>This method is used by the Master during cluster startup to clear out
445    * any existing state from other cluster runs.
446    *
447    * @param zkw zk reference
448    * @throws KeeperException if unexpected zookeeper exception
449    */
450   public static void deleteAllNodes(ZooKeeperWatcher zkw)
451   throws KeeperException {
452     LOG.debug(zkw.prefix("Deleting any existing unassigned nodes"));
453     ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode);
454   }
455 
456   /**
457    * Creates a new unassigned node in the CLOSING state for the specified
458    * region.
459    *
460    * <p>Does not transition nodes from any states.  If a node already exists
461    * for this region, a {@link NodeExistsException} will be thrown.
462    *
463    * <p>If creation is successful, returns the version number of the CLOSING
464    * node created.
465    *
466    * <p>Set a watch.
467    *
468    * <p>This method should only be used by a Master when initiating a
469    * close of a region before sending a close request to the region server.
470    *
471    * @param zkw zk reference
472    * @param region region to be created as closing
473    * @param serverName server transition will happen on
474    * @return version of node after transition, -1 if unsuccessful transition
475    * @throws KeeperException if unexpected zookeeper exception
476    * @throws KeeperException.NodeExistsException if node already exists
477    */
478   public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region,
479       ServerName serverName)
480   throws KeeperException, KeeperException.NodeExistsException {
481     LOG.debug(zkw.prefix("Creating unassigned node for " +
482       region.getEncodedName() + " in a CLOSING state"));
483     RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
484       region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
485     String node = getNodeName(zkw, region.getEncodedName());
486     return ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
487   }
488 
489   // RegionServer methods
490 
491   /**
492    * Transitions an existing unassigned node for the specified region which is
493    * currently in the CLOSING state to be in the CLOSED state.
494    *
495    * <p>Does not transition nodes from other states.  If for some reason the
496    * node could not be transitioned, the method returns -1.  If the transition
497    * is successful, the version of the node after transition is returned.
498    *
499    * <p>This method can fail and return false for three different reasons:
500    * <ul><li>Unassigned node for this region does not exist</li>
501    * <li>Unassigned node for this region is not in CLOSING state</li>
502    * <li>After verifying CLOSING state, update fails because of wrong version
503    * (someone else already transitioned the node)</li>
504    * </ul>
505    *
506    * <p>Does not set any watches.
507    *
508    * <p>This method should only be used by a RegionServer when initiating a
509    * close of a region after receiving a CLOSE RPC from the Master.
510    *
511    * @param zkw zk reference
512    * @param region region to be transitioned to closed
513    * @param serverName server transition happens on
514    * @return version of node after transition, -1 if unsuccessful transition
515    * @throws KeeperException if unexpected zookeeper exception
516    */
517   public static int transitionNodeClosed(ZooKeeperWatcher zkw,
518       HRegionInfo region, ServerName serverName, int expectedVersion)
519   throws KeeperException {
520     return transitionNode(zkw, region, serverName,
521         EventType.M_ZK_REGION_CLOSING,
522         EventType.RS_ZK_REGION_CLOSED, expectedVersion);
523   }
524 
525   /**
526    * Transitions an existing unassigned node for the specified region which is
527    * currently in the OFFLINE state to be in the OPENING state.
528    *
529    * <p>Does not transition nodes from other states.  If for some reason the
530    * node could not be transitioned, the method returns -1.  If the transition
531    * is successful, the version of the node written as OPENING is returned.
532    *
533    * <p>This method can fail and return -1 for three different reasons:
534    * <ul><li>Unassigned node for this region does not exist</li>
535    * <li>Unassigned node for this region is not in OFFLINE state</li>
536    * <li>After verifying OFFLINE state, update fails because of wrong version
537    * (someone else already transitioned the node)</li>
538    * </ul>
539    *
540    * <p>Does not set any watches.
541    *
542    * <p>This method should only be used by a RegionServer when initiating an
543    * open of a region after receiving an OPEN RPC from the Master.
544    *
545    * @param zkw zk reference
546    * @param region region to be transitioned to opening
547    * @param serverName server transition happens on
548    * @return version of node after transition, -1 if unsuccessful transition
549    * @throws KeeperException if unexpected zookeeper exception
550    */
551   public static int transitionNodeOpening(ZooKeeperWatcher zkw,
552       HRegionInfo region, ServerName serverName)
553   throws KeeperException {
554     return transitionNodeOpening(zkw, region, serverName,
555       EventType.M_ZK_REGION_OFFLINE);
556   }
557 
558   public static int transitionNodeOpening(ZooKeeperWatcher zkw,
559       HRegionInfo region, ServerName serverName, final EventType beginState)
560   throws KeeperException {
561     return transitionNode(zkw, region, serverName, beginState,
562       EventType.RS_ZK_REGION_OPENING, -1);
563   }
564 
565   /**
566    * Retransitions an existing unassigned node for the specified region which is
567    * currently in the OPENING state to be in the OPENING state.
568    *
569    * <p>Does not transition nodes from other states.  If for some reason the
570    * node could not be transitioned, the method returns -1.  If the transition
571    * is successful, the version of the node rewritten as OPENING is returned.
572    *
573    * <p>This method can fail and return -1 for three different reasons:
574    * <ul><li>Unassigned node for this region does not exist</li>
575    * <li>Unassigned node for this region is not in OPENING state</li>
576    * <li>After verifying OPENING state, update fails because of wrong version
577    * (someone else already transitioned the node)</li>
578    * </ul>
579    *
580    * <p>Does not set any watches.
581    *
582    * <p>This method should only be used by a RegionServer when initiating an
583    * open of a region after receiving an OPEN RPC from the Master.
584    *
585    * @param zkw zk reference
586    * @param region region to be transitioned to opening
587    * @param serverName server transition happens on
588    * @param updateZNode write the znode. If false, we only check.
589    * @return version of node after transition, -1 if unsuccessful transition
590    * @throws KeeperException if unexpected zookeeper exception
591    */
592   public static int retransitionNodeOpening(ZooKeeperWatcher zkw,
593       HRegionInfo region, ServerName serverName, int expectedVersion, boolean updateZNode)
594   throws KeeperException {
595 
596     String encoded = region.getEncodedName();
597     if(LOG.isDebugEnabled()) {
598       LOG.debug(zkw.prefix("Attempting to retransition the opening state of node " +
599           HRegionInfo.prettyPrint(encoded)));
600     }
601 
602     String node = getNodeName(zkw, encoded);
603     zkw.sync(node);
604 
605     // Read existing data of the node
606     Stat stat = new Stat();
607     byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
608     if (existingBytes == null) {
609       // Node no longer exists.  Return -1. It means unsuccessful transition.
610       return -1;
611     }
612     RegionTransition rt = getRegionTransition(existingBytes);
613 
614     // Verify it is the expected version
615     if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
616       LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
617           "unassigned node for " + encoded + " failed, " +
618           "the node existed but was version " + stat.getVersion() +
619           " not the expected version " + expectedVersion));
620       return -1;
621     }
622 
623     // Verify it is in expected state
624     EventType et = rt.getEventType();
625     if (!et.equals(EventType.RS_ZK_REGION_OPENING)) {
626       String existingServer = (rt.getServerName() == null)
627           ? "<unknown>" : rt.getServerName().toString();
628       LOG.warn(zkw.prefix("Attempt to retransition the opening state of the unassigned node for "
629           + encoded + " failed, the node existed but was in the state " + et +
630           " set by the server " + existingServer));
631       return -1;
632     }
633 
634     // We don't have to write the new state: the check is complete.
635     if (!updateZNode){
636       return expectedVersion;
637     }
638 
639     // Write new data, ensuring data has not changed since we last read it
640     try {
641       rt = RegionTransition.createRegionTransition(
642           EventType.RS_ZK_REGION_OPENING, region.getRegionName(), serverName, null);
643       if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) {
644         LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
645             "unassigned node for " + encoded + " failed, " +
646             "the node existed and was in the expected state but then when " +
647             "setting data we got a version mismatch"));
648         return -1;
649       }
650       if(LOG.isDebugEnabled()) {
651         LOG.debug(zkw.prefix("Successfully retransition the opening state of node " + encoded));
652       }
653       return stat.getVersion() + 1;
654     } catch (KeeperException.NoNodeException nne) {
655       LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
656           "unassigned node for " + encoded + " failed, " +
657           "the node existed and was in the expected state but then when " +
658           "setting data it no longer existed"));
659       return -1;
660     }
661   }
662 
663   /**
664    * Transitions an existing unassigned node for the specified region which is
665    * currently in the OPENING state to be in the OPENED state.
666    *
667    * <p>Does not transition nodes from other states.  If for some reason the
668    * node could not be transitioned, the method returns -1.  If the transition
669    * is successful, the version of the node after transition is returned.
670    *
671    * <p>This method can fail and return false for three different reasons:
672    * <ul><li>Unassigned node for this region does not exist</li>
673    * <li>Unassigned node for this region is not in OPENING state</li>
674    * <li>After verifying OPENING state, update fails because of wrong version
675    * (this should never actually happen since an RS only does this transition
676    * following a transition to OPENING.  if two RS are conflicting, one would
677    * fail the original transition to OPENING and not this transition)</li>
678    * </ul>
679    *
680    * <p>Does not set any watches.
681    *
682    * <p>This method should only be used by a RegionServer when completing the
683    * open of a region.
684    *
685    * @param zkw zk reference
686    * @param region region to be transitioned to opened
687    * @param serverName server transition happens on
688    * @return version of node after transition, -1 if unsuccessful transition
689    * @throws KeeperException if unexpected zookeeper exception
690    */
691   public static int transitionNodeOpened(ZooKeeperWatcher zkw,
692       HRegionInfo region, ServerName serverName, int expectedVersion)
693   throws KeeperException {
694     return transitionNode(zkw, region, serverName,
695         EventType.RS_ZK_REGION_OPENING,
696         EventType.RS_ZK_REGION_OPENED, expectedVersion);
697   }
698 
699   /**
700    *
701    * @param zkw zk reference
702    * @param region region to be closed
703    * @param expectedVersion expected version of the znode
704    * @return true if the znode exists, has the right version and the right state. False otherwise.
705    * @throws KeeperException
706    */
707   public static boolean checkClosingState(ZooKeeperWatcher zkw, HRegionInfo region,
708                                           int expectedVersion) throws KeeperException {
709 
710     final String encoded = getNodeName(zkw, region.getEncodedName());
711     zkw.sync(encoded);
712 
713     // Read existing data of the node
714     Stat stat = new Stat();
715     byte[] existingBytes = ZKUtil.getDataNoWatch(zkw, encoded, stat);
716 
717     if (existingBytes == null) {
718       LOG.warn(zkw.prefix("Attempt to check the " +
719           "closing node for " + encoded +
720           ". The node does not exist"));
721       return false;
722     }
723 
724     if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
725       LOG.warn(zkw.prefix("Attempt to check the " +
726           "closing node for " + encoded +
727           ". The node existed but was version " + stat.getVersion() +
728           " not the expected version " + expectedVersion));
729       return false;
730     }
731 
732     RegionTransition rt = getRegionTransition(existingBytes);
733 
734     if (!EventType.M_ZK_REGION_CLOSING.equals(rt.getEventType())) {
735       LOG.warn(zkw.prefix("Attempt to check the " +
736           "closing node for " + encoded +
737           ". The node existed but was in an unexpected state: " + rt.getEventType()));
738       return false;
739     }
740 
741     return true;
742   }
743 
744   /**
745    * Method that actually performs unassigned node transitions.
746    *
747    * <p>Attempts to transition the unassigned node for the specified region
748    * from the expected state to the state in the specified transition data.
749    *
750    * <p>Method first reads existing data and verifies it is in the expected
751    * state.  If the node does not exist or the node is not in the expected
752    * state, the method returns -1.  If the transition is successful, the
753    * version number of the node following the transition is returned.
754    *
755    * <p>If the read state is what is expected, it attempts to write the new
756    * state and data into the node.  When doing this, it includes the expected
757    * version (determined when the existing state was verified) to ensure that
758    * only one transition is successful.  If there is a version mismatch, the
759    * method returns -1.
760    *
761    * <p>If the write is successful, no watch is set and the method returns true.
762    *
763    * @param zkw zk reference
764    * @param region region to be transitioned to opened
765    * @param serverName server transition happens on
766    * @param endState state to transition node to if all checks pass
767    * @param beginState state the node must currently be in to do transition
768    * @param expectedVersion expected version of data before modification, or -1
769    * @return version of node after transition, -1 if unsuccessful transition
770    * @throws KeeperException if unexpected zookeeper exception
771    */
772   public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
773       ServerName serverName, EventType beginState, EventType endState,
774       int expectedVersion)
775   throws KeeperException {
776     return transitionNode(zkw, region, serverName, beginState, endState, expectedVersion, null);
777   }
778 
779 
780   public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
781       ServerName serverName, EventType beginState, EventType endState,
782       int expectedVersion, final byte [] payload)
783   throws KeeperException {
784     String encoded = region.getEncodedName();
785     if(LOG.isDebugEnabled()) {
786       LOG.debug(zkw.prefix("Transitioning " + HRegionInfo.prettyPrint(encoded) +
787         " from " + beginState.toString() + " to " + endState.toString()));
788     }
789 
790     String node = getNodeName(zkw, encoded);
791     zkw.sync(node);
792 
793     // Read existing data of the node
794     Stat stat = new Stat();
795     byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
796     if (existingBytes == null) {
797       // Node no longer exists.  Return -1. It means unsuccessful transition.
798       return -1;
799     }
800 
801     // Verify it is the expected version
802     if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
803       LOG.warn(zkw.prefix("Attempt to transition the " +
804         "unassigned node for " + encoded +
805         " from " + beginState + " to " + endState + " failed, " +
806         "the node existed but was version " + stat.getVersion() +
807         " not the expected version " + expectedVersion));
808         return -1;
809     }
810 
811     if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
812         && endState.equals(EventType.RS_ZK_REGION_OPENING)
813         && expectedVersion == -1 && stat.getVersion() != 0) {
814       // the below check ensures that double assignment doesnot happen.
815       // When the node is created for the first time then the expected version
816       // that is passed will be -1 and the version in znode will be 0.
817       // In all other cases the version in znode will be > 0.
818       LOG.warn(zkw.prefix("Attempt to transition the " + "unassigned node for "
819           + encoded + " from " + beginState + " to " + endState + " failed, "
820           + "the node existed but was version " + stat.getVersion()
821           + " not the expected version " + expectedVersion));
822       return -1;
823     }
824 
825     RegionTransition rt = getRegionTransition(existingBytes);
826 
827     // Verify the server transition happens on is not changed
828     if (!rt.getServerName().equals(serverName)) {
829       LOG.warn(zkw.prefix("Attempt to transition the " +
830         "unassigned node for " + encoded +
831         " from " + beginState + " to " + endState + " failed, " +
832         "the server that tried to transition was " + serverName +
833         " not the expected " + rt.getServerName()));
834       return -1;
835     }
836 
837     // Verify it is in expected state
838     EventType et = rt.getEventType();
839     if (!et.equals(beginState)) {
840       String existingServer = (rt.getServerName() == null)
841         ? "<unknown>" : rt.getServerName().toString();
842       LOG.warn(zkw.prefix("Attempt to transition the unassigned node for " + encoded
843         + " from " + beginState + " to " + endState + " failed, the node existed but"
844         + " was in the state " + et + " set by the server " + existingServer));
845       return -1;
846     }
847 
848     // Write new data, ensuring data has not changed since we last read it
849     try {
850       rt = RegionTransition.createRegionTransition(
851           endState, region.getRegionName(), serverName, payload);
852       if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) {
853         LOG.warn(zkw.prefix("Attempt to transition the " +
854         "unassigned node for " + encoded +
855         " from " + beginState + " to " + endState + " failed, " +
856         "the node existed and was in the expected state but then when " +
857         "setting data we got a version mismatch"));
858         return -1;
859       }
860       if(LOG.isDebugEnabled()) {
861         LOG.debug(zkw.prefix("Successfully transitioned node " + encoded +
862           " from " + beginState + " to " + endState));
863       }
864       return stat.getVersion() + 1;
865     } catch (KeeperException.NoNodeException nne) {
866       LOG.warn(zkw.prefix("Attempt to transition the " +
867         "unassigned node for " + encoded +
868         " from " + beginState + " to " + endState + " failed, " +
869         "the node existed and was in the expected state but then when " +
870         "setting data it no longer existed"));
871       return -1;
872     }
873   }
874 
875   private static RegionTransition getRegionTransition(final byte [] bytes) throws KeeperException {
876     try {
877       return RegionTransition.parseFrom(bytes);
878     } catch (DeserializationException e) {
879       // Convert to a zk exception for now.  Otherwise have to change API
880       throw ZKUtil.convert(e);
881     }
882   }
883 
884   /**
885    * Gets the current data in the unassigned node for the specified region name
886    * or fully-qualified path.
887    *
888    * <p>Returns null if the region does not currently have a node.
889    *
890    * <p>Sets a watch on the node if the node exists.
891    *
892    * @param zkw zk reference
893    * @param pathOrRegionName fully-specified path or region name
894    * @return znode content
895    * @throws KeeperException if unexpected zookeeper exception
896    */
897   public static byte [] getData(ZooKeeperWatcher zkw,
898       String pathOrRegionName)
899   throws KeeperException {
900     String node = getPath(zkw, pathOrRegionName);
901     return ZKUtil.getDataAndWatch(zkw, node);
902   }
903 
904   /**
905    * Gets the current data in the unassigned node for the specified region name
906    * or fully-qualified path.
907    *
908    * <p>Returns null if the region does not currently have a node.
909    *
910    * <p>Sets a watch on the node if the node exists.
911    *
912    * @param zkw zk reference
913    * @param pathOrRegionName fully-specified path or region name
914    * @param stat object to populate the version.
915    * @return znode content
916    * @throws KeeperException if unexpected zookeeper exception
917    */
918   public static byte [] getDataAndWatch(ZooKeeperWatcher zkw,
919       String pathOrRegionName, Stat stat)
920   throws KeeperException {
921     String node = getPath(zkw, pathOrRegionName);
922     return ZKUtil.getDataAndWatch(zkw, node, stat);
923   }
924 
925   /**
926    * Gets the current data in the unassigned node for the specified region name
927    * or fully-qualified path.
928    *
929    * <p>Returns null if the region does not currently have a node.
930    *
931    * <p>Does not set a watch.
932    *
933    * @param zkw zk reference
934    * @param pathOrRegionName fully-specified path or region name
935    * @param stat object to store node info into on getData call
936    * @return znode content
937    * @throws KeeperException if unexpected zookeeper exception
938    */
939   public static byte [] getDataNoWatch(ZooKeeperWatcher zkw,
940       String pathOrRegionName, Stat stat)
941   throws KeeperException {
942     String node = getPath(zkw, pathOrRegionName);
943     return ZKUtil.getDataNoWatch(zkw, node, stat);
944   }
945 
946   /**
947    * @param zkw
948    * @param pathOrRegionName
949    * @return Path to znode
950    */
951   public static String getPath(final ZooKeeperWatcher zkw, final String pathOrRegionName) {
952     return pathOrRegionName.startsWith("/")? pathOrRegionName : getNodeName(zkw, pathOrRegionName);
953   }
954 
955   /**
956    * Get the version of the specified znode
957    * @param zkw zk reference
958    * @param region region's info
959    * @return the version of the znode, -1 if it doesn't exist
960    * @throws KeeperException
961    */
962   public static int getVersion(ZooKeeperWatcher zkw, HRegionInfo region)
963     throws KeeperException {
964     String znode = getNodeName(zkw, region.getEncodedName());
965     return ZKUtil.checkExists(zkw, znode);
966   }
967 
968   /**
969    * Delete the assignment node regardless of its current state.
970    * <p>
971    * Fail silent even if the node does not exist at all.
972    * @param watcher
973    * @param regionInfo
974    * @throws KeeperException
975    */
976   public static void deleteNodeFailSilent(ZooKeeperWatcher watcher,
977       HRegionInfo regionInfo)
978   throws KeeperException {
979     String node = getNodeName(watcher, regionInfo.getEncodedName());
980     ZKUtil.deleteNodeFailSilent(watcher, node);
981   }
982 
983   /**
984    * Blocks until there are no node in regions in transition.
985    * <p>
986    * Used in testing only.
987    * @param zkw zk reference
988    * @throws KeeperException
989    * @throws InterruptedException
990    */
991   public static void blockUntilNoRIT(ZooKeeperWatcher zkw)
992   throws KeeperException, InterruptedException {
993     while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
994       List<String> znodes =
995         ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
996       if (znodes != null && !znodes.isEmpty()) {
997         LOG.debug("Waiting on RIT: " + znodes);
998       }
999       Thread.sleep(100);
1000     }
1001   }
1002 
1003   /**
1004    * Blocks until there is at least one node in regions in transition.
1005    * <p>
1006    * Used in testing only.
1007    * @param zkw zk reference
1008    * @throws KeeperException
1009    * @throws InterruptedException
1010    */
1011   public static void blockUntilRIT(ZooKeeperWatcher zkw)
1012   throws KeeperException, InterruptedException {
1013     while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
1014       List<String> znodes =
1015         ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
1016       if (znodes == null || znodes.isEmpty()) {
1017         LOG.debug("No RIT in ZK");
1018       }
1019       Thread.sleep(100);
1020     }
1021   }
1022 
1023   /**
1024    * Presume bytes are serialized unassigned data structure
1025    * @param znodeBytes
1026    * @return String of the deserialized znode bytes.
1027    */
1028   static String toString(final byte[] znodeBytes) {
1029     // This method should not exist.  Used by ZKUtil stringifying RegionTransition.  Have the
1030     // method in here so RegionTransition does not leak into ZKUtil.
1031     try {
1032       RegionTransition rt = RegionTransition.parseFrom(znodeBytes);
1033       return rt.toString();
1034     } catch (DeserializationException e) {
1035       return "";
1036     }
1037   }
1038 }