View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.zookeeper;
21  
22  import java.util.List;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.hbase.HRegionInfo;
27  import org.apache.hadoop.hbase.executor.RegionTransitionData;
28  import org.apache.hadoop.hbase.executor.EventHandler.EventType;
29  import org.apache.zookeeper.AsyncCallback;
30  import org.apache.zookeeper.KeeperException;
31  import org.apache.zookeeper.KeeperException.Code;
32  import org.apache.zookeeper.KeeperException.NoNodeException;
33  import org.apache.zookeeper.KeeperException.NodeExistsException;
34  import org.apache.zookeeper.data.Stat;
35  
36  /**
37   * Utility class for doing region assignment in ZooKeeper.  This class extends
38   * stuff done in {@link ZKUtil} to cover specific assignment operations.
39   * <p>
40   * Contains only static methods and constants.
41   * <p>
42   * Used by both the Master and RegionServer.
43   * <p>
44   * All valid transitions outlined below:
45   * <p>
46   * <b>MASTER</b>
47   * <ol>
48   *   <li>
49   *     Master creates an unassigned node as OFFLINE.
50   *     - Cluster startup and table enabling.
51   *   </li>
52   *   <li>
53   *     Master forces an existing unassigned node to OFFLINE.
54   *     - RegionServer failure.
55   *     - Allows transitions from all states to OFFLINE.
56   *   </li>
57   *   <li>
58   *     Master deletes an unassigned node that was in a OPENED state.
59   *     - Normal region transitions.  Besides cluster startup, no other deletions
60   *     of unassigned nodes is allowed.
61   *   </li>
62   *   <li>
63   *     Master deletes all unassigned nodes regardless of state.
64   *     - Cluster startup before any assignment happens.
65   *   </li>
66   * </ol>
67   * <p>
68   * <b>REGIONSERVER</b>
69   * <ol>
70   *   <li>
71   *     RegionServer creates an unassigned node as CLOSING.
72   *     - All region closes will do this in response to a CLOSE RPC from Master.
73   *     - A node can never be transitioned to CLOSING, only created.
74   *   </li>
75   *   <li>
76   *     RegionServer transitions an unassigned node from CLOSING to CLOSED.
77   *     - Normal region closes.  CAS operation.
78   *   </li>
79   *   <li>
80   *     RegionServer transitions an unassigned node from OFFLINE to OPENING.
81   *     - All region opens will do this in response to an OPEN RPC from the Master.
82   *     - Normal region opens.  CAS operation.
83   *   </li>
84   *   <li>
85   *     RegionServer transitions an unassigned node from OPENING to OPENED.
86   *     - Normal region opens.  CAS operation.
87   *   </li>
88   * </ol>
89   */
90  public class ZKAssign {
91    private static final Log LOG = LogFactory.getLog(ZKAssign.class);
92  
93    /**
94     * Gets the full path node name for the unassigned node for the specified
95     * region.
96     * @param zkw zk reference
97     * @param regionName region name
98     * @return full path node name
99     */
100   public static String getNodeName(ZooKeeperWatcher zkw, String regionName) {
101     return ZKUtil.joinZNode(zkw.assignmentZNode, regionName);
102   }
103 
104   /**
105    * Gets the region name from the full path node name of an unassigned node.
106    * @param path full zk path
107    * @return region name
108    */
109   public static String getRegionName(ZooKeeperWatcher zkw, String path) {
110     return path.substring(zkw.assignmentZNode.length()+1);
111   }
112 
113   // Master methods
114 
115   /**
116    * Creates a new unassigned node in the OFFLINE state for the specified region.
117    *
118    * <p>Does not transition nodes from other states.  If a node already exists
119    * for this region, a {@link NodeExistsException} will be thrown.
120    *
121    * <p>Sets a watcher on the unassigned region node if the method is successful.
122    *
123    * <p>This method should only be used during cluster startup and the enabling
124    * of a table.
125    *
126    * @param zkw zk reference
127    * @param region region to be created as offline
128    * @param serverName server event originates from
129    * @throws KeeperException if unexpected zookeeper exception
130    * @throws KeeperException.NodeExistsException if node already exists
131    */
132   public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
133       String serverName)
134   throws KeeperException, KeeperException.NodeExistsException {
135     createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE);
136   }
137 
138   public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
139       String serverName, final EventType event)
140   throws KeeperException, KeeperException.NodeExistsException {
141     LOG.debug(zkw.prefix("Creating unassigned node for " +
142       region.getEncodedName() + " in OFFLINE state"));
143     RegionTransitionData data = new RegionTransitionData(event,
144       region.getRegionName(), serverName);
145     synchronized(zkw.getNodes()) {
146       String node = getNodeName(zkw, region.getEncodedName());
147       zkw.getNodes().add(node);
148       ZKUtil.createAndWatch(zkw, node, data.getBytes());
149     }
150   }
151 
152   /**
153    * Creates an unassigned node in the OFFLINE state for the specified region.
154    * <p>
155    * Runs asynchronously.  Depends on no pre-existing znode.
156    *
157    * <p>Sets a watcher on the unassigned region node.
158    *
159    * @param zkw zk reference
160    * @param region region to be created as offline
161    * @param serverName server event originates from
162    * @param cb
163    * @param ctx
164    * @throws KeeperException if unexpected zookeeper exception
165    * @throws KeeperException.NodeExistsException if node already exists
166    */
167   public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw,
168       HRegionInfo region, String serverName,
169       final AsyncCallback.StringCallback cb, final Object ctx)
170   throws KeeperException {
171     LOG.debug(zkw.prefix("Async create of unassigned node for " +
172       region.getEncodedName() + " with OFFLINE state"));
173     RegionTransitionData data = new RegionTransitionData(
174         EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
175     synchronized(zkw.getNodes()) {
176       String node = getNodeName(zkw, region.getEncodedName());
177       zkw.getNodes().add(node);
178       ZKUtil.asyncCreate(zkw, node, data.getBytes(), cb, ctx);
179     }
180   }
181 
182   /**
183    * Forces an existing unassigned node to the OFFLINE state for the specified
184    * region.
185    *
186    * <p>Does not create a new node.  If a node does not already exist for this
187    * region, a {@link NoNodeException} will be thrown.
188    *
189    * <p>Sets a watcher on the unassigned region node if the method is
190    * successful.
191    *
192    * <p>This method should only be used during recovery of regionserver failure.
193    *
194    * @param zkw zk reference
195    * @param region region to be forced as offline
196    * @param serverName server event originates from
197    * @throws KeeperException if unexpected zookeeper exception
198    * @throws KeeperException.NoNodeException if node does not exist
199    */
200   public static void forceNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
201       String serverName)
202   throws KeeperException, KeeperException.NoNodeException {
203     LOG.debug(zkw.prefix("Forcing existing unassigned node for " +
204       region.getEncodedName() + " to OFFLINE state"));
205     RegionTransitionData data = new RegionTransitionData(
206         EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
207     synchronized(zkw.getNodes()) {
208       String node = getNodeName(zkw, region.getEncodedName());
209       zkw.getNodes().add(node);
210       ZKUtil.setData(zkw, node, data.getBytes());
211     }
212   }
213 
214 
215   /**
216    * Creates or force updates an unassigned node to the OFFLINE state for the
217    * specified region.
218    * <p>
219    * Attempts to create the node but if it exists will force it to transition to
220    * and OFFLINE state.
221    *
222    * <p>Sets a watcher on the unassigned region node if the method is
223    * successful.
224    *
225    * <p>This method should be used when assigning a region.
226    *
227    * @param zkw zk reference
228    * @param region region to be created as offline
229    * @param serverName server event originates from
230    * @throws KeeperException if unexpected zookeeper exception
231    * @throws KeeperException.NodeExistsException if node already exists
232    */
233   public static boolean createOrForceNodeOffline(ZooKeeperWatcher zkw,
234       HRegionInfo region, String serverName)
235   throws KeeperException {
236     LOG.debug(zkw.prefix("Creating (or updating) unassigned node for " +
237       region.getEncodedName() + " with OFFLINE state"));
238     RegionTransitionData data = new RegionTransitionData(
239         EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
240     synchronized(zkw.getNodes()) {
241       String node = getNodeName(zkw, region.getEncodedName());
242       zkw.sync(node);
243       zkw.getNodes().add(node);
244       int version = ZKUtil.checkExists(zkw, node);
245       if(version == -1) {
246         ZKUtil.createAndWatch(zkw, node, data.getBytes());
247       } else {
248         if (!ZKUtil.setData(zkw, node, data.getBytes(), version)) {
249           return false;
250         } else {
251           // We successfully forced to OFFLINE, reset watch and handle if
252           // the state changed in between our set and the watch
253           RegionTransitionData curData =
254             ZKAssign.getData(zkw, region.getEncodedName());
255           if (curData.getEventType() != data.getEventType()) {
256             // state changed, need to process
257             return false;
258           }
259         }
260       }
261     }
262     return true;
263   }
264 
265   /**
266    * Deletes an existing unassigned node that is in the OPENED state for the
267    * specified region.
268    *
269    * <p>If a node does not already exist for this region, a
270    * {@link NoNodeException} will be thrown.
271    *
272    * <p>No watcher is set whether this succeeds or not.
273    *
274    * <p>Returns false if the node was not in the proper state but did exist.
275    *
276    * <p>This method is used during normal region transitions when a region
277    * finishes successfully opening.  This is the Master acknowledging completion
278    * of the specified regions transition.
279    *
280    * @param zkw zk reference
281    * @param regionName opened region to be deleted from zk
282    * @throws KeeperException if unexpected zookeeper exception
283    * @throws KeeperException.NoNodeException if node does not exist
284    */
285   public static boolean deleteOpenedNode(ZooKeeperWatcher zkw,
286       String regionName)
287   throws KeeperException, KeeperException.NoNodeException {
288     return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_OPENED);
289   }
290 
291   /**
292    * Deletes an existing unassigned node that is in the OFFLINE state for the
293    * specified region.
294    *
295    * <p>If a node does not already exist for this region, a
296    * {@link NoNodeException} will be thrown.
297    *
298    * <p>No watcher is set whether this succeeds or not.
299    *
300    * <p>Returns false if the node was not in the proper state but did exist.
301    *
302    * <p>This method is used during master failover when the regions on an RS
303    * that has died are all set to OFFLINE before being processed.
304    *
305    * @param zkw zk reference
306    * @param regionName closed region to be deleted from zk
307    * @throws KeeperException if unexpected zookeeper exception
308    * @throws KeeperException.NoNodeException if node does not exist
309    */
310   public static boolean deleteOfflineNode(ZooKeeperWatcher zkw,
311       String regionName)
312   throws KeeperException, KeeperException.NoNodeException {
313     return deleteNode(zkw, regionName, EventType.M_ZK_REGION_OFFLINE);
314   }
315 
316   /**
317    * Deletes an existing unassigned node that is in the CLOSED state for the
318    * specified region.
319    *
320    * <p>If a node does not already exist for this region, a
321    * {@link NoNodeException} will be thrown.
322    *
323    * <p>No watcher is set whether this succeeds or not.
324    *
325    * <p>Returns false if the node was not in the proper state but did exist.
326    *
327    * <p>This method is used during table disables when a region finishes
328    * successfully closing.  This is the Master acknowledging completion
329    * of the specified regions transition to being closed.
330    *
331    * @param zkw zk reference
332    * @param regionName closed region to be deleted from zk
333    * @throws KeeperException if unexpected zookeeper exception
334    * @throws KeeperException.NoNodeException if node does not exist
335    */
336   public static boolean deleteClosedNode(ZooKeeperWatcher zkw,
337       String regionName)
338   throws KeeperException, KeeperException.NoNodeException {
339     return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_CLOSED);
340   }
341 
342   /**
343    * Deletes an existing unassigned node that is in the CLOSING state for the
344    * specified region.
345    *
346    * <p>If a node does not already exist for this region, a
347    * {@link NoNodeException} will be thrown.
348    *
349    * <p>No watcher is set whether this succeeds or not.
350    *
351    * <p>Returns false if the node was not in the proper state but did exist.
352    *
353    * <p>This method is used during table disables when a region finishes
354    * successfully closing.  This is the Master acknowledging completion
355    * of the specified regions transition to being closed.
356    *
357    * @param zkw zk reference
358    * @param region closing region to be deleted from zk
359    * @throws KeeperException if unexpected zookeeper exception
360    * @throws KeeperException.NoNodeException if node does not exist
361    */
362   public static boolean deleteClosingNode(ZooKeeperWatcher zkw,
363       HRegionInfo region)
364   throws KeeperException, KeeperException.NoNodeException {
365     String regionName = region.getEncodedName();
366     return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_CLOSING);
367   }
368 
369   /**
370    * Deletes an existing unassigned node that is in the specified state for the
371    * specified region.
372    *
373    * <p>If a node does not already exist for this region, a
374    * {@link NoNodeException} will be thrown.
375    *
376    * <p>No watcher is set whether this succeeds or not.
377    *
378    * <p>Returns false if the node was not in the proper state but did exist.
379    *
380    * <p>This method is used during table disables when a region finishes
381    * successfully closing.  This is the Master acknowledging completion
382    * of the specified regions transition to being closed.
383    *
384    * @param zkw zk reference
385    * @param regionName region to be deleted from zk
386    * @param expectedState state region must be in for delete to complete
387    * @throws KeeperException if unexpected zookeeper exception
388    * @throws KeeperException.NoNodeException if node does not exist
389    */
390   private static boolean deleteNode(ZooKeeperWatcher zkw, String regionName,
391       EventType expectedState)
392   throws KeeperException, KeeperException.NoNodeException {
393     LOG.debug(zkw.prefix("Deleting existing unassigned " +
394       "node for " + regionName + " that is in expected state " + expectedState));
395     String node = getNodeName(zkw, regionName);
396     zkw.sync(node);
397     Stat stat = new Stat();
398     byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat);
399     if(bytes == null) {
400       throw KeeperException.create(Code.NONODE);
401     }
402     RegionTransitionData data = RegionTransitionData.fromBytes(bytes);
403     if(!data.getEventType().equals(expectedState)) {
404       LOG.warn(zkw.prefix("Attempting to delete unassigned " +
405         "node in " + expectedState +
406         " state but node is in " + data.getEventType() + " state"));
407       return false;
408     }
409     synchronized(zkw.getNodes()) {
410       // TODO: Does this go here or only if we successfully delete node?
411       zkw.getNodes().remove(node);
412       if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) {
413         LOG.warn(zkw.prefix("Attempting to delete " +
414           "unassigned node in " + expectedState +
415             " state but " +
416             "after verifying it was in OPENED state, we got a version mismatch"));
417         return false;
418       }
419       LOG.debug(zkw.prefix("Successfully deleted unassigned node for region " +
420           regionName + " in expected state " + expectedState));
421       return true;
422     }
423   }
424 
425   /**
426    * Deletes all unassigned nodes regardless of their state.
427    *
428    * <p>No watchers are set.
429    *
430    * <p>This method is used by the Master during cluster startup to clear out
431    * any existing state from other cluster runs.
432    *
433    * @param zkw zk reference
434    * @throws KeeperException if unexpected zookeeper exception
435    */
436   public static void deleteAllNodes(ZooKeeperWatcher zkw)
437   throws KeeperException {
438     LOG.debug(zkw.prefix("Deleting any existing unassigned nodes"));
439     ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode);
440   }
441 
442   // RegionServer methods
443 
444   /**
445    * Creates a new unassigned node in the CLOSING state for the specified
446    * region.
447    *
448    * <p>Does not transition nodes from any states.  If a node already exists
449    * for this region, a {@link NodeExistsException} will be thrown.
450    *
451    * <p>If creation is successful, returns the version number of the CLOSING
452    * node created.
453    *
454    * <p>Does not set any watches.
455    *
456    * <p>This method should only be used by a RegionServer when initiating a
457    * close of a region after receiving a CLOSE RPC from the Master.
458    *
459    * @param zkw zk reference
460    * @param region region to be created as closing
461    * @param serverName server event originates from
462    * @return version of node after transition, -1 if unsuccessful transition
463    * @throws KeeperException if unexpected zookeeper exception
464    * @throws KeeperException.NodeExistsException if node already exists
465    */
466   public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region,
467       String serverName)
468   throws KeeperException, KeeperException.NodeExistsException {
469     LOG.debug(zkw.prefix("Creating unassigned node for " +
470       region.getEncodedName() + " in a CLOSING state"));
471 
472     RegionTransitionData data = new RegionTransitionData(
473         EventType.RS_ZK_REGION_CLOSING, region.getRegionName(), serverName);
474 
475     synchronized (zkw.getNodes()) {
476       String node = getNodeName(zkw, region.getEncodedName());
477       zkw.getNodes().add(node);
478       return ZKUtil.createAndWatch(zkw, node, data.getBytes());
479     }
480   }
481 
482   /**
483    * Transitions an existing unassigned node for the specified region which is
484    * currently in the CLOSING state to be in the CLOSED state.
485    *
486    * <p>Does not transition nodes from other states.  If for some reason the
487    * node could not be transitioned, the method returns -1.  If the transition
488    * is successful, the version of the node after transition is returned.
489    *
490    * <p>This method can fail and return false for three different reasons:
491    * <ul><li>Unassigned node for this region does not exist</li>
492    * <li>Unassigned node for this region is not in CLOSING state</li>
493    * <li>After verifying CLOSING state, update fails because of wrong version
494    * (someone else already transitioned the node)</li>
495    * </ul>
496    *
497    * <p>Does not set any watches.
498    *
499    * <p>This method should only be used by a RegionServer when initiating a
500    * close of a region after receiving a CLOSE RPC from the Master.
501    *
502    * @param zkw zk reference
503    * @param region region to be transitioned to closed
504    * @param serverName server event originates from
505    * @return version of node after transition, -1 if unsuccessful transition
506    * @throws KeeperException if unexpected zookeeper exception
507    */
508   public static int transitionNodeClosed(ZooKeeperWatcher zkw,
509       HRegionInfo region, String serverName, int expectedVersion)
510   throws KeeperException {
511     return transitionNode(zkw, region, serverName,
512         EventType.RS_ZK_REGION_CLOSING,
513         EventType.RS_ZK_REGION_CLOSED, expectedVersion);
514   }
515 
516   /**
517    * Transitions an existing unassigned node for the specified region which is
518    * currently in the OFFLINE state to be in the OPENING state.
519    *
520    * <p>Does not transition nodes from other states.  If for some reason the
521    * node could not be transitioned, the method returns -1.  If the transition
522    * is successful, the version of the node written as OPENING is returned.
523    *
524    * <p>This method can fail and return -1 for three different reasons:
525    * <ul><li>Unassigned node for this region does not exist</li>
526    * <li>Unassigned node for this region is not in OFFLINE state</li>
527    * <li>After verifying OFFLINE state, update fails because of wrong version
528    * (someone else already transitioned the node)</li>
529    * </ul>
530    *
531    * <p>Does not set any watches.
532    *
533    * <p>This method should only be used by a RegionServer when initiating an
534    * open of a region after receiving an OPEN RPC from the Master.
535    *
536    * @param zkw zk reference
537    * @param region region to be transitioned to opening
538    * @param serverName server event originates from
539    * @return version of node after transition, -1 if unsuccessful transition
540    * @throws KeeperException if unexpected zookeeper exception
541    */
542   public static int transitionNodeOpening(ZooKeeperWatcher zkw,
543       HRegionInfo region, String serverName)
544   throws KeeperException {
545     return transitionNodeOpening(zkw, region, serverName,
546       EventType.M_ZK_REGION_OFFLINE);
547   }
548 
549   public static int transitionNodeOpening(ZooKeeperWatcher zkw,
550       HRegionInfo region, String serverName, final EventType beginState)
551   throws KeeperException {
552     return transitionNode(zkw, region, serverName, beginState,
553       EventType.RS_ZK_REGION_OPENING, -1);
554   }
555 
556   /**
557    * Retransitions an existing unassigned node for the specified region which is
558    * currently in the OPENING state to be in the OPENING state.
559    *
560    * <p>Does not transition nodes from other states.  If for some reason the
561    * node could not be transitioned, the method returns -1.  If the transition
562    * is successful, the version of the node rewritten as OPENING is returned.
563    *
564    * <p>This method can fail and return -1 for three different reasons:
565    * <ul><li>Unassigned node for this region does not exist</li>
566    * <li>Unassigned node for this region is not in OPENING state</li>
567    * <li>After verifying OPENING state, update fails because of wrong version
568    * (someone else already transitioned the node)</li>
569    * </ul>
570    *
571    * <p>Does not set any watches.
572    *
573    * <p>This method should only be used by a RegionServer when initiating an
574    * open of a region after receiving an OPEN RPC from the Master.
575    *
576    * @param zkw zk reference
577    * @param region region to be transitioned to opening
578    * @param serverName server event originates from
579    * @return version of node after transition, -1 if unsuccessful transition
580    * @throws KeeperException if unexpected zookeeper exception
581    */
582   public static int retransitionNodeOpening(ZooKeeperWatcher zkw,
583       HRegionInfo region, String serverName, int expectedVersion)
584   throws KeeperException {
585     return transitionNode(zkw, region, serverName,
586         EventType.RS_ZK_REGION_OPENING,
587         EventType.RS_ZK_REGION_OPENING, expectedVersion);
588   }
589 
590   /**
591    * Transitions an existing unassigned node for the specified region which is
592    * currently in the OPENING state to be in the OPENED state.
593    *
594    * <p>Does not transition nodes from other states.  If for some reason the
595    * node could not be transitioned, the method returns -1.  If the transition
596    * is successful, the version of the node after transition is returned.
597    *
598    * <p>This method can fail and return false for three different reasons:
599    * <ul><li>Unassigned node for this region does not exist</li>
600    * <li>Unassigned node for this region is not in OPENING state</li>
601    * <li>After verifying OPENING state, update fails because of wrong version
602    * (this should never actually happen since an RS only does this transition
603    * following a transition to OPENING.  if two RS are conflicting, one would
604    * fail the original transition to OPENING and not this transition)</li>
605    * </ul>
606    *
607    * <p>Does not set any watches.
608    *
609    * <p>This method should only be used by a RegionServer when completing the
610    * open of a region.
611    *
612    * @param zkw zk reference
613    * @param region region to be transitioned to opened
614    * @param serverName server event originates from
615    * @return version of node after transition, -1 if unsuccessful transition
616    * @throws KeeperException if unexpected zookeeper exception
617    */
618   public static int transitionNodeOpened(ZooKeeperWatcher zkw,
619       HRegionInfo region, String serverName, int expectedVersion)
620   throws KeeperException {
621     return transitionNode(zkw, region, serverName,
622         EventType.RS_ZK_REGION_OPENING,
623         EventType.RS_ZK_REGION_OPENED, expectedVersion);
624   }
625 
626   /**
627    * Private method that actually performs unassigned node transitions.
628    *
629    * <p>Attempts to transition the unassigned node for the specified region
630    * from the expected state to the state in the specified transition data.
631    *
632    * <p>Method first reads existing data and verifies it is in the expected
633    * state.  If the node does not exist or the node is not in the expected
634    * state, the method returns -1.  If the transition is successful, the
635    * version number of the node following the transition is returned.
636    *
637    * <p>If the read state is what is expected, it attempts to write the new
638    * state and data into the node.  When doing this, it includes the expected
639    * version (determined when the existing state was verified) to ensure that
640    * only one transition is successful.  If there is a version mismatch, the
641    * method returns -1.
642    *
643    * <p>If the write is successful, no watch is set and the method returns true.
644    *
645    * @param zkw zk reference
646    * @param region region to be transitioned to opened
647    * @param serverName server event originates from
648    * @param endState state to transition node to if all checks pass
649    * @param beginState state the node must currently be in to do transition
650    * @param expectedVersion expected version of data before modification, or -1
651    * @return version of node after transition, -1 if unsuccessful transition
652    * @throws KeeperException if unexpected zookeeper exception
653    */
654   public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
655       String serverName, EventType beginState, EventType endState,
656       int expectedVersion)
657   throws KeeperException {
658     String encoded = region.getEncodedName();
659     if(LOG.isDebugEnabled()) {
660       LOG.debug(zkw.prefix("Attempting to transition node " +
661         HRegionInfo.prettyPrint(encoded) +
662         " from " + beginState.toString() + " to " + endState.toString()));
663     }
664 
665     String node = getNodeName(zkw, encoded);
666     zkw.sync(node);
667 
668     // Read existing data of the node
669     Stat stat = new Stat();
670     byte [] existingBytes =
671       ZKUtil.getDataNoWatch(zkw, node, stat);
672     RegionTransitionData existingData =
673       RegionTransitionData.fromBytes(existingBytes);
674 
675     // Verify it is the expected version
676     if(expectedVersion != -1 && stat.getVersion() != expectedVersion) {
677       LOG.warn(zkw.prefix("Attempt to transition the " +
678         "unassigned node for " + encoded +
679         " from " + beginState + " to " + endState + " failed, " +
680         "the node existed but was version " + stat.getVersion() +
681         " not the expected version " + expectedVersion));
682         return -1;
683     }
684 
685     // Verify it is in expected state
686     if(!existingData.getEventType().equals(beginState)) {
687       LOG.warn(zkw.prefix("Attempt to transition the " +
688         "unassigned node for " + encoded +
689         " from " + beginState + " to " + endState + " failed, " +
690         "the node existed but was in the state " + existingData.getEventType()));
691       return -1;
692     }
693 
694     // Write new data, ensuring data has not changed since we last read it
695     try {
696       RegionTransitionData data = new RegionTransitionData(endState,
697           region.getRegionName(), serverName);
698       if(!ZKUtil.setData(zkw, node, data.getBytes(), stat.getVersion())) {
699         LOG.warn(zkw.prefix("Attempt to transition the " +
700         "unassigned node for " + encoded +
701         " from " + beginState + " to " + endState + " failed, " +
702         "the node existed and was in the expected state but then when " +
703         "setting data we got a version mismatch"));
704         return -1;
705       }
706       if(LOG.isDebugEnabled()) {
707         LOG.debug(zkw.prefix("Successfully transitioned node " + encoded +
708           " from " + beginState + " to " + endState));
709       }
710       return stat.getVersion() + 1;
711     } catch (KeeperException.NoNodeException nne) {
712       LOG.warn(zkw.prefix("Attempt to transition the " +
713         "unassigned node for " + encoded +
714         " from " + beginState + " to " + endState + " failed, " +
715         "the node existed and was in the expected state but then when " +
716         "setting data it no longer existed"));
717       return -1;
718     }
719   }
720 
721   /**
722    * Gets the current data in the unassigned node for the specified region name
723    * or fully-qualified path.
724    *
725    * <p>Returns null if the region does not currently have a node.
726    *
727    * <p>Sets a watch on the node if the node exists.
728    *
729    * @param zkw zk reference
730    * @param pathOrRegionName fully-specified path or region name
731    * @return data for the unassigned node
732    * @throws KeeperException if unexpected zookeeper exception
733    */
734   public static RegionTransitionData getData(ZooKeeperWatcher zkw,
735       String pathOrRegionName)
736   throws KeeperException {
737     String node = pathOrRegionName.startsWith("/") ?
738         pathOrRegionName : getNodeName(zkw, pathOrRegionName);
739     byte [] data = ZKUtil.getDataAndWatch(zkw, node);
740     if(data == null) {
741       return null;
742     }
743     return RegionTransitionData.fromBytes(data);
744   }
745 
746   /**
747    * Gets the current data in the unassigned node for the specified region name
748    * or fully-qualified path.
749    *
750    * <p>Returns null if the region does not currently have a node.
751    *
752    * <p>Does not set a watch.
753    *
754    * @param zkw zk reference
755    * @param pathOrRegionName fully-specified path or region name
756    * @param stat object to store node info into on getData call
757    * @return data for the unassigned node
758    * @throws KeeperException if unexpected zookeeper exception
759    */
760   public static RegionTransitionData getDataNoWatch(ZooKeeperWatcher zkw,
761       String pathOrRegionName, Stat stat)
762   throws KeeperException {
763     String node = pathOrRegionName.startsWith("/") ?
764         pathOrRegionName : getNodeName(zkw, pathOrRegionName);
765     byte [] data = ZKUtil.getDataNoWatch(zkw, node, stat);
766     if(data == null) {
767       return null;
768     }
769     return RegionTransitionData.fromBytes(data);
770   }
771 
772   /**
773    * Delete the assignment node regardless of its current state.
774    * <p>
775    * Fail silent even if the node does not exist at all.
776    * @param watcher
777    * @param regionInfo
778    * @throws KeeperException
779    */
780   public static void deleteNodeFailSilent(ZooKeeperWatcher watcher,
781       HRegionInfo regionInfo)
782   throws KeeperException {
783     String node = getNodeName(watcher, regionInfo.getEncodedName());
784     ZKUtil.deleteNodeFailSilent(watcher, node);
785   }
786 
787   /**
788    * Blocks until there are no node in regions in transition.
789    * <p>
790    * Used in testing only.
791    * @param zkw zk reference
792    * @throws KeeperException
793    * @throws InterruptedException
794    */
795   public static void blockUntilNoRIT(ZooKeeperWatcher zkw)
796   throws KeeperException, InterruptedException {
797     while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
798       List<String> znodes =
799         ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
800       if (znodes != null && !znodes.isEmpty()) {
801         for (String znode : znodes) {
802           LOG.debug("ZK RIT -> " + znode);
803         }
804       }
805       Thread.sleep(100);
806     }
807   }
808 
809   /**
810    * Blocks until there is at least one node in regions in transition.
811    * <p>
812    * Used in testing only.
813    * @param zkw zk reference
814    * @throws KeeperException
815    * @throws InterruptedException
816    */
817   public static void blockUntilRIT(ZooKeeperWatcher zkw)
818   throws KeeperException, InterruptedException {
819     while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
820       List<String> znodes =
821         ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
822       if (znodes == null || znodes.isEmpty()) {
823         LOG.debug("No RIT in ZK");
824       }
825       Thread.sleep(100);
826     }
827   }
828 
829   /**
830    * Verifies that the specified region is in the specified state in ZooKeeper.
831    * <p>
832    * Returns true if region is in transition and in the specified state in
833    * ZooKeeper.  Returns false if the region does not exist in ZK or is in
834    * a different state.
835    * <p>
836    * Method synchronizes() with ZK so will yield an up-to-date result but is
837    * a slow read.
838    * @param zkw
839    * @param region
840    * @param expectedState
841    * @return true if region exists and is in expected state
842    */
843   public static boolean verifyRegionState(ZooKeeperWatcher zkw,
844       HRegionInfo region, EventType expectedState)
845   throws KeeperException {
846     String encoded = region.getEncodedName();
847 
848     String node = getNodeName(zkw, encoded);
849     zkw.sync(node);
850 
851     // Read existing data of the node
852     byte [] existingBytes = null;
853     try {
854       existingBytes = ZKUtil.getDataAndWatch(zkw, node);
855     } catch (KeeperException.NoNodeException nne) {
856       return false;
857     } catch (KeeperException e) {
858       throw e;
859     }
860     if (existingBytes == null) return false;
861     RegionTransitionData existingData =
862       RegionTransitionData.fromBytes(existingBytes);
863     if (existingData.getEventType() == expectedState){
864       return true;
865     }
866     return false;
867   }
868 }