1 /** 2 * Copyright 2010 The Apache Software Foundation 3 * 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 package org.apache.hadoop.hbase.zookeeper; 21 22 import java.util.List; 23 24 import org.apache.commons.logging.Log; 25 import org.apache.commons.logging.LogFactory; 26 import org.apache.hadoop.hbase.HRegionInfo; 27 import org.apache.hadoop.hbase.executor.RegionTransitionData; 28 import org.apache.hadoop.hbase.executor.EventHandler.EventType; 29 import org.apache.zookeeper.AsyncCallback; 30 import org.apache.zookeeper.KeeperException; 31 import org.apache.zookeeper.KeeperException.Code; 32 import org.apache.zookeeper.KeeperException.NoNodeException; 33 import org.apache.zookeeper.KeeperException.NodeExistsException; 34 import org.apache.zookeeper.data.Stat; 35 36 /** 37 * Utility class for doing region assignment in ZooKeeper. This class extends 38 * stuff done in {@link ZKUtil} to cover specific assignment operations. 39 * <p> 40 * Contains only static methods and constants. 41 * <p> 42 * Used by both the Master and RegionServer. 43 * <p> 44 * All valid transitions outlined below: 45 * <p> 46 * <b>MASTER</b> 47 * <ol> 48 * <li> 49 * Master creates an unassigned node as OFFLINE. 50 * - Cluster startup and table enabling. 51 * </li> 52 * <li> 53 * Master forces an existing unassigned node to OFFLINE. 54 * - RegionServer failure. 55 * - Allows transitions from all states to OFFLINE. 56 * </li> 57 * <li> 58 * Master deletes an unassigned node that was in a OPENED state. 59 * - Normal region transitions. Besides cluster startup, no other deletions 60 * of unassigned nodes is allowed. 61 * </li> 62 * <li> 63 * Master deletes all unassigned nodes regardless of state. 64 * - Cluster startup before any assignment happens. 65 * </li> 66 * </ol> 67 * <p> 68 * <b>REGIONSERVER</b> 69 * <ol> 70 * <li> 71 * RegionServer creates an unassigned node as CLOSING. 72 * - All region closes will do this in response to a CLOSE RPC from Master. 73 * - A node can never be transitioned to CLOSING, only created. 74 * </li> 75 * <li> 76 * RegionServer transitions an unassigned node from CLOSING to CLOSED. 77 * - Normal region closes. CAS operation. 78 * </li> 79 * <li> 80 * RegionServer transitions an unassigned node from OFFLINE to OPENING. 81 * - All region opens will do this in response to an OPEN RPC from the Master. 82 * - Normal region opens. CAS operation. 83 * </li> 84 * <li> 85 * RegionServer transitions an unassigned node from OPENING to OPENED. 86 * - Normal region opens. CAS operation. 87 * </li> 88 * </ol> 89 */ 90 public class ZKAssign { 91 private static final Log LOG = LogFactory.getLog(ZKAssign.class); 92 93 /** 94 * Gets the full path node name for the unassigned node for the specified 95 * region. 96 * @param zkw zk reference 97 * @param regionName region name 98 * @return full path node name 99 */ 100 public static String getNodeName(ZooKeeperWatcher zkw, String regionName) { 101 return ZKUtil.joinZNode(zkw.assignmentZNode, regionName); 102 } 103 104 /** 105 * Gets the region name from the full path node name of an unassigned node. 106 * @param path full zk path 107 * @return region name 108 */ 109 public static String getRegionName(ZooKeeperWatcher zkw, String path) { 110 return path.substring(zkw.assignmentZNode.length()+1); 111 } 112 113 // Master methods 114 115 /** 116 * Creates a new unassigned node in the OFFLINE state for the specified region. 117 * 118 * <p>Does not transition nodes from other states. If a node already exists 119 * for this region, a {@link NodeExistsException} will be thrown. 120 * 121 * <p>Sets a watcher on the unassigned region node if the method is successful. 122 * 123 * <p>This method should only be used during cluster startup and the enabling 124 * of a table. 125 * 126 * @param zkw zk reference 127 * @param region region to be created as offline 128 * @param serverName server event originates from 129 * @throws KeeperException if unexpected zookeeper exception 130 * @throws KeeperException.NodeExistsException if node already exists 131 */ 132 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, 133 String serverName) 134 throws KeeperException, KeeperException.NodeExistsException { 135 createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE); 136 } 137 138 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, 139 String serverName, final EventType event) 140 throws KeeperException, KeeperException.NodeExistsException { 141 LOG.debug(zkw.prefix("Creating unassigned node for " + 142 region.getEncodedName() + " in OFFLINE state")); 143 RegionTransitionData data = new RegionTransitionData(event, 144 region.getRegionName(), serverName); 145 synchronized(zkw.getNodes()) { 146 String node = getNodeName(zkw, region.getEncodedName()); 147 zkw.getNodes().add(node); 148 ZKUtil.createAndWatch(zkw, node, data.getBytes()); 149 } 150 } 151 152 /** 153 * Creates an unassigned node in the OFFLINE state for the specified region. 154 * <p> 155 * Runs asynchronously. Depends on no pre-existing znode. 156 * 157 * <p>Sets a watcher on the unassigned region node. 158 * 159 * @param zkw zk reference 160 * @param region region to be created as offline 161 * @param serverName server event originates from 162 * @param cb 163 * @param ctx 164 * @throws KeeperException if unexpected zookeeper exception 165 * @throws KeeperException.NodeExistsException if node already exists 166 */ 167 public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw, 168 HRegionInfo region, String serverName, 169 final AsyncCallback.StringCallback cb, final Object ctx) 170 throws KeeperException { 171 LOG.debug(zkw.prefix("Async create of unassigned node for " + 172 region.getEncodedName() + " with OFFLINE state")); 173 RegionTransitionData data = new RegionTransitionData( 174 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName); 175 synchronized(zkw.getNodes()) { 176 String node = getNodeName(zkw, region.getEncodedName()); 177 zkw.getNodes().add(node); 178 ZKUtil.asyncCreate(zkw, node, data.getBytes(), cb, ctx); 179 } 180 } 181 182 /** 183 * Forces an existing unassigned node to the OFFLINE state for the specified 184 * region. 185 * 186 * <p>Does not create a new node. If a node does not already exist for this 187 * region, a {@link NoNodeException} will be thrown. 188 * 189 * <p>Sets a watcher on the unassigned region node if the method is 190 * successful. 191 * 192 * <p>This method should only be used during recovery of regionserver failure. 193 * 194 * @param zkw zk reference 195 * @param region region to be forced as offline 196 * @param serverName server event originates from 197 * @throws KeeperException if unexpected zookeeper exception 198 * @throws KeeperException.NoNodeException if node does not exist 199 */ 200 public static void forceNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, 201 String serverName) 202 throws KeeperException, KeeperException.NoNodeException { 203 LOG.debug(zkw.prefix("Forcing existing unassigned node for " + 204 region.getEncodedName() + " to OFFLINE state")); 205 RegionTransitionData data = new RegionTransitionData( 206 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName); 207 synchronized(zkw.getNodes()) { 208 String node = getNodeName(zkw, region.getEncodedName()); 209 zkw.getNodes().add(node); 210 ZKUtil.setData(zkw, node, data.getBytes()); 211 } 212 } 213 214 215 /** 216 * Creates or force updates an unassigned node to the OFFLINE state for the 217 * specified region. 218 * <p> 219 * Attempts to create the node but if it exists will force it to transition to 220 * and OFFLINE state. 221 * 222 * <p>Sets a watcher on the unassigned region node if the method is 223 * successful. 224 * 225 * <p>This method should be used when assigning a region. 226 * 227 * @param zkw zk reference 228 * @param region region to be created as offline 229 * @param serverName server event originates from 230 * @throws KeeperException if unexpected zookeeper exception 231 * @throws KeeperException.NodeExistsException if node already exists 232 */ 233 public static boolean createOrForceNodeOffline(ZooKeeperWatcher zkw, 234 HRegionInfo region, String serverName) 235 throws KeeperException { 236 LOG.debug(zkw.prefix("Creating (or updating) unassigned node for " + 237 region.getEncodedName() + " with OFFLINE state")); 238 RegionTransitionData data = new RegionTransitionData( 239 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName); 240 synchronized(zkw.getNodes()) { 241 String node = getNodeName(zkw, region.getEncodedName()); 242 zkw.sync(node); 243 zkw.getNodes().add(node); 244 int version = ZKUtil.checkExists(zkw, node); 245 if(version == -1) { 246 ZKUtil.createAndWatch(zkw, node, data.getBytes()); 247 } else { 248 if (!ZKUtil.setData(zkw, node, data.getBytes(), version)) { 249 return false; 250 } else { 251 // We successfully forced to OFFLINE, reset watch and handle if 252 // the state changed in between our set and the watch 253 RegionTransitionData curData = 254 ZKAssign.getData(zkw, region.getEncodedName()); 255 if (curData.getEventType() != data.getEventType()) { 256 // state changed, need to process 257 return false; 258 } 259 } 260 } 261 } 262 return true; 263 } 264 265 /** 266 * Deletes an existing unassigned node that is in the OPENED state for the 267 * specified region. 268 * 269 * <p>If a node does not already exist for this region, a 270 * {@link NoNodeException} will be thrown. 271 * 272 * <p>No watcher is set whether this succeeds or not. 273 * 274 * <p>Returns false if the node was not in the proper state but did exist. 275 * 276 * <p>This method is used during normal region transitions when a region 277 * finishes successfully opening. This is the Master acknowledging completion 278 * of the specified regions transition. 279 * 280 * @param zkw zk reference 281 * @param regionName opened region to be deleted from zk 282 * @throws KeeperException if unexpected zookeeper exception 283 * @throws KeeperException.NoNodeException if node does not exist 284 */ 285 public static boolean deleteOpenedNode(ZooKeeperWatcher zkw, 286 String regionName) 287 throws KeeperException, KeeperException.NoNodeException { 288 return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_OPENED); 289 } 290 291 /** 292 * Deletes an existing unassigned node that is in the OFFLINE state for the 293 * specified region. 294 * 295 * <p>If a node does not already exist for this region, a 296 * {@link NoNodeException} will be thrown. 297 * 298 * <p>No watcher is set whether this succeeds or not. 299 * 300 * <p>Returns false if the node was not in the proper state but did exist. 301 * 302 * <p>This method is used during master failover when the regions on an RS 303 * that has died are all set to OFFLINE before being processed. 304 * 305 * @param zkw zk reference 306 * @param regionName closed region to be deleted from zk 307 * @throws KeeperException if unexpected zookeeper exception 308 * @throws KeeperException.NoNodeException if node does not exist 309 */ 310 public static boolean deleteOfflineNode(ZooKeeperWatcher zkw, 311 String regionName) 312 throws KeeperException, KeeperException.NoNodeException { 313 return deleteNode(zkw, regionName, EventType.M_ZK_REGION_OFFLINE); 314 } 315 316 /** 317 * Deletes an existing unassigned node that is in the CLOSED state for the 318 * specified region. 319 * 320 * <p>If a node does not already exist for this region, a 321 * {@link NoNodeException} will be thrown. 322 * 323 * <p>No watcher is set whether this succeeds or not. 324 * 325 * <p>Returns false if the node was not in the proper state but did exist. 326 * 327 * <p>This method is used during table disables when a region finishes 328 * successfully closing. This is the Master acknowledging completion 329 * of the specified regions transition to being closed. 330 * 331 * @param zkw zk reference 332 * @param regionName closed region to be deleted from zk 333 * @throws KeeperException if unexpected zookeeper exception 334 * @throws KeeperException.NoNodeException if node does not exist 335 */ 336 public static boolean deleteClosedNode(ZooKeeperWatcher zkw, 337 String regionName) 338 throws KeeperException, KeeperException.NoNodeException { 339 return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_CLOSED); 340 } 341 342 /** 343 * Deletes an existing unassigned node that is in the CLOSING state for the 344 * specified region. 345 * 346 * <p>If a node does not already exist for this region, a 347 * {@link NoNodeException} will be thrown. 348 * 349 * <p>No watcher is set whether this succeeds or not. 350 * 351 * <p>Returns false if the node was not in the proper state but did exist. 352 * 353 * <p>This method is used during table disables when a region finishes 354 * successfully closing. This is the Master acknowledging completion 355 * of the specified regions transition to being closed. 356 * 357 * @param zkw zk reference 358 * @param region closing region to be deleted from zk 359 * @throws KeeperException if unexpected zookeeper exception 360 * @throws KeeperException.NoNodeException if node does not exist 361 */ 362 public static boolean deleteClosingNode(ZooKeeperWatcher zkw, 363 HRegionInfo region) 364 throws KeeperException, KeeperException.NoNodeException { 365 String regionName = region.getEncodedName(); 366 return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_CLOSING); 367 } 368 369 /** 370 * Deletes an existing unassigned node that is in the specified state for the 371 * specified region. 372 * 373 * <p>If a node does not already exist for this region, a 374 * {@link NoNodeException} will be thrown. 375 * 376 * <p>No watcher is set whether this succeeds or not. 377 * 378 * <p>Returns false if the node was not in the proper state but did exist. 379 * 380 * <p>This method is used during table disables when a region finishes 381 * successfully closing. This is the Master acknowledging completion 382 * of the specified regions transition to being closed. 383 * 384 * @param zkw zk reference 385 * @param regionName region to be deleted from zk 386 * @param expectedState state region must be in for delete to complete 387 * @throws KeeperException if unexpected zookeeper exception 388 * @throws KeeperException.NoNodeException if node does not exist 389 */ 390 private static boolean deleteNode(ZooKeeperWatcher zkw, String regionName, 391 EventType expectedState) 392 throws KeeperException, KeeperException.NoNodeException { 393 LOG.debug(zkw.prefix("Deleting existing unassigned " + 394 "node for " + regionName + " that is in expected state " + expectedState)); 395 String node = getNodeName(zkw, regionName); 396 zkw.sync(node); 397 Stat stat = new Stat(); 398 byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat); 399 if (bytes == null) { 400 // If it came back null, node does not exist. 401 throw KeeperException.create(Code.NONODE); 402 } 403 RegionTransitionData data = RegionTransitionData.fromBytes(bytes); 404 if (!data.getEventType().equals(expectedState)) { 405 LOG.warn(zkw.prefix("Attempting to delete unassigned " + 406 "node " + regionName + " in " + expectedState + 407 " state but node is in " + data.getEventType() + " state")); 408 return false; 409 } 410 synchronized(zkw.getNodes()) { 411 // TODO: Does this go here or only if we successfully delete node? 412 zkw.getNodes().remove(node); 413 if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) { 414 LOG.warn(zkw.prefix("Attempting to delete " + 415 "unassigned node in " + expectedState + 416 " state but " + 417 "after verifying it was in OPENED state, we got a version mismatch")); 418 return false; 419 } 420 LOG.debug(zkw.prefix("Successfully deleted unassigned node for region " + 421 regionName + " in expected state " + expectedState)); 422 return true; 423 } 424 } 425 426 /** 427 * Deletes all unassigned nodes regardless of their state. 428 * 429 * <p>No watchers are set. 430 * 431 * <p>This method is used by the Master during cluster startup to clear out 432 * any existing state from other cluster runs. 433 * 434 * @param zkw zk reference 435 * @throws KeeperException if unexpected zookeeper exception 436 */ 437 public static void deleteAllNodes(ZooKeeperWatcher zkw) 438 throws KeeperException { 439 LOG.debug(zkw.prefix("Deleting any existing unassigned nodes")); 440 ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode); 441 } 442 443 // RegionServer methods 444 445 /** 446 * Creates a new unassigned node in the CLOSING state for the specified 447 * region. 448 * 449 * <p>Does not transition nodes from any states. If a node already exists 450 * for this region, a {@link NodeExistsException} will be thrown. 451 * 452 * <p>If creation is successful, returns the version number of the CLOSING 453 * node created. 454 * 455 * <p>Does not set any watches. 456 * 457 * <p>This method should only be used by a RegionServer when initiating a 458 * close of a region after receiving a CLOSE RPC from the Master. 459 * 460 * @param zkw zk reference 461 * @param region region to be created as closing 462 * @param serverName server event originates from 463 * @return version of node after transition, -1 if unsuccessful transition 464 * @throws KeeperException if unexpected zookeeper exception 465 * @throws KeeperException.NodeExistsException if node already exists 466 */ 467 public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region, 468 String serverName) 469 throws KeeperException, KeeperException.NodeExistsException { 470 LOG.debug(zkw.prefix("Creating unassigned node for " + 471 region.getEncodedName() + " in a CLOSING state")); 472 473 RegionTransitionData data = new RegionTransitionData( 474 EventType.RS_ZK_REGION_CLOSING, region.getRegionName(), serverName); 475 476 synchronized (zkw.getNodes()) { 477 String node = getNodeName(zkw, region.getEncodedName()); 478 zkw.getNodes().add(node); 479 return ZKUtil.createAndWatch(zkw, node, data.getBytes()); 480 } 481 } 482 483 /** 484 * Transitions an existing unassigned node for the specified region which is 485 * currently in the CLOSING state to be in the CLOSED state. 486 * 487 * <p>Does not transition nodes from other states. If for some reason the 488 * node could not be transitioned, the method returns -1. If the transition 489 * is successful, the version of the node after transition is returned. 490 * 491 * <p>This method can fail and return false for three different reasons: 492 * <ul><li>Unassigned node for this region does not exist</li> 493 * <li>Unassigned node for this region is not in CLOSING state</li> 494 * <li>After verifying CLOSING state, update fails because of wrong version 495 * (someone else already transitioned the node)</li> 496 * </ul> 497 * 498 * <p>Does not set any watches. 499 * 500 * <p>This method should only be used by a RegionServer when initiating a 501 * close of a region after receiving a CLOSE RPC from the Master. 502 * 503 * @param zkw zk reference 504 * @param region region to be transitioned to closed 505 * @param serverName server event originates from 506 * @return version of node after transition, -1 if unsuccessful transition 507 * @throws KeeperException if unexpected zookeeper exception 508 */ 509 public static int transitionNodeClosed(ZooKeeperWatcher zkw, 510 HRegionInfo region, String serverName, int expectedVersion) 511 throws KeeperException { 512 return transitionNode(zkw, region, serverName, 513 EventType.RS_ZK_REGION_CLOSING, 514 EventType.RS_ZK_REGION_CLOSED, expectedVersion); 515 } 516 517 /** 518 * Transitions an existing unassigned node for the specified region which is 519 * currently in the OFFLINE state to be in the OPENING state. 520 * 521 * <p>Does not transition nodes from other states. If for some reason the 522 * node could not be transitioned, the method returns -1. If the transition 523 * is successful, the version of the node written as OPENING is returned. 524 * 525 * <p>This method can fail and return -1 for three different reasons: 526 * <ul><li>Unassigned node for this region does not exist</li> 527 * <li>Unassigned node for this region is not in OFFLINE state</li> 528 * <li>After verifying OFFLINE state, update fails because of wrong version 529 * (someone else already transitioned the node)</li> 530 * </ul> 531 * 532 * <p>Does not set any watches. 533 * 534 * <p>This method should only be used by a RegionServer when initiating an 535 * open of a region after receiving an OPEN RPC from the Master. 536 * 537 * @param zkw zk reference 538 * @param region region to be transitioned to opening 539 * @param serverName server event originates from 540 * @return version of node after transition, -1 if unsuccessful transition 541 * @throws KeeperException if unexpected zookeeper exception 542 */ 543 public static int transitionNodeOpening(ZooKeeperWatcher zkw, 544 HRegionInfo region, String serverName) 545 throws KeeperException { 546 return transitionNodeOpening(zkw, region, serverName, 547 EventType.M_ZK_REGION_OFFLINE); 548 } 549 550 public static int transitionNodeOpening(ZooKeeperWatcher zkw, 551 HRegionInfo region, String serverName, final EventType beginState) 552 throws KeeperException { 553 return transitionNode(zkw, region, serverName, beginState, 554 EventType.RS_ZK_REGION_OPENING, -1); 555 } 556 557 /** 558 * Retransitions an existing unassigned node for the specified region which is 559 * currently in the OPENING state to be in the OPENING state. 560 * 561 * <p>Does not transition nodes from other states. If for some reason the 562 * node could not be transitioned, the method returns -1. If the transition 563 * is successful, the version of the node rewritten as OPENING is returned. 564 * 565 * <p>This method can fail and return -1 for three different reasons: 566 * <ul><li>Unassigned node for this region does not exist</li> 567 * <li>Unassigned node for this region is not in OPENING state</li> 568 * <li>After verifying OPENING state, update fails because of wrong version 569 * (someone else already transitioned the node)</li> 570 * </ul> 571 * 572 * <p>Does not set any watches. 573 * 574 * <p>This method should only be used by a RegionServer when initiating an 575 * open of a region after receiving an OPEN RPC from the Master. 576 * 577 * @param zkw zk reference 578 * @param region region to be transitioned to opening 579 * @param serverName server event originates from 580 * @return version of node after transition, -1 if unsuccessful transition 581 * @throws KeeperException if unexpected zookeeper exception 582 */ 583 public static int retransitionNodeOpening(ZooKeeperWatcher zkw, 584 HRegionInfo region, String serverName, int expectedVersion) 585 throws KeeperException { 586 return transitionNode(zkw, region, serverName, 587 EventType.RS_ZK_REGION_OPENING, 588 EventType.RS_ZK_REGION_OPENING, expectedVersion); 589 } 590 591 /** 592 * Transitions an existing unassigned node for the specified region which is 593 * currently in the OPENING state to be in the OPENED state. 594 * 595 * <p>Does not transition nodes from other states. If for some reason the 596 * node could not be transitioned, the method returns -1. If the transition 597 * is successful, the version of the node after transition is returned. 598 * 599 * <p>This method can fail and return false for three different reasons: 600 * <ul><li>Unassigned node for this region does not exist</li> 601 * <li>Unassigned node for this region is not in OPENING state</li> 602 * <li>After verifying OPENING state, update fails because of wrong version 603 * (this should never actually happen since an RS only does this transition 604 * following a transition to OPENING. if two RS are conflicting, one would 605 * fail the original transition to OPENING and not this transition)</li> 606 * </ul> 607 * 608 * <p>Does not set any watches. 609 * 610 * <p>This method should only be used by a RegionServer when completing the 611 * open of a region. 612 * 613 * @param zkw zk reference 614 * @param region region to be transitioned to opened 615 * @param serverName server event originates from 616 * @return version of node after transition, -1 if unsuccessful transition 617 * @throws KeeperException if unexpected zookeeper exception 618 */ 619 public static int transitionNodeOpened(ZooKeeperWatcher zkw, 620 HRegionInfo region, String serverName, int expectedVersion) 621 throws KeeperException { 622 return transitionNode(zkw, region, serverName, 623 EventType.RS_ZK_REGION_OPENING, 624 EventType.RS_ZK_REGION_OPENED, expectedVersion); 625 } 626 627 /** 628 * Private method that actually performs unassigned node transitions. 629 * 630 * <p>Attempts to transition the unassigned node for the specified region 631 * from the expected state to the state in the specified transition data. 632 * 633 * <p>Method first reads existing data and verifies it is in the expected 634 * state. If the node does not exist or the node is not in the expected 635 * state, the method returns -1. If the transition is successful, the 636 * version number of the node following the transition is returned. 637 * 638 * <p>If the read state is what is expected, it attempts to write the new 639 * state and data into the node. When doing this, it includes the expected 640 * version (determined when the existing state was verified) to ensure that 641 * only one transition is successful. If there is a version mismatch, the 642 * method returns -1. 643 * 644 * <p>If the write is successful, no watch is set and the method returns true. 645 * 646 * @param zkw zk reference 647 * @param region region to be transitioned to opened 648 * @param serverName server event originates from 649 * @param endState state to transition node to if all checks pass 650 * @param beginState state the node must currently be in to do transition 651 * @param expectedVersion expected version of data before modification, or -1 652 * @return version of node after transition, -1 if unsuccessful transition 653 * @throws KeeperException if unexpected zookeeper exception 654 */ 655 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region, 656 String serverName, EventType beginState, EventType endState, 657 int expectedVersion) 658 throws KeeperException { 659 String encoded = region.getEncodedName(); 660 if(LOG.isDebugEnabled()) { 661 LOG.debug(zkw.prefix("Attempting to transition node " + 662 HRegionInfo.prettyPrint(encoded) + 663 " from " + beginState.toString() + " to " + endState.toString())); 664 } 665 666 String node = getNodeName(zkw, encoded); 667 zkw.sync(node); 668 669 // Read existing data of the node 670 Stat stat = new Stat(); 671 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat); 672 if (existingBytes == null) { 673 // Node no longer exists. Return -1. It means unsuccessful transition. 674 return -1; 675 } 676 RegionTransitionData existingData = 677 RegionTransitionData.fromBytes(existingBytes); 678 679 // Verify it is the expected version 680 if(expectedVersion != -1 && stat.getVersion() != expectedVersion) { 681 LOG.warn(zkw.prefix("Attempt to transition the " + 682 "unassigned node for " + encoded + 683 " from " + beginState + " to " + endState + " failed, " + 684 "the node existed but was version " + stat.getVersion() + 685 " not the expected version " + expectedVersion)); 686 return -1; 687 } 688 689 // Verify it is in expected state 690 if(!existingData.getEventType().equals(beginState)) { 691 LOG.warn(zkw.prefix("Attempt to transition the " + 692 "unassigned node for " + encoded + 693 " from " + beginState + " to " + endState + " failed, " + 694 "the node existed but was in the state " + existingData.getEventType() + 695 " set by the server " + existingData.getServerName())); 696 return -1; 697 } 698 699 // Write new data, ensuring data has not changed since we last read it 700 try { 701 RegionTransitionData data = new RegionTransitionData(endState, 702 region.getRegionName(), serverName); 703 if(!ZKUtil.setData(zkw, node, data.getBytes(), stat.getVersion())) { 704 LOG.warn(zkw.prefix("Attempt to transition the " + 705 "unassigned node for " + encoded + 706 " from " + beginState + " to " + endState + " failed, " + 707 "the node existed and was in the expected state but then when " + 708 "setting data we got a version mismatch")); 709 return -1; 710 } 711 if(LOG.isDebugEnabled()) { 712 LOG.debug(zkw.prefix("Successfully transitioned node " + encoded + 713 " from " + beginState + " to " + endState)); 714 } 715 return stat.getVersion() + 1; 716 } catch (KeeperException.NoNodeException nne) { 717 LOG.warn(zkw.prefix("Attempt to transition the " + 718 "unassigned node for " + encoded + 719 " from " + beginState + " to " + endState + " failed, " + 720 "the node existed and was in the expected state but then when " + 721 "setting data it no longer existed")); 722 return -1; 723 } 724 } 725 726 /** 727 * Gets the current data in the unassigned node for the specified region name 728 * or fully-qualified path. 729 * 730 * <p>Returns null if the region does not currently have a node. 731 * 732 * <p>Sets a watch on the node if the node exists. 733 * 734 * @param zkw zk reference 735 * @param pathOrRegionName fully-specified path or region name 736 * @return data for the unassigned node 737 * @throws KeeperException if unexpected zookeeper exception 738 */ 739 public static RegionTransitionData getData(ZooKeeperWatcher zkw, 740 String pathOrRegionName) 741 throws KeeperException { 742 String node = pathOrRegionName.startsWith("/") ? 743 pathOrRegionName : getNodeName(zkw, pathOrRegionName); 744 byte [] data = ZKUtil.getDataAndWatch(zkw, node); 745 if(data == null) { 746 return null; 747 } 748 return RegionTransitionData.fromBytes(data); 749 } 750 751 /** 752 * Gets the current data in the unassigned node for the specified region name 753 * or fully-qualified path. 754 * 755 * <p>Returns null if the region does not currently have a node. 756 * 757 * <p>Does not set a watch. 758 * 759 * @param zkw zk reference 760 * @param pathOrRegionName fully-specified path or region name 761 * @param stat object to store node info into on getData call 762 * @return data for the unassigned node or null if node does not exist 763 * @throws KeeperException if unexpected zookeeper exception 764 */ 765 public static RegionTransitionData getDataNoWatch(ZooKeeperWatcher zkw, 766 String pathOrRegionName, Stat stat) 767 throws KeeperException { 768 String node = pathOrRegionName.startsWith("/") ? 769 pathOrRegionName : getNodeName(zkw, pathOrRegionName); 770 byte [] data = ZKUtil.getDataNoWatch(zkw, node, stat); 771 if (data == null) { 772 return null; 773 } 774 return RegionTransitionData.fromBytes(data); 775 } 776 777 /** 778 * Delete the assignment node regardless of its current state. 779 * <p> 780 * Fail silent even if the node does not exist at all. 781 * @param watcher 782 * @param regionInfo 783 * @throws KeeperException 784 */ 785 public static void deleteNodeFailSilent(ZooKeeperWatcher watcher, 786 HRegionInfo regionInfo) 787 throws KeeperException { 788 String node = getNodeName(watcher, regionInfo.getEncodedName()); 789 ZKUtil.deleteNodeFailSilent(watcher, node); 790 } 791 792 /** 793 * Blocks until there are no node in regions in transition. 794 * <p> 795 * Used in testing only. 796 * @param zkw zk reference 797 * @throws KeeperException 798 * @throws InterruptedException 799 */ 800 public static void blockUntilNoRIT(ZooKeeperWatcher zkw) 801 throws KeeperException, InterruptedException { 802 while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) { 803 List<String> znodes = 804 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode); 805 if (znodes != null && !znodes.isEmpty()) { 806 for (String znode : znodes) { 807 LOG.debug("ZK RIT -> " + znode); 808 } 809 } 810 Thread.sleep(100); 811 } 812 } 813 814 /** 815 * Blocks until there is at least one node in regions in transition. 816 * <p> 817 * Used in testing only. 818 * @param zkw zk reference 819 * @throws KeeperException 820 * @throws InterruptedException 821 */ 822 public static void blockUntilRIT(ZooKeeperWatcher zkw) 823 throws KeeperException, InterruptedException { 824 while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) { 825 List<String> znodes = 826 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode); 827 if (znodes == null || znodes.isEmpty()) { 828 LOG.debug("No RIT in ZK"); 829 } 830 Thread.sleep(100); 831 } 832 } 833 834 /** 835 * Verifies that the specified region is in the specified state in ZooKeeper. 836 * <p> 837 * Returns true if region is in transition and in the specified state in 838 * ZooKeeper. Returns false if the region does not exist in ZK or is in 839 * a different state. 840 * <p> 841 * Method synchronizes() with ZK so will yield an up-to-date result but is 842 * a slow read. 843 * @param zkw 844 * @param region 845 * @param expectedState 846 * @return true if region exists and is in expected state 847 */ 848 public static boolean verifyRegionState(ZooKeeperWatcher zkw, 849 HRegionInfo region, EventType expectedState) 850 throws KeeperException { 851 String encoded = region.getEncodedName(); 852 853 String node = getNodeName(zkw, encoded); 854 zkw.sync(node); 855 856 // Read existing data of the node 857 byte [] existingBytes = null; 858 try { 859 existingBytes = ZKUtil.getDataAndWatch(zkw, node); 860 } catch (KeeperException.NoNodeException nne) { 861 return false; 862 } catch (KeeperException e) { 863 throw e; 864 } 865 if (existingBytes == null) return false; 866 RegionTransitionData existingData = 867 RegionTransitionData.fromBytes(existingBytes); 868 if (existingData.getEventType() == expectedState){ 869 return true; 870 } 871 return false; 872 } 873 }