1 /** 2 * Copyright 2010 The Apache Software Foundation 3 * 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 package org.apache.hadoop.hbase.zookeeper; 21 22 import java.util.List; 23 24 import org.apache.commons.logging.Log; 25 import org.apache.commons.logging.LogFactory; 26 import org.apache.hadoop.hbase.HRegionInfo; 27 import org.apache.hadoop.hbase.executor.RegionTransitionData; 28 import org.apache.hadoop.hbase.executor.EventHandler.EventType; 29 import org.apache.zookeeper.AsyncCallback; 30 import org.apache.zookeeper.KeeperException; 31 import org.apache.zookeeper.KeeperException.Code; 32 import org.apache.zookeeper.KeeperException.NoNodeException; 33 import org.apache.zookeeper.KeeperException.NodeExistsException; 34 import org.apache.zookeeper.data.Stat; 35 36 /** 37 * Utility class for doing region assignment in ZooKeeper. This class extends 38 * stuff done in {@link ZKUtil} to cover specific assignment operations. 39 * <p> 40 * Contains only static methods and constants. 41 * <p> 42 * Used by both the Master and RegionServer. 43 * <p> 44 * All valid transitions outlined below: 45 * <p> 46 * <b>MASTER</b> 47 * <ol> 48 * <li> 49 * Master creates an unassigned node as OFFLINE. 50 * - Cluster startup and table enabling. 51 * </li> 52 * <li> 53 * Master forces an existing unassigned node to OFFLINE. 54 * - RegionServer failure. 55 * - Allows transitions from all states to OFFLINE. 56 * </li> 57 * <li> 58 * Master deletes an unassigned node that was in a OPENED state. 59 * - Normal region transitions. Besides cluster startup, no other deletions 60 * of unassigned nodes is allowed. 61 * </li> 62 * <li> 63 * Master deletes all unassigned nodes regardless of state. 64 * - Cluster startup before any assignment happens. 65 * </li> 66 * </ol> 67 * <p> 68 * <b>REGIONSERVER</b> 69 * <ol> 70 * <li> 71 * RegionServer creates an unassigned node as CLOSING. 72 * - All region closes will do this in response to a CLOSE RPC from Master. 73 * - A node can never be transitioned to CLOSING, only created. 74 * </li> 75 * <li> 76 * RegionServer transitions an unassigned node from CLOSING to CLOSED. 77 * - Normal region closes. CAS operation. 78 * </li> 79 * <li> 80 * RegionServer transitions an unassigned node from OFFLINE to OPENING. 81 * - All region opens will do this in response to an OPEN RPC from the Master. 82 * - Normal region opens. CAS operation. 83 * </li> 84 * <li> 85 * RegionServer transitions an unassigned node from OPENING to OPENED. 86 * - Normal region opens. CAS operation. 87 * </li> 88 * </ol> 89 */ 90 public class ZKAssign { 91 private static final Log LOG = LogFactory.getLog(ZKAssign.class); 92 93 /** 94 * Gets the full path node name for the unassigned node for the specified 95 * region. 96 * @param zkw zk reference 97 * @param regionName region name 98 * @return full path node name 99 */ 100 public static String getNodeName(ZooKeeperWatcher zkw, String regionName) { 101 return ZKUtil.joinZNode(zkw.assignmentZNode, regionName); 102 } 103 104 /** 105 * Gets the region name from the full path node name of an unassigned node. 106 * @param path full zk path 107 * @return region name 108 */ 109 public static String getRegionName(ZooKeeperWatcher zkw, String path) { 110 return path.substring(zkw.assignmentZNode.length()+1); 111 } 112 113 // Master methods 114 115 /** 116 * Creates a new unassigned node in the OFFLINE state for the specified region. 117 * 118 * <p>Does not transition nodes from other states. If a node already exists 119 * for this region, a {@link NodeExistsException} will be thrown. 120 * 121 * <p>Sets a watcher on the unassigned region node if the method is successful. 122 * 123 * <p>This method should only be used during cluster startup and the enabling 124 * of a table. 125 * 126 * @param zkw zk reference 127 * @param region region to be created as offline 128 * @param serverName server event originates from 129 * @throws KeeperException if unexpected zookeeper exception 130 * @throws KeeperException.NodeExistsException if node already exists 131 */ 132 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, 133 String serverName) 134 throws KeeperException, KeeperException.NodeExistsException { 135 createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE); 136 } 137 138 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, 139 String serverName, final EventType event) 140 throws KeeperException, KeeperException.NodeExistsException { 141 LOG.debug(zkw.prefix("Creating unassigned node for " + 142 region.getEncodedName() + " in OFFLINE state")); 143 RegionTransitionData data = new RegionTransitionData(event, 144 region.getRegionName(), serverName); 145 synchronized(zkw.getNodes()) { 146 String node = getNodeName(zkw, region.getEncodedName()); 147 zkw.getNodes().add(node); 148 ZKUtil.createAndWatch(zkw, node, data.getBytes()); 149 } 150 } 151 152 /** 153 * Creates an unassigned node in the OFFLINE state for the specified region. 154 * <p> 155 * Runs asynchronously. Depends on no pre-existing znode. 156 * 157 * <p>Sets a watcher on the unassigned region node. 158 * 159 * @param zkw zk reference 160 * @param region region to be created as offline 161 * @param serverName server event originates from 162 * @param cb 163 * @param ctx 164 * @throws KeeperException if unexpected zookeeper exception 165 * @throws KeeperException.NodeExistsException if node already exists 166 */ 167 public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw, 168 HRegionInfo region, String serverName, 169 final AsyncCallback.StringCallback cb, final Object ctx) 170 throws KeeperException { 171 LOG.debug(zkw.prefix("Async create of unassigned node for " + 172 region.getEncodedName() + " with OFFLINE state")); 173 RegionTransitionData data = new RegionTransitionData( 174 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName); 175 synchronized(zkw.getNodes()) { 176 String node = getNodeName(zkw, region.getEncodedName()); 177 zkw.getNodes().add(node); 178 ZKUtil.asyncCreate(zkw, node, data.getBytes(), cb, ctx); 179 } 180 } 181 182 /** 183 * Forces an existing unassigned node to the OFFLINE state for the specified 184 * region. 185 * 186 * <p>Does not create a new node. If a node does not already exist for this 187 * region, a {@link NoNodeException} will be thrown. 188 * 189 * <p>Sets a watcher on the unassigned region node if the method is 190 * successful. 191 * 192 * <p>This method should only be used during recovery of regionserver failure. 193 * 194 * @param zkw zk reference 195 * @param region region to be forced as offline 196 * @param serverName server event originates from 197 * @throws KeeperException if unexpected zookeeper exception 198 * @throws KeeperException.NoNodeException if node does not exist 199 */ 200 public static void forceNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, 201 String serverName) 202 throws KeeperException, KeeperException.NoNodeException { 203 LOG.debug(zkw.prefix("Forcing existing unassigned node for " + 204 region.getEncodedName() + " to OFFLINE state")); 205 RegionTransitionData data = new RegionTransitionData( 206 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName); 207 synchronized(zkw.getNodes()) { 208 String node = getNodeName(zkw, region.getEncodedName()); 209 zkw.getNodes().add(node); 210 ZKUtil.setData(zkw, node, data.getBytes()); 211 } 212 } 213 214 215 /** 216 * Creates or force updates an unassigned node to the OFFLINE state for the 217 * specified region. 218 * <p> 219 * Attempts to create the node but if it exists will force it to transition to 220 * and OFFLINE state. 221 * 222 * <p>Sets a watcher on the unassigned region node if the method is 223 * successful. 224 * 225 * <p>This method should be used when assigning a region. 226 * 227 * @param zkw zk reference 228 * @param region region to be created as offline 229 * @param serverName server event originates from 230 * @throws KeeperException if unexpected zookeeper exception 231 * @throws KeeperException.NodeExistsException if node already exists 232 */ 233 public static boolean createOrForceNodeOffline(ZooKeeperWatcher zkw, 234 HRegionInfo region, String serverName) 235 throws KeeperException { 236 LOG.debug(zkw.prefix("Creating (or updating) unassigned node for " + 237 region.getEncodedName() + " with OFFLINE state")); 238 RegionTransitionData data = new RegionTransitionData( 239 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName); 240 synchronized(zkw.getNodes()) { 241 String node = getNodeName(zkw, region.getEncodedName()); 242 zkw.sync(node); 243 zkw.getNodes().add(node); 244 int version = ZKUtil.checkExists(zkw, node); 245 if(version == -1) { 246 ZKUtil.createAndWatch(zkw, node, data.getBytes()); 247 } else { 248 if (!ZKUtil.setData(zkw, node, data.getBytes(), version)) { 249 return false; 250 } else { 251 // We successfully forced to OFFLINE, reset watch and handle if 252 // the state changed in between our set and the watch 253 RegionTransitionData curData = 254 ZKAssign.getData(zkw, region.getEncodedName()); 255 if (curData.getEventType() != data.getEventType()) { 256 // state changed, need to process 257 return false; 258 } 259 } 260 } 261 } 262 return true; 263 } 264 265 /** 266 * Deletes an existing unassigned node that is in the OPENED state for the 267 * specified region. 268 * 269 * <p>If a node does not already exist for this region, a 270 * {@link NoNodeException} will be thrown. 271 * 272 * <p>No watcher is set whether this succeeds or not. 273 * 274 * <p>Returns false if the node was not in the proper state but did exist. 275 * 276 * <p>This method is used during normal region transitions when a region 277 * finishes successfully opening. This is the Master acknowledging completion 278 * of the specified regions transition. 279 * 280 * @param zkw zk reference 281 * @param regionName opened region to be deleted from zk 282 * @throws KeeperException if unexpected zookeeper exception 283 * @throws KeeperException.NoNodeException if node does not exist 284 */ 285 public static boolean deleteOpenedNode(ZooKeeperWatcher zkw, 286 String regionName) 287 throws KeeperException, KeeperException.NoNodeException { 288 return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_OPENED); 289 } 290 291 /** 292 * Deletes an existing unassigned node that is in the OFFLINE state for the 293 * specified region. 294 * 295 * <p>If a node does not already exist for this region, a 296 * {@link NoNodeException} will be thrown. 297 * 298 * <p>No watcher is set whether this succeeds or not. 299 * 300 * <p>Returns false if the node was not in the proper state but did exist. 301 * 302 * <p>This method is used during master failover when the regions on an RS 303 * that has died are all set to OFFLINE before being processed. 304 * 305 * @param zkw zk reference 306 * @param regionName closed region to be deleted from zk 307 * @throws KeeperException if unexpected zookeeper exception 308 * @throws KeeperException.NoNodeException if node does not exist 309 */ 310 public static boolean deleteOfflineNode(ZooKeeperWatcher zkw, 311 String regionName) 312 throws KeeperException, KeeperException.NoNodeException { 313 return deleteNode(zkw, regionName, EventType.M_ZK_REGION_OFFLINE); 314 } 315 316 /** 317 * Deletes an existing unassigned node that is in the CLOSED state for the 318 * specified region. 319 * 320 * <p>If a node does not already exist for this region, a 321 * {@link NoNodeException} will be thrown. 322 * 323 * <p>No watcher is set whether this succeeds or not. 324 * 325 * <p>Returns false if the node was not in the proper state but did exist. 326 * 327 * <p>This method is used during table disables when a region finishes 328 * successfully closing. This is the Master acknowledging completion 329 * of the specified regions transition to being closed. 330 * 331 * @param zkw zk reference 332 * @param regionName closed region to be deleted from zk 333 * @throws KeeperException if unexpected zookeeper exception 334 * @throws KeeperException.NoNodeException if node does not exist 335 */ 336 public static boolean deleteClosedNode(ZooKeeperWatcher zkw, 337 String regionName) 338 throws KeeperException, KeeperException.NoNodeException { 339 return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_CLOSED); 340 } 341 342 /** 343 * Deletes an existing unassigned node that is in the CLOSING state for the 344 * specified region. 345 * 346 * <p>If a node does not already exist for this region, a 347 * {@link NoNodeException} will be thrown. 348 * 349 * <p>No watcher is set whether this succeeds or not. 350 * 351 * <p>Returns false if the node was not in the proper state but did exist. 352 * 353 * <p>This method is used during table disables when a region finishes 354 * successfully closing. This is the Master acknowledging completion 355 * of the specified regions transition to being closed. 356 * 357 * @param zkw zk reference 358 * @param region closing region to be deleted from zk 359 * @throws KeeperException if unexpected zookeeper exception 360 * @throws KeeperException.NoNodeException if node does not exist 361 */ 362 public static boolean deleteClosingNode(ZooKeeperWatcher zkw, 363 HRegionInfo region) 364 throws KeeperException, KeeperException.NoNodeException { 365 String regionName = region.getEncodedName(); 366 return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_CLOSING); 367 } 368 369 /** 370 * Deletes an existing unassigned node that is in the specified state for the 371 * specified region. 372 * 373 * <p>If a node does not already exist for this region, a 374 * {@link NoNodeException} will be thrown. 375 * 376 * <p>No watcher is set whether this succeeds or not. 377 * 378 * <p>Returns false if the node was not in the proper state but did exist. 379 * 380 * <p>This method is used during table disables when a region finishes 381 * successfully closing. This is the Master acknowledging completion 382 * of the specified regions transition to being closed. 383 * 384 * @param zkw zk reference 385 * @param regionName region to be deleted from zk 386 * @param expectedState state region must be in for delete to complete 387 * @throws KeeperException if unexpected zookeeper exception 388 * @throws KeeperException.NoNodeException if node does not exist 389 */ 390 private static boolean deleteNode(ZooKeeperWatcher zkw, String regionName, 391 EventType expectedState) 392 throws KeeperException, KeeperException.NoNodeException { 393 LOG.debug(zkw.prefix("Deleting existing unassigned " + 394 "node for " + regionName + " that is in expected state " + expectedState)); 395 String node = getNodeName(zkw, regionName); 396 zkw.sync(node); 397 Stat stat = new Stat(); 398 byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat); 399 if(bytes == null) { 400 throw KeeperException.create(Code.NONODE); 401 } 402 RegionTransitionData data = RegionTransitionData.fromBytes(bytes); 403 if(!data.getEventType().equals(expectedState)) { 404 LOG.warn(zkw.prefix("Attempting to delete unassigned " + 405 "node in " + expectedState + 406 " state but node is in " + data.getEventType() + " state")); 407 return false; 408 } 409 synchronized(zkw.getNodes()) { 410 // TODO: Does this go here or only if we successfully delete node? 411 zkw.getNodes().remove(node); 412 if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) { 413 LOG.warn(zkw.prefix("Attempting to delete " + 414 "unassigned node in " + expectedState + 415 " state but " + 416 "after verifying it was in OPENED state, we got a version mismatch")); 417 return false; 418 } 419 LOG.debug(zkw.prefix("Successfully deleted unassigned node for region " + 420 regionName + " in expected state " + expectedState)); 421 return true; 422 } 423 } 424 425 /** 426 * Deletes all unassigned nodes regardless of their state. 427 * 428 * <p>No watchers are set. 429 * 430 * <p>This method is used by the Master during cluster startup to clear out 431 * any existing state from other cluster runs. 432 * 433 * @param zkw zk reference 434 * @throws KeeperException if unexpected zookeeper exception 435 */ 436 public static void deleteAllNodes(ZooKeeperWatcher zkw) 437 throws KeeperException { 438 LOG.debug(zkw.prefix("Deleting any existing unassigned nodes")); 439 ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode); 440 } 441 442 // RegionServer methods 443 444 /** 445 * Creates a new unassigned node in the CLOSING state for the specified 446 * region. 447 * 448 * <p>Does not transition nodes from any states. If a node already exists 449 * for this region, a {@link NodeExistsException} will be thrown. 450 * 451 * <p>If creation is successful, returns the version number of the CLOSING 452 * node created. 453 * 454 * <p>Does not set any watches. 455 * 456 * <p>This method should only be used by a RegionServer when initiating a 457 * close of a region after receiving a CLOSE RPC from the Master. 458 * 459 * @param zkw zk reference 460 * @param region region to be created as closing 461 * @param serverName server event originates from 462 * @return version of node after transition, -1 if unsuccessful transition 463 * @throws KeeperException if unexpected zookeeper exception 464 * @throws KeeperException.NodeExistsException if node already exists 465 */ 466 public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region, 467 String serverName) 468 throws KeeperException, KeeperException.NodeExistsException { 469 LOG.debug(zkw.prefix("Creating unassigned node for " + 470 region.getEncodedName() + " in a CLOSING state")); 471 472 RegionTransitionData data = new RegionTransitionData( 473 EventType.RS_ZK_REGION_CLOSING, region.getRegionName(), serverName); 474 475 synchronized (zkw.getNodes()) { 476 String node = getNodeName(zkw, region.getEncodedName()); 477 zkw.getNodes().add(node); 478 return ZKUtil.createAndWatch(zkw, node, data.getBytes()); 479 } 480 } 481 482 /** 483 * Transitions an existing unassigned node for the specified region which is 484 * currently in the CLOSING state to be in the CLOSED state. 485 * 486 * <p>Does not transition nodes from other states. If for some reason the 487 * node could not be transitioned, the method returns -1. If the transition 488 * is successful, the version of the node after transition is returned. 489 * 490 * <p>This method can fail and return false for three different reasons: 491 * <ul><li>Unassigned node for this region does not exist</li> 492 * <li>Unassigned node for this region is not in CLOSING state</li> 493 * <li>After verifying CLOSING state, update fails because of wrong version 494 * (someone else already transitioned the node)</li> 495 * </ul> 496 * 497 * <p>Does not set any watches. 498 * 499 * <p>This method should only be used by a RegionServer when initiating a 500 * close of a region after receiving a CLOSE RPC from the Master. 501 * 502 * @param zkw zk reference 503 * @param region region to be transitioned to closed 504 * @param serverName server event originates from 505 * @return version of node after transition, -1 if unsuccessful transition 506 * @throws KeeperException if unexpected zookeeper exception 507 */ 508 public static int transitionNodeClosed(ZooKeeperWatcher zkw, 509 HRegionInfo region, String serverName, int expectedVersion) 510 throws KeeperException { 511 return transitionNode(zkw, region, serverName, 512 EventType.RS_ZK_REGION_CLOSING, 513 EventType.RS_ZK_REGION_CLOSED, expectedVersion); 514 } 515 516 /** 517 * Transitions an existing unassigned node for the specified region which is 518 * currently in the OFFLINE state to be in the OPENING state. 519 * 520 * <p>Does not transition nodes from other states. If for some reason the 521 * node could not be transitioned, the method returns -1. If the transition 522 * is successful, the version of the node written as OPENING is returned. 523 * 524 * <p>This method can fail and return -1 for three different reasons: 525 * <ul><li>Unassigned node for this region does not exist</li> 526 * <li>Unassigned node for this region is not in OFFLINE state</li> 527 * <li>After verifying OFFLINE state, update fails because of wrong version 528 * (someone else already transitioned the node)</li> 529 * </ul> 530 * 531 * <p>Does not set any watches. 532 * 533 * <p>This method should only be used by a RegionServer when initiating an 534 * open of a region after receiving an OPEN RPC from the Master. 535 * 536 * @param zkw zk reference 537 * @param region region to be transitioned to opening 538 * @param serverName server event originates from 539 * @return version of node after transition, -1 if unsuccessful transition 540 * @throws KeeperException if unexpected zookeeper exception 541 */ 542 public static int transitionNodeOpening(ZooKeeperWatcher zkw, 543 HRegionInfo region, String serverName) 544 throws KeeperException { 545 return transitionNodeOpening(zkw, region, serverName, 546 EventType.M_ZK_REGION_OFFLINE); 547 } 548 549 public static int transitionNodeOpening(ZooKeeperWatcher zkw, 550 HRegionInfo region, String serverName, final EventType beginState) 551 throws KeeperException { 552 return transitionNode(zkw, region, serverName, beginState, 553 EventType.RS_ZK_REGION_OPENING, -1); 554 } 555 556 /** 557 * Retransitions an existing unassigned node for the specified region which is 558 * currently in the OPENING state to be in the OPENING state. 559 * 560 * <p>Does not transition nodes from other states. If for some reason the 561 * node could not be transitioned, the method returns -1. If the transition 562 * is successful, the version of the node rewritten as OPENING is returned. 563 * 564 * <p>This method can fail and return -1 for three different reasons: 565 * <ul><li>Unassigned node for this region does not exist</li> 566 * <li>Unassigned node for this region is not in OPENING state</li> 567 * <li>After verifying OPENING state, update fails because of wrong version 568 * (someone else already transitioned the node)</li> 569 * </ul> 570 * 571 * <p>Does not set any watches. 572 * 573 * <p>This method should only be used by a RegionServer when initiating an 574 * open of a region after receiving an OPEN RPC from the Master. 575 * 576 * @param zkw zk reference 577 * @param region region to be transitioned to opening 578 * @param serverName server event originates from 579 * @return version of node after transition, -1 if unsuccessful transition 580 * @throws KeeperException if unexpected zookeeper exception 581 */ 582 public static int retransitionNodeOpening(ZooKeeperWatcher zkw, 583 HRegionInfo region, String serverName, int expectedVersion) 584 throws KeeperException { 585 return transitionNode(zkw, region, serverName, 586 EventType.RS_ZK_REGION_OPENING, 587 EventType.RS_ZK_REGION_OPENING, expectedVersion); 588 } 589 590 /** 591 * Transitions an existing unassigned node for the specified region which is 592 * currently in the OPENING state to be in the OPENED state. 593 * 594 * <p>Does not transition nodes from other states. If for some reason the 595 * node could not be transitioned, the method returns -1. If the transition 596 * is successful, the version of the node after transition is returned. 597 * 598 * <p>This method can fail and return false for three different reasons: 599 * <ul><li>Unassigned node for this region does not exist</li> 600 * <li>Unassigned node for this region is not in OPENING state</li> 601 * <li>After verifying OPENING state, update fails because of wrong version 602 * (this should never actually happen since an RS only does this transition 603 * following a transition to OPENING. if two RS are conflicting, one would 604 * fail the original transition to OPENING and not this transition)</li> 605 * </ul> 606 * 607 * <p>Does not set any watches. 608 * 609 * <p>This method should only be used by a RegionServer when completing the 610 * open of a region. 611 * 612 * @param zkw zk reference 613 * @param region region to be transitioned to opened 614 * @param serverName server event originates from 615 * @return version of node after transition, -1 if unsuccessful transition 616 * @throws KeeperException if unexpected zookeeper exception 617 */ 618 public static int transitionNodeOpened(ZooKeeperWatcher zkw, 619 HRegionInfo region, String serverName, int expectedVersion) 620 throws KeeperException { 621 return transitionNode(zkw, region, serverName, 622 EventType.RS_ZK_REGION_OPENING, 623 EventType.RS_ZK_REGION_OPENED, expectedVersion); 624 } 625 626 /** 627 * Private method that actually performs unassigned node transitions. 628 * 629 * <p>Attempts to transition the unassigned node for the specified region 630 * from the expected state to the state in the specified transition data. 631 * 632 * <p>Method first reads existing data and verifies it is in the expected 633 * state. If the node does not exist or the node is not in the expected 634 * state, the method returns -1. If the transition is successful, the 635 * version number of the node following the transition is returned. 636 * 637 * <p>If the read state is what is expected, it attempts to write the new 638 * state and data into the node. When doing this, it includes the expected 639 * version (determined when the existing state was verified) to ensure that 640 * only one transition is successful. If there is a version mismatch, the 641 * method returns -1. 642 * 643 * <p>If the write is successful, no watch is set and the method returns true. 644 * 645 * @param zkw zk reference 646 * @param region region to be transitioned to opened 647 * @param serverName server event originates from 648 * @param endState state to transition node to if all checks pass 649 * @param beginState state the node must currently be in to do transition 650 * @param expectedVersion expected version of data before modification, or -1 651 * @return version of node after transition, -1 if unsuccessful transition 652 * @throws KeeperException if unexpected zookeeper exception 653 */ 654 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region, 655 String serverName, EventType beginState, EventType endState, 656 int expectedVersion) 657 throws KeeperException { 658 String encoded = region.getEncodedName(); 659 if(LOG.isDebugEnabled()) { 660 LOG.debug(zkw.prefix("Attempting to transition node " + 661 HRegionInfo.prettyPrint(encoded) + 662 " from " + beginState.toString() + " to " + endState.toString())); 663 } 664 665 String node = getNodeName(zkw, encoded); 666 zkw.sync(node); 667 668 // Read existing data of the node 669 Stat stat = new Stat(); 670 byte [] existingBytes = 671 ZKUtil.getDataNoWatch(zkw, node, stat); 672 RegionTransitionData existingData = 673 RegionTransitionData.fromBytes(existingBytes); 674 675 // Verify it is the expected version 676 if(expectedVersion != -1 && stat.getVersion() != expectedVersion) { 677 LOG.warn(zkw.prefix("Attempt to transition the " + 678 "unassigned node for " + encoded + 679 " from " + beginState + " to " + endState + " failed, " + 680 "the node existed but was version " + stat.getVersion() + 681 " not the expected version " + expectedVersion)); 682 return -1; 683 } 684 685 // Verify it is in expected state 686 if(!existingData.getEventType().equals(beginState)) { 687 LOG.warn(zkw.prefix("Attempt to transition the " + 688 "unassigned node for " + encoded + 689 " from " + beginState + " to " + endState + " failed, " + 690 "the node existed but was in the state " + existingData.getEventType())); 691 return -1; 692 } 693 694 // Write new data, ensuring data has not changed since we last read it 695 try { 696 RegionTransitionData data = new RegionTransitionData(endState, 697 region.getRegionName(), serverName); 698 if(!ZKUtil.setData(zkw, node, data.getBytes(), stat.getVersion())) { 699 LOG.warn(zkw.prefix("Attempt to transition the " + 700 "unassigned node for " + encoded + 701 " from " + beginState + " to " + endState + " failed, " + 702 "the node existed and was in the expected state but then when " + 703 "setting data we got a version mismatch")); 704 return -1; 705 } 706 if(LOG.isDebugEnabled()) { 707 LOG.debug(zkw.prefix("Successfully transitioned node " + encoded + 708 " from " + beginState + " to " + endState)); 709 } 710 return stat.getVersion() + 1; 711 } catch (KeeperException.NoNodeException nne) { 712 LOG.warn(zkw.prefix("Attempt to transition the " + 713 "unassigned node for " + encoded + 714 " from " + beginState + " to " + endState + " failed, " + 715 "the node existed and was in the expected state but then when " + 716 "setting data it no longer existed")); 717 return -1; 718 } 719 } 720 721 /** 722 * Gets the current data in the unassigned node for the specified region name 723 * or fully-qualified path. 724 * 725 * <p>Returns null if the region does not currently have a node. 726 * 727 * <p>Sets a watch on the node if the node exists. 728 * 729 * @param zkw zk reference 730 * @param pathOrRegionName fully-specified path or region name 731 * @return data for the unassigned node 732 * @throws KeeperException if unexpected zookeeper exception 733 */ 734 public static RegionTransitionData getData(ZooKeeperWatcher zkw, 735 String pathOrRegionName) 736 throws KeeperException { 737 String node = pathOrRegionName.startsWith("/") ? 738 pathOrRegionName : getNodeName(zkw, pathOrRegionName); 739 byte [] data = ZKUtil.getDataAndWatch(zkw, node); 740 if(data == null) { 741 return null; 742 } 743 return RegionTransitionData.fromBytes(data); 744 } 745 746 /** 747 * Gets the current data in the unassigned node for the specified region name 748 * or fully-qualified path. 749 * 750 * <p>Returns null if the region does not currently have a node. 751 * 752 * <p>Does not set a watch. 753 * 754 * @param zkw zk reference 755 * @param pathOrRegionName fully-specified path or region name 756 * @param stat object to store node info into on getData call 757 * @return data for the unassigned node 758 * @throws KeeperException if unexpected zookeeper exception 759 */ 760 public static RegionTransitionData getDataNoWatch(ZooKeeperWatcher zkw, 761 String pathOrRegionName, Stat stat) 762 throws KeeperException { 763 String node = pathOrRegionName.startsWith("/") ? 764 pathOrRegionName : getNodeName(zkw, pathOrRegionName); 765 byte [] data = ZKUtil.getDataNoWatch(zkw, node, stat); 766 if(data == null) { 767 return null; 768 } 769 return RegionTransitionData.fromBytes(data); 770 } 771 772 /** 773 * Delete the assignment node regardless of its current state. 774 * <p> 775 * Fail silent even if the node does not exist at all. 776 * @param watcher 777 * @param regionInfo 778 * @throws KeeperException 779 */ 780 public static void deleteNodeFailSilent(ZooKeeperWatcher watcher, 781 HRegionInfo regionInfo) 782 throws KeeperException { 783 String node = getNodeName(watcher, regionInfo.getEncodedName()); 784 ZKUtil.deleteNodeFailSilent(watcher, node); 785 } 786 787 /** 788 * Blocks until there are no node in regions in transition. 789 * <p> 790 * Used in testing only. 791 * @param zkw zk reference 792 * @throws KeeperException 793 * @throws InterruptedException 794 */ 795 public static void blockUntilNoRIT(ZooKeeperWatcher zkw) 796 throws KeeperException, InterruptedException { 797 while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) { 798 List<String> znodes = 799 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode); 800 if (znodes != null && !znodes.isEmpty()) { 801 for (String znode : znodes) { 802 LOG.debug("ZK RIT -> " + znode); 803 } 804 } 805 Thread.sleep(100); 806 } 807 } 808 809 /** 810 * Blocks until there is at least one node in regions in transition. 811 * <p> 812 * Used in testing only. 813 * @param zkw zk reference 814 * @throws KeeperException 815 * @throws InterruptedException 816 */ 817 public static void blockUntilRIT(ZooKeeperWatcher zkw) 818 throws KeeperException, InterruptedException { 819 while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) { 820 List<String> znodes = 821 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode); 822 if (znodes == null || znodes.isEmpty()) { 823 LOG.debug("No RIT in ZK"); 824 } 825 Thread.sleep(100); 826 } 827 } 828 829 /** 830 * Verifies that the specified region is in the specified state in ZooKeeper. 831 * <p> 832 * Returns true if region is in transition and in the specified state in 833 * ZooKeeper. Returns false if the region does not exist in ZK or is in 834 * a different state. 835 * <p> 836 * Method synchronizes() with ZK so will yield an up-to-date result but is 837 * a slow read. 838 * @param zkw 839 * @param region 840 * @param expectedState 841 * @return true if region exists and is in expected state 842 */ 843 public static boolean verifyRegionState(ZooKeeperWatcher zkw, 844 HRegionInfo region, EventType expectedState) 845 throws KeeperException { 846 String encoded = region.getEncodedName(); 847 848 String node = getNodeName(zkw, encoded); 849 zkw.sync(node); 850 851 // Read existing data of the node 852 byte [] existingBytes = null; 853 try { 854 existingBytes = ZKUtil.getDataAndWatch(zkw, node); 855 } catch (KeeperException.NoNodeException nne) { 856 return false; 857 } catch (KeeperException e) { 858 throw e; 859 } 860 if (existingBytes == null) return false; 861 RegionTransitionData existingData = 862 RegionTransitionData.fromBytes(existingBytes); 863 if (existingData.getEventType() == expectedState){ 864 return true; 865 } 866 return false; 867 } 868 }