1 /** 2 * Copyright 2010 The Apache Software Foundation 3 * 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 package org.apache.hadoop.hbase.zookeeper; 21 22 import java.util.List; 23 24 import org.apache.commons.logging.Log; 25 import org.apache.commons.logging.LogFactory; 26 import org.apache.hadoop.hbase.HRegionInfo; 27 import org.apache.hadoop.hbase.ServerName; 28 import org.apache.hadoop.hbase.executor.RegionTransitionData; 29 import org.apache.hadoop.hbase.executor.EventHandler.EventType; 30 import org.apache.zookeeper.AsyncCallback; 31 import org.apache.zookeeper.KeeperException; 32 import org.apache.zookeeper.KeeperException.Code; 33 import org.apache.zookeeper.KeeperException.NoNodeException; 34 import org.apache.zookeeper.KeeperException.NodeExistsException; 35 import org.apache.zookeeper.data.Stat; 36 37 /** 38 * Utility class for doing region assignment in ZooKeeper. This class extends 39 * stuff done in {@link ZKUtil} to cover specific assignment operations. 40 * <p> 41 * Contains only static methods and constants. 42 * <p> 43 * Used by both the Master and RegionServer. 44 * <p> 45 * All valid transitions outlined below: 46 * <p> 47 * <b>MASTER</b> 48 * <ol> 49 * <li> 50 * Master creates an unassigned node as OFFLINE. 51 * - Cluster startup and table enabling. 52 * </li> 53 * <li> 54 * Master forces an existing unassigned node to OFFLINE. 55 * - RegionServer failure. 56 * - Allows transitions from all states to OFFLINE. 57 * </li> 58 * <li> 59 * Master deletes an unassigned node that was in a OPENED state. 60 * - Normal region transitions. Besides cluster startup, no other deletions 61 * of unassigned nodes is allowed. 62 * </li> 63 * <li> 64 * Master deletes all unassigned nodes regardless of state. 65 * - Cluster startup before any assignment happens. 66 * </li> 67 * </ol> 68 * <p> 69 * <b>REGIONSERVER</b> 70 * <ol> 71 * <li> 72 * RegionServer creates an unassigned node as CLOSING. 73 * - All region closes will do this in response to a CLOSE RPC from Master. 74 * - A node can never be transitioned to CLOSING, only created. 75 * </li> 76 * <li> 77 * RegionServer transitions an unassigned node from CLOSING to CLOSED. 78 * - Normal region closes. CAS operation. 79 * </li> 80 * <li> 81 * RegionServer transitions an unassigned node from OFFLINE to OPENING. 82 * - All region opens will do this in response to an OPEN RPC from the Master. 83 * - Normal region opens. CAS operation. 84 * </li> 85 * <li> 86 * RegionServer transitions an unassigned node from OPENING to OPENED. 87 * - Normal region opens. CAS operation. 88 * </li> 89 * </ol> 90 */ 91 public class ZKAssign { 92 private static final Log LOG = LogFactory.getLog(ZKAssign.class); 93 94 /** 95 * Gets the full path node name for the unassigned node for the specified 96 * region. 97 * @param zkw zk reference 98 * @param regionName region name 99 * @return full path node name 100 */ 101 public static String getNodeName(ZooKeeperWatcher zkw, String regionName) { 102 return ZKUtil.joinZNode(zkw.assignmentZNode, regionName); 103 } 104 105 /** 106 * Gets the region name from the full path node name of an unassigned node. 107 * @param path full zk path 108 * @return region name 109 */ 110 public static String getRegionName(ZooKeeperWatcher zkw, String path) { 111 return path.substring(zkw.assignmentZNode.length()+1); 112 } 113 114 // Master methods 115 116 /** 117 * Creates a new unassigned node in the OFFLINE state for the specified region. 118 * 119 * <p>Does not transition nodes from other states. If a node already exists 120 * for this region, a {@link NodeExistsException} will be thrown. 121 * 122 * <p>Sets a watcher on the unassigned region node if the method is successful. 123 * 124 * <p>This method should only be used during cluster startup and the enabling 125 * of a table. 126 * 127 * @param zkw zk reference 128 * @param region region to be created as offline 129 * @param serverName server event originates from 130 * @throws KeeperException if unexpected zookeeper exception 131 * @throws KeeperException.NodeExistsException if node already exists 132 */ 133 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, 134 ServerName serverName) 135 throws KeeperException, KeeperException.NodeExistsException { 136 createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE); 137 } 138 139 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, 140 ServerName serverName, final EventType event) 141 throws KeeperException, KeeperException.NodeExistsException { 142 LOG.debug(zkw.prefix("Creating unassigned node for " + 143 region.getEncodedName() + " in OFFLINE state")); 144 RegionTransitionData data = new RegionTransitionData(event, 145 region.getRegionName(), serverName); 146 String node = getNodeName(zkw, region.getEncodedName()); 147 ZKUtil.createAndWatch(zkw, node, data.getBytes()); 148 } 149 150 /** 151 * Creates an unassigned node in the OFFLINE state for the specified region. 152 * <p> 153 * Runs asynchronously. Depends on no pre-existing znode. 154 * 155 * <p>Sets a watcher on the unassigned region node. 156 * 157 * @param zkw zk reference 158 * @param region region to be created as offline 159 * @param serverName server event originates from 160 * @param cb 161 * @param ctx 162 * @throws KeeperException if unexpected zookeeper exception 163 * @throws KeeperException.NodeExistsException if node already exists 164 */ 165 public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw, 166 HRegionInfo region, ServerName serverName, 167 final AsyncCallback.StringCallback cb, final Object ctx) 168 throws KeeperException { 169 LOG.debug(zkw.prefix("Async create of unassigned node for " + 170 region.getEncodedName() + " with OFFLINE state")); 171 RegionTransitionData data = new RegionTransitionData( 172 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName); 173 String node = getNodeName(zkw, region.getEncodedName()); 174 ZKUtil.asyncCreate(zkw, node, data.getBytes(), cb, ctx); 175 } 176 177 /** 178 * Forces an existing unassigned node to the OFFLINE state for the specified 179 * region. 180 * 181 * <p>Does not create a new node. If a node does not already exist for this 182 * region, a {@link NoNodeException} will be thrown. 183 * 184 * <p>Sets a watcher on the unassigned region node if the method is 185 * successful. 186 * 187 * <p>This method should only be used during recovery of regionserver failure. 188 * 189 * @param zkw zk reference 190 * @param region region to be forced as offline 191 * @param serverName server event originates from 192 * @throws KeeperException if unexpected zookeeper exception 193 * @throws KeeperException.NoNodeException if node does not exist 194 */ 195 public static void forceNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, 196 ServerName serverName) 197 throws KeeperException, KeeperException.NoNodeException { 198 LOG.debug(zkw.prefix("Forcing existing unassigned node for " + 199 region.getEncodedName() + " to OFFLINE state")); 200 RegionTransitionData data = new RegionTransitionData( 201 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName); 202 String node = getNodeName(zkw, region.getEncodedName()); 203 ZKUtil.setData(zkw, node, data.getBytes()); 204 } 205 206 /** 207 * Creates or force updates an unassigned node to the OFFLINE state for the 208 * specified region. 209 * <p> 210 * Attempts to create the node but if it exists will force it to transition to 211 * and OFFLINE state. 212 * 213 * <p>Sets a watcher on the unassigned region node if the method is 214 * successful. 215 * 216 * <p>This method should be used when assigning a region. 217 * 218 * @param zkw zk reference 219 * @param region region to be created as offline 220 * @param serverName server event originates from 221 * @return the version of the znode created in OFFLINE state, -1 if 222 * unsuccessful. 223 * @throws KeeperException if unexpected zookeeper exception 224 * @throws KeeperException.NodeExistsException if node already exists 225 */ 226 public static int createOrForceNodeOffline(ZooKeeperWatcher zkw, 227 HRegionInfo region, ServerName serverName) throws KeeperException { 228 return createOrForceNodeOffline(zkw, region, serverName, false, true); 229 } 230 231 /** 232 * Creates or force updates an unassigned node to the OFFLINE state for the 233 * specified region. 234 * <p> 235 * Attempts to create the node but if it exists will force it to transition to 236 * and OFFLINE state. 237 * <p> 238 * Sets a watcher on the unassigned region node if the method is successful. 239 * 240 * <p> 241 * This method should be used when assigning a region. 242 * 243 * @param zkw 244 * zk reference 245 * @param region 246 * region to be created as offline 247 * @param serverName 248 * server event originates from 249 * @param hijack 250 * - true if to be hijacked and reassigned, false otherwise 251 * @param allowCreation 252 * - true if the node has to be created newly, false otherwise 253 * @throws KeeperException 254 * if unexpected zookeeper exception 255 * @return the version of the znode created in OFFLINE state, -1 if 256 * unsuccessful. 257 * @throws KeeperException.NodeExistsException 258 * if node already exists 259 */ 260 public static int createOrForceNodeOffline(ZooKeeperWatcher zkw, 261 HRegionInfo region, ServerName serverName, 262 boolean hijack, boolean allowCreation) 263 throws KeeperException { 264 LOG.debug(zkw.prefix("Creating (or updating) unassigned node for " + 265 region.getEncodedName() + " with OFFLINE state")); 266 RegionTransitionData data = new RegionTransitionData( 267 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName); 268 String node = getNodeName(zkw, region.getEncodedName()); 269 Stat stat = new Stat(); 270 zkw.sync(node); 271 int version = ZKUtil.checkExists(zkw, node); 272 if (version == -1) { 273 // While trying to transit a node to OFFLINE that was in previously in 274 // OPENING state but before it could transit to OFFLINE state if RS had 275 // opened the region then the Master deletes the assigned region znode. 276 // In that case the znode will not exist. So we should not 277 // create the znode again which will lead to double assignment. 278 if (hijack && !allowCreation) { 279 return -1; 280 } 281 return ZKUtil.createAndWatch(zkw, node, data.getBytes()); 282 } else { 283 RegionTransitionData curDataInZNode = ZKAssign.getDataNoWatch(zkw, region 284 .getEncodedName(), stat); 285 // Do not move the node to OFFLINE if znode is in any of the following 286 // state. 287 // Because these are already executed states. 288 if (hijack && null != curDataInZNode) { 289 EventType eventType = curDataInZNode.getEventType(); 290 if (eventType.equals(EventType.M_ZK_REGION_CLOSING) 291 || eventType.equals(EventType.RS_ZK_REGION_CLOSED) 292 || eventType.equals(EventType.RS_ZK_REGION_OPENED)) { 293 return -1; 294 } 295 } 296 297 boolean setData = false; 298 try { 299 setData = ZKUtil.setData(zkw, node, data.getBytes(), version); 300 // Setdata throws KeeperException which aborts the Master. So we are 301 // catching it here. 302 // If just before setting the znode to OFFLINE if the RS has made any 303 // change to the 304 // znode state then we need to return -1. 305 } catch (KeeperException kpe) { 306 LOG.info("Version mismatch while setting the node to OFFLINE state."); 307 return -1; 308 } 309 if (!setData) { 310 return -1; 311 } else { 312 // We successfully forced to OFFLINE, reset watch and handle if 313 // the state changed in between our set and the watch 314 RegionTransitionData curData = 315 ZKAssign.getData(zkw, region.getEncodedName()); 316 if (curData.getEventType() != data.getEventType()) { 317 // state changed, need to process 318 return -1; 319 } 320 } 321 } 322 return stat.getVersion() + 1; 323 } 324 325 /** 326 * Deletes an existing unassigned node that is in the OPENED state for the 327 * specified region. 328 * 329 * <p>If a node does not already exist for this region, a 330 * {@link NoNodeException} will be thrown. 331 * 332 * <p>No watcher is set whether this succeeds or not. 333 * 334 * <p>Returns false if the node was not in the proper state but did exist. 335 * 336 * <p>This method is used during normal region transitions when a region 337 * finishes successfully opening. This is the Master acknowledging completion 338 * of the specified regions transition. 339 * 340 * @param zkw zk reference 341 * @param regionName opened region to be deleted from zk 342 * @throws KeeperException if unexpected zookeeper exception 343 * @throws KeeperException.NoNodeException if node does not exist 344 */ 345 public static boolean deleteOpenedNode(ZooKeeperWatcher zkw, 346 String regionName) 347 throws KeeperException, KeeperException.NoNodeException { 348 return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_OPENED); 349 } 350 351 /** 352 * Deletes an existing unassigned node that is in the OFFLINE state for the 353 * specified region. 354 * 355 * <p>If a node does not already exist for this region, a 356 * {@link NoNodeException} will be thrown. 357 * 358 * <p>No watcher is set whether this succeeds or not. 359 * 360 * <p>Returns false if the node was not in the proper state but did exist. 361 * 362 * <p>This method is used during master failover when the regions on an RS 363 * that has died are all set to OFFLINE before being processed. 364 * 365 * @param zkw zk reference 366 * @param regionName closed region to be deleted from zk 367 * @throws KeeperException if unexpected zookeeper exception 368 * @throws KeeperException.NoNodeException if node does not exist 369 */ 370 public static boolean deleteOfflineNode(ZooKeeperWatcher zkw, 371 String regionName) 372 throws KeeperException, KeeperException.NoNodeException { 373 return deleteNode(zkw, regionName, EventType.M_ZK_REGION_OFFLINE); 374 } 375 376 /** 377 * Deletes an existing unassigned node that is in the CLOSED state for the 378 * specified region. 379 * 380 * <p>If a node does not already exist for this region, a 381 * {@link NoNodeException} will be thrown. 382 * 383 * <p>No watcher is set whether this succeeds or not. 384 * 385 * <p>Returns false if the node was not in the proper state but did exist. 386 * 387 * <p>This method is used during table disables when a region finishes 388 * successfully closing. This is the Master acknowledging completion 389 * of the specified regions transition to being closed. 390 * 391 * @param zkw zk reference 392 * @param regionName closed region to be deleted from zk 393 * @throws KeeperException if unexpected zookeeper exception 394 * @throws KeeperException.NoNodeException if node does not exist 395 */ 396 public static boolean deleteClosedNode(ZooKeeperWatcher zkw, 397 String regionName) 398 throws KeeperException, KeeperException.NoNodeException { 399 return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_CLOSED); 400 } 401 402 /** 403 * Deletes an existing unassigned node that is in the CLOSING state for the 404 * specified region. 405 * 406 * <p>If a node does not already exist for this region, a 407 * {@link NoNodeException} will be thrown. 408 * 409 * <p>No watcher is set whether this succeeds or not. 410 * 411 * <p>Returns false if the node was not in the proper state but did exist. 412 * 413 * <p>This method is used during table disables when a region finishes 414 * successfully closing. This is the Master acknowledging completion 415 * of the specified regions transition to being closed. 416 * 417 * @param zkw zk reference 418 * @param region closing region to be deleted from zk 419 * @throws KeeperException if unexpected zookeeper exception 420 * @throws KeeperException.NoNodeException if node does not exist 421 */ 422 public static boolean deleteClosingNode(ZooKeeperWatcher zkw, 423 HRegionInfo region) 424 throws KeeperException, KeeperException.NoNodeException { 425 String regionName = region.getEncodedName(); 426 return deleteNode(zkw, regionName, EventType.M_ZK_REGION_CLOSING); 427 } 428 429 /** 430 * Deletes an existing unassigned node that is in the specified state for the 431 * specified region. 432 * 433 * <p>If a node does not already exist for this region, a 434 * {@link NoNodeException} will be thrown. 435 * 436 * <p>No watcher is set whether this succeeds or not. 437 * 438 * <p>Returns false if the node was not in the proper state but did exist. 439 * 440 * <p>This method is used when a region finishes opening/closing. 441 * The Master acknowledges completion 442 * of the specified regions transition to being closed/opened. 443 * 444 * @param zkw zk reference 445 * @param regionName region to be deleted from zk 446 * @param expectedState state region must be in for delete to complete 447 * @throws KeeperException if unexpected zookeeper exception 448 * @throws KeeperException.NoNodeException if node does not exist 449 */ 450 public static boolean deleteNode(ZooKeeperWatcher zkw, String regionName, 451 EventType expectedState) 452 throws KeeperException, KeeperException.NoNodeException { 453 return deleteNode(zkw, regionName, expectedState, -1); 454 } 455 456 /** 457 * Deletes an existing unassigned node that is in the specified state for the 458 * specified region. 459 * 460 * <p>If a node does not already exist for this region, a 461 * {@link NoNodeException} will be thrown. 462 * 463 * <p>No watcher is set whether this succeeds or not. 464 * 465 * <p>Returns false if the node was not in the proper state but did exist. 466 * 467 * <p>This method is used when a region finishes opening/closing. 468 * The Master acknowledges completion 469 * of the specified regions transition to being closed/opened. 470 * 471 * @param zkw zk reference 472 * @param regionName region to be deleted from zk 473 * @param expectedState state region must be in for delete to complete 474 * @param expectedVersion of the znode that is to be deleted. 475 * If expectedVersion need not be compared while deleting the znode 476 * pass -1 477 * @throws KeeperException if unexpected zookeeper exception 478 * @throws KeeperException.NoNodeException if node does not exist 479 */ 480 public static boolean deleteNode(ZooKeeperWatcher zkw, String regionName, 481 EventType expectedState, int expectedVersion) 482 throws KeeperException, KeeperException.NoNodeException { 483 LOG.debug(zkw.prefix("Deleting existing unassigned " + 484 "node for " + regionName + " that is in expected state " + expectedState)); 485 String node = getNodeName(zkw, regionName); 486 zkw.sync(node); 487 Stat stat = new Stat(); 488 byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat); 489 if (bytes == null) { 490 // If it came back null, node does not exist. 491 throw KeeperException.create(Code.NONODE); 492 } 493 RegionTransitionData data = RegionTransitionData.fromBytes(bytes); 494 if (!data.getEventType().equals(expectedState)) { 495 LOG.warn(zkw.prefix("Attempting to delete unassigned " + 496 "node " + regionName + " in " + expectedState + 497 " state but node is in " + data.getEventType() + " state")); 498 return false; 499 } 500 if (expectedVersion != -1 501 && stat.getVersion() != expectedVersion) { 502 LOG.warn("The node " + regionName + " we are trying to delete is not" + 503 " the expected one. Got a version mismatch"); 504 return false; 505 } 506 if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) { 507 LOG.warn(zkw.prefix("Attempting to delete " + 508 "unassigned node " + regionName + " in " + expectedState + 509 " state but after verifying state, we got a version mismatch")); 510 return false; 511 } 512 LOG.debug(zkw.prefix("Successfully deleted unassigned node for region " + 513 regionName + " in expected state " + expectedState)); 514 return true; 515 } 516 517 /** 518 * Deletes all unassigned nodes regardless of their state. 519 * 520 * <p>No watchers are set. 521 * 522 * <p>This method is used by the Master during cluster startup to clear out 523 * any existing state from other cluster runs. 524 * 525 * @param zkw zk reference 526 * @throws KeeperException if unexpected zookeeper exception 527 */ 528 public static void deleteAllNodes(ZooKeeperWatcher zkw) 529 throws KeeperException { 530 LOG.debug(zkw.prefix("Deleting any existing unassigned nodes")); 531 ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode); 532 } 533 534 // RegionServer methods 535 536 /** 537 * Creates a new unassigned node in the CLOSING state for the specified 538 * region. 539 * 540 * <p>Does not transition nodes from any states. If a node already exists 541 * for this region, a {@link NodeExistsException} will be thrown. 542 * 543 * <p>If creation is successful, returns the version number of the CLOSING 544 * node created. 545 * 546 * <p>Does not set any watches. 547 * 548 * <p>This method should only be used by a RegionServer when initiating a 549 * close of a region after receiving a CLOSE RPC from the Master. 550 * 551 * @param zkw zk reference 552 * @param region region to be created as closing 553 * @param serverName server event originates from 554 * @return version of node after transition, -1 if unsuccessful transition 555 * @throws KeeperException if unexpected zookeeper exception 556 * @throws KeeperException.NodeExistsException if node already exists 557 */ 558 public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region, 559 ServerName serverName) 560 throws KeeperException, KeeperException.NodeExistsException { 561 LOG.debug(zkw.prefix("Creating unassigned node for " + 562 region.getEncodedName() + " in a CLOSING state")); 563 564 RegionTransitionData data = new RegionTransitionData( 565 EventType.M_ZK_REGION_CLOSING, region.getRegionName(), serverName); 566 567 String node = getNodeName(zkw, region.getEncodedName()); 568 return ZKUtil.createAndWatch(zkw, node, data.getBytes()); 569 } 570 571 /** 572 * Transitions an existing unassigned node for the specified region which is 573 * currently in the CLOSING state to be in the CLOSED state. 574 * 575 * <p>Does not transition nodes from other states. If for some reason the 576 * node could not be transitioned, the method returns -1. If the transition 577 * is successful, the version of the node after transition is returned. 578 * 579 * <p>This method can fail and return false for three different reasons: 580 * <ul><li>Unassigned node for this region does not exist</li> 581 * <li>Unassigned node for this region is not in CLOSING state</li> 582 * <li>After verifying CLOSING state, update fails because of wrong version 583 * (someone else already transitioned the node)</li> 584 * </ul> 585 * 586 * <p>Does not set any watches. 587 * 588 * <p>This method should only be used by a RegionServer when initiating a 589 * close of a region after receiving a CLOSE RPC from the Master. 590 * 591 * @param zkw zk reference 592 * @param region region to be transitioned to closed 593 * @param serverName server event originates from 594 * @return version of node after transition, -1 if unsuccessful transition 595 * @throws KeeperException if unexpected zookeeper exception 596 */ 597 public static int transitionNodeClosed(ZooKeeperWatcher zkw, 598 HRegionInfo region, ServerName serverName, int expectedVersion) 599 throws KeeperException { 600 return transitionNode(zkw, region, serverName, 601 EventType.M_ZK_REGION_CLOSING, 602 EventType.RS_ZK_REGION_CLOSED, expectedVersion); 603 } 604 605 /** 606 * Transitions an existing unassigned node for the specified region which is 607 * currently in the OFFLINE state to be in the OPENING state. 608 * 609 * <p>Does not transition nodes from other states. If for some reason the 610 * node could not be transitioned, the method returns -1. If the transition 611 * is successful, the version of the node written as OPENING is returned. 612 * 613 * <p>This method can fail and return -1 for three different reasons: 614 * <ul><li>Unassigned node for this region does not exist</li> 615 * <li>Unassigned node for this region is not in OFFLINE state</li> 616 * <li>After verifying OFFLINE state, update fails because of wrong version 617 * (someone else already transitioned the node)</li> 618 * </ul> 619 * 620 * <p>Does not set any watches. 621 * 622 * <p>This method should only be used by a RegionServer when initiating an 623 * open of a region after receiving an OPEN RPC from the Master. 624 * 625 * @param zkw zk reference 626 * @param region region to be transitioned to opening 627 * @param serverName server event originates from 628 * @return version of node after transition, -1 if unsuccessful transition 629 * @throws KeeperException if unexpected zookeeper exception 630 */ 631 public static int transitionNodeOpening(ZooKeeperWatcher zkw, 632 HRegionInfo region, ServerName serverName) 633 throws KeeperException { 634 return transitionNodeOpening(zkw, region, serverName, 635 EventType.M_ZK_REGION_OFFLINE); 636 } 637 638 public static int transitionNodeOpening(ZooKeeperWatcher zkw, 639 HRegionInfo region, ServerName serverName, final EventType beginState) 640 throws KeeperException { 641 return transitionNode(zkw, region, serverName, beginState, 642 EventType.RS_ZK_REGION_OPENING, -1); 643 } 644 645 /** 646 * Retransitions an existing unassigned node for the specified region which is 647 * currently in the OPENING state to be in the OPENING state. 648 * 649 * <p>Does not transition nodes from other states. If for some reason the 650 * node could not be transitioned, the method returns -1. If the transition 651 * is successful, the version of the node rewritten as OPENING is returned. 652 * 653 * <p>This method can fail and return -1 for three different reasons: 654 * <ul><li>Unassigned node for this region does not exist</li> 655 * <li>Unassigned node for this region is not in OPENING state</li> 656 * <li>After verifying OPENING state, update fails because of wrong version 657 * (someone else already transitioned the node)</li> 658 * </ul> 659 * 660 * <p>Does not set any watches. 661 * 662 * <p>This method should only be used by a RegionServer when initiating an 663 * open of a region after receiving an OPEN RPC from the Master. 664 * 665 * @param zkw zk reference 666 * @param region region to be transitioned to opening 667 * @param serverName server event originates from 668 * @return version of node after transition, -1 if unsuccessful transition 669 * @throws KeeperException if unexpected zookeeper exception 670 */ 671 public static int retransitionNodeOpening(ZooKeeperWatcher zkw, 672 HRegionInfo region, ServerName serverName, int expectedVersion) 673 throws KeeperException { 674 return transitionNode(zkw, region, serverName, 675 EventType.RS_ZK_REGION_OPENING, 676 EventType.RS_ZK_REGION_OPENING, expectedVersion); 677 } 678 679 /** 680 * Transitions an existing unassigned node for the specified region which is 681 * currently in the OPENING state to be in the OPENED state. 682 * 683 * <p>Does not transition nodes from other states. If for some reason the 684 * node could not be transitioned, the method returns -1. If the transition 685 * is successful, the version of the node after transition is returned. 686 * 687 * <p>This method can fail and return false for three different reasons: 688 * <ul><li>Unassigned node for this region does not exist</li> 689 * <li>Unassigned node for this region is not in OPENING state</li> 690 * <li>After verifying OPENING state, update fails because of wrong version 691 * (this should never actually happen since an RS only does this transition 692 * following a transition to OPENING. if two RS are conflicting, one would 693 * fail the original transition to OPENING and not this transition)</li> 694 * </ul> 695 * 696 * <p>Does not set any watches. 697 * 698 * <p>This method should only be used by a RegionServer when completing the 699 * open of a region. 700 * 701 * @param zkw zk reference 702 * @param region region to be transitioned to opened 703 * @param serverName server event originates from 704 * @return version of node after transition, -1 if unsuccessful transition 705 * @throws KeeperException if unexpected zookeeper exception 706 */ 707 public static int transitionNodeOpened(ZooKeeperWatcher zkw, 708 HRegionInfo region, ServerName serverName, int expectedVersion) 709 throws KeeperException { 710 return transitionNode(zkw, region, serverName, 711 EventType.RS_ZK_REGION_OPENING, 712 EventType.RS_ZK_REGION_OPENED, expectedVersion); 713 } 714 715 /** 716 * Method that actually performs unassigned node transitions. 717 * 718 * <p>Attempts to transition the unassigned node for the specified region 719 * from the expected state to the state in the specified transition data. 720 * 721 * <p>Method first reads existing data and verifies it is in the expected 722 * state. If the node does not exist or the node is not in the expected 723 * state, the method returns -1. If the transition is successful, the 724 * version number of the node following the transition is returned. 725 * 726 * <p>If the read state is what is expected, it attempts to write the new 727 * state and data into the node. When doing this, it includes the expected 728 * version (determined when the existing state was verified) to ensure that 729 * only one transition is successful. If there is a version mismatch, the 730 * method returns -1. 731 * 732 * <p>If the write is successful, no watch is set and the method returns true. 733 * 734 * @param zkw zk reference 735 * @param region region to be transitioned to opened 736 * @param serverName server event originates from 737 * @param endState state to transition node to if all checks pass 738 * @param beginState state the node must currently be in to do transition 739 * @param expectedVersion expected version of data before modification, or -1 740 * @return version of node after transition, -1 if unsuccessful transition 741 * @throws KeeperException if unexpected zookeeper exception 742 */ 743 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region, 744 ServerName serverName, EventType beginState, EventType endState, 745 int expectedVersion) 746 throws KeeperException { 747 return transitionNode(zkw, region, serverName, beginState, endState, 748 expectedVersion, null); 749 } 750 751 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region, 752 ServerName serverName, EventType beginState, EventType endState, 753 int expectedVersion, final byte [] payload) 754 throws KeeperException { 755 String encoded = region.getEncodedName(); 756 if(LOG.isDebugEnabled()) { 757 LOG.debug(zkw.prefix("Attempting to transition node " + 758 HRegionInfo.prettyPrint(encoded) + 759 " from " + beginState.toString() + " to " + endState.toString())); 760 } 761 762 String node = getNodeName(zkw, encoded); 763 zkw.sync(node); 764 765 // Read existing data of the node 766 Stat stat = new Stat(); 767 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat); 768 if (existingBytes == null) { 769 // Node no longer exists. Return -1. It means unsuccessful transition. 770 return -1; 771 } 772 RegionTransitionData existingData = 773 RegionTransitionData.fromBytes(existingBytes); 774 775 // Verify it is the expected version 776 if(expectedVersion != -1 && stat.getVersion() != expectedVersion) { 777 LOG.warn(zkw.prefix("Attempt to transition the " + 778 "unassigned node for " + encoded + 779 " from " + beginState + " to " + endState + " failed, " + 780 "the node existed but was version " + stat.getVersion() + 781 " not the expected version " + expectedVersion)); 782 return -1; 783 } else if (beginState.equals(EventType.M_ZK_REGION_OFFLINE) 784 && endState.equals(EventType.RS_ZK_REGION_OPENING) 785 && expectedVersion == -1 && stat.getVersion() != 0) { 786 // the below check ensures that double assignment doesnot happen. 787 // When the node is created for the first time then the expected version 788 // that is passed will be -1 and the version in znode will be 0. 789 // In all other cases the version in znode will be > 0. 790 LOG.warn(zkw.prefix("Attempt to transition the " + "unassigned node for " 791 + encoded + " from " + beginState + " to " + endState + " failed, " 792 + "the node existed but was version " + stat.getVersion() 793 + " not the expected version " + expectedVersion)); 794 return -1; 795 } 796 797 // Verify it is in expected state 798 if(!existingData.getEventType().equals(beginState)) { 799 LOG.warn(zkw.prefix("Attempt to transition the " + 800 "unassigned node for " + encoded + 801 " from " + beginState + " to " + endState + " failed, " + 802 "the node existed but was in the state " + existingData.getEventType() + 803 " set by the server " + serverName)); 804 return -1; 805 } 806 807 // Write new data, ensuring data has not changed since we last read it 808 try { 809 RegionTransitionData data = new RegionTransitionData(endState, 810 region.getRegionName(), serverName, payload); 811 if(!ZKUtil.setData(zkw, node, data.getBytes(), stat.getVersion())) { 812 LOG.warn(zkw.prefix("Attempt to transition the " + 813 "unassigned node for " + encoded + 814 " from " + beginState + " to " + endState + " failed, " + 815 "the node existed and was in the expected state but then when " + 816 "setting data we got a version mismatch")); 817 return -1; 818 } 819 if(LOG.isDebugEnabled()) { 820 LOG.debug(zkw.prefix("Successfully transitioned node " + encoded + 821 " from " + beginState + " to " + endState)); 822 } 823 return stat.getVersion() + 1; 824 } catch (KeeperException.NoNodeException nne) { 825 LOG.warn(zkw.prefix("Attempt to transition the " + 826 "unassigned node for " + encoded + 827 " from " + beginState + " to " + endState + " failed, " + 828 "the node existed and was in the expected state but then when " + 829 "setting data it no longer existed")); 830 return -1; 831 } 832 } 833 834 /** 835 * Gets the current data in the unassigned node for the specified region name 836 * or fully-qualified path. 837 * 838 * <p>Returns null if the region does not currently have a node. 839 * 840 * <p>Sets a watch on the node if the node exists. 841 * 842 * @param zkw zk reference 843 * @param pathOrRegionName fully-specified path or region name 844 * @return data for the unassigned node 845 * @throws KeeperException if unexpected zookeeper exception 846 */ 847 public static RegionTransitionData getData(ZooKeeperWatcher zkw, 848 String pathOrRegionName) 849 throws KeeperException { 850 String node = pathOrRegionName.startsWith("/") ? 851 pathOrRegionName : getNodeName(zkw, pathOrRegionName); 852 byte [] data = ZKUtil.getDataAndWatch(zkw, node); 853 if(data == null) { 854 return null; 855 } 856 return RegionTransitionData.fromBytes(data); 857 } 858 859 /** 860 * Gets the current data in the unassigned node for the specified region name 861 * or fully-qualified path. 862 * 863 * <p>Returns null if the region does not currently have a node. 864 * 865 * <p>Sets a watch on the node if the node exists. 866 * 867 * @param zkw zk reference 868 * @param pathOrRegionName fully-specified path or region name 869 * @param stat object to populate the version. 870 * @return data for the unassigned node 871 * @throws KeeperException if unexpected zookeeper exception 872 */ 873 public static RegionTransitionData getDataAndWatch(ZooKeeperWatcher zkw, 874 String pathOrRegionName, Stat stat) 875 throws KeeperException { 876 String node = pathOrRegionName.startsWith("/") ? 877 pathOrRegionName : getNodeName(zkw, pathOrRegionName); 878 byte [] data = ZKUtil.getDataAndWatch(zkw, node, stat); 879 if(data == null) { 880 return null; 881 } 882 return RegionTransitionData.fromBytes(data); 883 } 884 885 /** 886 * Gets the current data in the unassigned node for the specified region name 887 * or fully-qualified path. 888 * 889 * <p>Returns null if the region does not currently have a node. 890 * 891 * <p>Does not set a watch. 892 * 893 * @param zkw zk reference 894 * @param pathOrRegionName fully-specified path or region name 895 * @param stat object to store node info into on getData call 896 * @return data for the unassigned node or null if node does not exist 897 * @throws KeeperException if unexpected zookeeper exception 898 */ 899 public static RegionTransitionData getDataNoWatch(ZooKeeperWatcher zkw, 900 String pathOrRegionName, Stat stat) 901 throws KeeperException { 902 String node = pathOrRegionName.startsWith("/") ? 903 pathOrRegionName : getNodeName(zkw, pathOrRegionName); 904 byte [] data = ZKUtil.getDataNoWatch(zkw, node, stat); 905 if (data == null) { 906 return null; 907 } 908 return RegionTransitionData.fromBytes(data); 909 } 910 911 /** 912 * Get the version of the specified znode 913 * @param zkw zk reference 914 * @param region region's info 915 * @return the version of the znode, -1 if it doesn't exist 916 * @throws KeeperException 917 */ 918 public static int getVersion(ZooKeeperWatcher zkw, HRegionInfo region) 919 throws KeeperException { 920 String znode = getNodeName(zkw, region.getEncodedName()); 921 return ZKUtil.checkExists(zkw, znode); 922 } 923 924 /** 925 * Delete the assignment node regardless of its current state. 926 * <p> 927 * Fail silent even if the node does not exist at all. 928 * @param watcher 929 * @param regionInfo 930 * @throws KeeperException 931 */ 932 public static void deleteNodeFailSilent(ZooKeeperWatcher watcher, 933 HRegionInfo regionInfo) 934 throws KeeperException { 935 String node = getNodeName(watcher, regionInfo.getEncodedName()); 936 ZKUtil.deleteNodeFailSilent(watcher, node); 937 } 938 939 /** 940 * Blocks until there are no node in regions in transition. 941 * <p> 942 * Used in testing only. 943 * @param zkw zk reference 944 * @throws KeeperException 945 * @throws InterruptedException 946 */ 947 public static void blockUntilNoRIT(ZooKeeperWatcher zkw) 948 throws KeeperException, InterruptedException { 949 while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) { 950 List<String> znodes = 951 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode); 952 if (znodes != null && !znodes.isEmpty()) { 953 for (String znode : znodes) { 954 LOG.debug("ZK RIT -> " + znode); 955 } 956 } 957 Thread.sleep(100); 958 } 959 } 960 961 /** 962 * Blocks until there is at least one node in regions in transition. 963 * <p> 964 * Used in testing only. 965 * @param zkw zk reference 966 * @throws KeeperException 967 * @throws InterruptedException 968 */ 969 public static void blockUntilRIT(ZooKeeperWatcher zkw) 970 throws KeeperException, InterruptedException { 971 while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) { 972 List<String> znodes = 973 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode); 974 if (znodes == null || znodes.isEmpty()) { 975 LOG.debug("No RIT in ZK"); 976 } 977 Thread.sleep(100); 978 } 979 } 980 981 /** 982 * Verifies that the specified region is in the specified state in ZooKeeper. 983 * <p> 984 * Returns true if region is in transition and in the specified state in 985 * ZooKeeper. Returns false if the region does not exist in ZK or is in 986 * a different state. 987 * <p> 988 * Method synchronizes() with ZK so will yield an up-to-date result but is 989 * a slow read. 990 * @param zkw 991 * @param region 992 * @param expectedState 993 * @return true if region exists and is in expected state 994 */ 995 public static boolean verifyRegionState(ZooKeeperWatcher zkw, 996 HRegionInfo region, EventType expectedState) 997 throws KeeperException { 998 String encoded = region.getEncodedName(); 999 1000 String node = getNodeName(zkw, encoded); 1001 zkw.sync(node); 1002 1003 // Read existing data of the node 1004 byte [] existingBytes = null; 1005 try { 1006 existingBytes = ZKUtil.getDataAndWatch(zkw, node); 1007 } catch (KeeperException.NoNodeException nne) { 1008 return false; 1009 } catch (KeeperException e) { 1010 throw e; 1011 } 1012 if (existingBytes == null) return false; 1013 RegionTransitionData existingData = 1014 RegionTransitionData.fromBytes(existingBytes); 1015 if (existingData.getEventType() == expectedState){ 1016 return true; 1017 } 1018 return false; 1019 } 1020 }