1 /** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 package org.apache.hadoop.hbase.zookeeper; 20 21 import org.apache.commons.logging.Log; 22 import org.apache.commons.logging.LogFactory; 23 import org.apache.hadoop.classification.InterfaceAudience; 24 import org.apache.hadoop.classification.InterfaceStability; 25 import org.apache.hadoop.hbase.HConstants; 26 import org.apache.hadoop.hbase.HRegionInfo; 27 import org.apache.hadoop.hbase.RegionTransition; 28 import org.apache.hadoop.hbase.ServerName; 29 import org.apache.hadoop.hbase.exceptions.DeserializationException; 30 import org.apache.hadoop.hbase.executor.EventType; 31 import org.apache.zookeeper.AsyncCallback; 32 import org.apache.zookeeper.KeeperException; 33 import org.apache.zookeeper.KeeperException.Code; 34 import org.apache.zookeeper.KeeperException.NoNodeException; 35 import org.apache.zookeeper.KeeperException.NodeExistsException; 36 import org.apache.zookeeper.data.Stat; 37 38 import java.util.List; 39 40 // We should not be importing this Type here, nor a RegionTransition, etc. This class should be 41 // about zk and bytes only. 42 43 /** 44 * Utility class for doing region assignment in ZooKeeper. This class extends 45 * stuff done in {@link ZKUtil} to cover specific assignment operations. 46 * <p> 47 * Contains only static methods and constants. 48 * <p> 49 * Used by both the Master and RegionServer. 50 * <p> 51 * All valid transitions outlined below: 52 * <p> 53 * <b>MASTER</b> 54 * <ol> 55 * <li> 56 * Master creates an unassigned node as OFFLINE. 57 * - Cluster startup and table enabling. 58 * </li> 59 * <li> 60 * Master forces an existing unassigned node to OFFLINE. 61 * - RegionServer failure. 62 * - Allows transitions from all states to OFFLINE. 63 * </li> 64 * <li> 65 * Master deletes an unassigned node that was in a OPENED state. 66 * - Normal region transitions. Besides cluster startup, no other deletions 67 * of unassigned nodes is allowed. 68 * </li> 69 * <li> 70 * Master deletes all unassigned nodes regardless of state. 71 * - Cluster startup before any assignment happens. 72 * </li> 73 * </ol> 74 * <p> 75 * <b>REGIONSERVER</b> 76 * <ol> 77 * <li> 78 * RegionServer creates an unassigned node as CLOSING. 79 * - All region closes will do this in response to a CLOSE RPC from Master. 80 * - A node can never be transitioned to CLOSING, only created. 81 * </li> 82 * <li> 83 * RegionServer transitions an unassigned node from CLOSING to CLOSED. 84 * - Normal region closes. CAS operation. 85 * </li> 86 * <li> 87 * RegionServer transitions an unassigned node from OFFLINE to OPENING. 88 * - All region opens will do this in response to an OPEN RPC from the Master. 89 * - Normal region opens. CAS operation. 90 * </li> 91 * <li> 92 * RegionServer transitions an unassigned node from OPENING to OPENED. 93 * - Normal region opens. CAS operation. 94 * </li> 95 * </ol> 96 */ 97 @InterfaceAudience.Public 98 @InterfaceStability.Evolving 99 public class ZKAssign { 100 private static final Log LOG = LogFactory.getLog(ZKAssign.class); 101 102 /** 103 * Gets the full path node name for the unassigned node for the specified 104 * region. 105 * @param zkw zk reference 106 * @param regionName region name 107 * @return full path node name 108 */ 109 public static String getNodeName(ZooKeeperWatcher zkw, String regionName) { 110 return ZKUtil.joinZNode(zkw.assignmentZNode, regionName); 111 } 112 113 /** 114 * Gets the region name from the full path node name of an unassigned node. 115 * @param path full zk path 116 * @return region name 117 */ 118 public static String getRegionName(ZooKeeperWatcher zkw, String path) { 119 return path.substring(zkw.assignmentZNode.length()+1); 120 } 121 122 // Master methods 123 124 /** 125 * Creates a new unassigned node in the OFFLINE state for the specified region. 126 * 127 * <p>Does not transition nodes from other states. If a node already exists 128 * for this region, a {@link NodeExistsException} will be thrown. 129 * 130 * <p>Sets a watcher on the unassigned region node if the method is successful. 131 * 132 * <p>This method should only be used during cluster startup and the enabling 133 * of a table. 134 * 135 * @param zkw zk reference 136 * @param region region to be created as offline 137 * @param serverName server transition will happen on 138 * @throws KeeperException if unexpected zookeeper exception 139 * @throws KeeperException.NodeExistsException if node already exists 140 */ 141 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, 142 ServerName serverName) 143 throws KeeperException, KeeperException.NodeExistsException { 144 createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE); 145 } 146 147 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, 148 ServerName serverName, final EventType event) 149 throws KeeperException, KeeperException.NodeExistsException { 150 LOG.debug(zkw.prefix("Creating unassigned node for " + 151 region.getEncodedName() + " in OFFLINE state")); 152 RegionTransition rt = 153 RegionTransition.createRegionTransition(event, region.getRegionName(), serverName); 154 String node = getNodeName(zkw, region.getEncodedName()); 155 ZKUtil.createAndWatch(zkw, node, rt.toByteArray()); 156 } 157 158 /** 159 * Creates an unassigned node in the OFFLINE state for the specified region. 160 * <p> 161 * Runs asynchronously. Depends on no pre-existing znode. 162 * 163 * <p>Sets a watcher on the unassigned region node. 164 * 165 * @param zkw zk reference 166 * @param region region to be created as offline 167 * @param serverName server transition will happen on 168 * @param cb 169 * @param ctx 170 * @throws KeeperException if unexpected zookeeper exception 171 * @throws KeeperException.NodeExistsException if node already exists 172 */ 173 public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw, 174 HRegionInfo region, ServerName serverName, 175 final AsyncCallback.StringCallback cb, final Object ctx) 176 throws KeeperException { 177 LOG.debug(zkw.prefix("Async create of unassigned node for " + 178 region.getEncodedName() + " with OFFLINE state")); 179 RegionTransition rt = 180 RegionTransition.createRegionTransition( 181 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName); 182 String node = getNodeName(zkw, region.getEncodedName()); 183 ZKUtil.asyncCreate(zkw, node, rt.toByteArray(), cb, ctx); 184 } 185 186 /** 187 * Creates or force updates an unassigned node to the OFFLINE state for the 188 * specified region. 189 * <p> 190 * Attempts to create the node but if it exists will force it to transition to 191 * and OFFLINE state. 192 * 193 * <p>Sets a watcher on the unassigned region node if the method is 194 * successful. 195 * 196 * <p>This method should be used when assigning a region. 197 * 198 * @param zkw zk reference 199 * @param region region to be created as offline 200 * @param serverName server transition will happen on 201 * @return the version of the znode created in OFFLINE state, -1 if 202 * unsuccessful. 203 * @throws KeeperException if unexpected zookeeper exception 204 * @throws KeeperException.NodeExistsException if node already exists 205 */ 206 public static int createOrForceNodeOffline(ZooKeeperWatcher zkw, 207 HRegionInfo region, ServerName serverName) throws KeeperException { 208 LOG.debug(zkw.prefix("Creating (or updating) unassigned node for " + 209 region.getEncodedName() + " with OFFLINE state")); 210 RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_OFFLINE, 211 region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY); 212 byte [] data = rt.toByteArray(); 213 String node = getNodeName(zkw, region.getEncodedName()); 214 zkw.sync(node); 215 int version = ZKUtil.checkExists(zkw, node); 216 if (version == -1) { 217 return ZKUtil.createAndWatch(zkw, node, data); 218 } else { 219 boolean setData = false; 220 try { 221 setData = ZKUtil.setData(zkw, node, data, version); 222 // Setdata throws KeeperException which aborts the Master. So we are 223 // catching it here. 224 // If just before setting the znode to OFFLINE if the RS has made any 225 // change to the 226 // znode state then we need to return -1. 227 } catch (KeeperException kpe) { 228 LOG.info("Version mismatch while setting the node to OFFLINE state."); 229 return -1; 230 } 231 if (!setData) { 232 return -1; 233 } else { 234 // We successfully forced to OFFLINE, reset watch and handle if 235 // the state changed in between our set and the watch 236 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName()); 237 rt = getRegionTransition(bytes); 238 if (rt.getEventType() != EventType.M_ZK_REGION_OFFLINE) { 239 // state changed, need to process 240 return -1; 241 } 242 } 243 } 244 return version + 1; 245 } 246 247 /** 248 * Deletes an existing unassigned node that is in the OPENED state for the 249 * specified region. 250 * 251 * <p>If a node does not already exist for this region, a 252 * {@link NoNodeException} will be thrown. 253 * 254 * <p>No watcher is set whether this succeeds or not. 255 * 256 * <p>Returns false if the node was not in the proper state but did exist. 257 * 258 * <p>This method is used during normal region transitions when a region 259 * finishes successfully opening. This is the Master acknowledging completion 260 * of the specified regions transition. 261 * 262 * @param zkw zk reference 263 * @param encodedRegionName opened region to be deleted from zk 264 * @throws KeeperException if unexpected zookeeper exception 265 * @throws KeeperException.NoNodeException if node does not exist 266 */ 267 public static boolean deleteOpenedNode(ZooKeeperWatcher zkw, 268 String encodedRegionName) 269 throws KeeperException, KeeperException.NoNodeException { 270 return deleteNode(zkw, encodedRegionName, EventType.RS_ZK_REGION_OPENED); 271 } 272 273 /** 274 * Deletes an existing unassigned node that is in the OFFLINE state for the 275 * specified region. 276 * 277 * <p>If a node does not already exist for this region, a 278 * {@link NoNodeException} will be thrown. 279 * 280 * <p>No watcher is set whether this succeeds or not. 281 * 282 * <p>Returns false if the node was not in the proper state but did exist. 283 * 284 * <p>This method is used during master failover when the regions on an RS 285 * that has died are all set to OFFLINE before being processed. 286 * 287 * @param zkw zk reference 288 * @param encodedRegionName closed region to be deleted from zk 289 * @throws KeeperException if unexpected zookeeper exception 290 * @throws KeeperException.NoNodeException if node does not exist 291 */ 292 public static boolean deleteOfflineNode(ZooKeeperWatcher zkw, 293 String encodedRegionName) 294 throws KeeperException, KeeperException.NoNodeException { 295 return deleteNode(zkw, encodedRegionName, EventType.M_ZK_REGION_OFFLINE); 296 } 297 298 /** 299 * Deletes an existing unassigned node that is in the CLOSED state for the 300 * specified region. 301 * 302 * <p>If a node does not already exist for this region, a 303 * {@link NoNodeException} will be thrown. 304 * 305 * <p>No watcher is set whether this succeeds or not. 306 * 307 * <p>Returns false if the node was not in the proper state but did exist. 308 * 309 * <p>This method is used during table disables when a region finishes 310 * successfully closing. This is the Master acknowledging completion 311 * of the specified regions transition to being closed. 312 * 313 * @param zkw zk reference 314 * @param encodedRegionName closed region to be deleted from zk 315 * @throws KeeperException if unexpected zookeeper exception 316 * @throws KeeperException.NoNodeException if node does not exist 317 */ 318 public static boolean deleteClosedNode(ZooKeeperWatcher zkw, 319 String encodedRegionName) 320 throws KeeperException, KeeperException.NoNodeException { 321 return deleteNode(zkw, encodedRegionName, EventType.RS_ZK_REGION_CLOSED); 322 } 323 324 /** 325 * Deletes an existing unassigned node that is in the CLOSING state for the 326 * specified region. 327 * 328 * <p>If a node does not already exist for this region, a 329 * {@link NoNodeException} will be thrown. 330 * 331 * <p>No watcher is set whether this succeeds or not. 332 * 333 * <p>Returns false if the node was not in the proper state but did exist. 334 * 335 * <p>This method is used during table disables when a region finishes 336 * successfully closing. This is the Master acknowledging completion 337 * of the specified regions transition to being closed. 338 * 339 * @param zkw zk reference 340 * @param region closing region to be deleted from zk 341 * @throws KeeperException if unexpected zookeeper exception 342 * @throws KeeperException.NoNodeException if node does not exist 343 */ 344 public static boolean deleteClosingNode(ZooKeeperWatcher zkw, 345 HRegionInfo region) 346 throws KeeperException, KeeperException.NoNodeException { 347 String encodedRegionName = region.getEncodedName(); 348 return deleteNode(zkw, encodedRegionName, EventType.M_ZK_REGION_CLOSING); 349 } 350 351 /** 352 * Deletes an existing unassigned node that is in the specified state for the 353 * specified region. 354 * 355 * <p>If a node does not already exist for this region, a 356 * {@link NoNodeException} will be thrown. 357 * 358 * <p>No watcher is set whether this succeeds or not. 359 * 360 * <p>Returns false if the node was not in the proper state but did exist. 361 * 362 * <p>This method is used when a region finishes opening/closing. 363 * The Master acknowledges completion 364 * of the specified regions transition to being closed/opened. 365 * 366 * @param zkw zk reference 367 * @param encodedRegionName region to be deleted from zk 368 * @param expectedState state region must be in for delete to complete 369 * @throws KeeperException if unexpected zookeeper exception 370 * @throws KeeperException.NoNodeException if node does not exist 371 */ 372 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName, 373 EventType expectedState) 374 throws KeeperException, KeeperException.NoNodeException { 375 return deleteNode(zkw, encodedRegionName, expectedState, -1); 376 } 377 378 /** 379 * Deletes an existing unassigned node that is in the specified state for the 380 * specified region. 381 * 382 * <p>If a node does not already exist for this region, a 383 * {@link NoNodeException} will be thrown. 384 * 385 * <p>No watcher is set whether this succeeds or not. 386 * 387 * <p>Returns false if the node was not in the proper state but did exist. 388 * 389 * <p>This method is used when a region finishes opening/closing. 390 * The Master acknowledges completion 391 * of the specified regions transition to being closed/opened. 392 * 393 * @param zkw zk reference 394 * @param encodedRegionName region to be deleted from zk 395 * @param expectedState state region must be in for delete to complete 396 * @param expectedVersion of the znode that is to be deleted. 397 * If expectedVersion need not be compared while deleting the znode 398 * pass -1 399 * @throws KeeperException if unexpected zookeeper exception 400 * @throws KeeperException.NoNodeException if node does not exist 401 */ 402 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName, 403 EventType expectedState, int expectedVersion) 404 throws KeeperException, KeeperException.NoNodeException { 405 LOG.debug(zkw.prefix("Deleting existing unassigned " + 406 "node for " + encodedRegionName + " that is in expected state " + expectedState)); 407 String node = getNodeName(zkw, encodedRegionName); 408 zkw.sync(node); 409 Stat stat = new Stat(); 410 byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat); 411 if (bytes == null) { 412 // If it came back null, node does not exist. 413 throw KeeperException.create(Code.NONODE); 414 } 415 RegionTransition rt = getRegionTransition(bytes); 416 EventType et = rt.getEventType(); 417 if (!et.equals(expectedState)) { 418 LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName + " in " + 419 expectedState + " state but node is in " + et + " state")); 420 return false; 421 } 422 if (expectedVersion != -1 423 && stat.getVersion() != expectedVersion) { 424 LOG.warn("The node " + encodedRegionName + " we are trying to delete is not" + 425 " the expected one. Got a version mismatch"); 426 return false; 427 } 428 if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) { 429 LOG.warn(zkw.prefix("Attempting to delete " + 430 "unassigned node " + encodedRegionName + " in " + expectedState + 431 " state but after verifying state, we got a version mismatch")); 432 return false; 433 } 434 LOG.debug(zkw.prefix("Successfully deleted unassigned node for region " + 435 encodedRegionName + " in expected state " + expectedState)); 436 return true; 437 } 438 439 /** 440 * Deletes all unassigned nodes regardless of their state. 441 * 442 * <p>No watchers are set. 443 * 444 * <p>This method is used by the Master during cluster startup to clear out 445 * any existing state from other cluster runs. 446 * 447 * @param zkw zk reference 448 * @throws KeeperException if unexpected zookeeper exception 449 */ 450 public static void deleteAllNodes(ZooKeeperWatcher zkw) 451 throws KeeperException { 452 LOG.debug(zkw.prefix("Deleting any existing unassigned nodes")); 453 ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode); 454 } 455 456 /** 457 * Creates a new unassigned node in the CLOSING state for the specified 458 * region. 459 * 460 * <p>Does not transition nodes from any states. If a node already exists 461 * for this region, a {@link NodeExistsException} will be thrown. 462 * 463 * <p>If creation is successful, returns the version number of the CLOSING 464 * node created. 465 * 466 * <p>Set a watch. 467 * 468 * <p>This method should only be used by a Master when initiating a 469 * close of a region before sending a close request to the region server. 470 * 471 * @param zkw zk reference 472 * @param region region to be created as closing 473 * @param serverName server transition will happen on 474 * @return version of node after transition, -1 if unsuccessful transition 475 * @throws KeeperException if unexpected zookeeper exception 476 * @throws KeeperException.NodeExistsException if node already exists 477 */ 478 public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region, 479 ServerName serverName) 480 throws KeeperException, KeeperException.NodeExistsException { 481 LOG.debug(zkw.prefix("Creating unassigned node for " + 482 region.getEncodedName() + " in a CLOSING state")); 483 RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING, 484 region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY); 485 String node = getNodeName(zkw, region.getEncodedName()); 486 return ZKUtil.createAndWatch(zkw, node, rt.toByteArray()); 487 } 488 489 // RegionServer methods 490 491 /** 492 * Transitions an existing unassigned node for the specified region which is 493 * currently in the CLOSING state to be in the CLOSED state. 494 * 495 * <p>Does not transition nodes from other states. If for some reason the 496 * node could not be transitioned, the method returns -1. If the transition 497 * is successful, the version of the node after transition is returned. 498 * 499 * <p>This method can fail and return false for three different reasons: 500 * <ul><li>Unassigned node for this region does not exist</li> 501 * <li>Unassigned node for this region is not in CLOSING state</li> 502 * <li>After verifying CLOSING state, update fails because of wrong version 503 * (someone else already transitioned the node)</li> 504 * </ul> 505 * 506 * <p>Does not set any watches. 507 * 508 * <p>This method should only be used by a RegionServer when initiating a 509 * close of a region after receiving a CLOSE RPC from the Master. 510 * 511 * @param zkw zk reference 512 * @param region region to be transitioned to closed 513 * @param serverName server transition happens on 514 * @return version of node after transition, -1 if unsuccessful transition 515 * @throws KeeperException if unexpected zookeeper exception 516 */ 517 public static int transitionNodeClosed(ZooKeeperWatcher zkw, 518 HRegionInfo region, ServerName serverName, int expectedVersion) 519 throws KeeperException { 520 return transitionNode(zkw, region, serverName, 521 EventType.M_ZK_REGION_CLOSING, 522 EventType.RS_ZK_REGION_CLOSED, expectedVersion); 523 } 524 525 /** 526 * Transitions an existing unassigned node for the specified region which is 527 * currently in the OFFLINE state to be in the OPENING state. 528 * 529 * <p>Does not transition nodes from other states. If for some reason the 530 * node could not be transitioned, the method returns -1. If the transition 531 * is successful, the version of the node written as OPENING is returned. 532 * 533 * <p>This method can fail and return -1 for three different reasons: 534 * <ul><li>Unassigned node for this region does not exist</li> 535 * <li>Unassigned node for this region is not in OFFLINE state</li> 536 * <li>After verifying OFFLINE state, update fails because of wrong version 537 * (someone else already transitioned the node)</li> 538 * </ul> 539 * 540 * <p>Does not set any watches. 541 * 542 * <p>This method should only be used by a RegionServer when initiating an 543 * open of a region after receiving an OPEN RPC from the Master. 544 * 545 * @param zkw zk reference 546 * @param region region to be transitioned to opening 547 * @param serverName server transition happens on 548 * @return version of node after transition, -1 if unsuccessful transition 549 * @throws KeeperException if unexpected zookeeper exception 550 */ 551 public static int transitionNodeOpening(ZooKeeperWatcher zkw, 552 HRegionInfo region, ServerName serverName) 553 throws KeeperException { 554 return transitionNodeOpening(zkw, region, serverName, 555 EventType.M_ZK_REGION_OFFLINE); 556 } 557 558 public static int transitionNodeOpening(ZooKeeperWatcher zkw, 559 HRegionInfo region, ServerName serverName, final EventType beginState) 560 throws KeeperException { 561 return transitionNode(zkw, region, serverName, beginState, 562 EventType.RS_ZK_REGION_OPENING, -1); 563 } 564 565 /** 566 * Retransitions an existing unassigned node for the specified region which is 567 * currently in the OPENING state to be in the OPENING state. 568 * 569 * <p>Does not transition nodes from other states. If for some reason the 570 * node could not be transitioned, the method returns -1. If the transition 571 * is successful, the version of the node rewritten as OPENING is returned. 572 * 573 * <p>This method can fail and return -1 for three different reasons: 574 * <ul><li>Unassigned node for this region does not exist</li> 575 * <li>Unassigned node for this region is not in OPENING state</li> 576 * <li>After verifying OPENING state, update fails because of wrong version 577 * (someone else already transitioned the node)</li> 578 * </ul> 579 * 580 * <p>Does not set any watches. 581 * 582 * <p>This method should only be used by a RegionServer when initiating an 583 * open of a region after receiving an OPEN RPC from the Master. 584 * 585 * @param zkw zk reference 586 * @param region region to be transitioned to opening 587 * @param serverName server transition happens on 588 * @return version of node after transition, -1 if unsuccessful transition 589 * @throws KeeperException if unexpected zookeeper exception 590 */ 591 public static int retransitionNodeOpening(ZooKeeperWatcher zkw, 592 HRegionInfo region, ServerName serverName, int expectedVersion) 593 throws KeeperException { 594 return transitionNode(zkw, region, serverName, 595 EventType.RS_ZK_REGION_OPENING, 596 EventType.RS_ZK_REGION_OPENING, expectedVersion); 597 } 598 599 /** 600 * Transitions an existing unassigned node for the specified region which is 601 * currently in the OPENING state to be in the OPENED state. 602 * 603 * <p>Does not transition nodes from other states. If for some reason the 604 * node could not be transitioned, the method returns -1. If the transition 605 * is successful, the version of the node after transition is returned. 606 * 607 * <p>This method can fail and return false for three different reasons: 608 * <ul><li>Unassigned node for this region does not exist</li> 609 * <li>Unassigned node for this region is not in OPENING state</li> 610 * <li>After verifying OPENING state, update fails because of wrong version 611 * (this should never actually happen since an RS only does this transition 612 * following a transition to OPENING. if two RS are conflicting, one would 613 * fail the original transition to OPENING and not this transition)</li> 614 * </ul> 615 * 616 * <p>Does not set any watches. 617 * 618 * <p>This method should only be used by a RegionServer when completing the 619 * open of a region. 620 * 621 * @param zkw zk reference 622 * @param region region to be transitioned to opened 623 * @param serverName server transition happens on 624 * @return version of node after transition, -1 if unsuccessful transition 625 * @throws KeeperException if unexpected zookeeper exception 626 */ 627 public static int transitionNodeOpened(ZooKeeperWatcher zkw, 628 HRegionInfo region, ServerName serverName, int expectedVersion) 629 throws KeeperException { 630 return transitionNode(zkw, region, serverName, 631 EventType.RS_ZK_REGION_OPENING, 632 EventType.RS_ZK_REGION_OPENED, expectedVersion); 633 } 634 635 /** 636 * 637 * @param zkw zk reference 638 * @param region region to be closed 639 * @param expectedVersion expected version of the znode 640 * @return true if the znode exists, has the right version and the right state. False otherwise. 641 * @throws KeeperException 642 */ 643 public static boolean checkClosingState(ZooKeeperWatcher zkw, HRegionInfo region, 644 int expectedVersion) throws KeeperException { 645 646 final String encoded = getNodeName(zkw, region.getEncodedName()); 647 zkw.sync(encoded); 648 649 // Read existing data of the node 650 Stat stat = new Stat(); 651 byte[] existingBytes = ZKUtil.getDataNoWatch(zkw, encoded, stat); 652 653 if (existingBytes == null) { 654 LOG.warn(zkw.prefix("Attempt to check the " + 655 "closing node for " + encoded + 656 ". The node does not exist")); 657 return false; 658 } 659 660 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) { 661 LOG.warn(zkw.prefix("Attempt to check the " + 662 "closing node for " + encoded + 663 ". The node existed but was version " + stat.getVersion() + 664 " not the expected version " + expectedVersion)); 665 return false; 666 } 667 668 RegionTransition rt = getRegionTransition(existingBytes); 669 670 if (!EventType.M_ZK_REGION_CLOSING.equals(rt.getEventType())) { 671 LOG.warn(zkw.prefix("Attempt to check the " + 672 "closing node for " + encoded + 673 ". The node existed but was in an unexpected state: " + rt.getEventType())); 674 return false; 675 } 676 677 return true; 678 } 679 680 /** 681 * Method that actually performs unassigned node transitions. 682 * 683 * <p>Attempts to transition the unassigned node for the specified region 684 * from the expected state to the state in the specified transition data. 685 * 686 * <p>Method first reads existing data and verifies it is in the expected 687 * state. If the node does not exist or the node is not in the expected 688 * state, the method returns -1. If the transition is successful, the 689 * version number of the node following the transition is returned. 690 * 691 * <p>If the read state is what is expected, it attempts to write the new 692 * state and data into the node. When doing this, it includes the expected 693 * version (determined when the existing state was verified) to ensure that 694 * only one transition is successful. If there is a version mismatch, the 695 * method returns -1. 696 * 697 * <p>If the write is successful, no watch is set and the method returns true. 698 * 699 * @param zkw zk reference 700 * @param region region to be transitioned to opened 701 * @param serverName server transition happens on 702 * @param endState state to transition node to if all checks pass 703 * @param beginState state the node must currently be in to do transition 704 * @param expectedVersion expected version of data before modification, or -1 705 * @return version of node after transition, -1 if unsuccessful transition 706 * @throws KeeperException if unexpected zookeeper exception 707 */ 708 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region, 709 ServerName serverName, EventType beginState, EventType endState, 710 int expectedVersion) 711 throws KeeperException { 712 return transitionNode(zkw, region, serverName, beginState, endState, expectedVersion, null); 713 } 714 715 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region, 716 ServerName serverName, EventType beginState, EventType endState, 717 int expectedVersion, final byte [] payload) 718 throws KeeperException { 719 String encoded = region.getEncodedName(); 720 if(LOG.isDebugEnabled()) { 721 LOG.debug(zkw.prefix("Attempting to transition node " + 722 HRegionInfo.prettyPrint(encoded) + 723 " from " + beginState.toString() + " to " + endState.toString())); 724 } 725 726 String node = getNodeName(zkw, encoded); 727 zkw.sync(node); 728 729 // Read existing data of the node 730 Stat stat = new Stat(); 731 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat); 732 if (existingBytes == null) { 733 // Node no longer exists. Return -1. It means unsuccessful transition. 734 return -1; 735 } 736 RegionTransition rt = getRegionTransition(existingBytes); 737 738 // Verify it is the expected version 739 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) { 740 LOG.warn(zkw.prefix("Attempt to transition the " + 741 "unassigned node for " + encoded + 742 " from " + beginState + " to " + endState + " failed, " + 743 "the node existed but was version " + stat.getVersion() + 744 " not the expected version " + expectedVersion)); 745 return -1; 746 } else if (beginState.equals(EventType.M_ZK_REGION_OFFLINE) 747 && endState.equals(EventType.RS_ZK_REGION_OPENING) 748 && expectedVersion == -1 && stat.getVersion() != 0) { 749 // the below check ensures that double assignment doesnot happen. 750 // When the node is created for the first time then the expected version 751 // that is passed will be -1 and the version in znode will be 0. 752 // In all other cases the version in znode will be > 0. 753 LOG.warn(zkw.prefix("Attempt to transition the " + "unassigned node for " 754 + encoded + " from " + beginState + " to " + endState + " failed, " 755 + "the node existed but was version " + stat.getVersion() 756 + " not the expected version " + expectedVersion)); 757 return -1; 758 } 759 760 // Verify it is in expected state 761 EventType et = rt.getEventType(); 762 if (!et.equals(beginState)) { 763 String existingServer = (rt.getServerName() == null) 764 ? "<unknown>" : rt.getServerName().toString(); 765 LOG.warn(zkw.prefix("Attempt to transition the unassigned node for " + encoded 766 + " from " + beginState + " to " + endState + " failed, the node existed but" 767 + " was in the state " + et + " set by the server " + existingServer)); 768 return -1; 769 } 770 771 // Write new data, ensuring data has not changed since we last read it 772 try { 773 rt = RegionTransition.createRegionTransition( 774 endState, region.getRegionName(), serverName, payload); 775 if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) { 776 LOG.warn(zkw.prefix("Attempt to transition the " + 777 "unassigned node for " + encoded + 778 " from " + beginState + " to " + endState + " failed, " + 779 "the node existed and was in the expected state but then when " + 780 "setting data we got a version mismatch")); 781 return -1; 782 } 783 if(LOG.isDebugEnabled()) { 784 LOG.debug(zkw.prefix("Successfully transitioned node " + encoded + 785 " from " + beginState + " to " + endState)); 786 } 787 return stat.getVersion() + 1; 788 } catch (KeeperException.NoNodeException nne) { 789 LOG.warn(zkw.prefix("Attempt to transition the " + 790 "unassigned node for " + encoded + 791 " from " + beginState + " to " + endState + " failed, " + 792 "the node existed and was in the expected state but then when " + 793 "setting data it no longer existed")); 794 return -1; 795 } 796 } 797 798 private static RegionTransition getRegionTransition(final byte [] bytes) throws KeeperException { 799 try { 800 return RegionTransition.parseFrom(bytes); 801 } catch (DeserializationException e) { 802 // Convert to a zk exception for now. Otherwise have to change API 803 throw ZKUtil.convert(e); 804 } 805 } 806 807 /** 808 * Gets the current data in the unassigned node for the specified region name 809 * or fully-qualified path. 810 * 811 * <p>Returns null if the region does not currently have a node. 812 * 813 * <p>Sets a watch on the node if the node exists. 814 * 815 * @param zkw zk reference 816 * @param pathOrRegionName fully-specified path or region name 817 * @return znode content 818 * @throws KeeperException if unexpected zookeeper exception 819 */ 820 public static byte [] getData(ZooKeeperWatcher zkw, 821 String pathOrRegionName) 822 throws KeeperException { 823 String node = getPath(zkw, pathOrRegionName); 824 return ZKUtil.getDataAndWatch(zkw, node); 825 } 826 827 /** 828 * Gets the current data in the unassigned node for the specified region name 829 * or fully-qualified path. 830 * 831 * <p>Returns null if the region does not currently have a node. 832 * 833 * <p>Sets a watch on the node if the node exists. 834 * 835 * @param zkw zk reference 836 * @param pathOrRegionName fully-specified path or region name 837 * @param stat object to populate the version. 838 * @return znode content 839 * @throws KeeperException if unexpected zookeeper exception 840 */ 841 public static byte [] getDataAndWatch(ZooKeeperWatcher zkw, 842 String pathOrRegionName, Stat stat) 843 throws KeeperException { 844 String node = getPath(zkw, pathOrRegionName); 845 return ZKUtil.getDataAndWatch(zkw, node, stat); 846 } 847 848 /** 849 * Gets the current data in the unassigned node for the specified region name 850 * or fully-qualified path. 851 * 852 * <p>Returns null if the region does not currently have a node. 853 * 854 * <p>Does not set a watch. 855 * 856 * @param zkw zk reference 857 * @param pathOrRegionName fully-specified path or region name 858 * @param stat object to store node info into on getData call 859 * @return znode content 860 * @throws KeeperException if unexpected zookeeper exception 861 */ 862 public static byte [] getDataNoWatch(ZooKeeperWatcher zkw, 863 String pathOrRegionName, Stat stat) 864 throws KeeperException { 865 String node = getPath(zkw, pathOrRegionName); 866 return ZKUtil.getDataNoWatch(zkw, node, stat); 867 } 868 869 /** 870 * @param zkw 871 * @param pathOrRegionName 872 * @return Path to znode 873 */ 874 public static String getPath(final ZooKeeperWatcher zkw, final String pathOrRegionName) { 875 return pathOrRegionName.startsWith("/")? pathOrRegionName : getNodeName(zkw, pathOrRegionName); 876 } 877 878 /** 879 * Get the version of the specified znode 880 * @param zkw zk reference 881 * @param region region's info 882 * @return the version of the znode, -1 if it doesn't exist 883 * @throws KeeperException 884 */ 885 public static int getVersion(ZooKeeperWatcher zkw, HRegionInfo region) 886 throws KeeperException { 887 String znode = getNodeName(zkw, region.getEncodedName()); 888 return ZKUtil.checkExists(zkw, znode); 889 } 890 891 /** 892 * Delete the assignment node regardless of its current state. 893 * <p> 894 * Fail silent even if the node does not exist at all. 895 * @param watcher 896 * @param regionInfo 897 * @throws KeeperException 898 */ 899 public static void deleteNodeFailSilent(ZooKeeperWatcher watcher, 900 HRegionInfo regionInfo) 901 throws KeeperException { 902 String node = getNodeName(watcher, regionInfo.getEncodedName()); 903 ZKUtil.deleteNodeFailSilent(watcher, node); 904 } 905 906 /** 907 * Blocks until there are no node in regions in transition. 908 * <p> 909 * Used in testing only. 910 * @param zkw zk reference 911 * @throws KeeperException 912 * @throws InterruptedException 913 */ 914 public static void blockUntilNoRIT(ZooKeeperWatcher zkw) 915 throws KeeperException, InterruptedException { 916 while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) { 917 List<String> znodes = 918 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode); 919 if (znodes != null && !znodes.isEmpty()) { 920 for (String znode : znodes) { 921 LOG.debug("ZK RIT -> " + znode); 922 } 923 } 924 Thread.sleep(100); 925 } 926 } 927 928 /** 929 * Blocks until there is at least one node in regions in transition. 930 * <p> 931 * Used in testing only. 932 * @param zkw zk reference 933 * @throws KeeperException 934 * @throws InterruptedException 935 */ 936 public static void blockUntilRIT(ZooKeeperWatcher zkw) 937 throws KeeperException, InterruptedException { 938 while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) { 939 List<String> znodes = 940 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode); 941 if (znodes == null || znodes.isEmpty()) { 942 LOG.debug("No RIT in ZK"); 943 } 944 Thread.sleep(100); 945 } 946 } 947 948 /** 949 * Presume bytes are serialized unassigned data structure 950 * @param znodeBytes 951 * @return String of the deserialized znode bytes. 952 */ 953 static String toString(final byte[] znodeBytes) { 954 // This method should not exist. Used by ZKUtil stringifying RegionTransition. Have the 955 // method in here so RegionTransition does not leak into ZKUtil. 956 try { 957 RegionTransition rt = RegionTransition.parseFrom(znodeBytes); 958 return rt.toString(); 959 } catch (DeserializationException e) { 960 return ""; 961 } 962 } 963 }