1 /** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 package org.apache.hadoop.hbase.zookeeper; 20 21 import java.util.List; 22 23 import org.apache.commons.logging.Log; 24 import org.apache.commons.logging.LogFactory; 25 import org.apache.hadoop.classification.InterfaceAudience; 26 import org.apache.hadoop.hbase.HConstants; 27 import org.apache.hadoop.hbase.HRegionInfo; 28 import org.apache.hadoop.hbase.RegionTransition; 29 import org.apache.hadoop.hbase.ServerName; 30 import org.apache.hadoop.hbase.exceptions.DeserializationException; 31 import org.apache.hadoop.hbase.executor.EventType; 32 import org.apache.zookeeper.AsyncCallback; 33 import org.apache.zookeeper.KeeperException; 34 import org.apache.zookeeper.KeeperException.Code; 35 import org.apache.zookeeper.KeeperException.NoNodeException; 36 import org.apache.zookeeper.KeeperException.NodeExistsException; 37 import org.apache.zookeeper.data.Stat; 38 39 // We should not be importing this Type here, nor a RegionTransition, etc. This class should be 40 // about zk and bytes only. 41 42 /** 43 * Utility class for doing region assignment in ZooKeeper. This class extends 44 * stuff done in {@link ZKUtil} to cover specific assignment operations. 45 * <p> 46 * Contains only static methods and constants. 47 * <p> 48 * Used by both the Master and RegionServer. 49 * <p> 50 * All valid transitions outlined below: 51 * <p> 52 * <b>MASTER</b> 53 * <ol> 54 * <li> 55 * Master creates an unassigned node as OFFLINE. 56 * - Cluster startup and table enabling. 57 * </li> 58 * <li> 59 * Master forces an existing unassigned node to OFFLINE. 60 * - RegionServer failure. 61 * - Allows transitions from all states to OFFLINE. 62 * </li> 63 * <li> 64 * Master deletes an unassigned node that was in a OPENED state. 65 * - Normal region transitions. Besides cluster startup, no other deletions 66 * of unassigned nodes is allowed. 67 * </li> 68 * <li> 69 * Master deletes all unassigned nodes regardless of state. 70 * - Cluster startup before any assignment happens. 71 * </li> 72 * </ol> 73 * <p> 74 * <b>REGIONSERVER</b> 75 * <ol> 76 * <li> 77 * RegionServer creates an unassigned node as CLOSING. 78 * - All region closes will do this in response to a CLOSE RPC from Master. 79 * - A node can never be transitioned to CLOSING, only created. 80 * </li> 81 * <li> 82 * RegionServer transitions an unassigned node from CLOSING to CLOSED. 83 * - Normal region closes. CAS operation. 84 * </li> 85 * <li> 86 * RegionServer transitions an unassigned node from OFFLINE to OPENING. 87 * - All region opens will do this in response to an OPEN RPC from the Master. 88 * - Normal region opens. CAS operation. 89 * </li> 90 * <li> 91 * RegionServer transitions an unassigned node from OPENING to OPENED. 92 * - Normal region opens. CAS operation. 93 * </li> 94 * </ol> 95 */ 96 @InterfaceAudience.Private 97 public class ZKAssign { 98 private static final Log LOG = LogFactory.getLog(ZKAssign.class); 99 100 /** 101 * Gets the full path node name for the unassigned node for the specified 102 * region. 103 * @param zkw zk reference 104 * @param regionName region name 105 * @return full path node name 106 */ 107 public static String getNodeName(ZooKeeperWatcher zkw, String regionName) { 108 return ZKUtil.joinZNode(zkw.assignmentZNode, regionName); 109 } 110 111 /** 112 * Gets the region name from the full path node name of an unassigned node. 113 * @param path full zk path 114 * @return region name 115 */ 116 public static String getRegionName(ZooKeeperWatcher zkw, String path) { 117 return path.substring(zkw.assignmentZNode.length()+1); 118 } 119 120 // Master methods 121 122 /** 123 * Creates a new unassigned node in the OFFLINE state for the specified region. 124 * 125 * <p>Does not transition nodes from other states. If a node already exists 126 * for this region, a {@link NodeExistsException} will be thrown. 127 * 128 * <p>Sets a watcher on the unassigned region node if the method is successful. 129 * 130 * <p>This method should only be used during cluster startup and the enabling 131 * of a table. 132 * 133 * @param zkw zk reference 134 * @param region region to be created as offline 135 * @param serverName server transition will happen on 136 * @throws KeeperException if unexpected zookeeper exception 137 * @throws KeeperException.NodeExistsException if node already exists 138 */ 139 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, 140 ServerName serverName) 141 throws KeeperException, KeeperException.NodeExistsException { 142 createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE); 143 } 144 145 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, 146 ServerName serverName, final EventType event) 147 throws KeeperException, KeeperException.NodeExistsException { 148 LOG.debug(zkw.prefix("Creating unassigned node " + 149 region.getEncodedName() + " in OFFLINE state")); 150 RegionTransition rt = 151 RegionTransition.createRegionTransition(event, region.getRegionName(), serverName); 152 String node = getNodeName(zkw, region.getEncodedName()); 153 ZKUtil.createAndWatch(zkw, node, rt.toByteArray()); 154 } 155 156 /** 157 * Creates an unassigned node in the OFFLINE state for the specified region. 158 * <p> 159 * Runs asynchronously. Depends on no pre-existing znode. 160 * 161 * <p>Sets a watcher on the unassigned region node. 162 * 163 * @param zkw zk reference 164 * @param region region to be created as offline 165 * @param serverName server transition will happen on 166 * @param cb 167 * @param ctx 168 * @throws KeeperException if unexpected zookeeper exception 169 * @throws KeeperException.NodeExistsException if node already exists 170 */ 171 public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw, 172 HRegionInfo region, ServerName serverName, 173 final AsyncCallback.StringCallback cb, final Object ctx) 174 throws KeeperException { 175 LOG.debug(zkw.prefix("Async create of unassigned node " + 176 region.getEncodedName() + " with OFFLINE state")); 177 RegionTransition rt = 178 RegionTransition.createRegionTransition( 179 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName); 180 String node = getNodeName(zkw, region.getEncodedName()); 181 ZKUtil.asyncCreate(zkw, node, rt.toByteArray(), cb, ctx); 182 } 183 184 /** 185 * Creates or force updates an unassigned node to the OFFLINE state for the 186 * specified region. 187 * <p> 188 * Attempts to create the node but if it exists will force it to transition to 189 * and OFFLINE state. 190 * 191 * <p>Sets a watcher on the unassigned region node if the method is 192 * successful. 193 * 194 * <p>This method should be used when assigning a region. 195 * 196 * @param zkw zk reference 197 * @param region region to be created as offline 198 * @param serverName server transition will happen on 199 * @return the version of the znode created in OFFLINE state, -1 if 200 * unsuccessful. 201 * @throws KeeperException if unexpected zookeeper exception 202 * @throws KeeperException.NodeExistsException if node already exists 203 */ 204 public static int createOrForceNodeOffline(ZooKeeperWatcher zkw, 205 HRegionInfo region, ServerName serverName) throws KeeperException { 206 LOG.debug(zkw.prefix("Creating (or updating) unassigned node " + 207 region.getEncodedName() + " with OFFLINE state")); 208 RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_OFFLINE, 209 region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY); 210 byte [] data = rt.toByteArray(); 211 String node = getNodeName(zkw, region.getEncodedName()); 212 zkw.sync(node); 213 int version = ZKUtil.checkExists(zkw, node); 214 if (version == -1) { 215 return ZKUtil.createAndWatch(zkw, node, data); 216 } else { 217 boolean setData = false; 218 try { 219 setData = ZKUtil.setData(zkw, node, data, version); 220 // Setdata throws KeeperException which aborts the Master. So we are 221 // catching it here. 222 // If just before setting the znode to OFFLINE if the RS has made any 223 // change to the 224 // znode state then we need to return -1. 225 } catch (KeeperException kpe) { 226 LOG.info("Version mismatch while setting the node to OFFLINE state."); 227 return -1; 228 } 229 if (!setData) { 230 return -1; 231 } else { 232 // We successfully forced to OFFLINE, reset watch and handle if 233 // the state changed in between our set and the watch 234 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName()); 235 rt = getRegionTransition(bytes); 236 if (rt.getEventType() != EventType.M_ZK_REGION_OFFLINE) { 237 // state changed, need to process 238 return -1; 239 } 240 } 241 } 242 return version + 1; 243 } 244 245 /** 246 * Deletes an existing unassigned node that is in the OPENED state for the 247 * specified region. 248 * 249 * <p>If a node does not already exist for this region, a 250 * {@link NoNodeException} will be thrown. 251 * 252 * <p>No watcher is set whether this succeeds or not. 253 * 254 * <p>Returns false if the node was not in the proper state but did exist. 255 * 256 * <p>This method is used during normal region transitions when a region 257 * finishes successfully opening. This is the Master acknowledging completion 258 * of the specified regions transition. 259 * 260 * @param zkw zk reference 261 * @param encodedRegionName opened region to be deleted from zk 262 * @param sn the expected region transition target server name 263 * @throws KeeperException if unexpected zookeeper exception 264 * @throws KeeperException.NoNodeException if node does not exist 265 */ 266 public static boolean deleteOpenedNode(ZooKeeperWatcher zkw, 267 String encodedRegionName, ServerName sn) 268 throws KeeperException, KeeperException.NoNodeException { 269 return deleteNode(zkw, encodedRegionName, 270 EventType.RS_ZK_REGION_OPENED, sn); 271 } 272 273 /** 274 * Deletes an existing unassigned node that is in the OFFLINE state for the 275 * specified region. 276 * 277 * <p>If a node does not already exist for this region, a 278 * {@link NoNodeException} will be thrown. 279 * 280 * <p>No watcher is set whether this succeeds or not. 281 * 282 * <p>Returns false if the node was not in the proper state but did exist. 283 * 284 * <p>This method is used during master failover when the regions on an RS 285 * that has died are all set to OFFLINE before being processed. 286 * 287 * @param zkw zk reference 288 * @param encodedRegionName closed region to be deleted from zk 289 * @param sn the expected region transition target server name 290 * @throws KeeperException if unexpected zookeeper exception 291 * @throws KeeperException.NoNodeException if node does not exist 292 */ 293 public static boolean deleteOfflineNode(ZooKeeperWatcher zkw, 294 String encodedRegionName, ServerName sn) 295 throws KeeperException, KeeperException.NoNodeException { 296 return deleteNode(zkw, encodedRegionName, 297 EventType.M_ZK_REGION_OFFLINE, sn); 298 } 299 300 /** 301 * Deletes an existing unassigned node that is in the CLOSED state for the 302 * specified region. 303 * 304 * <p>If a node does not already exist for this region, a 305 * {@link NoNodeException} will be thrown. 306 * 307 * <p>No watcher is set whether this succeeds or not. 308 * 309 * <p>Returns false if the node was not in the proper state but did exist. 310 * 311 * <p>This method is used during table disables when a region finishes 312 * successfully closing. This is the Master acknowledging completion 313 * of the specified regions transition to being closed. 314 * 315 * @param zkw zk reference 316 * @param encodedRegionName closed region to be deleted from zk 317 * @param sn the expected region transition target server name 318 * @throws KeeperException if unexpected zookeeper exception 319 * @throws KeeperException.NoNodeException if node does not exist 320 */ 321 public static boolean deleteClosedNode(ZooKeeperWatcher zkw, 322 String encodedRegionName, ServerName sn) 323 throws KeeperException, KeeperException.NoNodeException { 324 return deleteNode(zkw, encodedRegionName, 325 EventType.RS_ZK_REGION_CLOSED, sn); 326 } 327 328 /** 329 * Deletes an existing unassigned node that is in the CLOSING state for the 330 * specified region. 331 * 332 * <p>If a node does not already exist for this region, a 333 * {@link NoNodeException} will be thrown. 334 * 335 * <p>No watcher is set whether this succeeds or not. 336 * 337 * <p>Returns false if the node was not in the proper state but did exist. 338 * 339 * <p>This method is used during table disables when a region finishes 340 * successfully closing. This is the Master acknowledging completion 341 * of the specified regions transition to being closed. 342 * 343 * @param zkw zk reference 344 * @param region closing region to be deleted from zk 345 * @param sn the expected region transition target server name 346 * @throws KeeperException if unexpected zookeeper exception 347 * @throws KeeperException.NoNodeException if node does not exist 348 */ 349 public static boolean deleteClosingNode(ZooKeeperWatcher zkw, 350 HRegionInfo region, ServerName sn) 351 throws KeeperException, KeeperException.NoNodeException { 352 String encodedRegionName = region.getEncodedName(); 353 return deleteNode(zkw, encodedRegionName, 354 EventType.M_ZK_REGION_CLOSING, sn); 355 } 356 357 /** 358 * Deletes an existing unassigned node that is in the specified state for the 359 * specified region. 360 * 361 * <p>If a node does not already exist for this region, a 362 * {@link NoNodeException} will be thrown. 363 * 364 * <p>No watcher is set whether this succeeds or not. 365 * 366 * <p>Returns false if the node was not in the proper state but did exist. 367 * 368 * <p>This method is used when a region finishes opening/closing. 369 * The Master acknowledges completion 370 * of the specified regions transition to being closed/opened. 371 * 372 * @param zkw zk reference 373 * @param encodedRegionName region to be deleted from zk 374 * @param expectedState state region must be in for delete to complete 375 * @param sn the expected region transition target server name 376 * @throws KeeperException if unexpected zookeeper exception 377 * @throws KeeperException.NoNodeException if node does not exist 378 */ 379 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName, 380 EventType expectedState, ServerName sn) 381 throws KeeperException, KeeperException.NoNodeException { 382 return deleteNode(zkw, encodedRegionName, expectedState, sn, -1); 383 } 384 385 /** 386 * Deletes an existing unassigned node that is in the specified state for the 387 * specified region. 388 * 389 * <p>If a node does not already exist for this region, a 390 * {@link NoNodeException} will be thrown. 391 * 392 * <p>No watcher is set whether this succeeds or not. 393 * 394 * <p>Returns false if the node was not in the proper state but did exist. 395 * 396 * <p>This method is used when a region finishes opening/closing. 397 * The Master acknowledges completion 398 * of the specified regions transition to being closed/opened. 399 * 400 * @param zkw zk reference 401 * @param encodedRegionName region to be deleted from zk 402 * @param expectedState state region must be in for delete to complete 403 * @param expectedVersion of the znode that is to be deleted. 404 * If expectedVersion need not be compared while deleting the znode 405 * pass -1 406 * @throws KeeperException if unexpected zookeeper exception 407 * @throws KeeperException.NoNodeException if node does not exist 408 */ 409 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName, 410 EventType expectedState, int expectedVersion) 411 throws KeeperException, KeeperException.NoNodeException { 412 return deleteNode(zkw, encodedRegionName, expectedState, null, expectedVersion); 413 } 414 415 /** 416 * Deletes an existing unassigned node that is in the specified state for the 417 * specified region. 418 * 419 * <p>If a node does not already exist for this region, a 420 * {@link NoNodeException} will be thrown. 421 * 422 * <p>No watcher is set whether this succeeds or not. 423 * 424 * <p>Returns false if the node was not in the proper state but did exist. 425 * 426 * <p>This method is used when a region finishes opening/closing. 427 * The Master acknowledges completion 428 * of the specified regions transition to being closed/opened. 429 * 430 * @param zkw zk reference 431 * @param encodedRegionName region to be deleted from zk 432 * @param expectedState state region must be in for delete to complete 433 * @param serverName the expected region transition target server name 434 * @param expectedVersion of the znode that is to be deleted. 435 * If expectedVersion need not be compared while deleting the znode 436 * pass -1 437 * @throws KeeperException if unexpected zookeeper exception 438 * @throws KeeperException.NoNodeException if node does not exist 439 */ 440 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName, 441 EventType expectedState, ServerName serverName, int expectedVersion) 442 throws KeeperException, KeeperException.NoNodeException { 443 if (LOG.isTraceEnabled()) { 444 LOG.trace(zkw.prefix("Deleting existing unassigned " + 445 "node " + encodedRegionName + " in expected state " + expectedState)); 446 } 447 String node = getNodeName(zkw, encodedRegionName); 448 zkw.sync(node); 449 Stat stat = new Stat(); 450 byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat); 451 if (bytes == null) { 452 // If it came back null, node does not exist. 453 throw KeeperException.create(Code.NONODE); 454 } 455 RegionTransition rt = getRegionTransition(bytes); 456 EventType et = rt.getEventType(); 457 if (!et.equals(expectedState)) { 458 LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName + " in " + 459 expectedState + " state but node is in " + et + " state")); 460 return false; 461 } 462 // Verify the server transition happens on is not changed 463 if (serverName != null && !rt.getServerName().equals(serverName)) { 464 LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName 465 + " with target " + serverName + " but node has " + rt.getServerName())); 466 return false; 467 } 468 if (expectedVersion != -1 469 && stat.getVersion() != expectedVersion) { 470 LOG.warn("The node " + encodedRegionName + " we are trying to delete is not" + 471 " the expected one. Got a version mismatch"); 472 return false; 473 } 474 if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) { 475 LOG.warn(zkw.prefix("Attempting to delete " + 476 "unassigned node " + encodedRegionName + " in " + expectedState + 477 " state but after verifying state, we got a version mismatch")); 478 return false; 479 } 480 LOG.debug(zkw.prefix("Deleted unassigned node " + 481 encodedRegionName + " in expected state " + expectedState)); 482 return true; 483 } 484 485 /** 486 * Deletes all unassigned nodes regardless of their state. 487 * 488 * <p>No watchers are set. 489 * 490 * <p>This method is used by the Master during cluster startup to clear out 491 * any existing state from other cluster runs. 492 * 493 * @param zkw zk reference 494 * @throws KeeperException if unexpected zookeeper exception 495 */ 496 public static void deleteAllNodes(ZooKeeperWatcher zkw) 497 throws KeeperException { 498 LOG.debug(zkw.prefix("Deleting any existing unassigned nodes")); 499 ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode); 500 } 501 502 /** 503 * Creates a new unassigned node in the CLOSING state for the specified 504 * region. 505 * 506 * <p>Does not transition nodes from any states. If a node already exists 507 * for this region, a {@link NodeExistsException} will be thrown. 508 * 509 * <p>If creation is successful, returns the version number of the CLOSING 510 * node created. 511 * 512 * <p>Set a watch. 513 * 514 * <p>This method should only be used by a Master when initiating a 515 * close of a region before sending a close request to the region server. 516 * 517 * @param zkw zk reference 518 * @param region region to be created as closing 519 * @param serverName server transition will happen on 520 * @return version of node after transition, -1 if unsuccessful transition 521 * @throws KeeperException if unexpected zookeeper exception 522 * @throws KeeperException.NodeExistsException if node already exists 523 */ 524 public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region, 525 ServerName serverName) 526 throws KeeperException, KeeperException.NodeExistsException { 527 LOG.debug(zkw.prefix("Creating unassigned node " + 528 region.getEncodedName() + " in a CLOSING state")); 529 RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING, 530 region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY); 531 String node = getNodeName(zkw, region.getEncodedName()); 532 return ZKUtil.createAndWatch(zkw, node, rt.toByteArray()); 533 } 534 535 // RegionServer methods 536 537 /** 538 * Transitions an existing unassigned node for the specified region which is 539 * currently in the CLOSING state to be in the CLOSED state. 540 * 541 * <p>Does not transition nodes from other states. If for some reason the 542 * node could not be transitioned, the method returns -1. If the transition 543 * is successful, the version of the node after transition is returned. 544 * 545 * <p>This method can fail and return false for three different reasons: 546 * <ul><li>Unassigned node for this region does not exist</li> 547 * <li>Unassigned node for this region is not in CLOSING state</li> 548 * <li>After verifying CLOSING state, update fails because of wrong version 549 * (someone else already transitioned the node)</li> 550 * </ul> 551 * 552 * <p>Does not set any watches. 553 * 554 * <p>This method should only be used by a RegionServer when initiating a 555 * close of a region after receiving a CLOSE RPC from the Master. 556 * 557 * @param zkw zk reference 558 * @param region region to be transitioned to closed 559 * @param serverName server transition happens on 560 * @return version of node after transition, -1 if unsuccessful transition 561 * @throws KeeperException if unexpected zookeeper exception 562 */ 563 public static int transitionNodeClosed(ZooKeeperWatcher zkw, 564 HRegionInfo region, ServerName serverName, int expectedVersion) 565 throws KeeperException { 566 return transitionNode(zkw, region, serverName, 567 EventType.M_ZK_REGION_CLOSING, 568 EventType.RS_ZK_REGION_CLOSED, expectedVersion); 569 } 570 571 /** 572 * Transitions an existing unassigned node for the specified region which is 573 * currently in the OFFLINE state to be in the OPENING state. 574 * 575 * <p>Does not transition nodes from other states. If for some reason the 576 * node could not be transitioned, the method returns -1. If the transition 577 * is successful, the version of the node written as OPENING is returned. 578 * 579 * <p>This method can fail and return -1 for three different reasons: 580 * <ul><li>Unassigned node for this region does not exist</li> 581 * <li>Unassigned node for this region is not in OFFLINE state</li> 582 * <li>After verifying OFFLINE state, update fails because of wrong version 583 * (someone else already transitioned the node)</li> 584 * </ul> 585 * 586 * <p>Does not set any watches. 587 * 588 * <p>This method should only be used by a RegionServer when initiating an 589 * open of a region after receiving an OPEN RPC from the Master. 590 * 591 * @param zkw zk reference 592 * @param region region to be transitioned to opening 593 * @param serverName server transition happens on 594 * @return version of node after transition, -1 if unsuccessful transition 595 * @throws KeeperException if unexpected zookeeper exception 596 */ 597 public static int transitionNodeOpening(ZooKeeperWatcher zkw, 598 HRegionInfo region, ServerName serverName) 599 throws KeeperException { 600 return transitionNodeOpening(zkw, region, serverName, 601 EventType.M_ZK_REGION_OFFLINE); 602 } 603 604 public static int transitionNodeOpening(ZooKeeperWatcher zkw, 605 HRegionInfo region, ServerName serverName, final EventType beginState) 606 throws KeeperException { 607 return transitionNode(zkw, region, serverName, beginState, 608 EventType.RS_ZK_REGION_OPENING, -1); 609 } 610 611 /** 612 * Retransitions an existing unassigned node for the specified region which is 613 * currently in the OPENING state to be in the OPENING state. 614 * 615 * <p>Does not transition nodes from other states. If for some reason the 616 * node could not be transitioned, the method returns -1. If the transition 617 * is successful, the version of the node rewritten as OPENING is returned. 618 * 619 * <p>This method can fail and return -1 for three different reasons: 620 * <ul><li>Unassigned node for this region does not exist</li> 621 * <li>Unassigned node for this region is not in OPENING state</li> 622 * <li>After verifying OPENING state, update fails because of wrong version 623 * (someone else already transitioned the node)</li> 624 * </ul> 625 * 626 * <p>Does not set any watches. 627 * 628 * <p>This method should only be used by a RegionServer when initiating an 629 * open of a region after receiving an OPEN RPC from the Master. 630 * 631 * @param zkw zk reference 632 * @param region region to be transitioned to opening 633 * @param serverName server transition happens on 634 * @param updateZNode write the znode. If false, we only check. 635 * @return version of node after transition, -1 if unsuccessful transition 636 * @throws KeeperException if unexpected zookeeper exception 637 */ 638 public static int retransitionNodeOpening(ZooKeeperWatcher zkw, 639 HRegionInfo region, ServerName serverName, int expectedVersion, boolean updateZNode) 640 throws KeeperException { 641 642 String encoded = region.getEncodedName(); 643 if(LOG.isDebugEnabled()) { 644 LOG.debug(zkw.prefix("Attempting to retransition opening state of node " + 645 HRegionInfo.prettyPrint(encoded))); 646 } 647 648 String node = getNodeName(zkw, encoded); 649 zkw.sync(node); 650 651 // Read existing data of the node 652 Stat stat = new Stat(); 653 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat); 654 if (existingBytes == null) { 655 // Node no longer exists. Return -1. It means unsuccessful transition. 656 return -1; 657 } 658 RegionTransition rt = getRegionTransition(existingBytes); 659 660 // Verify it is the expected version 661 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) { 662 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " + 663 "unassigned node for " + encoded + " failed, " + 664 "the node existed but was version " + stat.getVersion() + 665 " not the expected version " + expectedVersion)); 666 return -1; 667 } 668 669 // Verify it is in expected state 670 EventType et = rt.getEventType(); 671 if (!et.equals(EventType.RS_ZK_REGION_OPENING)) { 672 String existingServer = (rt.getServerName() == null) 673 ? "<unknown>" : rt.getServerName().toString(); 674 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the unassigned node for " 675 + encoded + " failed, the node existed but was in the state " + et + 676 " set by the server " + existingServer)); 677 return -1; 678 } 679 680 // We don't have to write the new state: the check is complete. 681 if (!updateZNode){ 682 return expectedVersion; 683 } 684 685 // Write new data, ensuring data has not changed since we last read it 686 try { 687 rt = RegionTransition.createRegionTransition( 688 EventType.RS_ZK_REGION_OPENING, region.getRegionName(), serverName, null); 689 if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) { 690 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " + 691 "unassigned node for " + encoded + " failed, " + 692 "the node existed and was in the expected state but then when " + 693 "setting data we got a version mismatch")); 694 return -1; 695 } 696 if(LOG.isDebugEnabled()) { 697 LOG.debug(zkw.prefix("Retransition opening state of node " + encoded)); 698 } 699 return stat.getVersion() + 1; 700 } catch (KeeperException.NoNodeException nne) { 701 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " + 702 "unassigned node for " + encoded + " failed, " + 703 "the node existed and was in the expected state but then when " + 704 "setting data it no longer existed")); 705 return -1; 706 } 707 } 708 709 /** 710 * Transitions an existing unassigned node for the specified region which is 711 * currently in the OPENING state to be in the OPENED state. 712 * 713 * <p>Does not transition nodes from other states. If for some reason the 714 * node could not be transitioned, the method returns -1. If the transition 715 * is successful, the version of the node after transition is returned. 716 * 717 * <p>This method can fail and return false for three different reasons: 718 * <ul><li>Unassigned node for this region does not exist</li> 719 * <li>Unassigned node for this region is not in OPENING state</li> 720 * <li>After verifying OPENING state, update fails because of wrong version 721 * (this should never actually happen since an RS only does this transition 722 * following a transition to OPENING. if two RS are conflicting, one would 723 * fail the original transition to OPENING and not this transition)</li> 724 * </ul> 725 * 726 * <p>Does not set any watches. 727 * 728 * <p>This method should only be used by a RegionServer when completing the 729 * open of a region. 730 * 731 * @param zkw zk reference 732 * @param region region to be transitioned to opened 733 * @param serverName server transition happens on 734 * @return version of node after transition, -1 if unsuccessful transition 735 * @throws KeeperException if unexpected zookeeper exception 736 */ 737 public static int transitionNodeOpened(ZooKeeperWatcher zkw, 738 HRegionInfo region, ServerName serverName, int expectedVersion) 739 throws KeeperException { 740 return transitionNode(zkw, region, serverName, 741 EventType.RS_ZK_REGION_OPENING, 742 EventType.RS_ZK_REGION_OPENED, expectedVersion); 743 } 744 745 /** 746 * 747 * @param zkw zk reference 748 * @param region region to be closed 749 * @param expectedVersion expected version of the znode 750 * @return true if the znode exists, has the right version and the right state. False otherwise. 751 * @throws KeeperException 752 */ 753 public static boolean checkClosingState(ZooKeeperWatcher zkw, HRegionInfo region, 754 int expectedVersion) throws KeeperException { 755 756 final String encoded = getNodeName(zkw, region.getEncodedName()); 757 zkw.sync(encoded); 758 759 // Read existing data of the node 760 Stat stat = new Stat(); 761 byte[] existingBytes = ZKUtil.getDataNoWatch(zkw, encoded, stat); 762 763 if (existingBytes == null) { 764 LOG.warn(zkw.prefix("Attempt to check the " + 765 "closing node for " + encoded + 766 ". The node does not exist")); 767 return false; 768 } 769 770 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) { 771 LOG.warn(zkw.prefix("Attempt to check the " + 772 "closing node for " + encoded + 773 ". The node existed but was version " + stat.getVersion() + 774 " not the expected version " + expectedVersion)); 775 return false; 776 } 777 778 RegionTransition rt = getRegionTransition(existingBytes); 779 780 if (!EventType.M_ZK_REGION_CLOSING.equals(rt.getEventType())) { 781 LOG.warn(zkw.prefix("Attempt to check the " + 782 "closing node for " + encoded + 783 ". The node existed but was in an unexpected state: " + rt.getEventType())); 784 return false; 785 } 786 787 return true; 788 } 789 790 /** 791 * Method that actually performs unassigned node transitions. 792 * 793 * <p>Attempts to transition the unassigned node for the specified region 794 * from the expected state to the state in the specified transition data. 795 * 796 * <p>Method first reads existing data and verifies it is in the expected 797 * state. If the node does not exist or the node is not in the expected 798 * state, the method returns -1. If the transition is successful, the 799 * version number of the node following the transition is returned. 800 * 801 * <p>If the read state is what is expected, it attempts to write the new 802 * state and data into the node. When doing this, it includes the expected 803 * version (determined when the existing state was verified) to ensure that 804 * only one transition is successful. If there is a version mismatch, the 805 * method returns -1. 806 * 807 * <p>If the write is successful, no watch is set and the method returns true. 808 * 809 * @param zkw zk reference 810 * @param region region to be transitioned to opened 811 * @param serverName server transition happens on 812 * @param endState state to transition node to if all checks pass 813 * @param beginState state the node must currently be in to do transition 814 * @param expectedVersion expected version of data before modification, or -1 815 * @return version of node after transition, -1 if unsuccessful transition 816 * @throws KeeperException if unexpected zookeeper exception 817 */ 818 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region, 819 ServerName serverName, EventType beginState, EventType endState, 820 int expectedVersion) 821 throws KeeperException { 822 return transitionNode(zkw, region, serverName, beginState, endState, expectedVersion, null); 823 } 824 825 826 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region, 827 ServerName serverName, EventType beginState, EventType endState, 828 int expectedVersion, final byte [] payload) 829 throws KeeperException { 830 String encoded = region.getEncodedName(); 831 if(LOG.isDebugEnabled()) { 832 LOG.debug(zkw.prefix("Transitioning " + HRegionInfo.prettyPrint(encoded) + 833 " from " + beginState.toString() + " to " + endState.toString())); 834 } 835 836 String node = getNodeName(zkw, encoded); 837 zkw.sync(node); 838 839 // Read existing data of the node 840 Stat stat = new Stat(); 841 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat); 842 if (existingBytes == null) { 843 // Node no longer exists. Return -1. It means unsuccessful transition. 844 return -1; 845 } 846 847 // Verify it is the expected version 848 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) { 849 LOG.warn(zkw.prefix("Attempt to transition the " + 850 "unassigned node for " + encoded + 851 " from " + beginState + " to " + endState + " failed, " + 852 "the node existed but was version " + stat.getVersion() + 853 " not the expected version " + expectedVersion)); 854 return -1; 855 } 856 857 if (beginState.equals(EventType.M_ZK_REGION_OFFLINE) 858 && endState.equals(EventType.RS_ZK_REGION_OPENING) 859 && expectedVersion == -1 && stat.getVersion() != 0) { 860 // the below check ensures that double assignment doesnot happen. 861 // When the node is created for the first time then the expected version 862 // that is passed will be -1 and the version in znode will be 0. 863 // In all other cases the version in znode will be > 0. 864 LOG.warn(zkw.prefix("Attempt to transition the " + "unassigned node for " 865 + encoded + " from " + beginState + " to " + endState + " failed, " 866 + "the node existed but was version " + stat.getVersion() 867 + " not the expected version " + expectedVersion)); 868 return -1; 869 } 870 871 RegionTransition rt = getRegionTransition(existingBytes); 872 873 // Verify the server transition happens on is not changed 874 if (!rt.getServerName().equals(serverName)) { 875 LOG.warn(zkw.prefix("Attempt to transition the " + 876 "unassigned node for " + encoded + 877 " from " + beginState + " to " + endState + " failed, " + 878 "the server that tried to transition was " + serverName + 879 " not the expected " + rt.getServerName())); 880 return -1; 881 } 882 883 // Verify it is in expected state 884 EventType et = rt.getEventType(); 885 if (!et.equals(beginState)) { 886 String existingServer = (rt.getServerName() == null) 887 ? "<unknown>" : rt.getServerName().toString(); 888 LOG.warn(zkw.prefix("Attempt to transition the unassigned node for " + encoded 889 + " from " + beginState + " to " + endState + " failed, the node existed but" 890 + " was in the state " + et + " set by the server " + existingServer)); 891 return -1; 892 } 893 894 // Write new data, ensuring data has not changed since we last read it 895 try { 896 rt = RegionTransition.createRegionTransition( 897 endState, region.getRegionName(), serverName, payload); 898 if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) { 899 LOG.warn(zkw.prefix("Attempt to transition the " + 900 "unassigned node for " + encoded + 901 " from " + beginState + " to " + endState + " failed, " + 902 "the node existed and was in the expected state but then when " + 903 "setting data we got a version mismatch")); 904 return -1; 905 } 906 if(LOG.isDebugEnabled()) { 907 LOG.debug(zkw.prefix("Transitioned node " + encoded + 908 " from " + beginState + " to " + endState)); 909 } 910 return stat.getVersion() + 1; 911 } catch (KeeperException.NoNodeException nne) { 912 LOG.warn(zkw.prefix("Attempt to transition the " + 913 "unassigned node for " + encoded + 914 " from " + beginState + " to " + endState + " failed, " + 915 "the node existed and was in the expected state but then when " + 916 "setting data it no longer existed")); 917 return -1; 918 } 919 } 920 921 private static RegionTransition getRegionTransition(final byte [] bytes) throws KeeperException { 922 try { 923 return RegionTransition.parseFrom(bytes); 924 } catch (DeserializationException e) { 925 // Convert to a zk exception for now. Otherwise have to change API 926 throw ZKUtil.convert(e); 927 } 928 } 929 930 /** 931 * Gets the current data in the unassigned node for the specified region name 932 * or fully-qualified path. 933 * 934 * <p>Returns null if the region does not currently have a node. 935 * 936 * <p>Sets a watch on the node if the node exists. 937 * 938 * @param zkw zk reference 939 * @param pathOrRegionName fully-specified path or region name 940 * @return znode content 941 * @throws KeeperException if unexpected zookeeper exception 942 */ 943 public static byte [] getData(ZooKeeperWatcher zkw, 944 String pathOrRegionName) 945 throws KeeperException { 946 String node = getPath(zkw, pathOrRegionName); 947 return ZKUtil.getDataAndWatch(zkw, node); 948 } 949 950 /** 951 * Gets the current data in the unassigned node for the specified region name 952 * or fully-qualified path. 953 * 954 * <p>Returns null if the region does not currently have a node. 955 * 956 * <p>Sets a watch on the node if the node exists. 957 * 958 * @param zkw zk reference 959 * @param pathOrRegionName fully-specified path or region name 960 * @param stat object to populate the version. 961 * @return znode content 962 * @throws KeeperException if unexpected zookeeper exception 963 */ 964 public static byte [] getDataAndWatch(ZooKeeperWatcher zkw, 965 String pathOrRegionName, Stat stat) 966 throws KeeperException { 967 String node = getPath(zkw, pathOrRegionName); 968 return ZKUtil.getDataAndWatch(zkw, node, stat); 969 } 970 971 /** 972 * Gets the current data in the unassigned node for the specified region name 973 * or fully-qualified path. 974 * 975 * <p>Returns null if the region does not currently have a node. 976 * 977 * <p>Does not set a watch. 978 * 979 * @param zkw zk reference 980 * @param pathOrRegionName fully-specified path or region name 981 * @param stat object to store node info into on getData call 982 * @return znode content 983 * @throws KeeperException if unexpected zookeeper exception 984 */ 985 public static byte [] getDataNoWatch(ZooKeeperWatcher zkw, 986 String pathOrRegionName, Stat stat) 987 throws KeeperException { 988 String node = getPath(zkw, pathOrRegionName); 989 return ZKUtil.getDataNoWatch(zkw, node, stat); 990 } 991 992 /** 993 * @param zkw 994 * @param pathOrRegionName 995 * @return Path to znode 996 */ 997 public static String getPath(final ZooKeeperWatcher zkw, final String pathOrRegionName) { 998 return pathOrRegionName.startsWith("/")? pathOrRegionName : getNodeName(zkw, pathOrRegionName); 999 } 1000 1001 /** 1002 * Get the version of the specified znode 1003 * @param zkw zk reference 1004 * @param region region's info 1005 * @return the version of the znode, -1 if it doesn't exist 1006 * @throws KeeperException 1007 */ 1008 public static int getVersion(ZooKeeperWatcher zkw, HRegionInfo region) 1009 throws KeeperException { 1010 String znode = getNodeName(zkw, region.getEncodedName()); 1011 return ZKUtil.checkExists(zkw, znode); 1012 } 1013 1014 /** 1015 * Delete the assignment node regardless of its current state. 1016 * <p> 1017 * Fail silent even if the node does not exist at all. 1018 * @param watcher 1019 * @param regionInfo 1020 * @throws KeeperException 1021 */ 1022 public static void deleteNodeFailSilent(ZooKeeperWatcher watcher, 1023 HRegionInfo regionInfo) 1024 throws KeeperException { 1025 String node = getNodeName(watcher, regionInfo.getEncodedName()); 1026 ZKUtil.deleteNodeFailSilent(watcher, node); 1027 } 1028 1029 /** 1030 * Blocks until there are no node in regions in transition. 1031 * <p> 1032 * Used in testing only. 1033 * @param zkw zk reference 1034 * @throws KeeperException 1035 * @throws InterruptedException 1036 */ 1037 public static void blockUntilNoRIT(ZooKeeperWatcher zkw) 1038 throws KeeperException, InterruptedException { 1039 while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) { 1040 List<String> znodes = 1041 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode); 1042 if (znodes != null && !znodes.isEmpty()) { 1043 LOG.debug("Waiting on RIT: " + znodes); 1044 } 1045 Thread.sleep(100); 1046 } 1047 } 1048 1049 /** 1050 * Blocks until there is at least one node in regions in transition. 1051 * <p> 1052 * Used in testing only. 1053 * @param zkw zk reference 1054 * @throws KeeperException 1055 * @throws InterruptedException 1056 */ 1057 public static void blockUntilRIT(ZooKeeperWatcher zkw) 1058 throws KeeperException, InterruptedException { 1059 while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) { 1060 List<String> znodes = 1061 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode); 1062 if (znodes == null || znodes.isEmpty()) { 1063 LOG.debug("No RIT in ZK"); 1064 } 1065 Thread.sleep(100); 1066 } 1067 } 1068 1069 /** 1070 * Presume bytes are serialized unassigned data structure 1071 * @param znodeBytes 1072 * @return String of the deserialized znode bytes. 1073 */ 1074 static String toString(final byte[] znodeBytes) { 1075 // This method should not exist. Used by ZKUtil stringifying RegionTransition. Have the 1076 // method in here so RegionTransition does not leak into ZKUtil. 1077 try { 1078 RegionTransition rt = RegionTransition.parseFrom(znodeBytes); 1079 return rt.toString(); 1080 } catch (DeserializationException e) { 1081 return ""; 1082 } 1083 } 1084 }