1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.zookeeper;
20
21 import org.apache.commons.logging.Log;
22 import org.apache.commons.logging.LogFactory;
23 import org.apache.hadoop.classification.InterfaceAudience;
24 import org.apache.hadoop.classification.InterfaceStability;
25 import org.apache.hadoop.hbase.HConstants;
26 import org.apache.hadoop.hbase.HRegionInfo;
27 import org.apache.hadoop.hbase.RegionTransition;
28 import org.apache.hadoop.hbase.ServerName;
29 import org.apache.hadoop.hbase.exceptions.DeserializationException;
30 import org.apache.hadoop.hbase.executor.EventType;
31 import org.apache.zookeeper.AsyncCallback;
32 import org.apache.zookeeper.KeeperException;
33 import org.apache.zookeeper.KeeperException.Code;
34 import org.apache.zookeeper.KeeperException.NoNodeException;
35 import org.apache.zookeeper.KeeperException.NodeExistsException;
36 import org.apache.zookeeper.data.Stat;
37
38 import java.util.List;
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97 @InterfaceAudience.Public
98 @InterfaceStability.Evolving
99 public class ZKAssign {
100 private static final Log LOG = LogFactory.getLog(ZKAssign.class);
101
102
103
104
105
106
107
108
109 public static String getNodeName(ZooKeeperWatcher zkw, String regionName) {
110 return ZKUtil.joinZNode(zkw.assignmentZNode, regionName);
111 }
112
113
114
115
116
117
118 public static String getRegionName(ZooKeeperWatcher zkw, String path) {
119 return path.substring(zkw.assignmentZNode.length()+1);
120 }
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
142 ServerName serverName)
143 throws KeeperException, KeeperException.NodeExistsException {
144 createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE);
145 }
146
147 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
148 ServerName serverName, final EventType event)
149 throws KeeperException, KeeperException.NodeExistsException {
150 LOG.debug(zkw.prefix("Creating unassigned node for " +
151 region.getEncodedName() + " in OFFLINE state"));
152 RegionTransition rt =
153 RegionTransition.createRegionTransition(event, region.getRegionName(), serverName);
154 String node = getNodeName(zkw, region.getEncodedName());
155 ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
156 }
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173 public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw,
174 HRegionInfo region, ServerName serverName,
175 final AsyncCallback.StringCallback cb, final Object ctx)
176 throws KeeperException {
177 LOG.debug(zkw.prefix("Async create of unassigned node for " +
178 region.getEncodedName() + " with OFFLINE state"));
179 RegionTransition rt =
180 RegionTransition.createRegionTransition(
181 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
182 String node = getNodeName(zkw, region.getEncodedName());
183 ZKUtil.asyncCreate(zkw, node, rt.toByteArray(), cb, ctx);
184 }
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206 public static int createOrForceNodeOffline(ZooKeeperWatcher zkw,
207 HRegionInfo region, ServerName serverName) throws KeeperException {
208 LOG.debug(zkw.prefix("Creating (or updating) unassigned node for " +
209 region.getEncodedName() + " with OFFLINE state"));
210 RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_OFFLINE,
211 region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
212 byte [] data = rt.toByteArray();
213 String node = getNodeName(zkw, region.getEncodedName());
214 zkw.sync(node);
215 int version = ZKUtil.checkExists(zkw, node);
216 if (version == -1) {
217 return ZKUtil.createAndWatch(zkw, node, data);
218 } else {
219 boolean setData = false;
220 try {
221 setData = ZKUtil.setData(zkw, node, data, version);
222
223
224
225
226
227 } catch (KeeperException kpe) {
228 LOG.info("Version mismatch while setting the node to OFFLINE state.");
229 return -1;
230 }
231 if (!setData) {
232 return -1;
233 } else {
234
235
236 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
237 rt = getRegionTransition(bytes);
238 if (rt.getEventType() != EventType.M_ZK_REGION_OFFLINE) {
239
240 return -1;
241 }
242 }
243 }
244 return version + 1;
245 }
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267 public static boolean deleteOpenedNode(ZooKeeperWatcher zkw,
268 String encodedRegionName)
269 throws KeeperException, KeeperException.NoNodeException {
270 return deleteNode(zkw, encodedRegionName, EventType.RS_ZK_REGION_OPENED);
271 }
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292 public static boolean deleteOfflineNode(ZooKeeperWatcher zkw,
293 String encodedRegionName)
294 throws KeeperException, KeeperException.NoNodeException {
295 return deleteNode(zkw, encodedRegionName, EventType.M_ZK_REGION_OFFLINE);
296 }
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318 public static boolean deleteClosedNode(ZooKeeperWatcher zkw,
319 String encodedRegionName)
320 throws KeeperException, KeeperException.NoNodeException {
321 return deleteNode(zkw, encodedRegionName, EventType.RS_ZK_REGION_CLOSED);
322 }
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344 public static boolean deleteClosingNode(ZooKeeperWatcher zkw,
345 HRegionInfo region)
346 throws KeeperException, KeeperException.NoNodeException {
347 String encodedRegionName = region.getEncodedName();
348 return deleteNode(zkw, encodedRegionName, EventType.M_ZK_REGION_CLOSING);
349 }
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
373 EventType expectedState)
374 throws KeeperException, KeeperException.NoNodeException {
375 return deleteNode(zkw, encodedRegionName, expectedState, -1);
376 }
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
403 EventType expectedState, int expectedVersion)
404 throws KeeperException, KeeperException.NoNodeException {
405 LOG.debug(zkw.prefix("Deleting existing unassigned " +
406 "node for " + encodedRegionName + " that is in expected state " + expectedState));
407 String node = getNodeName(zkw, encodedRegionName);
408 zkw.sync(node);
409 Stat stat = new Stat();
410 byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat);
411 if (bytes == null) {
412
413 throw KeeperException.create(Code.NONODE);
414 }
415 RegionTransition rt = getRegionTransition(bytes);
416 EventType et = rt.getEventType();
417 if (!et.equals(expectedState)) {
418 LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName + " in " +
419 expectedState + " state but node is in " + et + " state"));
420 return false;
421 }
422 if (expectedVersion != -1
423 && stat.getVersion() != expectedVersion) {
424 LOG.warn("The node " + encodedRegionName + " we are trying to delete is not" +
425 " the expected one. Got a version mismatch");
426 return false;
427 }
428 if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) {
429 LOG.warn(zkw.prefix("Attempting to delete " +
430 "unassigned node " + encodedRegionName + " in " + expectedState +
431 " state but after verifying state, we got a version mismatch"));
432 return false;
433 }
434 LOG.debug(zkw.prefix("Successfully deleted unassigned node for region " +
435 encodedRegionName + " in expected state " + expectedState));
436 return true;
437 }
438
439
440
441
442
443
444
445
446
447
448
449
450 public static void deleteAllNodes(ZooKeeperWatcher zkw)
451 throws KeeperException {
452 LOG.debug(zkw.prefix("Deleting any existing unassigned nodes"));
453 ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode);
454 }
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478 public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region,
479 ServerName serverName)
480 throws KeeperException, KeeperException.NodeExistsException {
481 LOG.debug(zkw.prefix("Creating unassigned node for " +
482 region.getEncodedName() + " in a CLOSING state"));
483 RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
484 region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
485 String node = getNodeName(zkw, region.getEncodedName());
486 return ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
487 }
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517 public static int transitionNodeClosed(ZooKeeperWatcher zkw,
518 HRegionInfo region, ServerName serverName, int expectedVersion)
519 throws KeeperException {
520 return transitionNode(zkw, region, serverName,
521 EventType.M_ZK_REGION_CLOSING,
522 EventType.RS_ZK_REGION_CLOSED, expectedVersion);
523 }
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551 public static int transitionNodeOpening(ZooKeeperWatcher zkw,
552 HRegionInfo region, ServerName serverName)
553 throws KeeperException {
554 return transitionNodeOpening(zkw, region, serverName,
555 EventType.M_ZK_REGION_OFFLINE);
556 }
557
558 public static int transitionNodeOpening(ZooKeeperWatcher zkw,
559 HRegionInfo region, ServerName serverName, final EventType beginState)
560 throws KeeperException {
561 return transitionNode(zkw, region, serverName, beginState,
562 EventType.RS_ZK_REGION_OPENING, -1);
563 }
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592 public static int retransitionNodeOpening(ZooKeeperWatcher zkw,
593 HRegionInfo region, ServerName serverName, int expectedVersion, boolean updateZNode)
594 throws KeeperException {
595
596 String encoded = region.getEncodedName();
597 if(LOG.isDebugEnabled()) {
598 LOG.debug(zkw.prefix("Attempting to retransition the opening state of node " +
599 HRegionInfo.prettyPrint(encoded)));
600 }
601
602 String node = getNodeName(zkw, encoded);
603 zkw.sync(node);
604
605
606 Stat stat = new Stat();
607 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
608 if (existingBytes == null) {
609
610 return -1;
611 }
612 RegionTransition rt = getRegionTransition(existingBytes);
613
614
615 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
616 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
617 "unassigned node for " + encoded + " failed, " +
618 "the node existed but was version " + stat.getVersion() +
619 " not the expected version " + expectedVersion));
620 return -1;
621 }
622
623
624 EventType et = rt.getEventType();
625 if (!et.equals(EventType.RS_ZK_REGION_OPENING)) {
626 String existingServer = (rt.getServerName() == null)
627 ? "<unknown>" : rt.getServerName().toString();
628 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the unassigned node for "
629 + encoded + " failed, the node existed but was in the state " + et +
630 " set by the server " + existingServer));
631 return -1;
632 }
633
634
635 if (!updateZNode){
636 return expectedVersion;
637 }
638
639
640 try {
641 rt = RegionTransition.createRegionTransition(
642 EventType.RS_ZK_REGION_OPENING, region.getRegionName(), serverName, null);
643 if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) {
644 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
645 "unassigned node for " + encoded + " failed, " +
646 "the node existed and was in the expected state but then when " +
647 "setting data we got a version mismatch"));
648 return -1;
649 }
650 if(LOG.isDebugEnabled()) {
651 LOG.debug(zkw.prefix("Successfully retransition the opening state of node " + encoded));
652 }
653 return stat.getVersion() + 1;
654 } catch (KeeperException.NoNodeException nne) {
655 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
656 "unassigned node for " + encoded + " failed, " +
657 "the node existed and was in the expected state but then when " +
658 "setting data it no longer existed"));
659 return -1;
660 }
661 }
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691 public static int transitionNodeOpened(ZooKeeperWatcher zkw,
692 HRegionInfo region, ServerName serverName, int expectedVersion)
693 throws KeeperException {
694 return transitionNode(zkw, region, serverName,
695 EventType.RS_ZK_REGION_OPENING,
696 EventType.RS_ZK_REGION_OPENED, expectedVersion);
697 }
698
699
700
701
702
703
704
705
706
707 public static boolean checkClosingState(ZooKeeperWatcher zkw, HRegionInfo region,
708 int expectedVersion) throws KeeperException {
709
710 final String encoded = getNodeName(zkw, region.getEncodedName());
711 zkw.sync(encoded);
712
713
714 Stat stat = new Stat();
715 byte[] existingBytes = ZKUtil.getDataNoWatch(zkw, encoded, stat);
716
717 if (existingBytes == null) {
718 LOG.warn(zkw.prefix("Attempt to check the " +
719 "closing node for " + encoded +
720 ". The node does not exist"));
721 return false;
722 }
723
724 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
725 LOG.warn(zkw.prefix("Attempt to check the " +
726 "closing node for " + encoded +
727 ". The node existed but was version " + stat.getVersion() +
728 " not the expected version " + expectedVersion));
729 return false;
730 }
731
732 RegionTransition rt = getRegionTransition(existingBytes);
733
734 if (!EventType.M_ZK_REGION_CLOSING.equals(rt.getEventType())) {
735 LOG.warn(zkw.prefix("Attempt to check the " +
736 "closing node for " + encoded +
737 ". The node existed but was in an unexpected state: " + rt.getEventType()));
738 return false;
739 }
740
741 return true;
742 }
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
773 ServerName serverName, EventType beginState, EventType endState,
774 int expectedVersion)
775 throws KeeperException {
776 return transitionNode(zkw, region, serverName, beginState, endState, expectedVersion, null);
777 }
778
779
780 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
781 ServerName serverName, EventType beginState, EventType endState,
782 int expectedVersion, final byte [] payload)
783 throws KeeperException {
784 String encoded = region.getEncodedName();
785 if(LOG.isDebugEnabled()) {
786 LOG.debug(zkw.prefix("Transitioning " + HRegionInfo.prettyPrint(encoded) +
787 " from " + beginState.toString() + " to " + endState.toString()));
788 }
789
790 String node = getNodeName(zkw, encoded);
791 zkw.sync(node);
792
793
794 Stat stat = new Stat();
795 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
796 if (existingBytes == null) {
797
798 return -1;
799 }
800
801
802 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
803 LOG.warn(zkw.prefix("Attempt to transition the " +
804 "unassigned node for " + encoded +
805 " from " + beginState + " to " + endState + " failed, " +
806 "the node existed but was version " + stat.getVersion() +
807 " not the expected version " + expectedVersion));
808 return -1;
809 }
810
811 if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
812 && endState.equals(EventType.RS_ZK_REGION_OPENING)
813 && expectedVersion == -1 && stat.getVersion() != 0) {
814
815
816
817
818 LOG.warn(zkw.prefix("Attempt to transition the " + "unassigned node for "
819 + encoded + " from " + beginState + " to " + endState + " failed, "
820 + "the node existed but was version " + stat.getVersion()
821 + " not the expected version " + expectedVersion));
822 return -1;
823 }
824
825 RegionTransition rt = getRegionTransition(existingBytes);
826
827
828 if (!rt.getServerName().equals(serverName)) {
829 LOG.warn(zkw.prefix("Attempt to transition the " +
830 "unassigned node for " + encoded +
831 " from " + beginState + " to " + endState + " failed, " +
832 "the server that tried to transition was " + serverName +
833 " not the expected " + rt.getServerName()));
834 return -1;
835 }
836
837
838 EventType et = rt.getEventType();
839 if (!et.equals(beginState)) {
840 String existingServer = (rt.getServerName() == null)
841 ? "<unknown>" : rt.getServerName().toString();
842 LOG.warn(zkw.prefix("Attempt to transition the unassigned node for " + encoded
843 + " from " + beginState + " to " + endState + " failed, the node existed but"
844 + " was in the state " + et + " set by the server " + existingServer));
845 return -1;
846 }
847
848
849 try {
850 rt = RegionTransition.createRegionTransition(
851 endState, region.getRegionName(), serverName, payload);
852 if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) {
853 LOG.warn(zkw.prefix("Attempt to transition the " +
854 "unassigned node for " + encoded +
855 " from " + beginState + " to " + endState + " failed, " +
856 "the node existed and was in the expected state but then when " +
857 "setting data we got a version mismatch"));
858 return -1;
859 }
860 if(LOG.isDebugEnabled()) {
861 LOG.debug(zkw.prefix("Successfully transitioned node " + encoded +
862 " from " + beginState + " to " + endState));
863 }
864 return stat.getVersion() + 1;
865 } catch (KeeperException.NoNodeException nne) {
866 LOG.warn(zkw.prefix("Attempt to transition the " +
867 "unassigned node for " + encoded +
868 " from " + beginState + " to " + endState + " failed, " +
869 "the node existed and was in the expected state but then when " +
870 "setting data it no longer existed"));
871 return -1;
872 }
873 }
874
875 private static RegionTransition getRegionTransition(final byte [] bytes) throws KeeperException {
876 try {
877 return RegionTransition.parseFrom(bytes);
878 } catch (DeserializationException e) {
879
880 throw ZKUtil.convert(e);
881 }
882 }
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897 public static byte [] getData(ZooKeeperWatcher zkw,
898 String pathOrRegionName)
899 throws KeeperException {
900 String node = getPath(zkw, pathOrRegionName);
901 return ZKUtil.getDataAndWatch(zkw, node);
902 }
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918 public static byte [] getDataAndWatch(ZooKeeperWatcher zkw,
919 String pathOrRegionName, Stat stat)
920 throws KeeperException {
921 String node = getPath(zkw, pathOrRegionName);
922 return ZKUtil.getDataAndWatch(zkw, node, stat);
923 }
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939 public static byte [] getDataNoWatch(ZooKeeperWatcher zkw,
940 String pathOrRegionName, Stat stat)
941 throws KeeperException {
942 String node = getPath(zkw, pathOrRegionName);
943 return ZKUtil.getDataNoWatch(zkw, node, stat);
944 }
945
946
947
948
949
950
951 public static String getPath(final ZooKeeperWatcher zkw, final String pathOrRegionName) {
952 return pathOrRegionName.startsWith("/")? pathOrRegionName : getNodeName(zkw, pathOrRegionName);
953 }
954
955
956
957
958
959
960
961
962 public static int getVersion(ZooKeeperWatcher zkw, HRegionInfo region)
963 throws KeeperException {
964 String znode = getNodeName(zkw, region.getEncodedName());
965 return ZKUtil.checkExists(zkw, znode);
966 }
967
968
969
970
971
972
973
974
975
976 public static void deleteNodeFailSilent(ZooKeeperWatcher watcher,
977 HRegionInfo regionInfo)
978 throws KeeperException {
979 String node = getNodeName(watcher, regionInfo.getEncodedName());
980 ZKUtil.deleteNodeFailSilent(watcher, node);
981 }
982
983
984
985
986
987
988
989
990
991 public static void blockUntilNoRIT(ZooKeeperWatcher zkw)
992 throws KeeperException, InterruptedException {
993 while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
994 List<String> znodes =
995 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
996 if (znodes != null && !znodes.isEmpty()) {
997 LOG.debug("Waiting on RIT: " + znodes);
998 }
999 Thread.sleep(100);
1000 }
1001 }
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011 public static void blockUntilRIT(ZooKeeperWatcher zkw)
1012 throws KeeperException, InterruptedException {
1013 while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
1014 List<String> znodes =
1015 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
1016 if (znodes == null || znodes.isEmpty()) {
1017 LOG.debug("No RIT in ZK");
1018 }
1019 Thread.sleep(100);
1020 }
1021 }
1022
1023
1024
1025
1026
1027
1028 static String toString(final byte[] znodeBytes) {
1029
1030
1031 try {
1032 RegionTransition rt = RegionTransition.parseFrom(znodeBytes);
1033 return rt.toString();
1034 } catch (DeserializationException e) {
1035 return "";
1036 }
1037 }
1038 }