1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.master;
21
22 import static org.junit.Assert.assertEquals;
23 import static org.junit.Assert.assertFalse;
24 import static org.junit.Assert.assertTrue;
25
26 import java.util.ArrayList;
27 import java.util.List;
28 import java.util.Set;
29 import java.util.TreeSet;
30
31 import org.apache.commons.logging.Log;
32 import org.apache.commons.logging.LogFactory;
33 import org.apache.hadoop.conf.Configuration;
34 import org.apache.hadoop.hbase.Abortable;
35 import org.apache.hadoop.hbase.HBaseConfiguration;
36 import org.apache.hadoop.hbase.HBaseTestingUtility;
37 import org.apache.hadoop.hbase.HColumnDescriptor;
38 import org.apache.hadoop.hbase.HRegionInfo;
39 import org.apache.hadoop.hbase.HServerInfo;
40 import org.apache.hadoop.hbase.HTableDescriptor;
41 import org.apache.hadoop.hbase.MiniHBaseCluster;
42 import org.apache.hadoop.hbase.executor.RegionTransitionData;
43 import org.apache.hadoop.hbase.executor.EventHandler.EventType;
44 import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
45 import org.apache.hadoop.hbase.master.LoadBalancer.RegionPlan;
46 import org.apache.hadoop.hbase.regionserver.HRegionServer;
47 import org.apache.hadoop.hbase.util.Bytes;
48 import org.apache.hadoop.hbase.util.JVMClusterUtil;
49 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
50 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
51 import org.apache.hadoop.hbase.zookeeper.ZKTable;
52 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
53 import org.junit.Test;
54
55 public class TestMasterFailover {
56 private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
57
58
59
60
61
62
63
64
65
66 @Test (timeout=180000)
67 public void testSimpleMasterFailover() throws Exception {
68
69 final int NUM_MASTERS = 3;
70 final int NUM_RS = 3;
71
72
73 Configuration conf = HBaseConfiguration.create();
74 conf.setInt("hbase.master.wait.on.regionservers.mintostart", 3);
75 conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 3);
76
77
78 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
79 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
80 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
81
82
83 List<MasterThread> masterThreads = cluster.getMasterThreads();
84
85
86 for (MasterThread mt : masterThreads) {
87 assertTrue(mt.isAlive());
88 }
89
90
91 int numActive = 0;
92 int activeIndex = -1;
93 String activeName = null;
94 for (int i = 0; i < masterThreads.size(); i++) {
95 if (masterThreads.get(i).getMaster().isActiveMaster()) {
96 numActive++;
97 activeIndex = i;
98 activeName = masterThreads.get(i).getMaster().getServerName();
99 }
100 }
101 assertEquals(1, numActive);
102 assertEquals(NUM_MASTERS, masterThreads.size());
103
104
105 LOG.debug("\n\nStopping a backup master\n");
106 int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
107 cluster.stopMaster(backupIndex, false);
108 cluster.waitOnMaster(backupIndex);
109
110
111 for (int i = 0; i < masterThreads.size(); i++) {
112 if (masterThreads.get(i).getMaster().isActiveMaster()) {
113 assertTrue(activeName.equals(
114 masterThreads.get(i).getMaster().getServerName()));
115 activeIndex = i;
116 }
117 }
118 assertEquals(1, numActive);
119 assertEquals(2, masterThreads.size());
120
121
122 LOG.debug("\n\nStopping the active master\n");
123 cluster.stopMaster(activeIndex, false);
124 cluster.waitOnMaster(activeIndex);
125
126
127 assertTrue(cluster.waitForActiveAndReadyMaster());
128
129 LOG.debug("\n\nVerifying backup master is now active\n");
130
131 assertEquals(1, masterThreads.size());
132
133 assertTrue(masterThreads.get(0).getMaster().isActiveMaster());
134
135
136 TEST_UTIL.shutdownMiniCluster();
137 }
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218 @Test (timeout=180000)
219 public void testMasterFailoverWithMockedRIT() throws Exception {
220
221 final int NUM_MASTERS = 1;
222 final int NUM_RS = 3;
223
224
225 Configuration conf = HBaseConfiguration.create();
226
227 conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
228 conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
229 conf.setInt("hbase.master.wait.on.regionservers.mintostart", 3);
230 conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 3);
231
232
233 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
234 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
235 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
236 log("Cluster started");
237
238
239 ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
240 "unittest", new Abortable() {
241 @Override
242 public void abort(String why, Throwable e) {
243 throw new RuntimeException("Fatal ZK error, why=" + why, e);
244 }
245 });
246
247
248 List<MasterThread> masterThreads = cluster.getMasterThreads();
249 assertEquals(1, masterThreads.size());
250
251
252 assertTrue(cluster.waitForActiveAndReadyMaster());
253 HMaster master = masterThreads.get(0).getMaster();
254 assertTrue(master.isActiveMaster());
255 assertTrue(master.isInitialized());
256
257
258 master.balanceSwitch(false);
259
260
261 byte [] FAMILY = Bytes.toBytes("family");
262 byte [][] SPLIT_KEYS = new byte [][] {
263 new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
264 Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
265 Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
266 Bytes.toBytes("iii"), Bytes.toBytes("jjj")
267 };
268
269 byte [] enabledTable = Bytes.toBytes("enabledTable");
270 HTableDescriptor htdEnabled = new HTableDescriptor(enabledTable);
271 htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
272 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
273 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
274
275 byte [] disabledTable = Bytes.toBytes("disabledTable");
276 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
277 htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
278 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
279 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
280
281 log("Regions in META have been created");
282
283
284 assertEquals(2, cluster.countServedRegions());
285
286
287 HRegionServer hrs = cluster.getRegionServer(0);
288 String serverName = hrs.getServerName();
289 HServerInfo hsiAlive = hrs.getServerInfo();
290
291
292 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
293 enabledAndAssignedRegions.add(enabledRegions.remove(0));
294 enabledAndAssignedRegions.add(enabledRegions.remove(0));
295 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
296 disabledAndAssignedRegions.add(disabledRegions.remove(0));
297 disabledAndAssignedRegions.add(disabledRegions.remove(0));
298
299
300 for (HRegionInfo hri : enabledAndAssignedRegions) {
301 master.assignmentManager.regionPlans.put(hri.getEncodedName(),
302 new RegionPlan(hri, null, hsiAlive));
303 master.assignRegion(hri);
304 }
305 for (HRegionInfo hri : disabledAndAssignedRegions) {
306 master.assignmentManager.regionPlans.put(hri.getEncodedName(),
307 new RegionPlan(hri, null, hsiAlive));
308 master.assignRegion(hri);
309 }
310
311
312 log("Waiting for assignment to finish");
313 ZKAssign.blockUntilNoRIT(zkw);
314 log("Assignment completed");
315
316
317 log("Aborting master");
318 cluster.abortMaster(0);
319 cluster.waitOnMaster(0);
320 log("Master has aborted");
321
322
323
324
325
326
327 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
328 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
329
330 log("Beginning to mock scenarios");
331
332
333 ZKTable zktable = new ZKTable(zkw);
334 zktable.setDisabledTable(Bytes.toString(disabledTable));
335
336
337
338
339
340
341 HRegionInfo region = enabledRegions.remove(0);
342 regionsThatShouldBeOnline.add(region);
343 ZKAssign.createNodeOffline(zkw, region, serverName);
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371 region = enabledRegions.remove(0);
372 regionsThatShouldBeOnline.add(region);
373 int version = ZKAssign.createNodeClosing(zkw, region, serverName);
374 ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
375
376
377 region = disabledRegions.remove(0);
378 regionsThatShouldBeOffline.add(region);
379 version = ZKAssign.createNodeClosing(zkw, region, serverName);
380 ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
381
382
383
384
385
386
387 region = enabledRegions.remove(0);
388 regionsThatShouldBeOnline.add(region);
389 ZKAssign.createNodeOffline(zkw, region, serverName);
390 ZKAssign.transitionNodeOpening(zkw, region, serverName);
391
392
393
394
395
396
397 region = enabledRegions.remove(0);
398 regionsThatShouldBeOnline.add(region);
399 ZKAssign.createNodeOffline(zkw, region, serverName);
400 hrs.openRegion(region);
401 while (true) {
402 RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
403 if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
404 break;
405 }
406 Thread.sleep(100);
407 }
408
409
410 region = disabledRegions.remove(0);
411 regionsThatShouldBeOffline.add(region);
412 ZKAssign.createNodeOffline(zkw, region, serverName);
413 hrs.openRegion(region);
414 while (true) {
415 RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
416 if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
417 break;
418 }
419 Thread.sleep(100);
420 }
421
422
423
424
425
426
427
428
429
430 log("Done mocking data up in ZK");
431
432
433 log("Starting up a new master");
434 master = cluster.startMaster().getMaster();
435 log("Waiting for master to be ready");
436 cluster.waitForActiveAndReadyMaster();
437 log("Master is ready");
438
439
440 log("Waiting for no more RIT");
441 ZKAssign.blockUntilNoRIT(zkw);
442 log("No more RIT in ZK, now doing final test verification");
443
444
445 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
446 for (JVMClusterUtil.RegionServerThread rst :
447 cluster.getRegionServerThreads()) {
448 onlineRegions.addAll(rst.getRegionServer().getOnlineRegions());
449 }
450
451
452 for (HRegionInfo hri : regionsThatShouldBeOnline) {
453 assertTrue(onlineRegions.contains(hri));
454 }
455
456
457 for (HRegionInfo hri : regionsThatShouldBeOffline) {
458 assertFalse(onlineRegions.contains(hri));
459 }
460
461 log("Done with verification, all passed, shutting down cluster");
462
463
464 TEST_UTIL.shutdownMiniCluster();
465 }
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524 @Test (timeout=180000)
525 public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception {
526
527 final int NUM_MASTERS = 1;
528 final int NUM_RS = 2;
529
530
531 Configuration conf = HBaseConfiguration.create();
532
533 conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
534 conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
535 conf.setInt("hbase.master.wait.on.regionservers.mintostart", 1);
536 conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 2);
537
538
539 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
540 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
541 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
542 log("Cluster started");
543
544
545 ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
546 "unittest", new Abortable() {
547 @Override
548 public void abort(String why, Throwable e) {
549 LOG.error("Fatal ZK Error: " + why, e);
550 org.junit.Assert.assertFalse("Fatal ZK error", true);
551 }
552 });
553
554
555 List<MasterThread> masterThreads = cluster.getMasterThreads();
556 assertEquals(1, masterThreads.size());
557
558
559 assertTrue(cluster.waitForActiveAndReadyMaster());
560 HMaster master = masterThreads.get(0).getMaster();
561 assertTrue(master.isActiveMaster());
562 assertTrue(master.isInitialized());
563
564
565 master.balanceSwitch(false);
566
567
568 byte [] FAMILY = Bytes.toBytes("family");
569 byte [][] SPLIT_KEYS = new byte [][] {
570 new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
571 Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
572 Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
573 Bytes.toBytes("iii"), Bytes.toBytes("jjj")
574 };
575
576 byte [] enabledTable = Bytes.toBytes("enabledTable");
577 HTableDescriptor htdEnabled = new HTableDescriptor(enabledTable);
578 htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
579 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
580 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
581
582 byte [] disabledTable = Bytes.toBytes("disabledTable");
583 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
584 htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
585 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
586 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
587
588 log("Regions in META have been created");
589
590
591 assertEquals(2, cluster.countServedRegions());
592
593
594 HRegionServer hrs = cluster.getRegionServer(0);
595 HServerInfo hsiAlive = hrs.getServerInfo();
596
597
598 HRegionServer hrsDead = cluster.getRegionServer(1);
599 String deadServerName = hrsDead.getServerName();
600 HServerInfo hsiDead = hrsDead.getServerInfo();
601
602
603 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
604 enabledAndAssignedRegions.add(enabledRegions.remove(0));
605 enabledAndAssignedRegions.add(enabledRegions.remove(0));
606 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
607 disabledAndAssignedRegions.add(disabledRegions.remove(0));
608 disabledAndAssignedRegions.add(disabledRegions.remove(0));
609
610
611 for (HRegionInfo hri : enabledAndAssignedRegions) {
612 master.assignmentManager.regionPlans.put(hri.getEncodedName(),
613 new RegionPlan(hri, null, hsiAlive));
614 master.assignRegion(hri);
615 }
616 for (HRegionInfo hri : disabledAndAssignedRegions) {
617 master.assignmentManager.regionPlans.put(hri.getEncodedName(),
618 new RegionPlan(hri, null, hsiAlive));
619 master.assignRegion(hri);
620 }
621
622
623 List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
624 enabledAndOnDeadRegions.add(enabledRegions.remove(0));
625 enabledAndOnDeadRegions.add(enabledRegions.remove(0));
626 List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
627 disabledAndOnDeadRegions.add(disabledRegions.remove(0));
628 disabledAndOnDeadRegions.add(disabledRegions.remove(0));
629
630
631 for (HRegionInfo hri : enabledAndOnDeadRegions) {
632 master.assignmentManager.regionPlans.put(hri.getEncodedName(),
633 new RegionPlan(hri, null, hsiDead));
634 master.assignRegion(hri);
635 }
636 for (HRegionInfo hri : disabledAndOnDeadRegions) {
637 master.assignmentManager.regionPlans.put(hri.getEncodedName(),
638 new RegionPlan(hri, null, hsiDead));
639 master.assignRegion(hri);
640 }
641
642
643 log("Waiting for assignment to finish");
644 ZKAssign.blockUntilNoRIT(zkw);
645 log("Assignment completed");
646
647
648 log("Aborting master");
649 cluster.abortMaster(0);
650 cluster.waitOnMaster(0);
651 log("Master has aborted");
652
653
654
655
656
657
658 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
659 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
660
661 log("Beginning to mock scenarios");
662
663
664 ZKTable zktable = new ZKTable(zkw);
665 zktable.setDisabledTable(Bytes.toString(disabledTable));
666
667
668
669
670
671
672 HRegionInfo region = enabledAndOnDeadRegions.remove(0);
673 regionsThatShouldBeOnline.add(region);
674 ZKAssign.createNodeClosing(zkw, region, deadServerName);
675 LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" +
676 region + "\n\n");
677
678
679 region = disabledAndOnDeadRegions.remove(0);
680 regionsThatShouldBeOffline.add(region);
681 ZKAssign.createNodeClosing(zkw, region, deadServerName);
682 LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" +
683 region + "\n\n");
684
685
686
687
688
689
690 region = enabledAndOnDeadRegions.remove(0);
691 regionsThatShouldBeOnline.add(region);
692 int version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
693 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
694 LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" +
695 region + "\n\n");
696
697
698 region = disabledAndOnDeadRegions.remove(0);
699 regionsThatShouldBeOffline.add(region);
700 version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
701 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
702 LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" +
703 region + "\n\n");
704
705
706
707
708
709
710 region = enabledRegions.remove(0);
711 regionsThatShouldBeOnline.add(region);
712 ZKAssign.createNodeOffline(zkw, region, deadServerName);
713 ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
714 LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" +
715 region + "\n\n");
716
717
718 region = disabledRegions.remove(0);
719 regionsThatShouldBeOffline.add(region);
720 ZKAssign.createNodeOffline(zkw, region, deadServerName);
721 ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
722 LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" +
723 region + "\n\n");
724
725
726
727
728
729
730 region = enabledRegions.remove(0);
731 regionsThatShouldBeOnline.add(region);
732 ZKAssign.createNodeOffline(zkw, region, deadServerName);
733 hrsDead.openRegion(region);
734 while (true) {
735 RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
736 if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
737 break;
738 }
739 Thread.sleep(100);
740 }
741 LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" +
742 region + "\n\n");
743
744
745 region = disabledRegions.remove(0);
746 regionsThatShouldBeOffline.add(region);
747 ZKAssign.createNodeOffline(zkw, region, deadServerName);
748 hrsDead.openRegion(region);
749 while (true) {
750 RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
751 if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
752 break;
753 }
754 Thread.sleep(100);
755 }
756 LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" +
757 region + "\n\n");
758
759
760
761
762
763
764 region = enabledRegions.remove(0);
765 regionsThatShouldBeOnline.add(region);
766 ZKAssign.createNodeOffline(zkw, region, deadServerName);
767 hrsDead.openRegion(region);
768 while (true) {
769 RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
770 if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
771 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName());
772 break;
773 }
774 Thread.sleep(100);
775 }
776 LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS"
777 + "\n" + region + "\n\n");
778
779
780 region = disabledRegions.remove(0);
781 regionsThatShouldBeOffline.add(region);
782 ZKAssign.createNodeOffline(zkw, region, deadServerName);
783 hrsDead.openRegion(region);
784 while (true) {
785 RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
786 if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
787 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName());
788 break;
789 }
790 Thread.sleep(100);
791 }
792 LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS"
793 + "\n" + region + "\n\n");
794
795
796
797
798
799 log("Done mocking data up in ZK");
800
801
802 log("Killing RS " + deadServerName);
803 hrsDead.abort("Killing for unit test");
804 log("RS " + deadServerName + " killed");
805
806
807 log("Starting up a new master");
808 master = cluster.startMaster().getMaster();
809 log("Waiting for master to be ready");
810 cluster.waitForActiveAndReadyMaster();
811 log("Master is ready");
812
813
814
815
816
817
818
819
820 region = enabledRegions.remove(0);
821 regionsThatShouldBeOnline.add(region);
822 master.assignmentManager.regionsInTransition.put(region.getEncodedName(),
823 new RegionState(region, RegionState.State.PENDING_OPEN, 0));
824 ZKAssign.createNodeOffline(zkw, region, master.getServerName());
825
826 region = disabledRegions.remove(0);
827 regionsThatShouldBeOffline.add(region);
828 master.assignmentManager.regionsInTransition.put(region.getEncodedName(),
829 new RegionState(region, RegionState.State.PENDING_OPEN, 0));
830 ZKAssign.createNodeOffline(zkw, region, master.getServerName());
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853 log("Waiting for no more RIT");
854 ZKAssign.blockUntilNoRIT(zkw);
855 log("No more RIT in ZK");
856 long now = System.currentTimeMillis();
857 final long maxTime = 120000;
858 boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
859 if (!done) {
860 LOG.info("rit=" + master.assignmentManager.getRegionsInTransition());
861 }
862 long elapsed = System.currentTimeMillis() - now;
863 assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
864 elapsed < maxTime);
865 log("No more RIT in RIT map, doing final test verification");
866
867
868 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
869 for (JVMClusterUtil.RegionServerThread rst :
870 cluster.getRegionServerThreads()) {
871 onlineRegions.addAll(rst.getRegionServer().getOnlineRegions());
872 }
873
874
875 for (HRegionInfo hri : regionsThatShouldBeOnline) {
876 assertTrue("region=" + hri.getRegionNameAsString(), onlineRegions.contains(hri));
877 }
878
879
880 for (HRegionInfo hri : regionsThatShouldBeOffline) {
881 assertFalse(onlineRegions.contains(hri));
882 }
883
884 log("Done with verification, all passed, shutting down cluster");
885
886
887 TEST_UTIL.shutdownMiniCluster();
888 }
889
890
891
892
893 private void log(String string) {
894 LOG.info("\n\n" + string + " \n\n");
895 }
896 }