1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertTrue;
25
26 import java.io.IOException;
27 import java.util.ArrayList;
28 import java.util.Iterator;
29 import java.util.List;
30 import java.util.Set;
31 import java.util.TreeSet;
32
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.conf.Configuration;
36 import org.apache.hadoop.fs.FileSystem;
37 import org.apache.hadoop.fs.Path;
38 import org.apache.hadoop.hbase.Abortable;
39 import org.apache.hadoop.hbase.ClusterStatus;
40 import org.apache.hadoop.hbase.HBaseConfiguration;
41 import org.apache.hadoop.hbase.HBaseTestingUtility;
42 import org.apache.hadoop.hbase.HColumnDescriptor;
43 import org.apache.hadoop.hbase.HConstants;
44 import org.apache.hadoop.hbase.HRegionInfo;
45 import org.apache.hadoop.hbase.HTableDescriptor;
46 import org.apache.hadoop.hbase.testclassification.LargeTests;
47 import org.apache.hadoop.hbase.MetaTableAccessor;
48 import org.apache.hadoop.hbase.MiniHBaseCluster;
49 import org.apache.hadoop.hbase.RegionTransition;
50 import org.apache.hadoop.hbase.ServerName;
51 import org.apache.hadoop.hbase.TableName;
52 import org.apache.hadoop.hbase.TableStateManager;
53 import org.apache.hadoop.hbase.client.RegionLocator;
54 import org.apache.hadoop.hbase.client.Table;
55 import org.apache.hadoop.hbase.executor.EventType;
56 import org.apache.hadoop.hbase.master.RegionState.State;
57 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
58 import org.apache.hadoop.hbase.protobuf.RequestConverter;
59 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
60 import org.apache.hadoop.hbase.regionserver.HRegion;
61 import org.apache.hadoop.hbase.regionserver.HRegionServer;
62 import org.apache.hadoop.hbase.regionserver.RegionMergeTransaction;
63 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
64 import org.apache.hadoop.hbase.util.Bytes;
65 import org.apache.hadoop.hbase.util.FSTableDescriptors;
66 import org.apache.hadoop.hbase.util.FSUtils;
67 import org.apache.hadoop.hbase.util.JVMClusterUtil;
68 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
69 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
70 import org.apache.hadoop.hbase.util.Threads;
71 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
72 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
73 import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager;
74 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
75 import org.apache.zookeeper.data.Stat;
76 import org.junit.Test;
77 import org.junit.experimental.categories.Category;
78
79 @Category(LargeTests.class)
80 public class TestMasterFailover {
81 private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162 @Test (timeout=240000)
163 public void testMasterFailoverWithMockedRIT() throws Exception {
164
165 final int NUM_MASTERS = 1;
166 final int NUM_RS = 3;
167
168
169 Configuration conf = HBaseConfiguration.create();
170 conf.setBoolean("hbase.assignment.usezk", true);
171
172
173 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
174 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
175 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
176 log("Cluster started");
177
178
179 ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
180
181
182 List<MasterThread> masterThreads = cluster.getMasterThreads();
183 assertEquals(1, masterThreads.size());
184
185
186 assertTrue(cluster.waitForActiveAndReadyMaster());
187 HMaster master = masterThreads.get(0).getMaster();
188 assertTrue(master.isActiveMaster());
189 assertTrue(master.isInitialized());
190
191
192 master.balanceSwitch(false);
193
194
195 byte [] FAMILY = Bytes.toBytes("family");
196 byte [][] SPLIT_KEYS = new byte [][] {
197 new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
198 Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
199 Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
200 Bytes.toBytes("iii"), Bytes.toBytes("jjj")
201 };
202
203 byte [] enabledTable = Bytes.toBytes("enabledTable");
204 HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
205 htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
206
207 FileSystem filesystem = FileSystem.get(conf);
208 Path rootdir = FSUtils.getRootDir(conf);
209 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
210
211 fstd.createTableDescriptor(htdEnabled);
212
213 HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(), null, null);
214 createRegion(hriEnabled, rootdir, conf, htdEnabled);
215
216 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
217 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
218
219 TableName disabledTable = TableName.valueOf("disabledTable");
220 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
221 htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
222
223 fstd.createTableDescriptor(htdDisabled);
224 HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
225 createRegion(hriDisabled, rootdir, conf, htdDisabled);
226 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
227 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
228
229 TableName tableWithMergingRegions = TableName.valueOf("tableWithMergingRegions");
230 TEST_UTIL.createTable(tableWithMergingRegions, FAMILY, new byte [][] {Bytes.toBytes("m")});
231
232 log("Regions in hbase:meta and namespace have been created");
233
234
235
236 assertEquals(4, cluster.countServedRegions());
237
238
239 AssignmentManager am = master.getAssignmentManager();
240 RegionStates regionStates = am.getRegionStates();
241 List<HRegionInfo> mergingRegions = regionStates.getRegionsOfTable(tableWithMergingRegions);
242 assertEquals(2, mergingRegions.size());
243 HRegionInfo a = mergingRegions.get(0);
244 HRegionInfo b = mergingRegions.get(1);
245 HRegionInfo newRegion = RegionMergeTransaction.getMergedRegionInfo(a, b);
246 ServerName mergingServer = regionStates.getRegionServerOfRegion(a);
247 ServerName serverB = regionStates.getRegionServerOfRegion(b);
248 if (!serverB.equals(mergingServer)) {
249 RegionPlan plan = new RegionPlan(b, serverB, mergingServer);
250 am.balance(plan);
251 assertTrue(am.waitForAssignment(b));
252 }
253
254
255 HRegionServer hrs = cluster.getRegionServer(0);
256 ServerName serverName = hrs.getServerName();
257 HRegionInfo closingRegion = enabledRegions.remove(0);
258
259 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
260 enabledAndAssignedRegions.add(enabledRegions.remove(0));
261 enabledAndAssignedRegions.add(enabledRegions.remove(0));
262 enabledAndAssignedRegions.add(closingRegion);
263
264 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
265 disabledAndAssignedRegions.add(disabledRegions.remove(0));
266 disabledAndAssignedRegions.add(disabledRegions.remove(0));
267
268
269 for (HRegionInfo hri : enabledAndAssignedRegions) {
270 master.assignmentManager.addPlan(hri.getEncodedName(),
271 new RegionPlan(hri, null, serverName));
272 master.assignRegion(hri);
273 }
274
275 for (HRegionInfo hri : disabledAndAssignedRegions) {
276 master.assignmentManager.addPlan(hri.getEncodedName(),
277 new RegionPlan(hri, null, serverName));
278 master.assignRegion(hri);
279 }
280
281
282 log("Waiting for assignment to finish");
283 ZKAssign.blockUntilNoRIT(zkw);
284 log("Assignment completed");
285
286
287 log("Aborting master");
288 cluster.abortMaster(0);
289 cluster.waitOnMaster(0);
290 log("Master has aborted");
291
292
293
294
295
296
297 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
298 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
299
300 log("Beginning to mock scenarios");
301
302
303 TableStateManager zktable = new ZKTableStateManager(zkw);
304 zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
305
306
307
308
309
310
311
312
313 HRegionInfo region = enabledRegions.remove(0);
314 regionsThatShouldBeOnline.add(region);
315 ZKAssign.createNodeOffline(zkw, region, serverName);
316
317
318
319
320
321 regionsThatShouldBeOnline.add(closingRegion);
322 ZKAssign.createNodeClosing(zkw, closingRegion, serverName);
323
324
325
326
327
328
329
330 region = enabledRegions.remove(0);
331 regionsThatShouldBeOnline.add(region);
332 int version = ZKAssign.createNodeClosing(zkw, region, serverName);
333 ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
334
335
336 region = disabledRegions.remove(0);
337 regionsThatShouldBeOffline.add(region);
338 version = ZKAssign.createNodeClosing(zkw, region, serverName);
339 ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
340
341
342
343
344
345
346
347 region = enabledRegions.remove(0);
348 regionsThatShouldBeOnline.add(region);
349 ZKAssign.createNodeOffline(zkw, region, serverName);
350 ProtobufUtil.openRegion(hrs.getRSRpcServices(), hrs.getServerName(), region);
351 while (true) {
352 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
353 RegionTransition rt = RegionTransition.parseFrom(bytes);
354 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
355 break;
356 }
357 Thread.sleep(100);
358 }
359
360
361
362 region = disabledRegions.remove(0);
363 regionsThatShouldBeOffline.add(region);
364 ZKAssign.createNodeOffline(zkw, region, serverName);
365 ProtobufUtil.openRegion(hrs.getRSRpcServices(), hrs.getServerName(), region);
366 while (true) {
367 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
368 RegionTransition rt = RegionTransition.parseFrom(bytes);
369 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
370 break;
371 }
372 Thread.sleep(100);
373 }
374
375
376
377
378
379
380
381 hrs.getCoordinatedStateManager().
382 getRegionMergeCoordination().startRegionMergeTransaction(newRegion, mergingServer, a, b);
383
384
385
386
387
388
389
390
391
392 log("Done mocking data up in ZK");
393
394
395 log("Starting up a new master");
396 master = cluster.startMaster().getMaster();
397 log("Waiting for master to be ready");
398 cluster.waitForActiveAndReadyMaster();
399 log("Master is ready");
400
401
402 regionStates = master.getAssignmentManager().getRegionStates();
403
404 assertTrue(regionStates.isRegionInState(a, State.MERGING));
405 assertTrue(regionStates.isRegionInState(b, State.MERGING));
406 assertTrue(regionStates.isRegionInState(newRegion, State.MERGING_NEW));
407
408
409 ZKAssign.deleteNodeFailSilent(zkw, newRegion);
410
411
412 log("Waiting for no more RIT");
413 ZKAssign.blockUntilNoRIT(zkw);
414 log("No more RIT in ZK, now doing final test verification");
415
416
417 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
418 for (JVMClusterUtil.RegionServerThread rst :
419 cluster.getRegionServerThreads()) {
420 onlineRegions.addAll(ProtobufUtil.getOnlineRegions(
421 rst.getRegionServer().getRSRpcServices()));
422 }
423
424
425 for (HRegionInfo hri : regionsThatShouldBeOnline) {
426 assertTrue(onlineRegions.contains(hri));
427 }
428
429
430 for (HRegionInfo hri : regionsThatShouldBeOffline) {
431 if (onlineRegions.contains(hri)) {
432 LOG.debug(hri);
433 }
434 assertFalse(onlineRegions.contains(hri));
435 }
436
437 log("Done with verification, all passed, shutting down cluster");
438
439
440 TEST_UTIL.shutdownMiniCluster();
441 }
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499 @Test (timeout=180000)
500 public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception {
501
502 final int NUM_MASTERS = 1;
503 final int NUM_RS = 2;
504
505
506 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
507 Configuration conf = TEST_UTIL.getConfiguration();
508 conf.setBoolean("hbase.assignment.usezk", true);
509
510 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
511 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 2);
512 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
513 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
514 log("Cluster started");
515
516
517 ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
518 "unittest", new Abortable() {
519
520 @Override
521 public void abort(String why, Throwable e) {
522 LOG.error("Fatal ZK Error: " + why, e);
523 org.junit.Assert.assertFalse("Fatal ZK error", true);
524 }
525
526 @Override
527 public boolean isAborted() {
528 return false;
529 }
530
531 });
532
533
534 List<MasterThread> masterThreads = cluster.getMasterThreads();
535 assertEquals(1, masterThreads.size());
536
537
538 assertTrue(cluster.waitForActiveAndReadyMaster());
539 HMaster master = masterThreads.get(0).getMaster();
540 assertTrue(master.isActiveMaster());
541 assertTrue(master.isInitialized());
542
543
544 master.balanceSwitch(false);
545
546
547 byte [] FAMILY = Bytes.toBytes("family");
548 byte[][] SPLIT_KEYS =
549 TEST_UTIL.getRegionSplitStartKeys(Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 30);
550
551 byte [] enabledTable = Bytes.toBytes("enabledTable");
552 HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
553 htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
554 FileSystem filesystem = FileSystem.get(conf);
555 Path rootdir = FSUtils.getRootDir(conf);
556 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
557
558 fstd.createTableDescriptor(htdEnabled);
559 HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(),
560 null, null);
561 createRegion(hriEnabled, rootdir, conf, htdEnabled);
562
563 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
564 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
565
566 TableName disabledTable =
567 TableName.valueOf("disabledTable");
568 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
569 htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
570
571 fstd.createTableDescriptor(htdDisabled);
572 HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
573 createRegion(hriDisabled, rootdir, conf, htdDisabled);
574
575 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
576 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
577
578 log("Regions in hbase:meta and Namespace have been created");
579
580
581 assertEquals(2, cluster.countServedRegions());
582
583
584 List<RegionServerThread> regionservers =
585 cluster.getRegionServerThreads();
586 HRegionServer hrs = regionservers.get(0).getRegionServer();
587
588
589 RegionServerThread hrsDeadThread = regionservers.get(1);
590 HRegionServer hrsDead = hrsDeadThread.getRegionServer();
591 ServerName deadServerName = hrsDead.getServerName();
592
593
594 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
595 enabledAndAssignedRegions.addAll(enabledRegions.subList(0, 6));
596 enabledRegions.removeAll(enabledAndAssignedRegions);
597 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
598 disabledAndAssignedRegions.addAll(disabledRegions.subList(0, 6));
599 disabledRegions.removeAll(disabledAndAssignedRegions);
600
601
602 for (HRegionInfo hri : enabledAndAssignedRegions) {
603 master.assignmentManager.addPlan(hri.getEncodedName(),
604 new RegionPlan(hri, null, hrs.getServerName()));
605 master.assignRegion(hri);
606 }
607 for (HRegionInfo hri : disabledAndAssignedRegions) {
608 master.assignmentManager.addPlan(hri.getEncodedName(),
609 new RegionPlan(hri, null, hrs.getServerName()));
610 master.assignRegion(hri);
611 }
612
613 log("Waiting for assignment to finish");
614 ZKAssign.blockUntilNoRIT(zkw);
615 master.assignmentManager.waitUntilNoRegionsInTransition(60000);
616 log("Assignment completed");
617
618 assertTrue(" Table must be enabled.", master.getAssignmentManager()
619 .getTableStateManager().isTableState(TableName.valueOf("enabledTable"),
620 ZooKeeperProtos.Table.State.ENABLED));
621
622 List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
623 enabledAndOnDeadRegions.addAll(enabledRegions.subList(0, 6));
624 enabledRegions.removeAll(enabledAndOnDeadRegions);
625 List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
626 disabledAndOnDeadRegions.addAll(disabledRegions.subList(0, 6));
627 disabledRegions.removeAll(disabledAndOnDeadRegions);
628
629
630 for (HRegionInfo hri : enabledAndOnDeadRegions) {
631 master.assignmentManager.addPlan(hri.getEncodedName(),
632 new RegionPlan(hri, null, deadServerName));
633 master.assignRegion(hri);
634 }
635 for (HRegionInfo hri : disabledAndOnDeadRegions) {
636 master.assignmentManager.addPlan(hri.getEncodedName(),
637 new RegionPlan(hri, null, deadServerName));
638 master.assignRegion(hri);
639 }
640
641
642 log("Waiting for assignment to finish");
643 ZKAssign.blockUntilNoRIT(zkw);
644 master.assignmentManager.waitUntilNoRegionsInTransition(60000);
645 log("Assignment completed");
646
647
648
649 verifyRegionLocation(hrs, enabledAndAssignedRegions);
650 verifyRegionLocation(hrs, disabledAndAssignedRegions);
651 verifyRegionLocation(hrsDead, enabledAndOnDeadRegions);
652 verifyRegionLocation(hrsDead, disabledAndOnDeadRegions);
653
654 assertTrue(" Didn't get enough regions of enabledTalbe on live rs.",
655 enabledAndAssignedRegions.size() >= 2);
656 assertTrue(" Didn't get enough regions of disalbedTable on live rs.",
657 disabledAndAssignedRegions.size() >= 2);
658 assertTrue(" Didn't get enough regions of enabledTalbe on dead rs.",
659 enabledAndOnDeadRegions.size() >= 2);
660 assertTrue(" Didn't get enough regions of disalbedTable on dead rs.",
661 disabledAndOnDeadRegions.size() >= 2);
662
663
664 log("Aborting master");
665 cluster.abortMaster(0);
666 cluster.waitOnMaster(0);
667 log("Master has aborted");
668
669
670
671
672
673
674 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
675 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
676
677 log("Beginning to mock scenarios");
678
679
680 TableStateManager zktable = new ZKTableStateManager(zkw);
681 zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
682
683 assertTrue(" The enabled table should be identified on master fail over.",
684 zktable.isTableState(TableName.valueOf("enabledTable"),
685 ZooKeeperProtos.Table.State.ENABLED));
686
687
688
689
690
691
692 HRegionInfo region = enabledAndOnDeadRegions.remove(0);
693 regionsThatShouldBeOnline.add(region);
694 ZKAssign.createNodeClosing(zkw, region, deadServerName);
695 LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" +
696 region + "\n\n");
697
698
699 region = disabledAndOnDeadRegions.remove(0);
700 regionsThatShouldBeOffline.add(region);
701 ZKAssign.createNodeClosing(zkw, region, deadServerName);
702 LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" +
703 region + "\n\n");
704
705
706
707
708
709
710 region = enabledAndOnDeadRegions.remove(0);
711 regionsThatShouldBeOnline.add(region);
712 int version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
713 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
714 LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" +
715 region + "\n\n");
716
717
718 region = disabledAndOnDeadRegions.remove(0);
719 regionsThatShouldBeOffline.add(region);
720 version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
721 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
722 LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" +
723 region + "\n\n");
724
725
726
727
728
729
730 region = enabledRegions.remove(0);
731 regionsThatShouldBeOnline.add(region);
732 ZKAssign.createNodeOffline(zkw, region, deadServerName);
733 ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
734 LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" +
735 region + "\n\n");
736
737
738 region = disabledRegions.remove(0);
739 regionsThatShouldBeOffline.add(region);
740 ZKAssign.createNodeOffline(zkw, region, deadServerName);
741 ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
742 LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" +
743 region + "\n\n");
744
745
746
747
748
749
750 region = enabledRegions.remove(0);
751 regionsThatShouldBeOnline.add(region);
752 ZKAssign.createNodeOffline(zkw, region, deadServerName);
753 ProtobufUtil.openRegion(hrsDead.getRSRpcServices(),
754 hrsDead.getServerName(), region);
755 while (true) {
756 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
757 RegionTransition rt = RegionTransition.parseFrom(bytes);
758 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
759 break;
760 }
761 Thread.sleep(100);
762 }
763 LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" +
764 region + "\n\n");
765
766
767 region = disabledRegions.remove(0);
768 regionsThatShouldBeOffline.add(region);
769 ZKAssign.createNodeOffline(zkw, region, deadServerName);
770 ProtobufUtil.openRegion(hrsDead.getRSRpcServices(),
771 hrsDead.getServerName(), region);
772 while (true) {
773 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
774 RegionTransition rt = RegionTransition.parseFrom(bytes);
775 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
776 break;
777 }
778 Thread.sleep(100);
779 }
780 LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" +
781 region + "\n\n");
782
783
784
785
786
787
788 region = enabledRegions.remove(0);
789 regionsThatShouldBeOnline.add(region);
790 ZKAssign.createNodeOffline(zkw, region, deadServerName);
791 ProtobufUtil.openRegion(hrsDead.getRSRpcServices(),
792 hrsDead.getServerName(), region);
793 while (true) {
794 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
795 RegionTransition rt = RegionTransition.parseFrom(bytes);
796 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
797 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
798 LOG.debug("DELETED " + rt);
799 break;
800 }
801 Thread.sleep(100);
802 }
803 LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS"
804 + "\n" + region + "\n\n");
805
806
807 region = disabledRegions.remove(0);
808 regionsThatShouldBeOffline.add(region);
809 ZKAssign.createNodeOffline(zkw, region, deadServerName);
810 ProtobufUtil.openRegion(hrsDead.getRSRpcServices(),
811 hrsDead.getServerName(), region);
812 while (true) {
813 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
814 RegionTransition rt = RegionTransition.parseFrom(bytes);
815 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
816 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
817 break;
818 }
819 Thread.sleep(100);
820 }
821 LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS"
822 + "\n" + region + "\n\n");
823
824
825
826
827
828 log("Done mocking data up in ZK");
829
830
831 log("Killing RS " + deadServerName);
832 hrsDead.abort("Killing for unit test");
833 log("RS " + deadServerName + " killed");
834
835
836
837 while (hrsDeadThread.isAlive()) {
838 Threads.sleep(10);
839 }
840 log("Starting up a new master");
841 master = cluster.startMaster().getMaster();
842 log("Waiting for master to be ready");
843 assertTrue(cluster.waitForActiveAndReadyMaster());
844 log("Master is ready");
845
846
847 while (master.getServerManager().areDeadServersInProgress()) {
848 Thread.sleep(10);
849 }
850
851
852 log("Waiting for no more RIT");
853 ZKAssign.blockUntilNoRIT(zkw);
854 log("No more RIT in ZK");
855 long now = System.currentTimeMillis();
856 long maxTime = 120000;
857 boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
858 if (!done) {
859 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
860 LOG.info("rit=" + regionStates.getRegionsInTransition());
861 }
862 long elapsed = System.currentTimeMillis() - now;
863 assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
864 elapsed < maxTime);
865 log("No more RIT in RIT map, doing final test verification");
866
867
868 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
869 now = System.currentTimeMillis();
870 maxTime = 30000;
871 for (JVMClusterUtil.RegionServerThread rst :
872 cluster.getRegionServerThreads()) {
873 try {
874 HRegionServer rs = rst.getRegionServer();
875 while (!rs.getRegionsInTransitionInRS().isEmpty()) {
876 elapsed = System.currentTimeMillis() - now;
877 assertTrue("Test timed out in getting online regions", elapsed < maxTime);
878 if (rs.isAborted() || rs.isStopped()) {
879
880 break;
881 }
882 Thread.sleep(100);
883 }
884 onlineRegions.addAll(ProtobufUtil.getOnlineRegions(rs.getRSRpcServices()));
885 } catch (RegionServerStoppedException e) {
886 LOG.info("Got RegionServerStoppedException", e);
887 }
888 }
889
890
891 for (HRegionInfo hri : regionsThatShouldBeOnline) {
892 assertTrue("region=" + hri.getRegionNameAsString() + ", " + onlineRegions.toString(),
893 onlineRegions.contains(hri));
894 }
895
896
897 for (HRegionInfo hri : regionsThatShouldBeOffline) {
898 assertFalse(onlineRegions.contains(hri));
899 }
900
901 log("Done with verification, all passed, shutting down cluster");
902
903
904 TEST_UTIL.shutdownMiniCluster();
905 }
906
907
908
909
910 private void verifyRegionLocation(HRegionServer hrs, List<HRegionInfo> regions)
911 throws IOException {
912 List<HRegionInfo> tmpOnlineRegions =
913 ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
914 Iterator<HRegionInfo> itr = regions.iterator();
915 while (itr.hasNext()) {
916 HRegionInfo tmp = itr.next();
917 if (!tmpOnlineRegions.contains(tmp)) {
918 itr.remove();
919 }
920 }
921 }
922
923 HRegion createRegion(final HRegionInfo hri, final Path rootdir, final Configuration c,
924 final HTableDescriptor htd)
925 throws IOException {
926 HRegion r = HRegion.createHRegion(hri, rootdir, c, htd);
927
928
929
930
931
932 HRegion.closeHRegion(r);
933 return r;
934 }
935
936
937
938
939 private void log(String string) {
940 LOG.info("\n\n" + string + " \n\n");
941 }
942
943 @Test (timeout=180000)
944 public void testShouldCheckMasterFailOverWhenMETAIsInOpenedState()
945 throws Exception {
946 LOG.info("Starting testShouldCheckMasterFailOverWhenMETAIsInOpenedState");
947 final int NUM_MASTERS = 1;
948 final int NUM_RS = 2;
949
950
951 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
952 Configuration conf = TEST_UTIL.getConfiguration();
953 conf.setInt("hbase.master.info.port", -1);
954 conf.setBoolean("hbase.assignment.usezk", true);
955
956 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
957 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
958
959
960 List<RegionServerThread> regionServerThreads =
961 cluster.getRegionServerThreads();
962 HRegion metaRegion = null;
963 HRegionServer metaRegionServer = null;
964 for (RegionServerThread regionServerThread : regionServerThreads) {
965 HRegionServer regionServer = regionServerThread.getRegionServer();
966 metaRegion = regionServer.getOnlineRegion(HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
967 regionServer.abort("");
968 if (null != metaRegion) {
969 metaRegionServer = regionServer;
970 break;
971 }
972 }
973
974 TEST_UTIL.shutdownMiniHBaseCluster();
975
976
977 ZooKeeperWatcher zkw =
978 HBaseTestingUtility.createAndForceNodeToOpenedState(TEST_UTIL,
979 metaRegion, metaRegionServer.getServerName());
980
981 LOG.info("Staring cluster for second time");
982 TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
983
984 HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
985 while (!master.isInitialized()) {
986 Thread.sleep(100);
987 }
988
989 log("Waiting for no more RIT");
990 ZKAssign.blockUntilNoRIT(zkw);
991
992 zkw.close();
993
994 TEST_UTIL.shutdownMiniCluster();
995 }
996
997
998
999
1000 @Test(timeout=240000)
1001 public void testOfflineRegionReAssginedAfterMasterRestart() throws Exception {
1002 final TableName table = TableName.valueOf("testOfflineRegionReAssginedAfterMasterRestart");
1003 final int NUM_MASTERS = 1;
1004 final int NUM_RS = 2;
1005
1006
1007 Configuration conf = HBaseConfiguration.create();
1008 conf.setBoolean("hbase.assignment.usezk", true);
1009
1010
1011 final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1012 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1013 log("Cluster started");
1014
1015 TEST_UTIL.createTable(table, Bytes.toBytes("family"));
1016 HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
1017 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1018 HRegionInfo hri = regionStates.getRegionsOfTable(table).get(0);
1019 ServerName serverName = regionStates.getRegionServerOfRegion(hri);
1020 TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
1021
1022 ServerName dstName = null;
1023 for (ServerName tmpServer : master.serverManager.getOnlineServers().keySet()) {
1024 if (!tmpServer.equals(serverName)) {
1025 dstName = tmpServer;
1026 break;
1027 }
1028 }
1029
1030 assertTrue(dstName != null);
1031
1032 TEST_UTIL.shutdownMiniHBaseCluster();
1033
1034 ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
1035 ZKAssign.createNodeOffline(zkw, hri, dstName);
1036 Stat stat = new Stat();
1037 byte[] data =
1038 ZKAssign.getDataNoWatch(zkw, hri.getEncodedName(), stat);
1039 assertTrue(data != null);
1040 RegionTransition rt = RegionTransition.parseFrom(data);
1041 assertTrue(rt.getEventType() == EventType.M_ZK_REGION_OFFLINE);
1042
1043 LOG.info(hri.getEncodedName() + " region is in offline state with source server=" + serverName
1044 + " and dst server=" + dstName);
1045
1046
1047 TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
1048
1049 while (true) {
1050 master = TEST_UTIL.getHBaseCluster().getMaster();
1051 if (master != null && master.isInitialized()) {
1052 ServerManager serverManager = master.getServerManager();
1053 if (!serverManager.areDeadServersInProgress()) {
1054 break;
1055 }
1056 }
1057 Thread.sleep(200);
1058 }
1059
1060
1061 master = TEST_UTIL.getHBaseCluster().getMaster();
1062 master.getAssignmentManager().waitForAssignment(hri);
1063 regionStates = master.getAssignmentManager().getRegionStates();
1064 RegionState newState = regionStates.getRegionState(hri);
1065 assertTrue(newState.isOpened());
1066 }
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076 @Test (timeout=240000)
1077 public void testSimpleMasterFailover() throws Exception {
1078
1079 final int NUM_MASTERS = 3;
1080 final int NUM_RS = 3;
1081
1082
1083 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
1084
1085 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1086 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1087
1088
1089 List<MasterThread> masterThreads = cluster.getMasterThreads();
1090
1091
1092 for (MasterThread mt : masterThreads) {
1093 assertTrue(mt.isAlive());
1094 }
1095
1096
1097 int numActive = 0;
1098 int activeIndex = -1;
1099 ServerName activeName = null;
1100 HMaster active = null;
1101 for (int i = 0; i < masterThreads.size(); i++) {
1102 if (masterThreads.get(i).getMaster().isActiveMaster()) {
1103 numActive++;
1104 activeIndex = i;
1105 active = masterThreads.get(activeIndex).getMaster();
1106 activeName = active.getServerName();
1107 }
1108 }
1109 assertEquals(1, numActive);
1110 assertEquals(NUM_MASTERS, masterThreads.size());
1111 LOG.info("Active master " + activeName);
1112
1113
1114 assertNotNull(active);
1115 ClusterStatus status = active.getClusterStatus();
1116 assertTrue(status.getMaster().equals(activeName));
1117 assertEquals(2, status.getBackupMastersSize());
1118 assertEquals(2, status.getBackupMasters().size());
1119
1120
1121 int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
1122 HMaster master = cluster.getMaster(backupIndex);
1123 LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
1124 cluster.stopMaster(backupIndex, false);
1125 cluster.waitOnMaster(backupIndex);
1126
1127
1128 for (int i = 0; i < masterThreads.size(); i++) {
1129 if (masterThreads.get(i).getMaster().isActiveMaster()) {
1130 assertTrue(activeName.equals(masterThreads.get(i).getMaster().getServerName()));
1131 activeIndex = i;
1132 active = masterThreads.get(activeIndex).getMaster();
1133 }
1134 }
1135 assertEquals(1, numActive);
1136 assertEquals(2, masterThreads.size());
1137 int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
1138 LOG.info("Active master " + active.getServerName() + " managing " + rsCount + " regions servers");
1139 assertEquals(3, rsCount);
1140
1141
1142 assertNotNull(active);
1143 status = active.getClusterStatus();
1144 assertTrue(status.getMaster().equals(activeName));
1145 assertEquals(1, status.getBackupMastersSize());
1146 assertEquals(1, status.getBackupMasters().size());
1147
1148
1149 LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
1150 cluster.stopMaster(activeIndex, false);
1151 cluster.waitOnMaster(activeIndex);
1152
1153
1154 assertTrue(cluster.waitForActiveAndReadyMaster());
1155
1156 LOG.debug("\n\nVerifying backup master is now active\n");
1157
1158 assertEquals(1, masterThreads.size());
1159
1160
1161 active = masterThreads.get(0).getMaster();
1162 assertNotNull(active);
1163 status = active.getClusterStatus();
1164 ServerName mastername = status.getMaster();
1165 assertTrue(mastername.equals(active.getServerName()));
1166 assertTrue(active.isActiveMaster());
1167 assertEquals(0, status.getBackupMastersSize());
1168 assertEquals(0, status.getBackupMasters().size());
1169 int rss = status.getServersSize();
1170 LOG.info("Active master " + mastername.getServerName() + " managing " +
1171 rss + " region servers");
1172 assertEquals(3, rss);
1173
1174
1175 TEST_UTIL.shutdownMiniCluster();
1176 }
1177
1178
1179
1180
1181 @Test (timeout=180000)
1182 @SuppressWarnings("deprecation")
1183 public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
1184 final int NUM_MASTERS = 1;
1185 final int NUM_RS = 1;
1186
1187
1188 Configuration conf = HBaseConfiguration.create();
1189 conf.setBoolean("hbase.assignment.usezk", false);
1190
1191
1192 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1193 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1194 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1195 log("Cluster started");
1196
1197
1198 List<MasterThread> masterThreads = cluster.getMasterThreads();
1199 assertEquals(1, masterThreads.size());
1200
1201
1202 assertTrue(cluster.waitForActiveAndReadyMaster());
1203 HMaster master = masterThreads.get(0).getMaster();
1204 assertTrue(master.isActiveMaster());
1205 assertTrue(master.isInitialized());
1206
1207
1208 Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
1209 onlineTable.close();
1210
1211 HTableDescriptor offlineTable = new HTableDescriptor(
1212 TableName.valueOf(Bytes.toBytes("offlineTable")));
1213 offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));
1214
1215 FileSystem filesystem = FileSystem.get(conf);
1216 Path rootdir = FSUtils.getRootDir(conf);
1217 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
1218 fstd.createTableDescriptor(offlineTable);
1219
1220 HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
1221 createRegion(hriOffline, rootdir, conf, offlineTable);
1222 MetaTableAccessor.addRegionToMeta(master.getConnection(), hriOffline);
1223
1224 log("Regions in hbase:meta and namespace have been created");
1225
1226
1227
1228 assertEquals(3, cluster.countServedRegions());
1229 HRegionInfo hriOnline = null;
1230 try (RegionLocator locator =
1231 TEST_UTIL.getConnection().getRegionLocator(TableName.valueOf("onlineTable"))) {
1232 hriOnline = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
1233 }
1234 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1235 RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();
1236
1237
1238
1239 RegionState oldState = regionStates.getRegionState(hriOnline);
1240 RegionState newState = new RegionState(
1241 hriOnline, State.PENDING_CLOSE, oldState.getServerName());
1242 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1243
1244
1245
1246 oldState = new RegionState(hriOffline, State.OFFLINE);
1247 newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
1248 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1249
1250 HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
1251 createRegion(failedClose, rootdir, conf, offlineTable);
1252 MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);
1253
1254 oldState = new RegionState(failedClose, State.PENDING_CLOSE);
1255 newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
1256 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1257
1258
1259 HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
1260 createRegion(failedOpen, rootdir, conf, offlineTable);
1261 MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);
1262
1263
1264
1265 oldState = new RegionState(failedOpen, State.PENDING_OPEN);
1266 newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
1267 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1268
1269 HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
1270 createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
1271 MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);
1272
1273
1274
1275 oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
1276 newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
1277 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1278
1279
1280
1281
1282 log("Aborting master");
1283 cluster.abortMaster(0);
1284 cluster.waitOnMaster(0);
1285 log("Master has aborted");
1286
1287
1288 log("Starting up a new master");
1289 master = cluster.startMaster().getMaster();
1290 log("Waiting for master to be ready");
1291 cluster.waitForActiveAndReadyMaster();
1292 log("Master is ready");
1293
1294
1295 master.getAssignmentManager().waitUntilNoRegionsInTransition(60000);
1296
1297
1298 regionStates = master.getAssignmentManager().getRegionStates();
1299
1300
1301 assertTrue(regionStates.isRegionOnline(hriOffline));
1302 assertTrue(regionStates.isRegionOnline(hriOnline));
1303 assertTrue(regionStates.isRegionOnline(failedClose));
1304 assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
1305 assertTrue(regionStates.isRegionOnline(failedOpen));
1306
1307 log("Done with verification, shutting down cluster");
1308
1309
1310 TEST_UTIL.shutdownMiniCluster();
1311 }
1312
1313
1314
1315
1316 @Test(timeout = 180000)
1317 public void testMetaInTransitionWhenMasterFailover() throws Exception {
1318 final int NUM_MASTERS = 1;
1319 final int NUM_RS = 1;
1320
1321
1322 Configuration conf = HBaseConfiguration.create();
1323 conf.setBoolean("hbase.assignment.usezk", false);
1324 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1325 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1326 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1327 log("Cluster started");
1328
1329 log("Moving meta off the master");
1330 HMaster activeMaster = cluster.getMaster();
1331 HRegionServer rs = cluster.getRegionServer(0);
1332 ServerName metaServerName = cluster.getLiveRegionServerThreads()
1333 .get(0).getRegionServer().getServerName();
1334 activeMaster.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
1335 Bytes.toBytes(metaServerName.getServerName()));
1336 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1337 assertEquals("Meta should be assigned on expected regionserver",
1338 metaServerName, activeMaster.getMetaTableLocator()
1339 .getMetaRegionLocation(activeMaster.getZooKeeper()));
1340
1341
1342 log("Aborting master");
1343 activeMaster.abort("test-kill");
1344 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1345 log("Master has aborted");
1346
1347
1348 RegionState metaState =
1349 MetaTableLocator.getMetaRegionState(rs.getZooKeeper());
1350 assertEquals("hbase:meta should be onlined on RS",
1351 metaState.getServerName(), rs.getServerName());
1352 assertEquals("hbase:meta should be onlined on RS",
1353 metaState.getState(), State.OPEN);
1354
1355
1356 log("Starting up a new master");
1357 activeMaster = cluster.startMaster().getMaster();
1358 log("Waiting for master to be ready");
1359 cluster.waitForActiveAndReadyMaster();
1360 log("Master is ready");
1361
1362
1363 metaState =
1364 MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1365 assertEquals("hbase:meta should be onlined on RS",
1366 metaState.getServerName(), rs.getServerName());
1367 assertEquals("hbase:meta should be onlined on RS",
1368 metaState.getState(), State.OPEN);
1369
1370
1371
1372
1373
1374 MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1375 rs.getServerName(), State.PENDING_OPEN);
1376 HRegion meta = rs.getFromOnlineRegions(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
1377 rs.removeFromOnlineRegions(meta, null);
1378 meta.close();
1379
1380 log("Aborting master");
1381 activeMaster.abort("test-kill");
1382 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1383 log("Master has aborted");
1384
1385
1386 log("Starting up a new master");
1387 activeMaster = cluster.startMaster().getMaster();
1388 log("Waiting for master to be ready");
1389 cluster.waitForActiveAndReadyMaster();
1390 log("Master is ready");
1391
1392 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1393 log("Meta was assigned");
1394
1395 metaState =
1396 MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1397 assertEquals("hbase:meta should be onlined on RS",
1398 metaState.getServerName(), rs.getServerName());
1399 assertEquals("hbase:meta should be onlined on RS",
1400 metaState.getState(), State.OPEN);
1401
1402
1403
1404
1405
1406 MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1407 rs.getServerName(), State.PENDING_CLOSE);
1408
1409 log("Aborting master");
1410 activeMaster.abort("test-kill");
1411 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1412 log("Master has aborted");
1413
1414 rs.getRSRpcServices().closeRegion(null, RequestConverter.buildCloseRegionRequest(
1415 rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1416
1417
1418 log("Starting up a new master");
1419 activeMaster = cluster.startMaster().getMaster();
1420 log("Waiting for master to be ready");
1421 cluster.waitForActiveAndReadyMaster();
1422 log("Master is ready");
1423
1424 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1425 log("Meta was assigned");
1426
1427 rs.getRSRpcServices().closeRegion(
1428 null,
1429 RequestConverter.buildCloseRegionRequest(rs.getServerName(),
1430 HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1431
1432
1433 MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1434 ServerName.valueOf("dummyserver.example.org", 1234, -1L), State.OPEN);
1435
1436 log("Aborting master");
1437 activeMaster.stop("test-kill");
1438
1439 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1440 log("Master has aborted");
1441
1442
1443 log("Starting up a new master");
1444 activeMaster = cluster.startMaster().getMaster();
1445 log("Waiting for master to be ready");
1446 cluster.waitForActiveAndReadyMaster();
1447 log("Master is ready");
1448
1449 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1450 log("Meta was assigned");
1451
1452
1453 TEST_UTIL.shutdownMiniCluster();
1454 }
1455 }
1456