1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertNotSame;
25 import static org.junit.Assert.assertNull;
26 import static org.junit.Assert.assertTrue;
27 import static org.junit.Assert.fail;
28
29 import java.io.IOException;
30 import java.util.List;
31 import java.util.Map;
32 import java.util.concurrent.CountDownLatch;
33
34 import org.apache.commons.logging.Log;
35 import org.apache.commons.logging.LogFactory;
36 import org.apache.hadoop.conf.Configuration;
37 import org.apache.hadoop.fs.FileSystem;
38 import org.apache.hadoop.fs.Path;
39 import org.apache.hadoop.hbase.Abortable;
40 import org.apache.hadoop.hbase.TableName;
41 import org.apache.hadoop.hbase.HBaseIOException;
42 import org.apache.hadoop.hbase.HBaseTestingUtility;
43 import org.apache.hadoop.hbase.HColumnDescriptor;
44 import org.apache.hadoop.hbase.HConstants;
45 import org.apache.hadoop.hbase.HRegionInfo;
46 import org.apache.hadoop.hbase.HTableDescriptor;
47 import org.apache.hadoop.hbase.LargeTests;
48 import org.apache.hadoop.hbase.MasterNotRunningException;
49 import org.apache.hadoop.hbase.MiniHBaseCluster;
50 import org.apache.hadoop.hbase.RegionTransition;
51 import org.apache.hadoop.hbase.Server;
52 import org.apache.hadoop.hbase.ServerName;
53 import org.apache.hadoop.hbase.UnknownRegionException;
54 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
55 import org.apache.hadoop.hbase.catalog.MetaReader;
56 import org.apache.hadoop.hbase.client.Delete;
57 import org.apache.hadoop.hbase.client.HBaseAdmin;
58 import org.apache.hadoop.hbase.client.HTable;
59 import org.apache.hadoop.hbase.client.Put;
60 import org.apache.hadoop.hbase.client.Result;
61 import org.apache.hadoop.hbase.client.ResultScanner;
62 import org.apache.hadoop.hbase.client.Scan;
63 import org.apache.hadoop.hbase.exceptions.DeserializationException;
64 import org.apache.hadoop.hbase.executor.EventType;
65 import org.apache.hadoop.hbase.master.AssignmentManager;
66 import org.apache.hadoop.hbase.master.HMaster;
67 import org.apache.hadoop.hbase.master.RegionState;
68 import org.apache.hadoop.hbase.master.RegionState.State;
69 import org.apache.hadoop.hbase.master.RegionStates;
70 import org.apache.hadoop.hbase.master.handler.SplitRegionHandler;
71 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
72 import org.apache.hadoop.hbase.util.Bytes;
73 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
74 import org.apache.hadoop.hbase.util.FSUtils;
75 import org.apache.hadoop.hbase.util.HBaseFsck;
76 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
77 import org.apache.hadoop.hbase.util.Threads;
78 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
79 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
80 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
81 import org.apache.zookeeper.KeeperException;
82 import org.apache.zookeeper.KeeperException.NodeExistsException;
83 import org.apache.zookeeper.data.Stat;
84 import org.junit.After;
85 import org.junit.AfterClass;
86 import org.junit.Assert;
87 import org.junit.Before;
88 import org.junit.BeforeClass;
89 import org.junit.Test;
90 import org.junit.experimental.categories.Category;
91
92 import com.google.protobuf.ServiceException;
93
94
95
96
97
98
99 @Category(LargeTests.class)
100 public class TestSplitTransactionOnCluster {
101 private static final Log LOG =
102 LogFactory.getLog(TestSplitTransactionOnCluster.class);
103 private HBaseAdmin admin = null;
104 private MiniHBaseCluster cluster = null;
105 private static final int NB_SERVERS = 3;
106 private static CountDownLatch latch = new CountDownLatch(1);
107 private static volatile boolean secondSplit = false;
108 private static volatile boolean callRollBack = false;
109 private static volatile boolean firstSplitCompleted = false;
110 private static byte [] CF = Bytes.toBytes("cf");
111
112 private static final HBaseTestingUtility TESTING_UTIL =
113 new HBaseTestingUtility();
114
115 @BeforeClass public static void before() throws Exception {
116 TESTING_UTIL.getConfiguration().setInt("hbase.balancer.period", 60000);
117
118
119 TESTING_UTIL.getConfiguration().setInt(
120 "hbase.master.assignment.timeoutmonitor.timeout", 4000);
121 TESTING_UTIL.startMiniCluster(NB_SERVERS);
122 }
123
124 @AfterClass public static void after() throws Exception {
125 TESTING_UTIL.shutdownMiniCluster();
126 }
127
128 @Before public void setup() throws IOException {
129 TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS);
130 this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
131 this.cluster = TESTING_UTIL.getMiniHBaseCluster();
132 }
133
134 @After
135 public void tearDown() throws Exception {
136 this.admin.close();
137 }
138
139 private HRegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions) {
140 assertEquals(1, regions.size());
141 HRegionInfo hri = regions.get(0).getRegionInfo();
142 return waitOnRIT(hri);
143 }
144
145
146
147
148
149
150
151
152 private HRegionInfo waitOnRIT(final HRegionInfo hri) {
153
154
155 while (TESTING_UTIL.getHBaseCluster().getMaster().getAssignmentManager().
156 getRegionStates().isRegionInTransition(hri)) {
157 LOG.info("Waiting on region in transition: " +
158 TESTING_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates().
159 getRegionTransitionState(hri));
160 Threads.sleep(10);
161 }
162 return hri;
163 }
164
165 @Test(timeout = 60000)
166 public void testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack() throws Exception {
167 final TableName tableName =
168 TableName.valueOf("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack");
169 try {
170
171 HTable t = createTableAndWait(tableName.getName(), CF);
172 final List<HRegion> regions = cluster.getRegions(tableName);
173 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
174 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
175 final HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
176 insertData(tableName.getName(), admin, t);
177 t.close();
178
179
180 this.admin.setBalancerRunning(false, true);
181
182 cluster.getMaster().setCatalogJanitorEnabled(false);
183
184
185 final HRegion region = findSplittableRegion(regions);
186 assertTrue("not able to find a splittable region", region != null);
187
188 new Thread() {
189 public void run() {
190 SplitTransaction st = null;
191 st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
192 try {
193 st.prepare();
194 st.execute(regionServer, regionServer);
195 } catch (IOException e) {
196
197 }
198 }
199 }.start();
200 for (int i = 0; !callRollBack && i < 100; i++) {
201 Thread.sleep(100);
202 }
203 assertTrue("Waited too long for rollback", callRollBack);
204 SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row3"));
205 try {
206 secondSplit = true;
207 st.prepare();
208 st.execute(regionServer, regionServer);
209 } catch (IOException e) {
210 LOG.debug("Rollback started :"+ e.getMessage());
211 st.rollback(regionServer, regionServer);
212 }
213 for (int i=0; !firstSplitCompleted && i<100; i++) {
214 Thread.sleep(100);
215 }
216 assertTrue("fist split did not complete", firstSplitCompleted);
217
218 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
219 Map<String, RegionState> rit = regionStates.getRegionsInTransition();
220
221 for (int i=0; rit.containsKey(hri.getTableName()) && i<100; i++) {
222 Thread.sleep(100);
223 }
224 assertFalse("region still in transition", rit.containsKey(
225 rit.containsKey(hri.getTableName())));
226
227 List<HRegion> onlineRegions = regionServer.getOnlineRegions(tableName);
228
229 assertEquals("The parent region should be splitted", 2, onlineRegions.size());
230
231 List<HRegionInfo> regionsOfTable = cluster.getMaster().getAssignmentManager()
232 .getRegionStates().getRegionsOfTable(tableName);
233
234 assertEquals("No of regions in master", 2, regionsOfTable.size());
235 } finally {
236 admin.setBalancerRunning(true, false);
237 secondSplit = false;
238 firstSplitCompleted = false;
239 callRollBack = false;
240 cluster.getMaster().setCatalogJanitorEnabled(true);
241 TESTING_UTIL.deleteTable(tableName);
242 }
243 }
244
245
246
247
248
249
250
251
252
253
254
255
256 @Test (timeout = 300000) public void testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling()
257 throws IOException, InterruptedException, NodeExistsException, KeeperException,
258 DeserializationException, ServiceException {
259 final byte [] tableName =
260 Bytes.toBytes("testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling");
261
262
263 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
264 List<HRegion> regions = cluster.getRegions(tableName);
265 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
266
267 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
268
269
270 this.admin.setBalancerRunning(false, true);
271
272 cluster.getMaster().setCatalogJanitorEnabled(false);
273 try {
274
275 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
276
277 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
278 printOutRegions(server, "Initial regions: ");
279 int regionCount = ProtobufUtil.getOnlineRegions(server).size();
280
281
282 SplitRegionHandler.TEST_SKIP = true;
283
284 split(hri, server, regionCount);
285
286 List<HRegion> daughters = checkAndGetDaughters(tableName);
287
288 String path = ZKAssign.getNodeName(TESTING_UTIL.getZooKeeperWatcher(),
289 hri.getEncodedName());
290 Stat stats =
291 TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
292 LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats);
293 RegionTransition rt =
294 RegionTransition.parseFrom(ZKAssign.getData(TESTING_UTIL.getZooKeeperWatcher(),
295 hri.getEncodedName()));
296
297 assertTrue(rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT) ||
298 rt.getEventType().equals(EventType.RS_ZK_REGION_SPLITTING));
299
300 cluster.abortRegionServer(tableRegionIndex);
301 waitUntilRegionServerDead();
302 awaitDaughters(tableName, daughters.size());
303
304
305 regions = cluster.getRegions(tableName);
306 for (HRegion r: regions) {
307 assertTrue(daughters.contains(r));
308 }
309
310 for (int i=0; i<100; i++) {
311
312 stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
313 if (stats == null) break;
314 Thread.sleep(100);
315 }
316 LOG.info("EPHEMERAL NODE AFTER SERVER ABORT, path=" + path + ", stats=" + stats);
317 assertTrue(stats == null);
318 } finally {
319
320 SplitRegionHandler.TEST_SKIP = false;
321 admin.setBalancerRunning(true, false);
322 cluster.getMaster().setCatalogJanitorEnabled(true);
323 t.close();
324 }
325 }
326
327 @Test (timeout = 300000) public void testExistingZnodeBlocksSplitAndWeRollback()
328 throws IOException, InterruptedException, NodeExistsException, KeeperException, ServiceException {
329 final byte [] tableName =
330 Bytes.toBytes("testExistingZnodeBlocksSplitAndWeRollback");
331
332
333 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
334 List<HRegion> regions = cluster.getRegions(tableName);
335 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
336
337 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
338
339
340 this.admin.setBalancerRunning(false, true);
341
342 cluster.getMaster().setCatalogJanitorEnabled(false);
343 try {
344
345 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
346
347 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
348 printOutRegions(server, "Initial regions: ");
349 int regionCount = ProtobufUtil.getOnlineRegions(server).size();
350
351
352 ZKAssign.createNodeClosing(TESTING_UTIL.getZooKeeperWatcher(),
353 hri, new ServerName("any.old.server", 1234, -1));
354
355
356 this.admin.split(hri.getRegionNameAsString());
357 this.admin.split(hri.getRegionNameAsString());
358 this.admin.split(hri.getRegionNameAsString());
359
360 for (int i = 0; i < 10; i++) {
361 Thread.sleep(100);
362 assertEquals(regionCount, ProtobufUtil.getOnlineRegions(server).size());
363 }
364
365 ZKAssign.deleteClosingNode(TESTING_UTIL.getZooKeeperWatcher(), hri);
366
367 split(hri, server, regionCount);
368
369 checkAndGetDaughters(tableName);
370
371 } finally {
372 admin.setBalancerRunning(true, false);
373 cluster.getMaster().setCatalogJanitorEnabled(true);
374 t.close();
375 }
376 }
377
378
379
380
381
382
383
384 @Test (timeout=300000) public void testShutdownFixupWhenDaughterHasSplit()
385 throws IOException, InterruptedException, ServiceException {
386 final byte [] tableName =
387 Bytes.toBytes("testShutdownFixupWhenDaughterHasSplit");
388
389
390 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
391 List<HRegion> regions = cluster.getRegions(tableName);
392 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
393
394 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
395
396
397 this.admin.setBalancerRunning(false, true);
398
399 cluster.getMaster().setCatalogJanitorEnabled(false);
400 try {
401
402 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
403
404 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
405 printOutRegions(server, "Initial regions: ");
406 int regionCount = ProtobufUtil.getOnlineRegions(server).size();
407
408 split(hri, server, regionCount);
409
410 List<HRegion> daughters = checkAndGetDaughters(tableName);
411
412 regionCount = ProtobufUtil.getOnlineRegions(server).size();
413 HRegionInfo daughter = daughters.get(0).getRegionInfo();
414 LOG.info("Daughter we are going to split: " + daughter);
415
416
417 this.admin.compact(daughter.getRegionName());
418 daughters = cluster.getRegions(tableName);
419 HRegion daughterRegion = null;
420 for (HRegion r: daughters) {
421 if (r.getRegionInfo().equals(daughter)) {
422 daughterRegion = r;
423 LOG.info("Found matching HRI: " + daughterRegion);
424 break;
425 }
426 }
427 assertTrue(daughterRegion != null);
428 for (int i=0; i<100; i++) {
429 if (!daughterRegion.hasReferences()) break;
430 Threads.sleep(100);
431 }
432 assertFalse("Waiting for reference to be compacted", daughterRegion.hasReferences());
433 LOG.info("Daughter hri before split (has been compacted): " + daughter);
434 split(daughter, server, regionCount);
435
436 daughters = cluster.getRegions(tableName);
437 for (HRegion d: daughters) {
438 LOG.info("Regions before crash: " + d);
439 }
440
441 cluster.abortRegionServer(tableRegionIndex);
442 waitUntilRegionServerDead();
443 awaitDaughters(tableName, daughters.size());
444
445
446 regions = cluster.getRegions(tableName);
447 for (HRegion d: daughters) {
448 LOG.info("Regions after crash: " + d);
449 }
450 assertEquals(daughters.size(), regions.size());
451 for (HRegion r: regions) {
452 LOG.info("Regions post crash " + r);
453 assertTrue("Missing region post crash " + r, daughters.contains(r));
454 }
455 } finally {
456 admin.setBalancerRunning(true, false);
457 cluster.getMaster().setCatalogJanitorEnabled(true);
458 t.close();
459 }
460 }
461
462 @Test(timeout = 180000)
463 public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception {
464 Configuration conf = TESTING_UTIL.getConfiguration();
465 ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TESTING_UTIL);
466 TableName userTableName =
467 TableName.valueOf("testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles");
468 HTableDescriptor htd = new HTableDescriptor(userTableName);
469 HColumnDescriptor hcd = new HColumnDescriptor("col");
470 htd.addFamily(hcd);
471 admin.createTable(htd);
472 ZKAssign.blockUntilNoRIT(zkw);
473 HTable table = new HTable(conf, userTableName);
474 try {
475 for (int i = 0; i <= 5; i++) {
476 String row = "row" + i;
477 Put p = new Put(row.getBytes());
478 String val = "Val" + i;
479 p.add("col".getBytes(), "ql".getBytes(), val.getBytes());
480 table.put(p);
481 admin.flush(userTableName.getName());
482 Delete d = new Delete(row.getBytes());
483
484 table.delete(d);
485 admin.flush(userTableName.getName());
486 }
487 admin.majorCompact(userTableName.getName());
488 List<HRegionInfo> regionsOfTable = TESTING_UTIL.getMiniHBaseCluster()
489 .getMaster().getAssignmentManager().getRegionStates()
490 .getRegionsOfTable(userTableName);
491 HRegionInfo hRegionInfo = regionsOfTable.get(0);
492 Put p = new Put("row6".getBytes());
493 p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
494 table.put(p);
495 p = new Put("row7".getBytes());
496 p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
497 table.put(p);
498 p = new Put("row8".getBytes());
499 p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
500 table.put(p);
501 admin.flush(userTableName.getName());
502 admin.split(hRegionInfo.getRegionName(), "row7".getBytes());
503 regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
504 .getAssignmentManager().getRegionStates()
505 .getRegionsOfTable(userTableName);
506
507 while (regionsOfTable.size() != 2) {
508 Thread.sleep(2000);
509 regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
510 .getAssignmentManager().getRegionStates()
511 .getRegionsOfTable(userTableName);
512 }
513 Assert.assertEquals(2, regionsOfTable.size());
514 Scan s = new Scan();
515 ResultScanner scanner = table.getScanner(s);
516 int mainTableCount = 0;
517 for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
518 mainTableCount++;
519 }
520 Assert.assertEquals(3, mainTableCount);
521 } finally {
522 table.close();
523 }
524 }
525
526
527
528
529 static class UselessTestAbortable implements Abortable {
530 boolean aborted = false;
531 @Override
532 public void abort(String why, Throwable e) {
533 LOG.warn("ABORTED (But nothing to abort): why=" + why, e);
534 aborted = true;
535 }
536
537 @Override
538 public boolean isAborted() {
539 return this.aborted;
540 }
541 }
542
543
544
545
546
547
548
549
550
551
552
553 @Test(timeout = 300000)
554 public void testMasterRestartWhenSplittingIsPartial()
555 throws IOException, InterruptedException, NodeExistsException,
556 KeeperException, DeserializationException, ServiceException {
557 final byte[] tableName = Bytes.toBytes("testMasterRestartWhenSplittingIsPartial");
558
559
560 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
561 List<HRegion> regions = cluster.getRegions(tableName);
562 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
563
564 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
565
566
567 this.admin.setBalancerRunning(false, true);
568
569 cluster.getMaster().setCatalogJanitorEnabled(false);
570 ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
571 "testMasterRestartWhenSplittingIsPartial", new UselessTestAbortable());
572 try {
573
574 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
575
576 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
577 printOutRegions(server, "Initial regions: ");
578
579
580 SplitRegionHandler.TEST_SKIP = true;
581
582
583 this.admin.split(hri.getRegionNameAsString());
584 checkAndGetDaughters(tableName);
585
586 String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
587 Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
588 LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
589 + stats);
590 byte[] bytes = ZKAssign.getData(zkw, hri.getEncodedName());
591 RegionTransition rtd = RegionTransition.parseFrom(bytes);
592
593 assertTrue(rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)
594 || rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLITTING));
595
596
597 MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
598
599 this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
600
601
602 hri.setOffline(true);
603 hri.setSplit(true);
604 ServerName regionServerOfRegion = master.getAssignmentManager()
605 .getRegionStates().getRegionServerOfRegion(hri);
606 assertTrue(regionServerOfRegion != null);
607
608 } finally {
609
610 SplitRegionHandler.TEST_SKIP = false;
611 admin.setBalancerRunning(true, false);
612 cluster.getMaster().setCatalogJanitorEnabled(true);
613 t.close();
614 zkw.close();
615 }
616 }
617
618
619
620
621
622
623
624
625
626 @Test (timeout = 300000)
627 public void testMasterRestartAtRegionSplitPendingCatalogJanitor()
628 throws IOException, InterruptedException, NodeExistsException,
629 KeeperException, ServiceException {
630 final byte[] tableName = Bytes.toBytes("testMasterRestartAtRegionSplitPendingCatalogJanitor");
631
632
633 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
634 List<HRegion> regions = cluster.getRegions(tableName);
635 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
636
637 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
638
639
640 this.admin.setBalancerRunning(false, true);
641
642 cluster.getMaster().setCatalogJanitorEnabled(false);
643 ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
644 "testMasterRestartAtRegionSplitPendingCatalogJanitor", new UselessTestAbortable());
645 try {
646
647 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
648
649 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
650 printOutRegions(server, "Initial regions: ");
651
652 this.admin.split(hri.getRegionNameAsString());
653 checkAndGetDaughters(tableName);
654
655 String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
656 Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
657 LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
658 + stats);
659 String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
660 Stat stat = new Stat();
661 byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
662
663 for (int i=0; data != null && i<60; i++) {
664 Thread.sleep(1000);
665 data = ZKUtil.getDataNoWatch(zkw, node, stat);
666
667 }
668 assertNull("Waited too long for ZK node to be removed: "+node, data);
669
670 MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
671
672 this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
673
674 hri.setOffline(true);
675 hri.setSplit(true);
676 ServerName regionServerOfRegion = master.getAssignmentManager()
677 .getRegionStates().getRegionServerOfRegion(hri);
678 assertTrue(regionServerOfRegion == null);
679 } finally {
680
681 SplitRegionHandler.TEST_SKIP = false;
682 this.admin.setBalancerRunning(true, false);
683 cluster.getMaster().setCatalogJanitorEnabled(true);
684 t.close();
685 zkw.close();
686 }
687 }
688
689
690
691
692
693
694
695
696
697
698
699
700 @Test
701 public void testSplitBeforeSettingSplittingInZK() throws Exception,
702 InterruptedException, KeeperException {
703 testSplitBeforeSettingSplittingInZKInternals();
704 }
705
706 @Test(timeout = 60000)
707 public void testTableExistsIfTheSpecifiedTableRegionIsSplitParent() throws Exception {
708 final TableName tableName =
709 TableName.valueOf("testTableExistsIfTheSpecifiedTableRegionIsSplitParent");
710
711 HTable t = createTableAndWait(tableName.getName(), CF);
712 try {
713 List<HRegion> regions = cluster.getRegions(tableName);
714 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
715 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
716 insertData(tableName.getName(), admin, t);
717
718 admin.setBalancerRunning(false, true);
719
720 cluster.getMaster().setCatalogJanitorEnabled(false);
721 boolean tableExists = MetaReader.tableExists(regionServer.getCatalogTracker(),
722 tableName);
723 assertEquals("The specified table should present.", true, tableExists);
724 final HRegion region = findSplittableRegion(regions);
725 assertTrue("not able to find a splittable region", region != null);
726 SplitTransaction st = new SplitTransaction(region, Bytes.toBytes("row2"));
727 try {
728 st.prepare();
729 st.createDaughters(regionServer, regionServer);
730 } catch (IOException e) {
731
732 }
733 tableExists = MetaReader.tableExists(regionServer.getCatalogTracker(),
734 tableName);
735 assertEquals("The specified table should present.", true, tableExists);
736 } finally {
737 admin.setBalancerRunning(true, false);
738 cluster.getMaster().setCatalogJanitorEnabled(true);
739 t.close();
740 }
741 }
742
743 private void insertData(final byte[] tableName, HBaseAdmin admin, HTable t) throws IOException,
744 InterruptedException {
745 Put p = new Put(Bytes.toBytes("row1"));
746 p.add(CF, Bytes.toBytes("q1"), Bytes.toBytes("1"));
747 t.put(p);
748 p = new Put(Bytes.toBytes("row2"));
749 p.add(CF, Bytes.toBytes("q1"), Bytes.toBytes("2"));
750 t.put(p);
751 p = new Put(Bytes.toBytes("row3"));
752 p.add(CF, Bytes.toBytes("q1"), Bytes.toBytes("3"));
753 t.put(p);
754 p = new Put(Bytes.toBytes("row4"));
755 p.add(CF, Bytes.toBytes("q1"), Bytes.toBytes("4"));
756 t.put(p);
757 admin.flush(tableName);
758 }
759
760
761
762
763 @Test
764 public void testSplitRegionNotAssignable() throws Exception {
765 final TableName tableName =
766 TableName.valueOf("testSplitRegionWithNoStoreFiles");
767
768 HTable t = createTableAndWait(tableName.getName(), CF);
769 try {
770 List<HRegion> regions = cluster.getRegions(tableName);
771 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
772 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
773 insertData(tableName.getName(), admin, t);
774
775 admin.setBalancerRunning(false, true);
776
777 cluster.getMaster().setCatalogJanitorEnabled(false);
778 final HRegion region = findSplittableRegion(regions);
779 assertTrue("not able to find a splittable region", region != null);
780
781
782 SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
783 try {
784 st.prepare();
785 st.execute(regionServer, regionServer);
786 } catch (IOException e) {
787 fail("Split execution should have succeeded with no exceptions thrown");
788 }
789
790 List<HRegion> daughters = cluster.getRegions(tableName);
791 assertTrue(daughters.size() == regions.size() + 1);
792
793 HRegionInfo hri = region.getRegionInfo();
794 AssignmentManager am = cluster.getMaster().getAssignmentManager();
795 RegionStates regionStates = am.getRegionStates();
796 long start = EnvironmentEdgeManager.currentTimeMillis();
797 while (!regionStates.isRegionInState(hri, State.SPLIT)) {
798 assertFalse("Timed out in waiting split parent to be in state SPLIT",
799 EnvironmentEdgeManager.currentTimeMillis() - start > 60000);
800 Thread.sleep(500);
801 }
802
803
804 am.assign(hri, true, true);
805 assertFalse("Split region should not be in transition again",
806 regionStates.isRegionInTransition(hri)
807 && regionStates.isRegionInState(hri, State.SPLIT));
808 } finally {
809 admin.setBalancerRunning(true, false);
810 cluster.getMaster().setCatalogJanitorEnabled(true);
811 }
812 }
813
814 private void testSplitBeforeSettingSplittingInZKInternals() throws Exception {
815 final byte[] tableName = Bytes.toBytes("testSplitBeforeSettingSplittingInZK");
816 try {
817
818 HTable t = createTableAndWait(tableName, CF);
819
820 List<HRegion> regions = awaitTableRegions(tableName);
821 assertTrue("Table not online", cluster.getRegions(tableName).size() != 0);
822
823 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
824 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
825 final HRegion region = findSplittableRegion(regions);
826 assertTrue("not able to find a splittable region", region != null);
827 SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row2")) {
828 @Override
829 int createNodeSplitting(ZooKeeperWatcher zkw, HRegionInfo region, ServerName serverName)
830 throws KeeperException, IOException {
831 throw new SplittingNodeCreationFailedException ();
832 }
833 };
834 String node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
835 region.getRegionInfo().getEncodedName());
836 regionServer.getZooKeeper().sync(node);
837 for (int i = 0; i < 100; i++) {
838
839
840
841 if (ZKUtil.checkExists(regionServer.getZooKeeper(), node) != -1) {
842 Thread.sleep(100);
843 }
844 }
845 try {
846 st.prepare();
847 st.execute(regionServer, regionServer);
848 } catch (IOException e) {
849
850
851
852 assertTrue("Should be instance of CreateSplittingNodeFailedException",
853 e instanceof SplittingNodeCreationFailedException );
854 node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
855 region.getRegionInfo().getEncodedName());
856 {
857 assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
858 }
859 assertTrue(st.rollback(regionServer, regionServer));
860 assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
861 }
862 } finally {
863 TESTING_UTIL.deleteTable(tableName);
864 }
865 }
866
867 public static class MockedSplitTransaction extends SplitTransaction {
868
869 private HRegion currentRegion;
870 public MockedSplitTransaction(HRegion r, byte[] splitrow) {
871 super(r, splitrow);
872 this.currentRegion = r;
873 }
874
875 @Override
876 void transitionZKNode(Server server, RegionServerServices services, HRegion a, HRegion b)
877 throws IOException {
878 if (this.currentRegion.getRegionInfo().getTableName().getNameAsString()
879 .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
880 try {
881 if (!secondSplit){
882 callRollBack = true;
883 latch.await();
884 }
885 } catch (InterruptedException e) {
886 }
887
888 }
889 super.transitionZKNode(server, services, a, b);
890 if (this.currentRegion.getRegionInfo().getTableName().getNameAsString()
891 .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
892 firstSplitCompleted = true;
893 }
894 }
895 @Override
896 public boolean rollback(Server server, RegionServerServices services) throws IOException {
897 if (this.currentRegion.getRegionInfo().getTableName().getNameAsString()
898 .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
899 if(secondSplit){
900 super.rollback(server, services);
901 latch.countDown();
902 return true;
903 }
904 }
905 return super.rollback(server, services);
906 }
907
908 }
909
910 private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException {
911 HRegion region = null;
912 for (int i = 0; i < 5; ++i) {
913 for (HRegion r: regions) {
914 if (r.isSplittable()) {
915 return(r);
916 }
917 }
918 Thread.sleep(100);
919 }
920 return(null);
921 }
922
923 private List<HRegion> checkAndGetDaughters(byte[] tableName)
924 throws InterruptedException {
925 List<HRegion> daughters = null;
926
927 for (int i=0; i<100; i++) {
928 daughters = cluster.getRegions(tableName);
929 if (daughters.size() >= 2) break;
930 Thread.sleep(100);
931 }
932 assertTrue(daughters.size() >= 2);
933 return daughters;
934 }
935
936 private MockMasterWithoutCatalogJanitor abortAndWaitForMaster()
937 throws IOException, InterruptedException {
938 cluster.abortMaster(0);
939 cluster.waitOnMaster(0);
940 cluster.getConfiguration().setClass(HConstants.MASTER_IMPL,
941 MockMasterWithoutCatalogJanitor.class, HMaster.class);
942 MockMasterWithoutCatalogJanitor master = null;
943 master = (MockMasterWithoutCatalogJanitor) cluster.startMaster().getMaster();
944 cluster.waitForActiveAndReadyMaster();
945 return master;
946 }
947
948 private void split(final HRegionInfo hri, final HRegionServer server, final int regionCount)
949 throws IOException, InterruptedException {
950 this.admin.split(hri.getRegionNameAsString());
951 for (int i = 0; ProtobufUtil.getOnlineRegions(server).size() <= regionCount && i < 100; i++) {
952 LOG.debug("Waiting on region to split");
953 Thread.sleep(100);
954 }
955
956 assertFalse("Waited too long for split",
957 ProtobufUtil.getOnlineRegions(server).size() <= regionCount);
958 }
959
960 private void removeDaughterFromMeta(final byte [] regionName) throws IOException {
961 HTable metaTable = new HTable(TESTING_UTIL.getConfiguration(), TableName.META_TABLE_NAME);
962 try {
963 Delete d = new Delete(regionName);
964 LOG.info("Deleted " + Bytes.toString(regionName));
965 metaTable.delete(d);
966 } finally {
967 metaTable.close();
968 }
969 }
970
971
972
973
974
975
976
977
978
979
980
981
982 private int ensureTableRegionNotOnSameServerAsMeta(final HBaseAdmin admin,
983 final HRegionInfo hri)
984 throws HBaseIOException, MasterNotRunningException,
985 ZooKeeperConnectionException, InterruptedException {
986
987
988
989 int metaServerIndex = cluster.getServerWithMeta();
990 assertTrue(metaServerIndex != -1);
991 HRegionServer metaRegionServer = cluster.getRegionServer(metaServerIndex);
992 int tableRegionIndex = cluster.getServerWith(hri.getRegionName());
993 assertTrue(tableRegionIndex != -1);
994 HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex);
995 if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) {
996 HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer);
997 assertNotNull(hrs);
998 assertNotNull(hri);
999 LOG.info("Moving " + hri.getRegionNameAsString() + " from " +
1000 metaRegionServer.getServerName() + " to " +
1001 hrs.getServerName() + "; metaServerIndex=" + metaServerIndex);
1002 admin.move(hri.getEncodedNameAsBytes(), Bytes.toBytes(hrs.getServerName().toString()));
1003 }
1004
1005 for (int i = 0; i < 100; i++) {
1006 tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1007 if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
1008 LOG.debug("Waiting on region move off the .META. server; current index " +
1009 tableRegionIndex + " and metaServerIndex=" + metaServerIndex);
1010 Thread.sleep(100);
1011 }
1012 assertTrue("Region not moved off .META. server", tableRegionIndex != -1
1013 && tableRegionIndex != metaServerIndex);
1014
1015 tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1016 assertTrue(tableRegionIndex != -1);
1017 assertNotSame(metaServerIndex, tableRegionIndex);
1018 return tableRegionIndex;
1019 }
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030 private HRegionServer getOtherRegionServer(final MiniHBaseCluster cluster,
1031 final HRegionServer notThisOne) {
1032 for (RegionServerThread rst: cluster.getRegionServerThreads()) {
1033 HRegionServer hrs = rst.getRegionServer();
1034 if (hrs.getServerName().equals(notThisOne.getServerName())) continue;
1035 if (hrs.isStopping() || hrs.isStopped()) continue;
1036 return hrs;
1037 }
1038 return null;
1039 }
1040
1041 private void printOutRegions(final HRegionServer hrs, final String prefix)
1042 throws IOException {
1043 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
1044 for (HRegionInfo region: regions) {
1045 LOG.info(prefix + region.getRegionNameAsString());
1046 }
1047 }
1048
1049 private void waitUntilRegionServerDead() throws InterruptedException {
1050
1051 for (int i=0; cluster.getMaster().getClusterStatus().
1052 getServers().size() == NB_SERVERS && i<100; i++) {
1053 LOG.info("Waiting on server to go down");
1054 Thread.sleep(100);
1055 }
1056 assertFalse("Waited too long for RS to die", cluster.getMaster().getClusterStatus().
1057 getServers().size() == NB_SERVERS);
1058 }
1059
1060 private void awaitDaughters(byte[] tableName, int numDaughters) throws InterruptedException {
1061
1062 for (int i=0; cluster.getRegions(tableName).size() < numDaughters && i<60; i++) {
1063 LOG.info("Waiting for repair to happen");
1064 Thread.sleep(1000);
1065 }
1066 if (cluster.getRegions(tableName).size() < numDaughters) {
1067 fail("Waiting too long for daughter regions");
1068 }
1069 }
1070
1071 private List<HRegion> awaitTableRegions(final byte[] tableName) throws InterruptedException {
1072 List<HRegion> regions = null;
1073 for (int i = 0; i < 100; i++) {
1074 regions = cluster.getRegions(tableName);
1075 if (regions.size() > 0) break;
1076 Thread.sleep(100);
1077 }
1078 return regions;
1079 }
1080
1081 private HTable createTableAndWait(byte[] tableName, byte[] cf) throws IOException,
1082 InterruptedException {
1083 HTable t = TESTING_UTIL.createTable(tableName, cf);
1084 awaitTableRegions(tableName);
1085 assertTrue("Table not online: " + Bytes.toString(tableName),
1086 cluster.getRegions(tableName).size() != 0);
1087 return t;
1088 }
1089
1090 public static class MockMasterWithoutCatalogJanitor extends HMaster {
1091
1092 public MockMasterWithoutCatalogJanitor(Configuration conf) throws IOException, KeeperException,
1093 InterruptedException {
1094 super(conf);
1095 }
1096
1097 protected void startCatalogJanitorChore() {
1098 LOG.debug("Customised master executed.");
1099 }
1100 }
1101
1102 private static class SplittingNodeCreationFailedException extends IOException {
1103 public SplittingNodeCreationFailedException () {
1104 super();
1105 }
1106 }
1107
1108 }
1109