1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertNotSame;
25 import static org.junit.Assert.assertNull;
26 import static org.junit.Assert.assertTrue;
27 import static org.junit.Assert.fail;
28
29 import java.io.IOException;
30 import java.util.Collection;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.concurrent.CountDownLatch;
34
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.conf.Configuration;
38 import org.apache.hadoop.fs.FileSystem;
39 import org.apache.hadoop.fs.Path;
40 import org.apache.hadoop.hbase.Abortable;
41 import org.apache.hadoop.hbase.Coprocessor;
42 import org.apache.hadoop.hbase.CoprocessorEnvironment;
43 import org.apache.hadoop.hbase.HBaseIOException;
44 import org.apache.hadoop.hbase.HBaseTestingUtility;
45 import org.apache.hadoop.hbase.HColumnDescriptor;
46 import org.apache.hadoop.hbase.HConstants;
47 import org.apache.hadoop.hbase.HRegionInfo;
48 import org.apache.hadoop.hbase.HTableDescriptor;
49 import org.apache.hadoop.hbase.MasterNotRunningException;
50 import org.apache.hadoop.hbase.MiniHBaseCluster;
51 import org.apache.hadoop.hbase.RegionTransition;
52 import org.apache.hadoop.hbase.Server;
53 import org.apache.hadoop.hbase.ServerName;
54 import org.apache.hadoop.hbase.TableName;
55 import org.apache.hadoop.hbase.UnknownRegionException;
56 import org.apache.hadoop.hbase.Waiter;
57 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
58 import org.apache.hadoop.hbase.catalog.MetaEditor;
59 import org.apache.hadoop.hbase.catalog.MetaReader;
60 import org.apache.hadoop.hbase.client.Delete;
61 import org.apache.hadoop.hbase.client.HBaseAdmin;
62 import org.apache.hadoop.hbase.client.HTable;
63 import org.apache.hadoop.hbase.client.Mutation;
64 import org.apache.hadoop.hbase.client.Put;
65 import org.apache.hadoop.hbase.client.Result;
66 import org.apache.hadoop.hbase.client.ResultScanner;
67 import org.apache.hadoop.hbase.client.Scan;
68 import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
69 import org.apache.hadoop.hbase.coprocessor.ObserverContext;
70 import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
71 import org.apache.hadoop.hbase.exceptions.DeserializationException;
72 import org.apache.hadoop.hbase.executor.EventType;
73 import org.apache.hadoop.hbase.master.AssignmentManager;
74 import org.apache.hadoop.hbase.master.HMaster;
75 import org.apache.hadoop.hbase.master.RegionState;
76 import org.apache.hadoop.hbase.master.RegionState.State;
77 import org.apache.hadoop.hbase.master.RegionStates;
78 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
79 import org.apache.hadoop.hbase.security.User;
80 import org.apache.hadoop.hbase.testclassification.LargeTests;
81 import org.apache.hadoop.hbase.util.Bytes;
82 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
83 import org.apache.hadoop.hbase.util.FSUtils;
84 import org.apache.hadoop.hbase.util.HBaseFsck;
85 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
86 import org.apache.hadoop.hbase.util.PairOfSameType;
87 import org.apache.hadoop.hbase.util.Threads;
88 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
89 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
90 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
91 import org.apache.zookeeper.KeeperException;
92 import org.apache.zookeeper.KeeperException.NodeExistsException;
93 import org.apache.zookeeper.data.Stat;
94 import org.junit.After;
95 import org.junit.AfterClass;
96 import org.junit.Assert;
97 import org.junit.Before;
98 import org.junit.BeforeClass;
99 import org.junit.Test;
100 import org.junit.experimental.categories.Category;
101
102 import com.google.protobuf.ServiceException;
103
104
105
106
107
108
109 @Category(LargeTests.class)
110 public class TestSplitTransactionOnCluster {
111 private static final Log LOG =
112 LogFactory.getLog(TestSplitTransactionOnCluster.class);
113 private HBaseAdmin admin = null;
114 private MiniHBaseCluster cluster = null;
115 private static final int NB_SERVERS = 3;
116 private static CountDownLatch latch = new CountDownLatch(1);
117 private static volatile boolean secondSplit = false;
118 private static volatile boolean callRollBack = false;
119 private static volatile boolean firstSplitCompleted = false;
120 private static boolean useZKForAssignment = true;
121
122 static final HBaseTestingUtility TESTING_UTIL =
123 new HBaseTestingUtility();
124
125 static void setupOnce() throws Exception {
126 TESTING_UTIL.getConfiguration().setInt("hbase.balancer.period", 60000);
127 useZKForAssignment =
128 TESTING_UTIL.getConfiguration().getBoolean("hbase.assignment.usezk", false);
129 TESTING_UTIL.startMiniCluster(NB_SERVERS);
130 }
131
132 @BeforeClass public static void before() throws Exception {
133
134 TESTING_UTIL.getConfiguration().setBoolean("hbase.assignment.usezk", true);
135 setupOnce();
136 }
137
138 @AfterClass public static void after() throws Exception {
139 TESTING_UTIL.shutdownMiniCluster();
140 }
141
142 @Before public void setup() throws IOException {
143 TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS);
144 this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
145 this.cluster = TESTING_UTIL.getMiniHBaseCluster();
146 }
147
148 @After
149 public void tearDown() throws Exception {
150 this.admin.close();
151 }
152
153 private HRegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions) {
154 assertEquals(1, regions.size());
155 HRegionInfo hri = regions.get(0).getRegionInfo();
156 return waitOnRIT(hri);
157 }
158
159
160
161
162
163
164
165
166 private HRegionInfo waitOnRIT(final HRegionInfo hri) {
167
168
169 while (TESTING_UTIL.getHBaseCluster().getMaster().getAssignmentManager().
170 getRegionStates().isRegionInTransition(hri)) {
171 LOG.info("Waiting on region in transition: " +
172 TESTING_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates().
173 getRegionTransitionState(hri));
174 Threads.sleep(10);
175 }
176 return hri;
177 }
178
179 @SuppressWarnings("deprecation")
180 @Test(timeout = 60000)
181 public void testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack() throws Exception {
182 final TableName tableName =
183 TableName.valueOf("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack");
184
185 if (!useZKForAssignment) {
186
187 return;
188 }
189
190 try {
191
192 HTable t = createTableAndWait(tableName.getName(), Bytes.toBytes("cf"));
193 final List<HRegion> regions = cluster.getRegions(tableName);
194 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
195 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
196 final HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
197 insertData(tableName.getName(), admin, t);
198 t.close();
199
200
201 this.admin.setBalancerRunning(false, true);
202
203 cluster.getMaster().setCatalogJanitorEnabled(false);
204
205
206 final HRegion region = findSplittableRegion(regions);
207 assertTrue("not able to find a splittable region", region != null);
208
209 new Thread() {
210 @Override
211 public void run() {
212 SplitTransaction st = null;
213 st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
214 try {
215 st.prepare();
216 st.execute(regionServer, regionServer);
217 } catch (IOException e) {
218
219 }
220 }
221 }.start();
222 for (int i = 0; !callRollBack && i < 100; i++) {
223 Thread.sleep(100);
224 }
225 assertTrue("Waited too long for rollback", callRollBack);
226 SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row3"));
227 try {
228 secondSplit = true;
229
230 region.initialize();
231 st.prepare();
232 st.execute(regionServer, regionServer);
233 } catch (IOException e) {
234 LOG.debug("Rollback started :"+ e.getMessage());
235 st.rollback(regionServer, regionServer);
236 }
237 for (int i=0; !firstSplitCompleted && i<100; i++) {
238 Thread.sleep(100);
239 }
240 assertTrue("fist split did not complete", firstSplitCompleted);
241
242 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
243 Map<String, RegionState> rit = regionStates.getRegionsInTransition();
244
245 for (int i=0; rit.containsKey(hri.getTable()) && i<100; i++) {
246 Thread.sleep(100);
247 }
248 assertFalse("region still in transition", rit.containsKey(
249 rit.containsKey(hri.getTable())));
250
251 List<HRegion> onlineRegions = regionServer.getOnlineRegions(tableName);
252
253 assertEquals("The parent region should be splitted", 2, onlineRegions.size());
254
255 List<HRegionInfo> regionsOfTable = cluster.getMaster().getAssignmentManager()
256 .getRegionStates().getRegionsOfTable(tableName);
257
258 assertEquals("No of regions in master", 2, regionsOfTable.size());
259 } finally {
260 admin.setBalancerRunning(true, false);
261 secondSplit = false;
262 firstSplitCompleted = false;
263 callRollBack = false;
264 cluster.getMaster().setCatalogJanitorEnabled(true);
265 TESTING_UTIL.deleteTable(tableName);
266 }
267 }
268
269 @Test(timeout = 60000)
270 public void testRITStateForRollback() throws Exception {
271 final TableName tableName =
272 TableName.valueOf("testRITStateForRollback");
273 try {
274
275 HTable t = createTableAndWait(tableName.getName(), Bytes.toBytes("cf"));
276 final List<HRegion> regions = cluster.getRegions(tableName);
277 final HRegionInfo hri = getAndCheckSingleTableRegion(regions);
278 insertData(tableName.getName(), admin, t);
279 t.close();
280
281
282 this.admin.setBalancerRunning(false, true);
283
284 cluster.getMaster().setCatalogJanitorEnabled(false);
285
286
287 final HRegion region = findSplittableRegion(regions);
288 assertTrue("not able to find a splittable region", region != null);
289
290
291 region.getCoprocessorHost().load(FailingSplitRegionObserver.class,
292 Coprocessor.PRIORITY_USER, region.getBaseConf());
293
294
295 this.admin.split(region.getRegionName(), new byte[] {42});
296
297
298 FailingSplitRegionObserver observer = (FailingSplitRegionObserver) region
299 .getCoprocessorHost().findCoprocessor(FailingSplitRegionObserver.class.getName());
300 assertNotNull(observer);
301 observer.latch.await();
302
303 LOG.info("Waiting for region to come out of RIT");
304 TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
305 @Override
306 public boolean evaluate() throws Exception {
307 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
308 Map<String, RegionState> rit = regionStates.getRegionsInTransition();
309 return !rit.containsKey(hri.getEncodedName());
310 }
311 });
312 } finally {
313 admin.setBalancerRunning(true, false);
314 cluster.getMaster().setCatalogJanitorEnabled(true);
315 TESTING_UTIL.deleteTable(tableName);
316 }
317 }
318
319 public static class FailingSplitRegionObserver extends BaseRegionObserver {
320 volatile CountDownLatch latch;
321 volatile CountDownLatch postSplit;
322 @Override
323 public void start(CoprocessorEnvironment e) throws IOException {
324 latch = new CountDownLatch(1);
325 postSplit = new CountDownLatch(1);
326 }
327 @Override
328 public void preSplitBeforePONR(ObserverContext<RegionCoprocessorEnvironment> ctx,
329 byte[] splitKey, List<Mutation> metaEntries) throws IOException {
330 latch.countDown();
331 LOG.info("Causing rollback of region split");
332 throw new IOException("Causing rollback of region split");
333 }
334 @Override
335 public void postCompleteSplit(ObserverContext<RegionCoprocessorEnvironment> ctx)
336 throws IOException {
337 postSplit.countDown();
338 LOG.info("postCompleteSplit called");
339 }
340 }
341
342
343
344
345
346
347
348
349
350
351
352
353 @Test (timeout = 300000) public void testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling()
354 throws IOException, InterruptedException, NodeExistsException, KeeperException,
355 DeserializationException, ServiceException {
356 final byte [] tableName =
357 Bytes.toBytes("testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling");
358
359
360 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
361 List<HRegion> regions = cluster.getRegions(tableName);
362 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
363
364 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
365
366
367 this.admin.setBalancerRunning(false, true);
368
369 cluster.getMaster().setCatalogJanitorEnabled(false);
370 try {
371
372 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
373
374 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
375 printOutRegions(server, "Initial regions: ");
376 int regionCount = ProtobufUtil.getOnlineRegions(server).size();
377
378
379 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
380
381 split(hri, server, regionCount);
382
383 String path = ZKAssign.getNodeName(TESTING_UTIL.getZooKeeperWatcher(),
384 hri.getEncodedName());
385 RegionTransition rt = null;
386 Stat stats = null;
387 List<HRegion> daughters = null;
388 if (useZKForAssignment) {
389 daughters = checkAndGetDaughters(tableName);
390
391
392 for (int i=0; i<100; i++) {
393 stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
394 rt = RegionTransition.parseFrom(ZKAssign.getData(TESTING_UTIL.getZooKeeperWatcher(),
395 hri.getEncodedName()));
396 if (rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)) break;
397 Thread.sleep(100);
398 }
399 LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats);
400 assertTrue(rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT));
401
402 cluster.abortRegionServer(tableRegionIndex);
403 }
404 waitUntilRegionServerDead();
405 awaitDaughters(tableName, 2);
406 if (useZKForAssignment) {
407 regions = cluster.getRegions(tableName);
408 for (HRegion r: regions) {
409 assertTrue(daughters.contains(r));
410 }
411
412
413 for (int i=0; i<100; i++) {
414
415 stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
416 if (stats == null) break;
417 Thread.sleep(100);
418 }
419 LOG.info("EPHEMERAL NODE AFTER SERVER ABORT, path=" + path + ", stats=" + stats);
420 assertTrue(stats == null);
421 }
422 } finally {
423
424 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
425 admin.setBalancerRunning(true, false);
426 cluster.getMaster().setCatalogJanitorEnabled(true);
427 cluster.startRegionServer();
428 t.close();
429 }
430 }
431
432 @Test (timeout = 300000) public void testExistingZnodeBlocksSplitAndWeRollback()
433 throws IOException, InterruptedException, NodeExistsException, KeeperException, ServiceException {
434 final byte [] tableName =
435 Bytes.toBytes("testExistingZnodeBlocksSplitAndWeRollback");
436
437
438 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
439 List<HRegion> regions = cluster.getRegions(tableName);
440 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
441
442 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
443
444 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
445
446
447 this.admin.setBalancerRunning(false, true);
448
449 cluster.getMaster().setCatalogJanitorEnabled(false);
450 try {
451
452 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
453
454 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
455 printOutRegions(server, "Initial regions: ");
456 int regionCount = ProtobufUtil.getOnlineRegions(server).size();
457
458
459 ServerName fakedServer = ServerName.valueOf("any.old.server", 1234, -1);
460 if (useZKForAssignment) {
461 ZKAssign.createNodeClosing(TESTING_UTIL.getZooKeeperWatcher(),
462 hri, fakedServer);
463 } else {
464 regionStates.updateRegionState(hri, RegionState.State.CLOSING);
465 }
466
467
468 this.admin.split(hri.getRegionNameAsString());
469 this.admin.split(hri.getRegionNameAsString());
470 this.admin.split(hri.getRegionNameAsString());
471
472 for (int i = 0; i < 10; i++) {
473 Thread.sleep(100);
474 assertEquals(regionCount, ProtobufUtil.getOnlineRegions(server).size());
475 }
476 if (useZKForAssignment) {
477
478 ZKAssign.deleteClosingNode(TESTING_UTIL.getZooKeeperWatcher(),
479 hri, fakedServer);
480 } else {
481 regionStates.regionOnline(hri, server.getServerName());
482 }
483
484 split(hri, server, regionCount);
485
486 checkAndGetDaughters(tableName);
487
488 } finally {
489 admin.setBalancerRunning(true, false);
490 cluster.getMaster().setCatalogJanitorEnabled(true);
491 t.close();
492 }
493 }
494
495
496
497
498
499
500
501 @Test (timeout=300000) public void testShutdownFixupWhenDaughterHasSplit()
502 throws IOException, InterruptedException, ServiceException {
503 final byte [] tableName =
504 Bytes.toBytes("testShutdownFixupWhenDaughterHasSplit");
505
506
507 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
508 List<HRegion> regions = cluster.getRegions(tableName);
509 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
510
511 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
512
513
514 this.admin.setBalancerRunning(false, true);
515
516 cluster.getMaster().setCatalogJanitorEnabled(false);
517 try {
518
519 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
520
521 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
522 printOutRegions(server, "Initial regions: ");
523 int regionCount = ProtobufUtil.getOnlineRegions(server).size();
524
525 split(hri, server, regionCount);
526
527 List<HRegion> daughters = checkAndGetDaughters(tableName);
528
529 regionCount = ProtobufUtil.getOnlineRegions(server).size();
530 HRegionInfo daughter = daughters.get(0).getRegionInfo();
531 LOG.info("Daughter we are going to split: " + daughter);
532
533
534 this.admin.compact(daughter.getRegionName());
535 daughters = cluster.getRegions(tableName);
536 HRegion daughterRegion = null;
537 for (HRegion r: daughters) {
538 if (r.getRegionInfo().equals(daughter)) {
539 daughterRegion = r;
540 LOG.info("Found matching HRI: " + daughterRegion);
541 break;
542 }
543 }
544 assertTrue(daughterRegion != null);
545 for (int i=0; i<100; i++) {
546 if (!daughterRegion.hasReferences()) break;
547 Threads.sleep(100);
548 }
549 assertFalse("Waiting for reference to be compacted", daughterRegion.hasReferences());
550 LOG.info("Daughter hri before split (has been compacted): " + daughter);
551 split(daughter, server, regionCount);
552
553 daughters = cluster.getRegions(tableName);
554 for (HRegion d: daughters) {
555 LOG.info("Regions before crash: " + d);
556 }
557
558 cluster.abortRegionServer(tableRegionIndex);
559 waitUntilRegionServerDead();
560 awaitDaughters(tableName, daughters.size());
561
562
563 regions = cluster.getRegions(tableName);
564 for (HRegion d: daughters) {
565 LOG.info("Regions after crash: " + d);
566 }
567 assertEquals(daughters.size(), regions.size());
568 for (HRegion r: regions) {
569 LOG.info("Regions post crash " + r);
570 assertTrue("Missing region post crash " + r, daughters.contains(r));
571 }
572 } finally {
573 admin.setBalancerRunning(true, false);
574 cluster.getMaster().setCatalogJanitorEnabled(true);
575 t.close();
576 }
577 }
578
579 @Test(timeout = 180000)
580 public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception {
581 Configuration conf = TESTING_UTIL.getConfiguration();
582 TableName userTableName =
583 TableName.valueOf("testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles");
584 HTableDescriptor htd = new HTableDescriptor(userTableName);
585 HColumnDescriptor hcd = new HColumnDescriptor("col");
586 htd.addFamily(hcd);
587 admin.createTable(htd);
588 HTable table = new HTable(conf, userTableName);
589 try {
590 for (int i = 0; i <= 5; i++) {
591 String row = "row" + i;
592 Put p = new Put(row.getBytes());
593 String val = "Val" + i;
594 p.add("col".getBytes(), "ql".getBytes(), val.getBytes());
595 table.put(p);
596 admin.flush(userTableName.getName());
597 Delete d = new Delete(row.getBytes());
598
599 table.delete(d);
600 admin.flush(userTableName.getName());
601 }
602 admin.majorCompact(userTableName.getName());
603 List<HRegionInfo> regionsOfTable = TESTING_UTIL.getMiniHBaseCluster()
604 .getMaster().getAssignmentManager().getRegionStates()
605 .getRegionsOfTable(userTableName);
606 HRegionInfo hRegionInfo = regionsOfTable.get(0);
607 Put p = new Put("row6".getBytes());
608 p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
609 table.put(p);
610 p = new Put("row7".getBytes());
611 p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
612 table.put(p);
613 p = new Put("row8".getBytes());
614 p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
615 table.put(p);
616 admin.flush(userTableName.getName());
617 admin.split(hRegionInfo.getRegionName(), "row7".getBytes());
618 regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
619 .getAssignmentManager().getRegionStates()
620 .getRegionsOfTable(userTableName);
621
622 while (regionsOfTable.size() != 2) {
623 Thread.sleep(2000);
624 regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
625 .getAssignmentManager().getRegionStates()
626 .getRegionsOfTable(userTableName);
627 }
628 Assert.assertEquals(2, regionsOfTable.size());
629 Scan s = new Scan();
630 ResultScanner scanner = table.getScanner(s);
631 int mainTableCount = 0;
632 for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
633 mainTableCount++;
634 }
635 Assert.assertEquals(3, mainTableCount);
636 } finally {
637 table.close();
638 }
639 }
640
641
642
643
644 static class UselessTestAbortable implements Abortable {
645 boolean aborted = false;
646 @Override
647 public void abort(String why, Throwable e) {
648 LOG.warn("ABORTED (But nothing to abort): why=" + why, e);
649 aborted = true;
650 }
651
652 @Override
653 public boolean isAborted() {
654 return this.aborted;
655 }
656 }
657
658
659
660
661
662
663
664
665
666
667
668 @Test(timeout = 400000)
669 public void testMasterRestartWhenSplittingIsPartial()
670 throws IOException, InterruptedException, NodeExistsException,
671 KeeperException, DeserializationException, ServiceException {
672 final byte[] tableName = Bytes.toBytes("testMasterRestartWhenSplittingIsPartial");
673
674 if (!useZKForAssignment) {
675
676 return;
677 }
678
679
680 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
681 List<HRegion> regions = cluster.getRegions(tableName);
682 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
683
684 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
685
686
687 this.admin.setBalancerRunning(false, true);
688
689 cluster.getMaster().setCatalogJanitorEnabled(false);
690 ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
691 "testMasterRestartWhenSplittingIsPartial", new UselessTestAbortable());
692 try {
693
694 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
695
696 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
697 printOutRegions(server, "Initial regions: ");
698
699
700 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
701
702
703 this.admin.split(hri.getRegionNameAsString());
704 checkAndGetDaughters(tableName);
705
706 String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
707 Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
708 LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
709 + stats);
710 byte[] bytes = ZKAssign.getData(zkw, hri.getEncodedName());
711 RegionTransition rtd = RegionTransition.parseFrom(bytes);
712
713 assertTrue(rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)
714 || rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLITTING));
715
716
717 MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
718
719 this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
720
721
722
723 hri.setOffline(true);
724 hri.setSplit(true);
725 ServerName regionServerOfRegion = master.getAssignmentManager()
726 .getRegionStates().getRegionServerOfRegion(hri);
727 assertTrue(regionServerOfRegion != null);
728
729
730 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
731 String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
732 Stat stat = new Stat();
733 byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
734
735 for (int i=0; data != null && i<60; i++) {
736 Thread.sleep(1000);
737 data = ZKUtil.getDataNoWatch(zkw, node, stat);
738 }
739 assertNull("Waited too long for ZK node to be removed: "+node, data);
740 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
741 assertTrue("Split parent should be in SPLIT state",
742 regionStates.isRegionInState(hri, State.SPLIT));
743 regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
744 assertTrue(regionServerOfRegion == null);
745 } finally {
746
747 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
748 admin.setBalancerRunning(true, false);
749 cluster.getMaster().setCatalogJanitorEnabled(true);
750 t.close();
751 zkw.close();
752 }
753 }
754
755
756
757
758
759
760
761
762
763 @Test (timeout = 300000)
764 public void testMasterRestartAtRegionSplitPendingCatalogJanitor()
765 throws IOException, InterruptedException, NodeExistsException,
766 KeeperException, ServiceException {
767 final byte[] tableName = Bytes.toBytes("testMasterRestartAtRegionSplitPendingCatalogJanitor");
768
769
770 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
771 List<HRegion> regions = cluster.getRegions(tableName);
772 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
773
774 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
775
776
777 this.admin.setBalancerRunning(false, true);
778
779 cluster.getMaster().setCatalogJanitorEnabled(false);
780 ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
781 "testMasterRestartAtRegionSplitPendingCatalogJanitor", new UselessTestAbortable());
782 try {
783
784 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
785
786 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
787 printOutRegions(server, "Initial regions: ");
788
789 this.admin.split(hri.getRegionNameAsString());
790 checkAndGetDaughters(tableName);
791
792 String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
793 Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
794 LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
795 + stats);
796 String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
797 Stat stat = new Stat();
798 byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
799
800 for (int i=0; data != null && i<60; i++) {
801 Thread.sleep(1000);
802 data = ZKUtil.getDataNoWatch(zkw, node, stat);
803 }
804 assertNull("Waited too long for ZK node to be removed: "+node, data);
805
806 MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
807
808 this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
809
810
811
812 hri.setOffline(true);
813 hri.setSplit(true);
814 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
815 assertTrue("Split parent should be in SPLIT state",
816 regionStates.isRegionInState(hri, State.SPLIT));
817 ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
818 assertTrue(regionServerOfRegion == null);
819 } finally {
820 this.admin.setBalancerRunning(true, false);
821 cluster.getMaster().setCatalogJanitorEnabled(true);
822 t.close();
823 zkw.close();
824 }
825 }
826
827
828
829
830
831
832
833
834
835
836
837
838 @Test(timeout = 60000)
839 public void testSplitBeforeSettingSplittingInZK() throws Exception,
840 InterruptedException, KeeperException {
841 testSplitBeforeSettingSplittingInZKInternals();
842 }
843
844 @Test(timeout = 60000)
845 public void testTableExistsIfTheSpecifiedTableRegionIsSplitParent() throws Exception {
846 ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TESTING_UTIL);
847 final TableName tableName =
848 TableName.valueOf("testTableExistsIfTheSpecifiedTableRegionIsSplitParent");
849
850 HTable t = createTableAndWait(tableName.getName(), Bytes.toBytes("cf"));
851 List<HRegion> regions = null;
852 try {
853 regions = cluster.getRegions(tableName);
854 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
855 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
856 insertData(tableName.getName(), admin, t);
857
858 admin.setBalancerRunning(false, true);
859
860 cluster.getMaster().setCatalogJanitorEnabled(false);
861 boolean tableExists = MetaReader.tableExists(regionServer.getCatalogTracker(),
862 tableName);
863 assertEquals("The specified table should present.", true, tableExists);
864 final HRegion region = findSplittableRegion(regions);
865 assertTrue("not able to find a splittable region", region != null);
866 SplitTransaction st = new SplitTransaction(region, Bytes.toBytes("row2"));
867 try {
868 st.prepare();
869 st.createDaughters(regionServer, regionServer, null);
870 } catch (IOException e) {
871
872 }
873 tableExists = MetaReader.tableExists(regionServer.getCatalogTracker(),
874 tableName);
875 assertEquals("The specified table should present.", true, tableExists);
876 Map<String, RegionState> rit = cluster.getMaster().getAssignmentManager().getRegionStates()
877 .getRegionsInTransition();
878 assertTrue(rit.size() == 3);
879 cluster.getMaster().getAssignmentManager().regionOffline(st.getFirstDaughter());
880 cluster.getMaster().getAssignmentManager().regionOffline(st.getSecondDaughter());
881 cluster.getMaster().getAssignmentManager().regionOffline(region.getRegionInfo());
882 rit = cluster.getMaster().getAssignmentManager().getRegionStates().getRegionsInTransition();
883 assertTrue(rit.size() == 0);
884 } finally {
885 if (regions != null) {
886 String node = ZKAssign.getNodeName(zkw, regions.get(0).getRegionInfo()
887 .getEncodedName());
888 ZKUtil.deleteNodeFailSilent(zkw, node);
889 }
890 admin.setBalancerRunning(true, false);
891 cluster.getMaster().setCatalogJanitorEnabled(true);
892 t.close();
893 TESTING_UTIL.deleteTable(tableName);
894 }
895 }
896
897 private void insertData(final byte[] tableName, HBaseAdmin admin, HTable t) throws IOException,
898 InterruptedException {
899 Put p = new Put(Bytes.toBytes("row1"));
900 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1"));
901 t.put(p);
902 p = new Put(Bytes.toBytes("row2"));
903 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2"));
904 t.put(p);
905 p = new Put(Bytes.toBytes("row3"));
906 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3"));
907 t.put(p);
908 p = new Put(Bytes.toBytes("row4"));
909 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4"));
910 t.put(p);
911 admin.flush(tableName);
912 }
913
914
915
916
917
918 @Test(timeout = 60000)
919 public void testSplitRegionWithNoStoreFiles()
920 throws Exception {
921 final TableName tableName =
922 TableName.valueOf("testSplitRegionWithNoStoreFiles");
923
924 createTableAndWait(tableName.getName(), HConstants.CATALOG_FAMILY);
925 List<HRegion> regions = cluster.getRegions(tableName);
926 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
927 ensureTableRegionNotOnSameServerAsMeta(admin, hri);
928 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
929 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
930
931 this.admin.setBalancerRunning(false, true);
932
933 cluster.getMaster().setCatalogJanitorEnabled(false);
934 try {
935
936 printOutRegions(regionServer, "Initial regions: ");
937 Configuration conf = cluster.getConfiguration();
938 HBaseFsck.debugLsr(conf, new Path("/"));
939 Path rootDir = FSUtils.getRootDir(conf);
940 FileSystem fs = TESTING_UTIL.getDFSCluster().getFileSystem();
941 Map<String, Path> storefiles =
942 FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
943 assertEquals("Expected nothing but found " + storefiles.toString(), storefiles.size(), 0);
944
945
946 regions = cluster.getRegions(tableName);
947 final HRegion region = findSplittableRegion(regions);
948 assertTrue("not able to find a splittable region", region != null);
949
950
951 SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
952 try {
953 st.prepare();
954 st.execute(regionServer, regionServer);
955 } catch (IOException e) {
956 fail("Split execution should have succeeded with no exceptions thrown");
957 }
958
959
960
961 List<HRegion> daughters = cluster.getRegions(tableName);
962 assertTrue(daughters.size() == 2);
963
964
965 HBaseFsck.debugLsr(conf, new Path("/"));
966 Map<String, Path> storefilesAfter =
967 FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
968 assertEquals("Expected nothing but found " + storefilesAfter.toString(),
969 storefilesAfter.size(), 0);
970
971 hri = region.getRegionInfo();
972 AssignmentManager am = cluster.getMaster().getAssignmentManager();
973 RegionStates regionStates = am.getRegionStates();
974 long start = EnvironmentEdgeManager.currentTimeMillis();
975 while (!regionStates.isRegionInState(hri, State.SPLIT)) {
976 assertFalse("Timed out in waiting split parent to be in state SPLIT",
977 EnvironmentEdgeManager.currentTimeMillis() - start > 60000);
978 Thread.sleep(500);
979 }
980
981
982 am.assign(hri, true, true);
983 assertFalse("Split region can't be assigned",
984 regionStates.isRegionInTransition(hri));
985 assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
986
987
988 am.unassign(hri, true, null);
989 assertFalse("Split region can't be unassigned",
990 regionStates.isRegionInTransition(hri));
991 assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
992 } finally {
993 admin.setBalancerRunning(true, false);
994 cluster.getMaster().setCatalogJanitorEnabled(true);
995 }
996 }
997
998 @Test(timeout = 180000)
999 public void testSplitHooksBeforeAndAfterPONR() throws Exception {
1000 TableName firstTable = TableName.valueOf("testSplitHooksBeforeAndAfterPONR_1");
1001 TableName secondTable = TableName.valueOf("testSplitHooksBeforeAndAfterPONR_2");
1002 HColumnDescriptor hcd = new HColumnDescriptor("cf");
1003
1004 HTableDescriptor desc = new HTableDescriptor(firstTable);
1005 desc.addCoprocessor(MockedRegionObserver.class.getName());
1006 desc.addFamily(hcd);
1007 admin.createTable(desc);
1008 TESTING_UTIL.waitUntilAllRegionsAssigned(firstTable);
1009
1010 desc = new HTableDescriptor(secondTable);
1011 desc.addFamily(hcd);
1012 admin.createTable(desc);
1013 TESTING_UTIL.waitUntilAllRegionsAssigned(secondTable);
1014
1015 List<HRegion> firstTableRegions = cluster.getRegions(firstTable);
1016 List<HRegion> secondTableRegions = cluster.getRegions(secondTable);
1017
1018
1019 if (firstTableRegions.size() == 0 || secondTableRegions.size() == 0) {
1020 fail("Each table should have at least one region.");
1021 }
1022 ServerName serverName =
1023 cluster.getServerHoldingRegion(firstTableRegions.get(0).getRegionName());
1024 admin.move(secondTableRegions.get(0).getRegionInfo().getEncodedNameAsBytes(),
1025 Bytes.toBytes(serverName.getServerName()));
1026 HTable table1 = null;
1027 HTable table2 = null;
1028 try {
1029 table1 = new HTable(TESTING_UTIL.getConfiguration(), firstTable);
1030 table2 = new HTable(TESTING_UTIL.getConfiguration(), firstTable);
1031 insertData(firstTable.getName(), admin, table1);
1032 insertData(secondTable.getName(), admin, table2);
1033 admin.split(firstTable.getName(), "row2".getBytes());
1034 firstTableRegions = cluster.getRegions(firstTable.getName());
1035 while (firstTableRegions.size() != 2) {
1036 Thread.sleep(1000);
1037 firstTableRegions = cluster.getRegions(firstTable.getName());
1038 }
1039 assertEquals("Number of regions after split should be 2.", 2, firstTableRegions.size());
1040 secondTableRegions = cluster.getRegions(secondTable.getName());
1041 assertEquals("Number of regions after split should be 2.", 2, secondTableRegions.size());
1042 } finally {
1043 if (table1 != null) {
1044 table1.close();
1045 }
1046 if (table2 != null) {
1047 table2.close();
1048 }
1049 TESTING_UTIL.deleteTable(firstTable);
1050 TESTING_UTIL.deleteTable(secondTable);
1051 }
1052 }
1053
1054 private void testSplitBeforeSettingSplittingInZKInternals() throws Exception {
1055 final byte[] tableName = Bytes.toBytes("testSplitBeforeSettingSplittingInZK");
1056 try {
1057
1058 createTableAndWait(tableName, Bytes.toBytes("cf"));
1059
1060 List<HRegion> regions = awaitTableRegions(tableName);
1061 assertTrue("Table not online", cluster.getRegions(tableName).size() != 0);
1062
1063 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
1064 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
1065 final HRegion region = findSplittableRegion(regions);
1066 assertTrue("not able to find a splittable region", region != null);
1067 SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row2")) {
1068 @Override
1069 public PairOfSameType<HRegion> stepsBeforePONR(final Server server,
1070 final RegionServerServices services, boolean testing) throws IOException {
1071 throw new SplittingNodeCreationFailedException ();
1072 }
1073 };
1074 String node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1075 region.getRegionInfo().getEncodedName());
1076 regionServer.getZooKeeper().sync(node);
1077 for (int i = 0; i < 100; i++) {
1078
1079
1080
1081 if (ZKUtil.checkExists(regionServer.getZooKeeper(), node) != -1) {
1082 Thread.sleep(100);
1083 }
1084 }
1085 try {
1086 st.prepare();
1087 st.execute(regionServer, regionServer);
1088 } catch (IOException e) {
1089
1090
1091
1092 assertTrue("Should be instance of CreateSplittingNodeFailedException",
1093 e instanceof SplittingNodeCreationFailedException );
1094 node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1095 region.getRegionInfo().getEncodedName());
1096 {
1097 assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
1098 }
1099 assertTrue(st.rollback(regionServer, regionServer));
1100 assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
1101 }
1102 } finally {
1103 TESTING_UTIL.deleteTable(tableName);
1104 }
1105 }
1106
1107 @Test
1108 public void testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck()
1109 throws Exception {
1110 final TableName tableName =
1111 TableName.valueOf("testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck");
1112 try {
1113 HTableDescriptor htd = new HTableDescriptor(tableName);
1114 htd.addFamily(new HColumnDescriptor("f"));
1115 htd.addFamily(new HColumnDescriptor("i_f"));
1116 htd.setRegionSplitPolicyClassName(CustomSplitPolicy.class.getName());
1117 admin.createTable(htd);
1118 List<HRegion> regions = awaitTableRegions(tableName.toBytes());
1119 HRegion region = regions.get(0);
1120 for(int i = 3;i<9;i++) {
1121 Put p = new Put(Bytes.toBytes("row"+i));
1122 p.add(Bytes.toBytes("f"), Bytes.toBytes("q"), Bytes.toBytes("value"+i));
1123 p.add(Bytes.toBytes("i_f"), Bytes.toBytes("q"), Bytes.toBytes("value"+i));
1124 region.put(p);
1125 }
1126 region.flushcache();
1127 Store store = region.getStore(Bytes.toBytes("f"));
1128 Collection<StoreFile> storefiles = store.getStorefiles();
1129 assertEquals(storefiles.size(), 1);
1130 assertFalse(region.hasReferences());
1131 Path referencePath =
1132 region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "f",
1133 storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
1134 assertNull(referencePath);
1135 referencePath =
1136 region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "i_f",
1137 storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
1138 assertNotNull(referencePath);
1139 } finally {
1140 TESTING_UTIL.deleteTable(tableName);
1141 }
1142 }
1143
1144 @Test (timeout=300000)
1145 public void testSSHCleanupDaugtherRegionsOfAbortedSplit() throws Exception {
1146 TableName table = TableName.valueOf("testSSHCleanupDaugtherRegionsOfAbortedSplit");
1147 try {
1148 HTableDescriptor desc = new HTableDescriptor(table);
1149 desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
1150 admin.createTable(desc);
1151 HTable hTable = new HTable(cluster.getConfiguration(), desc.getTableName());
1152 for(int i = 1; i < 5; i++) {
1153 Put p1 = new Put(("r"+i).getBytes());
1154 p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
1155 hTable.put(p1);
1156 }
1157 admin.flush(desc.getTableName().toString());
1158 List<HRegion> regions = cluster.getRegions(desc.getTableName());
1159 int serverWith = cluster.getServerWith(regions.get(0).getRegionName());
1160 HRegionServer regionServer = cluster.getRegionServer(serverWith);
1161 cluster.getServerWith(regions.get(0).getRegionName());
1162 SplitTransaction st = new SplitTransaction(regions.get(0), Bytes.toBytes("r3"));
1163 st.prepare();
1164 st.stepsBeforePONR(regionServer, regionServer, false);
1165 Path tableDir =
1166 FSUtils.getTableDir(cluster.getMaster().getMasterFileSystem().getRootDir(),
1167 desc.getTableName());
1168 tableDir.getFileSystem(cluster.getConfiguration());
1169 List<Path> regionDirs =
1170 FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
1171 assertEquals(3,regionDirs.size());
1172 cluster.startRegionServer();
1173 regionServer.kill();
1174 cluster.getRegionServerThreads().get(serverWith).join();
1175
1176 while (cluster.getMaster().getServerManager().areDeadServersInProgress()) {
1177 Thread.sleep(10);
1178 }
1179 AssignmentManager am = cluster.getMaster().getAssignmentManager();
1180 while(am.getRegionStates().isRegionsInTransition()){
1181 Thread.sleep(10);
1182 }
1183 assertEquals(am.getRegionStates().getRegionsInTransition().toString(), am.getRegionStates()
1184 .getRegionsInTransition().size(), 0);
1185 regionDirs =
1186 FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
1187 assertEquals(1,regionDirs.size());
1188 } finally {
1189 TESTING_UTIL.deleteTable(table);
1190 }
1191 }
1192
1193 public static class MockedSplitTransaction extends SplitTransaction {
1194
1195 private HRegion currentRegion;
1196 public MockedSplitTransaction(HRegion r, byte[] splitrow) {
1197 super(r, splitrow);
1198 this.currentRegion = r;
1199 }
1200
1201 @Override
1202 void transitionZKNode(Server server, RegionServerServices services, HRegion a, HRegion b)
1203 throws IOException {
1204 if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1205 .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1206 try {
1207 if (!secondSplit){
1208 callRollBack = true;
1209 latch.await();
1210 }
1211 } catch (InterruptedException e) {
1212 }
1213
1214 }
1215 super.transitionZKNode(server, services, a, b);
1216 if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1217 .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1218 firstSplitCompleted = true;
1219 }
1220 }
1221 @Override
1222 public boolean rollback(Server server, RegionServerServices services) throws IOException {
1223 if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1224 .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1225 if(secondSplit){
1226 super.rollback(server, services);
1227 latch.countDown();
1228 return true;
1229 }
1230 }
1231 return super.rollback(server, services);
1232 }
1233
1234 }
1235
1236 private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException {
1237 for (int i = 0; i < 5; ++i) {
1238 for (HRegion r: regions) {
1239 if (r.isSplittable()) {
1240 return(r);
1241 }
1242 }
1243 Thread.sleep(100);
1244 }
1245 return(null);
1246 }
1247
1248 @Test(timeout = 120000)
1249 public void testFailedSplit() throws Exception {
1250 TableName tableName = TableName.valueOf("testFailedSplit");
1251 byte[] colFamily = Bytes.toBytes("info");
1252 TESTING_UTIL.createTable(tableName, colFamily);
1253 HTable table = new HTable(TESTING_UTIL.getConfiguration(), tableName);
1254 try {
1255 TESTING_UTIL.loadTable(table, colFamily);
1256 List<HRegionInfo> regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
1257 assertTrue(regions.size() == 1);
1258 final HRegion actualRegion = cluster.getRegions(tableName).get(0);
1259 actualRegion.getCoprocessorHost().load(FailingSplitRegionObserver.class,
1260 Coprocessor.PRIORITY_USER, actualRegion.getBaseConf());
1261
1262
1263 admin.split(tableName.getNameAsString());
1264 FailingSplitRegionObserver observer = (FailingSplitRegionObserver) actualRegion
1265 .getCoprocessorHost().findCoprocessor(FailingSplitRegionObserver.class.getName());
1266 assertNotNull(observer);
1267 observer.latch.await();
1268 observer.postSplit.await();
1269 LOG.info("Waiting for region to come out of RIT");
1270 TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
1271 @Override
1272 public boolean evaluate() throws Exception {
1273 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
1274 Map<String, RegionState> rit = regionStates.getRegionsInTransition();
1275 return (rit.size() == 0);
1276 }
1277 });
1278 regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
1279 assertTrue(regions.size() == 1);
1280 assertTrue(admin.balancer());
1281 } finally {
1282 table.close();
1283 TESTING_UTIL.deleteTable(tableName);
1284 }
1285 }
1286
1287 private List<HRegion> checkAndGetDaughters(byte[] tableName)
1288 throws InterruptedException {
1289 List<HRegion> daughters = null;
1290
1291 for (int i=0; i<100; i++) {
1292 daughters = cluster.getRegions(tableName);
1293 if (daughters.size() >= 2) break;
1294 Thread.sleep(100);
1295 }
1296 assertTrue(daughters.size() >= 2);
1297 return daughters;
1298 }
1299
1300 private MockMasterWithoutCatalogJanitor abortAndWaitForMaster()
1301 throws IOException, InterruptedException {
1302 cluster.abortMaster(0);
1303 cluster.waitOnMaster(0);
1304 cluster.getConfiguration().setClass(HConstants.MASTER_IMPL,
1305 MockMasterWithoutCatalogJanitor.class, HMaster.class);
1306 MockMasterWithoutCatalogJanitor master = null;
1307 master = (MockMasterWithoutCatalogJanitor) cluster.startMaster().getMaster();
1308 cluster.waitForActiveAndReadyMaster();
1309 return master;
1310 }
1311
1312 private void split(final HRegionInfo hri, final HRegionServer server, final int regionCount)
1313 throws IOException, InterruptedException {
1314 this.admin.split(hri.getRegionNameAsString());
1315 try {
1316 for (int i = 0; ProtobufUtil.getOnlineRegions(server).size() <= regionCount && i < 300; i++) {
1317 LOG.debug("Waiting on region to split");
1318 Thread.sleep(100);
1319 }
1320
1321 assertFalse("Waited too long for split",
1322 ProtobufUtil.getOnlineRegions(server).size() <= regionCount);
1323 } catch (RegionServerStoppedException e) {
1324 if (useZKForAssignment) {
1325
1326 LOG.error(e);
1327 throw e;
1328 }
1329 }
1330 }
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343 private int ensureTableRegionNotOnSameServerAsMeta(final HBaseAdmin admin,
1344 final HRegionInfo hri)
1345 throws HBaseIOException, MasterNotRunningException,
1346 ZooKeeperConnectionException, InterruptedException {
1347
1348
1349
1350 int metaServerIndex = cluster.getServerWithMeta();
1351 assertTrue(metaServerIndex != -1);
1352 HRegionServer metaRegionServer = cluster.getRegionServer(metaServerIndex);
1353 int tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1354 assertTrue(tableRegionIndex != -1);
1355 HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex);
1356 if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) {
1357 HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer);
1358 assertNotNull(hrs);
1359 assertNotNull(hri);
1360 LOG.info("Moving " + hri.getRegionNameAsString() + " from " +
1361 metaRegionServer.getServerName() + " to " +
1362 hrs.getServerName() + "; metaServerIndex=" + metaServerIndex);
1363 admin.move(hri.getEncodedNameAsBytes(), Bytes.toBytes(hrs.getServerName().toString()));
1364 }
1365
1366 for (int i = 0; i < 100; i++) {
1367 tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1368 if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
1369 LOG.debug("Waiting on region move off the hbase:meta server; current index " +
1370 tableRegionIndex + " and metaServerIndex=" + metaServerIndex);
1371 Thread.sleep(100);
1372 }
1373 assertTrue("Region not moved off hbase:meta server", tableRegionIndex != -1
1374 && tableRegionIndex != metaServerIndex);
1375
1376 tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1377 assertTrue(tableRegionIndex != -1);
1378 assertNotSame(metaServerIndex, tableRegionIndex);
1379 return tableRegionIndex;
1380 }
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391 private HRegionServer getOtherRegionServer(final MiniHBaseCluster cluster,
1392 final HRegionServer notThisOne) {
1393 for (RegionServerThread rst: cluster.getRegionServerThreads()) {
1394 HRegionServer hrs = rst.getRegionServer();
1395 if (hrs.getServerName().equals(notThisOne.getServerName())) continue;
1396 if (hrs.isStopping() || hrs.isStopped()) continue;
1397 return hrs;
1398 }
1399 return null;
1400 }
1401
1402 private void printOutRegions(final HRegionServer hrs, final String prefix)
1403 throws IOException {
1404 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
1405 for (HRegionInfo region: regions) {
1406 LOG.info(prefix + region.getRegionNameAsString());
1407 }
1408 }
1409
1410 private void waitUntilRegionServerDead() throws InterruptedException {
1411
1412 for (int i=0; cluster.getMaster().getClusterStatus().
1413 getServers().size() == NB_SERVERS && i<100; i++) {
1414 LOG.info("Waiting on server to go down");
1415 Thread.sleep(100);
1416 }
1417 assertFalse("Waited too long for RS to die", cluster.getMaster().getClusterStatus().
1418 getServers().size() == NB_SERVERS);
1419 }
1420
1421 private void awaitDaughters(byte[] tableName, int numDaughters) throws InterruptedException {
1422
1423 for (int i=0; cluster.getRegions(tableName).size() < numDaughters && i<60; i++) {
1424 LOG.info("Waiting for repair to happen");
1425 Thread.sleep(1000);
1426 }
1427 if (cluster.getRegions(tableName).size() < numDaughters) {
1428 fail("Waiting too long for daughter regions");
1429 }
1430 }
1431
1432 private List<HRegion> awaitTableRegions(final byte[] tableName) throws InterruptedException {
1433 List<HRegion> regions = null;
1434 for (int i = 0; i < 100; i++) {
1435 regions = cluster.getRegions(tableName);
1436 if (regions.size() > 0) break;
1437 Thread.sleep(100);
1438 }
1439 return regions;
1440 }
1441
1442 private HTable createTableAndWait(byte[] tableName, byte[] cf) throws IOException,
1443 InterruptedException {
1444 HTable t = TESTING_UTIL.createTable(tableName, cf);
1445 awaitTableRegions(tableName);
1446 assertTrue("Table not online: " + Bytes.toString(tableName),
1447 cluster.getRegions(tableName).size() != 0);
1448 return t;
1449 }
1450
1451 public static class MockMasterWithoutCatalogJanitor extends HMaster {
1452
1453 public MockMasterWithoutCatalogJanitor(Configuration conf) throws IOException, KeeperException,
1454 InterruptedException {
1455 super(conf);
1456 }
1457 }
1458
1459 private static class SplittingNodeCreationFailedException extends IOException {
1460 private static final long serialVersionUID = 1652404976265623004L;
1461
1462 public SplittingNodeCreationFailedException () {
1463 super();
1464 }
1465 }
1466
1467 public static class MockedRegionObserver extends BaseRegionObserver {
1468 private SplitTransaction st = null;
1469 private PairOfSameType<HRegion> daughterRegions = null;
1470
1471 @Override
1472 public void preSplitBeforePONR(ObserverContext<RegionCoprocessorEnvironment> ctx,
1473 byte[] splitKey, List<Mutation> metaEntries) throws IOException {
1474 RegionCoprocessorEnvironment environment = ctx.getEnvironment();
1475 HRegionServer rs = (HRegionServer) environment.getRegionServerServices();
1476 List<HRegion> onlineRegions =
1477 rs.getOnlineRegions(TableName.valueOf("testSplitHooksBeforeAndAfterPONR_2"));
1478 HRegion region = onlineRegions.get(0);
1479 for (HRegion r : onlineRegions) {
1480 if (r.getRegionInfo().containsRow(splitKey)) {
1481 region = r;
1482 break;
1483 }
1484 }
1485 st = new SplitTransaction(region, splitKey);
1486 if (!st.prepare()) {
1487 LOG.error("Prepare for the table " + region.getTableDesc().getNameAsString()
1488 + " failed. So returning null. ");
1489 ctx.bypass();
1490 return;
1491 }
1492 region.forceSplit(splitKey);
1493 daughterRegions = st.stepsBeforePONR(rs, rs, false);
1494 HRegionInfo copyOfParent = new HRegionInfo(region.getRegionInfo());
1495 copyOfParent.setOffline(true);
1496 copyOfParent.setSplit(true);
1497
1498 Put putParent = MetaEditor.makePutFromRegionInfo(copyOfParent);
1499 MetaEditor.addDaughtersToPut(putParent, daughterRegions.getFirst().getRegionInfo(),
1500 daughterRegions.getSecond().getRegionInfo());
1501 metaEntries.add(putParent);
1502
1503 Put putA = MetaEditor.makePutFromRegionInfo(daughterRegions.getFirst().getRegionInfo());
1504 Put putB = MetaEditor.makePutFromRegionInfo(daughterRegions.getSecond().getRegionInfo());
1505 st.addLocation(putA, rs.getServerName(), 1);
1506 st.addLocation(putB, rs.getServerName(), 1);
1507 metaEntries.add(putA);
1508 metaEntries.add(putB);
1509 }
1510
1511 @Override
1512 public void preSplitAfterPONR(ObserverContext<RegionCoprocessorEnvironment> ctx)
1513 throws IOException {
1514 RegionCoprocessorEnvironment environment = ctx.getEnvironment();
1515 HRegionServer rs = (HRegionServer) environment.getRegionServerServices();
1516 st.stepsAfterPONR(rs, rs, daughterRegions, null);
1517 }
1518
1519 }
1520
1521 static class CustomSplitPolicy extends RegionSplitPolicy {
1522
1523 @Override
1524 protected boolean shouldSplit() {
1525 return true;
1526 }
1527
1528 @Override
1529 public boolean skipStoreFileRangeCheck(String familyName) {
1530 if(familyName.startsWith("i_")) {
1531 return true;
1532 } else {
1533 return false;
1534 }
1535 }
1536 }
1537 }
1538