1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertNotSame;
25 import static org.junit.Assert.assertNull;
26 import static org.junit.Assert.assertTrue;
27 import static org.junit.Assert.fail;
28
29 import java.io.IOException;
30 import java.util.Collection;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.concurrent.CountDownLatch;
34
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.conf.Configuration;
38 import org.apache.hadoop.fs.FileSystem;
39 import org.apache.hadoop.fs.Path;
40 import org.apache.hadoop.hbase.Abortable;
41 import org.apache.hadoop.hbase.Coprocessor;
42 import org.apache.hadoop.hbase.CoprocessorEnvironment;
43 import org.apache.hadoop.hbase.HBaseIOException;
44 import org.apache.hadoop.hbase.HBaseTestingUtility;
45 import org.apache.hadoop.hbase.HColumnDescriptor;
46 import org.apache.hadoop.hbase.HConstants;
47 import org.apache.hadoop.hbase.HRegionInfo;
48 import org.apache.hadoop.hbase.HTableDescriptor;
49 import org.apache.hadoop.hbase.MasterNotRunningException;
50 import org.apache.hadoop.hbase.MiniHBaseCluster;
51 import org.apache.hadoop.hbase.RegionTransition;
52 import org.apache.hadoop.hbase.Server;
53 import org.apache.hadoop.hbase.ServerName;
54 import org.apache.hadoop.hbase.TableName;
55 import org.apache.hadoop.hbase.UnknownRegionException;
56 import org.apache.hadoop.hbase.Waiter;
57 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
58 import org.apache.hadoop.hbase.catalog.MetaEditor;
59 import org.apache.hadoop.hbase.catalog.MetaReader;
60 import org.apache.hadoop.hbase.client.Delete;
61 import org.apache.hadoop.hbase.client.HBaseAdmin;
62 import org.apache.hadoop.hbase.client.HTable;
63 import org.apache.hadoop.hbase.client.Mutation;
64 import org.apache.hadoop.hbase.client.Put;
65 import org.apache.hadoop.hbase.client.Result;
66 import org.apache.hadoop.hbase.client.ResultScanner;
67 import org.apache.hadoop.hbase.client.Scan;
68 import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
69 import org.apache.hadoop.hbase.coprocessor.ObserverContext;
70 import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
71 import org.apache.hadoop.hbase.exceptions.DeserializationException;
72 import org.apache.hadoop.hbase.executor.EventType;
73 import org.apache.hadoop.hbase.master.AssignmentManager;
74 import org.apache.hadoop.hbase.master.HMaster;
75 import org.apache.hadoop.hbase.master.RegionState;
76 import org.apache.hadoop.hbase.master.RegionState.State;
77 import org.apache.hadoop.hbase.master.RegionStates;
78 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
79 import org.apache.hadoop.hbase.testclassification.LargeTests;
80 import org.apache.hadoop.hbase.util.Bytes;
81 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
82 import org.apache.hadoop.hbase.util.FSUtils;
83 import org.apache.hadoop.hbase.util.HBaseFsck;
84 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
85 import org.apache.hadoop.hbase.util.PairOfSameType;
86 import org.apache.hadoop.hbase.util.Threads;
87 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
88 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
89 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
90 import org.apache.zookeeper.KeeperException;
91 import org.apache.zookeeper.KeeperException.NodeExistsException;
92 import org.apache.zookeeper.data.Stat;
93 import org.junit.After;
94 import org.junit.AfterClass;
95 import org.junit.Assert;
96 import org.junit.Before;
97 import org.junit.BeforeClass;
98 import org.junit.Test;
99 import org.junit.experimental.categories.Category;
100
101 import com.google.protobuf.ServiceException;
102
103
104
105
106
107
108 @Category(LargeTests.class)
109 public class TestSplitTransactionOnCluster {
110 private static final Log LOG =
111 LogFactory.getLog(TestSplitTransactionOnCluster.class);
112 private HBaseAdmin admin = null;
113 private MiniHBaseCluster cluster = null;
114 private static final int NB_SERVERS = 3;
115 private static CountDownLatch latch = new CountDownLatch(1);
116 private static volatile boolean secondSplit = false;
117 private static volatile boolean callRollBack = false;
118 private static volatile boolean firstSplitCompleted = false;
119 private static boolean useZKForAssignment = true;
120
121 static final HBaseTestingUtility TESTING_UTIL =
122 new HBaseTestingUtility();
123
124 static void setupOnce() throws Exception {
125 TESTING_UTIL.getConfiguration().setInt("hbase.balancer.period", 60000);
126 useZKForAssignment =
127 TESTING_UTIL.getConfiguration().getBoolean("hbase.assignment.usezk", false);
128 TESTING_UTIL.startMiniCluster(NB_SERVERS);
129 }
130
131 @BeforeClass public static void before() throws Exception {
132
133 TESTING_UTIL.getConfiguration().setBoolean("hbase.assignment.usezk", true);
134 setupOnce();
135 }
136
137 @AfterClass public static void after() throws Exception {
138 TESTING_UTIL.shutdownMiniCluster();
139 }
140
141 @Before public void setup() throws IOException {
142 TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS);
143 this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
144 this.cluster = TESTING_UTIL.getMiniHBaseCluster();
145 }
146
147 @After
148 public void tearDown() throws Exception {
149 this.admin.close();
150 }
151
152 private HRegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions) {
153 assertEquals(1, regions.size());
154 HRegionInfo hri = regions.get(0).getRegionInfo();
155 return waitOnRIT(hri);
156 }
157
158
159
160
161
162
163
164
165 private HRegionInfo waitOnRIT(final HRegionInfo hri) {
166
167
168 while (TESTING_UTIL.getHBaseCluster().getMaster().getAssignmentManager().
169 getRegionStates().isRegionInTransition(hri)) {
170 LOG.info("Waiting on region in transition: " +
171 TESTING_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates().
172 getRegionTransitionState(hri));
173 Threads.sleep(10);
174 }
175 return hri;
176 }
177
178 @SuppressWarnings("deprecation")
179 @Test(timeout = 60000)
180 public void testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack() throws Exception {
181 final TableName tableName =
182 TableName.valueOf("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack");
183
184 if (!useZKForAssignment) {
185
186 return;
187 }
188
189 try {
190
191 HTable t = createTableAndWait(tableName.getName(), Bytes.toBytes("cf"));
192 final List<HRegion> regions = cluster.getRegions(tableName);
193 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
194 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
195 final HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
196 insertData(tableName.getName(), admin, t);
197 t.close();
198
199
200 this.admin.setBalancerRunning(false, true);
201
202 cluster.getMaster().setCatalogJanitorEnabled(false);
203
204
205 final HRegion region = findSplittableRegion(regions);
206 assertTrue("not able to find a splittable region", region != null);
207
208 new Thread() {
209 @Override
210 public void run() {
211 SplitTransaction st = null;
212 st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
213 try {
214 st.prepare();
215 st.execute(regionServer, regionServer);
216 } catch (IOException e) {
217
218 }
219 }
220 }.start();
221 for (int i = 0; !callRollBack && i < 100; i++) {
222 Thread.sleep(100);
223 }
224 assertTrue("Waited too long for rollback", callRollBack);
225 SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row3"));
226 try {
227 secondSplit = true;
228
229 region.initialize();
230 st.prepare();
231 st.execute(regionServer, regionServer);
232 } catch (IOException e) {
233 LOG.debug("Rollback started :"+ e.getMessage());
234 st.rollback(regionServer, regionServer);
235 }
236 for (int i=0; !firstSplitCompleted && i<100; i++) {
237 Thread.sleep(100);
238 }
239 assertTrue("fist split did not complete", firstSplitCompleted);
240
241 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
242 Map<String, RegionState> rit = regionStates.getRegionsInTransition();
243
244 for (int i=0; rit.containsKey(hri.getTable()) && i<100; i++) {
245 Thread.sleep(100);
246 }
247 assertFalse("region still in transition", rit.containsKey(
248 rit.containsKey(hri.getTable())));
249
250 List<HRegion> onlineRegions = regionServer.getOnlineRegions(tableName);
251
252 assertEquals("The parent region should be splitted", 2, onlineRegions.size());
253
254 List<HRegionInfo> regionsOfTable = cluster.getMaster().getAssignmentManager()
255 .getRegionStates().getRegionsOfTable(tableName);
256
257 assertEquals("No of regions in master", 2, regionsOfTable.size());
258 } finally {
259 admin.setBalancerRunning(true, false);
260 secondSplit = false;
261 firstSplitCompleted = false;
262 callRollBack = false;
263 cluster.getMaster().setCatalogJanitorEnabled(true);
264 TESTING_UTIL.deleteTable(tableName);
265 }
266 }
267
268 @Test(timeout = 60000)
269 public void testRITStateForRollback() throws Exception {
270 final TableName tableName =
271 TableName.valueOf("testRITStateForRollback");
272 try {
273
274 HTable t = createTableAndWait(tableName.getName(), Bytes.toBytes("cf"));
275 final List<HRegion> regions = cluster.getRegions(tableName);
276 final HRegionInfo hri = getAndCheckSingleTableRegion(regions);
277 insertData(tableName.getName(), admin, t);
278 t.close();
279
280
281 this.admin.setBalancerRunning(false, true);
282
283 cluster.getMaster().setCatalogJanitorEnabled(false);
284
285
286 final HRegion region = findSplittableRegion(regions);
287 assertTrue("not able to find a splittable region", region != null);
288
289
290 region.getCoprocessorHost().load(FailingSplitRegionObserver.class,
291 Coprocessor.PRIORITY_USER, region.getBaseConf());
292
293
294 this.admin.split(region.getRegionName(), new byte[] {42});
295
296
297 FailingSplitRegionObserver observer = (FailingSplitRegionObserver) region
298 .getCoprocessorHost().findCoprocessor(FailingSplitRegionObserver.class.getName());
299 assertNotNull(observer);
300 observer.latch.await();
301
302 LOG.info("Waiting for region to come out of RIT");
303 TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
304 @Override
305 public boolean evaluate() throws Exception {
306 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
307 Map<String, RegionState> rit = regionStates.getRegionsInTransition();
308 return !rit.containsKey(hri.getEncodedName());
309 }
310 });
311 } finally {
312 admin.setBalancerRunning(true, false);
313 cluster.getMaster().setCatalogJanitorEnabled(true);
314 TESTING_UTIL.deleteTable(tableName);
315 }
316 }
317
318 public static class FailingSplitRegionObserver extends BaseRegionObserver {
319 volatile CountDownLatch latch;
320 volatile CountDownLatch postSplit;
321 @Override
322 public void start(CoprocessorEnvironment e) throws IOException {
323 latch = new CountDownLatch(1);
324 postSplit = new CountDownLatch(1);
325 }
326 @Override
327 public void preSplitBeforePONR(ObserverContext<RegionCoprocessorEnvironment> ctx,
328 byte[] splitKey, List<Mutation> metaEntries) throws IOException {
329 latch.countDown();
330 LOG.info("Causing rollback of region split");
331 throw new IOException("Causing rollback of region split");
332 }
333 @Override
334 public void postCompleteSplit(ObserverContext<RegionCoprocessorEnvironment> ctx)
335 throws IOException {
336 postSplit.countDown();
337 LOG.info("postCompleteSplit called");
338 }
339 }
340
341
342
343
344
345
346
347
348
349
350
351
352 @Test (timeout = 300000) public void testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling()
353 throws IOException, InterruptedException, NodeExistsException, KeeperException,
354 DeserializationException, ServiceException {
355 final byte [] tableName =
356 Bytes.toBytes("testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling");
357
358
359 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
360 List<HRegion> regions = cluster.getRegions(tableName);
361 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
362
363 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
364
365
366 this.admin.setBalancerRunning(false, true);
367
368 cluster.getMaster().setCatalogJanitorEnabled(false);
369 try {
370
371 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
372
373 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
374 printOutRegions(server, "Initial regions: ");
375 int regionCount = ProtobufUtil.getOnlineRegions(server).size();
376
377
378 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
379
380 split(hri, server, regionCount);
381
382 String path = ZKAssign.getNodeName(TESTING_UTIL.getZooKeeperWatcher(),
383 hri.getEncodedName());
384 RegionTransition rt = null;
385 Stat stats = null;
386 List<HRegion> daughters = null;
387 if (useZKForAssignment) {
388 daughters = checkAndGetDaughters(tableName);
389
390
391 for (int i=0; i<100; i++) {
392 stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
393 rt = RegionTransition.parseFrom(ZKAssign.getData(TESTING_UTIL.getZooKeeperWatcher(),
394 hri.getEncodedName()));
395 if (rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)) break;
396 Thread.sleep(100);
397 }
398 LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats);
399 assertTrue(rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT));
400
401 cluster.abortRegionServer(tableRegionIndex);
402 }
403 waitUntilRegionServerDead();
404 awaitDaughters(tableName, 2);
405 if (useZKForAssignment) {
406 regions = cluster.getRegions(tableName);
407 for (HRegion r: regions) {
408 assertTrue(daughters.contains(r));
409 }
410
411
412 for (int i=0; i<100; i++) {
413
414 stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
415 if (stats == null) break;
416 Thread.sleep(100);
417 }
418 LOG.info("EPHEMERAL NODE AFTER SERVER ABORT, path=" + path + ", stats=" + stats);
419 assertTrue(stats == null);
420 }
421 } finally {
422
423 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
424 admin.setBalancerRunning(true, false);
425 cluster.getMaster().setCatalogJanitorEnabled(true);
426 cluster.startRegionServer();
427 t.close();
428 }
429 }
430
431 @Test (timeout = 300000) public void testExistingZnodeBlocksSplitAndWeRollback()
432 throws IOException, InterruptedException, NodeExistsException, KeeperException, ServiceException {
433 final byte [] tableName =
434 Bytes.toBytes("testExistingZnodeBlocksSplitAndWeRollback");
435
436
437 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
438 List<HRegion> regions = cluster.getRegions(tableName);
439 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
440
441 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
442
443 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
444
445
446 this.admin.setBalancerRunning(false, true);
447
448 cluster.getMaster().setCatalogJanitorEnabled(false);
449 try {
450
451 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
452
453 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
454 printOutRegions(server, "Initial regions: ");
455 int regionCount = ProtobufUtil.getOnlineRegions(server).size();
456
457
458 ServerName fakedServer = ServerName.valueOf("any.old.server", 1234, -1);
459 if (useZKForAssignment) {
460 ZKAssign.createNodeClosing(TESTING_UTIL.getZooKeeperWatcher(),
461 hri, fakedServer);
462 } else {
463 regionStates.updateRegionState(hri, RegionState.State.CLOSING);
464 }
465
466
467 this.admin.split(hri.getRegionNameAsString());
468 this.admin.split(hri.getRegionNameAsString());
469 this.admin.split(hri.getRegionNameAsString());
470
471 for (int i = 0; i < 10; i++) {
472 Thread.sleep(100);
473 assertEquals(regionCount, ProtobufUtil.getOnlineRegions(server).size());
474 }
475 if (useZKForAssignment) {
476
477 ZKAssign.deleteClosingNode(TESTING_UTIL.getZooKeeperWatcher(),
478 hri, fakedServer);
479 } else {
480 regionStates.regionOnline(hri, server.getServerName());
481 }
482
483 split(hri, server, regionCount);
484
485 checkAndGetDaughters(tableName);
486
487 } finally {
488 admin.setBalancerRunning(true, false);
489 cluster.getMaster().setCatalogJanitorEnabled(true);
490 t.close();
491 }
492 }
493
494
495
496
497
498
499
500 @Test (timeout=300000) public void testShutdownFixupWhenDaughterHasSplit()
501 throws IOException, InterruptedException, ServiceException {
502 final byte [] tableName =
503 Bytes.toBytes("testShutdownFixupWhenDaughterHasSplit");
504
505
506 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
507 List<HRegion> regions = cluster.getRegions(tableName);
508 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
509
510 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
511
512
513 this.admin.setBalancerRunning(false, true);
514
515 cluster.getMaster().setCatalogJanitorEnabled(false);
516 try {
517
518 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
519
520 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
521 printOutRegions(server, "Initial regions: ");
522 int regionCount = ProtobufUtil.getOnlineRegions(server).size();
523
524 split(hri, server, regionCount);
525
526 List<HRegion> daughters = checkAndGetDaughters(tableName);
527
528 regionCount = ProtobufUtil.getOnlineRegions(server).size();
529 HRegionInfo daughter = daughters.get(0).getRegionInfo();
530 LOG.info("Daughter we are going to split: " + daughter);
531
532
533 this.admin.compact(daughter.getRegionName());
534 daughters = cluster.getRegions(tableName);
535 HRegion daughterRegion = null;
536 for (HRegion r: daughters) {
537 if (r.getRegionInfo().equals(daughter)) {
538 daughterRegion = r;
539 LOG.info("Found matching HRI: " + daughterRegion);
540 break;
541 }
542 }
543 assertTrue(daughterRegion != null);
544 for (int i=0; i<100; i++) {
545 if (!daughterRegion.hasReferences()) break;
546 Threads.sleep(100);
547 }
548 assertFalse("Waiting for reference to be compacted", daughterRegion.hasReferences());
549 LOG.info("Daughter hri before split (has been compacted): " + daughter);
550 split(daughter, server, regionCount);
551
552 daughters = cluster.getRegions(tableName);
553 for (HRegion d: daughters) {
554 LOG.info("Regions before crash: " + d);
555 }
556
557 cluster.abortRegionServer(tableRegionIndex);
558 waitUntilRegionServerDead();
559 awaitDaughters(tableName, daughters.size());
560
561
562 regions = cluster.getRegions(tableName);
563 for (HRegion d: daughters) {
564 LOG.info("Regions after crash: " + d);
565 }
566 assertEquals(daughters.size(), regions.size());
567 for (HRegion r: regions) {
568 LOG.info("Regions post crash " + r);
569 assertTrue("Missing region post crash " + r, daughters.contains(r));
570 }
571 } finally {
572 admin.setBalancerRunning(true, false);
573 cluster.getMaster().setCatalogJanitorEnabled(true);
574 t.close();
575 }
576 }
577
578 @Test(timeout = 180000)
579 public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception {
580 Configuration conf = TESTING_UTIL.getConfiguration();
581 TableName userTableName =
582 TableName.valueOf("testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles");
583 HTableDescriptor htd = new HTableDescriptor(userTableName);
584 HColumnDescriptor hcd = new HColumnDescriptor("col");
585 htd.addFamily(hcd);
586 admin.createTable(htd);
587 HTable table = new HTable(conf, userTableName);
588 try {
589 for (int i = 0; i <= 5; i++) {
590 String row = "row" + i;
591 Put p = new Put(row.getBytes());
592 String val = "Val" + i;
593 p.add("col".getBytes(), "ql".getBytes(), val.getBytes());
594 table.put(p);
595 admin.flush(userTableName.getName());
596 Delete d = new Delete(row.getBytes());
597
598 table.delete(d);
599 admin.flush(userTableName.getName());
600 }
601 admin.majorCompact(userTableName.getName());
602 List<HRegionInfo> regionsOfTable = TESTING_UTIL.getMiniHBaseCluster()
603 .getMaster().getAssignmentManager().getRegionStates()
604 .getRegionsOfTable(userTableName);
605 HRegionInfo hRegionInfo = regionsOfTable.get(0);
606 Put p = new Put("row6".getBytes());
607 p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
608 table.put(p);
609 p = new Put("row7".getBytes());
610 p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
611 table.put(p);
612 p = new Put("row8".getBytes());
613 p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
614 table.put(p);
615 admin.flush(userTableName.getName());
616 admin.split(hRegionInfo.getRegionName(), "row7".getBytes());
617 regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
618 .getAssignmentManager().getRegionStates()
619 .getRegionsOfTable(userTableName);
620
621 while (regionsOfTable.size() != 2) {
622 Thread.sleep(2000);
623 regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
624 .getAssignmentManager().getRegionStates()
625 .getRegionsOfTable(userTableName);
626 }
627 Assert.assertEquals(2, regionsOfTable.size());
628 Scan s = new Scan();
629 ResultScanner scanner = table.getScanner(s);
630 int mainTableCount = 0;
631 for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
632 mainTableCount++;
633 }
634 Assert.assertEquals(3, mainTableCount);
635 } finally {
636 table.close();
637 }
638 }
639
640
641
642
643 static class UselessTestAbortable implements Abortable {
644 boolean aborted = false;
645 @Override
646 public void abort(String why, Throwable e) {
647 LOG.warn("ABORTED (But nothing to abort): why=" + why, e);
648 aborted = true;
649 }
650
651 @Override
652 public boolean isAborted() {
653 return this.aborted;
654 }
655 }
656
657
658
659
660
661
662
663
664
665
666
667 @Test(timeout = 400000)
668 public void testMasterRestartWhenSplittingIsPartial()
669 throws IOException, InterruptedException, NodeExistsException,
670 KeeperException, DeserializationException, ServiceException {
671 final byte[] tableName = Bytes.toBytes("testMasterRestartWhenSplittingIsPartial");
672
673 if (!useZKForAssignment) {
674
675 return;
676 }
677
678
679 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
680 List<HRegion> regions = cluster.getRegions(tableName);
681 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
682
683 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
684
685
686 this.admin.setBalancerRunning(false, true);
687
688 cluster.getMaster().setCatalogJanitorEnabled(false);
689 ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
690 "testMasterRestartWhenSplittingIsPartial", new UselessTestAbortable());
691 try {
692
693 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
694
695 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
696 printOutRegions(server, "Initial regions: ");
697
698
699 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
700
701
702 this.admin.split(hri.getRegionNameAsString());
703 checkAndGetDaughters(tableName);
704
705 String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
706 Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
707 LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
708 + stats);
709 byte[] bytes = ZKAssign.getData(zkw, hri.getEncodedName());
710 RegionTransition rtd = RegionTransition.parseFrom(bytes);
711
712 assertTrue(rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)
713 || rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLITTING));
714
715
716 MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
717
718 this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
719
720
721
722 hri.setOffline(true);
723 hri.setSplit(true);
724 ServerName regionServerOfRegion = master.getAssignmentManager()
725 .getRegionStates().getRegionServerOfRegion(hri);
726 assertTrue(regionServerOfRegion != null);
727
728
729 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
730 String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
731 Stat stat = new Stat();
732 byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
733
734 for (int i=0; data != null && i<60; i++) {
735 Thread.sleep(1000);
736 data = ZKUtil.getDataNoWatch(zkw, node, stat);
737 }
738 assertNull("Waited too long for ZK node to be removed: "+node, data);
739 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
740 assertTrue("Split parent should be in SPLIT state",
741 regionStates.isRegionInState(hri, State.SPLIT));
742 regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
743 assertTrue(regionServerOfRegion == null);
744 } finally {
745
746 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
747 admin.setBalancerRunning(true, false);
748 cluster.getMaster().setCatalogJanitorEnabled(true);
749 t.close();
750 zkw.close();
751 }
752 }
753
754
755
756
757
758
759
760
761
762 @Test (timeout = 300000)
763 public void testMasterRestartAtRegionSplitPendingCatalogJanitor()
764 throws IOException, InterruptedException, NodeExistsException,
765 KeeperException, ServiceException {
766 final byte[] tableName = Bytes.toBytes("testMasterRestartAtRegionSplitPendingCatalogJanitor");
767
768
769 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
770 List<HRegion> regions = cluster.getRegions(tableName);
771 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
772
773 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
774
775
776 this.admin.setBalancerRunning(false, true);
777
778 cluster.getMaster().setCatalogJanitorEnabled(false);
779 ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
780 "testMasterRestartAtRegionSplitPendingCatalogJanitor", new UselessTestAbortable());
781 try {
782
783 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
784
785 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
786 printOutRegions(server, "Initial regions: ");
787
788 this.admin.split(hri.getRegionNameAsString());
789 checkAndGetDaughters(tableName);
790
791 String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
792 Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
793 LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
794 + stats);
795 String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
796 Stat stat = new Stat();
797 byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
798
799 for (int i=0; data != null && i<60; i++) {
800 Thread.sleep(1000);
801 data = ZKUtil.getDataNoWatch(zkw, node, stat);
802 }
803 assertNull("Waited too long for ZK node to be removed: "+node, data);
804
805 MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
806
807 this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
808
809
810
811 hri.setOffline(true);
812 hri.setSplit(true);
813 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
814 assertTrue("Split parent should be in SPLIT state",
815 regionStates.isRegionInState(hri, State.SPLIT));
816 ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
817 assertTrue(regionServerOfRegion == null);
818 } finally {
819 this.admin.setBalancerRunning(true, false);
820 cluster.getMaster().setCatalogJanitorEnabled(true);
821 t.close();
822 zkw.close();
823 }
824 }
825
826
827
828
829
830
831
832
833
834
835
836
837 @Test(timeout = 60000)
838 public void testSplitBeforeSettingSplittingInZK() throws Exception,
839 InterruptedException, KeeperException {
840 testSplitBeforeSettingSplittingInZKInternals();
841 }
842
843 @Test(timeout = 60000)
844 public void testTableExistsIfTheSpecifiedTableRegionIsSplitParent() throws Exception {
845 ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TESTING_UTIL);
846 final TableName tableName =
847 TableName.valueOf("testTableExistsIfTheSpecifiedTableRegionIsSplitParent");
848
849 HTable t = createTableAndWait(tableName.getName(), Bytes.toBytes("cf"));
850 List<HRegion> regions = null;
851 try {
852 regions = cluster.getRegions(tableName);
853 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
854 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
855 insertData(tableName.getName(), admin, t);
856
857 admin.setBalancerRunning(false, true);
858
859 cluster.getMaster().setCatalogJanitorEnabled(false);
860 boolean tableExists = MetaReader.tableExists(regionServer.getCatalogTracker(),
861 tableName);
862 assertEquals("The specified table should present.", true, tableExists);
863 final HRegion region = findSplittableRegion(regions);
864 assertTrue("not able to find a splittable region", region != null);
865 SplitTransaction st = new SplitTransaction(region, Bytes.toBytes("row2"));
866 try {
867 st.prepare();
868 st.createDaughters(regionServer, regionServer);
869 } catch (IOException e) {
870
871 }
872 tableExists = MetaReader.tableExists(regionServer.getCatalogTracker(),
873 tableName);
874 assertEquals("The specified table should present.", true, tableExists);
875 Map<String, RegionState> rit = cluster.getMaster().getAssignmentManager().getRegionStates()
876 .getRegionsInTransition();
877 assertTrue(rit.size() == 3);
878 cluster.getMaster().getAssignmentManager().regionOffline(st.getFirstDaughter());
879 cluster.getMaster().getAssignmentManager().regionOffline(st.getSecondDaughter());
880 cluster.getMaster().getAssignmentManager().regionOffline(region.getRegionInfo());
881 rit = cluster.getMaster().getAssignmentManager().getRegionStates().getRegionsInTransition();
882 assertTrue(rit.size() == 0);
883 } finally {
884 if (regions != null) {
885 String node = ZKAssign.getNodeName(zkw, regions.get(0).getRegionInfo()
886 .getEncodedName());
887 ZKUtil.deleteNodeFailSilent(zkw, node);
888 }
889 admin.setBalancerRunning(true, false);
890 cluster.getMaster().setCatalogJanitorEnabled(true);
891 t.close();
892 TESTING_UTIL.deleteTable(tableName);
893 }
894 }
895
896 private void insertData(final byte[] tableName, HBaseAdmin admin, HTable t) throws IOException,
897 InterruptedException {
898 Put p = new Put(Bytes.toBytes("row1"));
899 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1"));
900 t.put(p);
901 p = new Put(Bytes.toBytes("row2"));
902 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2"));
903 t.put(p);
904 p = new Put(Bytes.toBytes("row3"));
905 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3"));
906 t.put(p);
907 p = new Put(Bytes.toBytes("row4"));
908 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4"));
909 t.put(p);
910 admin.flush(tableName);
911 }
912
913
914
915
916
917 @Test(timeout = 60000)
918 public void testSplitRegionWithNoStoreFiles()
919 throws Exception {
920 final TableName tableName =
921 TableName.valueOf("testSplitRegionWithNoStoreFiles");
922
923 createTableAndWait(tableName.getName(), HConstants.CATALOG_FAMILY);
924 List<HRegion> regions = cluster.getRegions(tableName);
925 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
926 ensureTableRegionNotOnSameServerAsMeta(admin, hri);
927 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
928 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
929
930 this.admin.setBalancerRunning(false, true);
931
932 cluster.getMaster().setCatalogJanitorEnabled(false);
933 try {
934
935 printOutRegions(regionServer, "Initial regions: ");
936 Configuration conf = cluster.getConfiguration();
937 HBaseFsck.debugLsr(conf, new Path("/"));
938 Path rootDir = FSUtils.getRootDir(conf);
939 FileSystem fs = TESTING_UTIL.getDFSCluster().getFileSystem();
940 Map<String, Path> storefiles =
941 FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
942 assertEquals("Expected nothing but found " + storefiles.toString(), storefiles.size(), 0);
943
944
945 regions = cluster.getRegions(tableName);
946 final HRegion region = findSplittableRegion(regions);
947 assertTrue("not able to find a splittable region", region != null);
948
949
950 SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
951 try {
952 st.prepare();
953 st.execute(regionServer, regionServer);
954 } catch (IOException e) {
955 fail("Split execution should have succeeded with no exceptions thrown");
956 }
957
958
959
960 List<HRegion> daughters = cluster.getRegions(tableName);
961 assertTrue(daughters.size() == 2);
962
963
964 HBaseFsck.debugLsr(conf, new Path("/"));
965 Map<String, Path> storefilesAfter =
966 FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
967 assertEquals("Expected nothing but found " + storefilesAfter.toString(),
968 storefilesAfter.size(), 0);
969
970 hri = region.getRegionInfo();
971 AssignmentManager am = cluster.getMaster().getAssignmentManager();
972 RegionStates regionStates = am.getRegionStates();
973 long start = EnvironmentEdgeManager.currentTimeMillis();
974 while (!regionStates.isRegionInState(hri, State.SPLIT)) {
975 assertFalse("Timed out in waiting split parent to be in state SPLIT",
976 EnvironmentEdgeManager.currentTimeMillis() - start > 60000);
977 Thread.sleep(500);
978 }
979
980
981 am.assign(hri, true, true);
982 assertFalse("Split region can't be assigned",
983 regionStates.isRegionInTransition(hri));
984 assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
985
986
987 am.unassign(hri, true, null);
988 assertFalse("Split region can't be unassigned",
989 regionStates.isRegionInTransition(hri));
990 assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
991 } finally {
992 admin.setBalancerRunning(true, false);
993 cluster.getMaster().setCatalogJanitorEnabled(true);
994 }
995 }
996
997 @Test(timeout = 180000)
998 public void testSplitHooksBeforeAndAfterPONR() throws Exception {
999 TableName firstTable = TableName.valueOf("testSplitHooksBeforeAndAfterPONR_1");
1000 TableName secondTable = TableName.valueOf("testSplitHooksBeforeAndAfterPONR_2");
1001 HColumnDescriptor hcd = new HColumnDescriptor("cf");
1002
1003 HTableDescriptor desc = new HTableDescriptor(firstTable);
1004 desc.addCoprocessor(MockedRegionObserver.class.getName());
1005 desc.addFamily(hcd);
1006 admin.createTable(desc);
1007 TESTING_UTIL.waitUntilAllRegionsAssigned(firstTable);
1008
1009 desc = new HTableDescriptor(secondTable);
1010 desc.addFamily(hcd);
1011 admin.createTable(desc);
1012 TESTING_UTIL.waitUntilAllRegionsAssigned(secondTable);
1013
1014 List<HRegion> firstTableRegions = cluster.getRegions(firstTable);
1015 List<HRegion> secondTableRegions = cluster.getRegions(secondTable);
1016
1017
1018 if (firstTableRegions.size() == 0 || secondTableRegions.size() == 0) {
1019 fail("Each table should have at least one region.");
1020 }
1021 ServerName serverName =
1022 cluster.getServerHoldingRegion(firstTableRegions.get(0).getRegionName());
1023 admin.move(secondTableRegions.get(0).getRegionInfo().getEncodedNameAsBytes(),
1024 Bytes.toBytes(serverName.getServerName()));
1025 HTable table1 = null;
1026 HTable table2 = null;
1027 try {
1028 table1 = new HTable(TESTING_UTIL.getConfiguration(), firstTable);
1029 table2 = new HTable(TESTING_UTIL.getConfiguration(), firstTable);
1030 insertData(firstTable.getName(), admin, table1);
1031 insertData(secondTable.getName(), admin, table2);
1032 admin.split(firstTable.getName(), "row2".getBytes());
1033 firstTableRegions = cluster.getRegions(firstTable.getName());
1034 while (firstTableRegions.size() != 2) {
1035 Thread.sleep(1000);
1036 firstTableRegions = cluster.getRegions(firstTable.getName());
1037 }
1038 assertEquals("Number of regions after split should be 2.", 2, firstTableRegions.size());
1039 secondTableRegions = cluster.getRegions(secondTable.getName());
1040 assertEquals("Number of regions after split should be 2.", 2, secondTableRegions.size());
1041 } finally {
1042 if (table1 != null) {
1043 table1.close();
1044 }
1045 if (table2 != null) {
1046 table2.close();
1047 }
1048 TESTING_UTIL.deleteTable(firstTable);
1049 TESTING_UTIL.deleteTable(secondTable);
1050 }
1051 }
1052
1053 private void testSplitBeforeSettingSplittingInZKInternals() throws Exception {
1054 final byte[] tableName = Bytes.toBytes("testSplitBeforeSettingSplittingInZK");
1055 try {
1056
1057 createTableAndWait(tableName, Bytes.toBytes("cf"));
1058
1059 List<HRegion> regions = awaitTableRegions(tableName);
1060 assertTrue("Table not online", cluster.getRegions(tableName).size() != 0);
1061
1062 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
1063 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
1064 final HRegion region = findSplittableRegion(regions);
1065 assertTrue("not able to find a splittable region", region != null);
1066 SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row2")) {
1067 @Override
1068 public PairOfSameType<HRegion> stepsBeforePONR(final Server server,
1069 final RegionServerServices services, boolean testing) throws IOException {
1070 throw new SplittingNodeCreationFailedException ();
1071 }
1072 };
1073 String node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1074 region.getRegionInfo().getEncodedName());
1075 regionServer.getZooKeeper().sync(node);
1076 for (int i = 0; i < 100; i++) {
1077
1078
1079
1080 if (ZKUtil.checkExists(regionServer.getZooKeeper(), node) != -1) {
1081 Thread.sleep(100);
1082 }
1083 }
1084 try {
1085 st.prepare();
1086 st.execute(regionServer, regionServer);
1087 } catch (IOException e) {
1088
1089
1090
1091 assertTrue("Should be instance of CreateSplittingNodeFailedException",
1092 e instanceof SplittingNodeCreationFailedException );
1093 node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1094 region.getRegionInfo().getEncodedName());
1095 {
1096 assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
1097 }
1098 assertTrue(st.rollback(regionServer, regionServer));
1099 assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
1100 }
1101 } finally {
1102 TESTING_UTIL.deleteTable(tableName);
1103 }
1104 }
1105
1106 @Test
1107 public void testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck()
1108 throws Exception {
1109 final TableName tableName =
1110 TableName.valueOf("testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck");
1111 try {
1112 HTableDescriptor htd = new HTableDescriptor(tableName);
1113 htd.addFamily(new HColumnDescriptor("f"));
1114 htd.addFamily(new HColumnDescriptor("i_f"));
1115 htd.setRegionSplitPolicyClassName(CustomSplitPolicy.class.getName());
1116 admin.createTable(htd);
1117 List<HRegion> regions = awaitTableRegions(tableName.toBytes());
1118 HRegion region = regions.get(0);
1119 for(int i = 3;i<9;i++) {
1120 Put p = new Put(Bytes.toBytes("row"+i));
1121 p.add(Bytes.toBytes("f"), Bytes.toBytes("q"), Bytes.toBytes("value"+i));
1122 p.add(Bytes.toBytes("i_f"), Bytes.toBytes("q"), Bytes.toBytes("value"+i));
1123 region.put(p);
1124 }
1125 region.flushcache();
1126 Store store = region.getStore(Bytes.toBytes("f"));
1127 Collection<StoreFile> storefiles = store.getStorefiles();
1128 assertEquals(storefiles.size(), 1);
1129 assertFalse(region.hasReferences());
1130 Path referencePath =
1131 region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "f",
1132 storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
1133 assertNull(referencePath);
1134 referencePath =
1135 region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "i_f",
1136 storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
1137 assertNotNull(referencePath);
1138 } finally {
1139 TESTING_UTIL.deleteTable(tableName);
1140 }
1141 }
1142
1143 @Test (timeout=300000)
1144 public void testSSHCleanupDaugtherRegionsOfAbortedSplit() throws Exception {
1145 TableName table = TableName.valueOf("testSSHCleanupDaugtherRegionsOfAbortedSplit");
1146 try {
1147 HTableDescriptor desc = new HTableDescriptor(table);
1148 desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
1149 admin.createTable(desc);
1150 HTable hTable = new HTable(cluster.getConfiguration(), desc.getTableName());
1151 for(int i = 1; i < 5; i++) {
1152 Put p1 = new Put(("r"+i).getBytes());
1153 p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
1154 hTable.put(p1);
1155 }
1156 admin.flush(desc.getTableName().toString());
1157 List<HRegion> regions = cluster.getRegions(desc.getTableName());
1158 int serverWith = cluster.getServerWith(regions.get(0).getRegionName());
1159 HRegionServer regionServer = cluster.getRegionServer(serverWith);
1160 cluster.getServerWith(regions.get(0).getRegionName());
1161 SplitTransaction st = new SplitTransaction(regions.get(0), Bytes.toBytes("r3"));
1162 st.prepare();
1163 st.stepsBeforePONR(regionServer, regionServer, false);
1164 Path tableDir =
1165 FSUtils.getTableDir(cluster.getMaster().getMasterFileSystem().getRootDir(),
1166 desc.getTableName());
1167 tableDir.getFileSystem(cluster.getConfiguration());
1168 List<Path> regionDirs =
1169 FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
1170 assertEquals(3,regionDirs.size());
1171 cluster.startRegionServer();
1172 regionServer.kill();
1173 cluster.getRegionServerThreads().get(serverWith).join();
1174
1175 while (cluster.getMaster().getServerManager().areDeadServersInProgress()) {
1176 Thread.sleep(10);
1177 }
1178 AssignmentManager am = cluster.getMaster().getAssignmentManager();
1179 while(am.getRegionStates().isRegionsInTransition()){
1180 Thread.sleep(10);
1181 }
1182 assertEquals(am.getRegionStates().getRegionsInTransition().toString(), am.getRegionStates()
1183 .getRegionsInTransition().size(), 0);
1184 regionDirs =
1185 FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
1186 assertEquals(1,regionDirs.size());
1187 } finally {
1188 TESTING_UTIL.deleteTable(table);
1189 }
1190 }
1191
1192 public static class MockedSplitTransaction extends SplitTransaction {
1193
1194 private HRegion currentRegion;
1195 public MockedSplitTransaction(HRegion r, byte[] splitrow) {
1196 super(r, splitrow);
1197 this.currentRegion = r;
1198 }
1199
1200 @Override
1201 void transitionZKNode(Server server, RegionServerServices services, HRegion a, HRegion b)
1202 throws IOException {
1203 if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1204 .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1205 try {
1206 if (!secondSplit){
1207 callRollBack = true;
1208 latch.await();
1209 }
1210 } catch (InterruptedException e) {
1211 }
1212
1213 }
1214 super.transitionZKNode(server, services, a, b);
1215 if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1216 .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1217 firstSplitCompleted = true;
1218 }
1219 }
1220 @Override
1221 public boolean rollback(Server server, RegionServerServices services) throws IOException {
1222 if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1223 .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1224 if(secondSplit){
1225 super.rollback(server, services);
1226 latch.countDown();
1227 return true;
1228 }
1229 }
1230 return super.rollback(server, services);
1231 }
1232
1233 }
1234
1235 private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException {
1236 for (int i = 0; i < 5; ++i) {
1237 for (HRegion r: regions) {
1238 if (r.isSplittable()) {
1239 return(r);
1240 }
1241 }
1242 Thread.sleep(100);
1243 }
1244 return(null);
1245 }
1246
1247 @Test(timeout = 120000)
1248 public void testFailedSplit() throws Exception {
1249 TableName tableName = TableName.valueOf("testFailedSplit");
1250 byte[] colFamily = Bytes.toBytes("info");
1251 TESTING_UTIL.createTable(tableName, colFamily);
1252 HTable table = new HTable(TESTING_UTIL.getConfiguration(), tableName);
1253 try {
1254 TESTING_UTIL.loadTable(table, colFamily);
1255 List<HRegionInfo> regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
1256 assertTrue(regions.size() == 1);
1257 final HRegion actualRegion = cluster.getRegions(tableName).get(0);
1258 actualRegion.getCoprocessorHost().load(FailingSplitRegionObserver.class,
1259 Coprocessor.PRIORITY_USER, actualRegion.getBaseConf());
1260
1261
1262 admin.split(tableName.getNameAsString());
1263 FailingSplitRegionObserver observer = (FailingSplitRegionObserver) actualRegion
1264 .getCoprocessorHost().findCoprocessor(FailingSplitRegionObserver.class.getName());
1265 assertNotNull(observer);
1266 observer.latch.await();
1267 observer.postSplit.await();
1268 LOG.info("Waiting for region to come out of RIT");
1269 TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
1270 @Override
1271 public boolean evaluate() throws Exception {
1272 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
1273 Map<String, RegionState> rit = regionStates.getRegionsInTransition();
1274 return (rit.size() == 0);
1275 }
1276 });
1277 regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
1278 assertTrue(regions.size() == 1);
1279 assertTrue(admin.balancer());
1280 } finally {
1281 table.close();
1282 TESTING_UTIL.deleteTable(tableName);
1283 }
1284 }
1285
1286 private List<HRegion> checkAndGetDaughters(byte[] tableName)
1287 throws InterruptedException {
1288 List<HRegion> daughters = null;
1289
1290 for (int i=0; i<100; i++) {
1291 daughters = cluster.getRegions(tableName);
1292 if (daughters.size() >= 2) break;
1293 Thread.sleep(100);
1294 }
1295 assertTrue(daughters.size() >= 2);
1296 return daughters;
1297 }
1298
1299 private MockMasterWithoutCatalogJanitor abortAndWaitForMaster()
1300 throws IOException, InterruptedException {
1301 cluster.abortMaster(0);
1302 cluster.waitOnMaster(0);
1303 cluster.getConfiguration().setClass(HConstants.MASTER_IMPL,
1304 MockMasterWithoutCatalogJanitor.class, HMaster.class);
1305 MockMasterWithoutCatalogJanitor master = null;
1306 master = (MockMasterWithoutCatalogJanitor) cluster.startMaster().getMaster();
1307 cluster.waitForActiveAndReadyMaster();
1308 return master;
1309 }
1310
1311 private void split(final HRegionInfo hri, final HRegionServer server, final int regionCount)
1312 throws IOException, InterruptedException {
1313 this.admin.split(hri.getRegionNameAsString());
1314 try {
1315 for (int i = 0; ProtobufUtil.getOnlineRegions(server).size() <= regionCount && i < 300; i++) {
1316 LOG.debug("Waiting on region to split");
1317 Thread.sleep(100);
1318 }
1319
1320 assertFalse("Waited too long for split",
1321 ProtobufUtil.getOnlineRegions(server).size() <= regionCount);
1322 } catch (RegionServerStoppedException e) {
1323 if (useZKForAssignment) {
1324
1325 LOG.error(e);
1326 throw e;
1327 }
1328 }
1329 }
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342 private int ensureTableRegionNotOnSameServerAsMeta(final HBaseAdmin admin,
1343 final HRegionInfo hri)
1344 throws HBaseIOException, MasterNotRunningException,
1345 ZooKeeperConnectionException, InterruptedException {
1346
1347
1348
1349 int metaServerIndex = cluster.getServerWithMeta();
1350 assertTrue(metaServerIndex != -1);
1351 HRegionServer metaRegionServer = cluster.getRegionServer(metaServerIndex);
1352 int tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1353 assertTrue(tableRegionIndex != -1);
1354 HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex);
1355 if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) {
1356 HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer);
1357 assertNotNull(hrs);
1358 assertNotNull(hri);
1359 LOG.info("Moving " + hri.getRegionNameAsString() + " from " +
1360 metaRegionServer.getServerName() + " to " +
1361 hrs.getServerName() + "; metaServerIndex=" + metaServerIndex);
1362 admin.move(hri.getEncodedNameAsBytes(), Bytes.toBytes(hrs.getServerName().toString()));
1363 }
1364
1365 for (int i = 0; i < 100; i++) {
1366 tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1367 if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
1368 LOG.debug("Waiting on region move off the hbase:meta server; current index " +
1369 tableRegionIndex + " and metaServerIndex=" + metaServerIndex);
1370 Thread.sleep(100);
1371 }
1372 assertTrue("Region not moved off hbase:meta server", tableRegionIndex != -1
1373 && tableRegionIndex != metaServerIndex);
1374
1375 tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1376 assertTrue(tableRegionIndex != -1);
1377 assertNotSame(metaServerIndex, tableRegionIndex);
1378 return tableRegionIndex;
1379 }
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390 private HRegionServer getOtherRegionServer(final MiniHBaseCluster cluster,
1391 final HRegionServer notThisOne) {
1392 for (RegionServerThread rst: cluster.getRegionServerThreads()) {
1393 HRegionServer hrs = rst.getRegionServer();
1394 if (hrs.getServerName().equals(notThisOne.getServerName())) continue;
1395 if (hrs.isStopping() || hrs.isStopped()) continue;
1396 return hrs;
1397 }
1398 return null;
1399 }
1400
1401 private void printOutRegions(final HRegionServer hrs, final String prefix)
1402 throws IOException {
1403 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
1404 for (HRegionInfo region: regions) {
1405 LOG.info(prefix + region.getRegionNameAsString());
1406 }
1407 }
1408
1409 private void waitUntilRegionServerDead() throws InterruptedException {
1410
1411 for (int i=0; cluster.getMaster().getClusterStatus().
1412 getServers().size() == NB_SERVERS && i<100; i++) {
1413 LOG.info("Waiting on server to go down");
1414 Thread.sleep(100);
1415 }
1416 assertFalse("Waited too long for RS to die", cluster.getMaster().getClusterStatus().
1417 getServers().size() == NB_SERVERS);
1418 }
1419
1420 private void awaitDaughters(byte[] tableName, int numDaughters) throws InterruptedException {
1421
1422 for (int i=0; cluster.getRegions(tableName).size() < numDaughters && i<60; i++) {
1423 LOG.info("Waiting for repair to happen");
1424 Thread.sleep(1000);
1425 }
1426 if (cluster.getRegions(tableName).size() < numDaughters) {
1427 fail("Waiting too long for daughter regions");
1428 }
1429 }
1430
1431 private List<HRegion> awaitTableRegions(final byte[] tableName) throws InterruptedException {
1432 List<HRegion> regions = null;
1433 for (int i = 0; i < 100; i++) {
1434 regions = cluster.getRegions(tableName);
1435 if (regions.size() > 0) break;
1436 Thread.sleep(100);
1437 }
1438 return regions;
1439 }
1440
1441 private HTable createTableAndWait(byte[] tableName, byte[] cf) throws IOException,
1442 InterruptedException {
1443 HTable t = TESTING_UTIL.createTable(tableName, cf);
1444 awaitTableRegions(tableName);
1445 assertTrue("Table not online: " + Bytes.toString(tableName),
1446 cluster.getRegions(tableName).size() != 0);
1447 return t;
1448 }
1449
1450 public static class MockMasterWithoutCatalogJanitor extends HMaster {
1451
1452 public MockMasterWithoutCatalogJanitor(Configuration conf) throws IOException, KeeperException,
1453 InterruptedException {
1454 super(conf);
1455 }
1456 }
1457
1458 private static class SplittingNodeCreationFailedException extends IOException {
1459 private static final long serialVersionUID = 1652404976265623004L;
1460
1461 public SplittingNodeCreationFailedException () {
1462 super();
1463 }
1464 }
1465
1466 public static class MockedRegionObserver extends BaseRegionObserver {
1467 private SplitTransaction st = null;
1468 private PairOfSameType<HRegion> daughterRegions = null;
1469
1470 @Override
1471 public void preSplitBeforePONR(ObserverContext<RegionCoprocessorEnvironment> ctx,
1472 byte[] splitKey, List<Mutation> metaEntries) throws IOException {
1473 RegionCoprocessorEnvironment environment = ctx.getEnvironment();
1474 HRegionServer rs = (HRegionServer) environment.getRegionServerServices();
1475 List<HRegion> onlineRegions =
1476 rs.getOnlineRegions(TableName.valueOf("testSplitHooksBeforeAndAfterPONR_2"));
1477 HRegion region = onlineRegions.get(0);
1478 for (HRegion r : onlineRegions) {
1479 if (r.getRegionInfo().containsRow(splitKey)) {
1480 region = r;
1481 break;
1482 }
1483 }
1484 st = new SplitTransaction(region, splitKey);
1485 if (!st.prepare()) {
1486 LOG.error("Prepare for the table " + region.getTableDesc().getNameAsString()
1487 + " failed. So returning null. ");
1488 ctx.bypass();
1489 return;
1490 }
1491 region.forceSplit(splitKey);
1492 daughterRegions = st.stepsBeforePONR(rs, rs, false);
1493 HRegionInfo copyOfParent = new HRegionInfo(region.getRegionInfo());
1494 copyOfParent.setOffline(true);
1495 copyOfParent.setSplit(true);
1496
1497 Put putParent = MetaEditor.makePutFromRegionInfo(copyOfParent);
1498 MetaEditor.addDaughtersToPut(putParent, daughterRegions.getFirst().getRegionInfo(),
1499 daughterRegions.getSecond().getRegionInfo());
1500 metaEntries.add(putParent);
1501
1502 Put putA = MetaEditor.makePutFromRegionInfo(daughterRegions.getFirst().getRegionInfo());
1503 Put putB = MetaEditor.makePutFromRegionInfo(daughterRegions.getSecond().getRegionInfo());
1504 st.addLocation(putA, rs.getServerName(), 1);
1505 st.addLocation(putB, rs.getServerName(), 1);
1506 metaEntries.add(putA);
1507 metaEntries.add(putB);
1508 }
1509
1510 @Override
1511 public void preSplitAfterPONR(ObserverContext<RegionCoprocessorEnvironment> ctx)
1512 throws IOException {
1513 RegionCoprocessorEnvironment environment = ctx.getEnvironment();
1514 HRegionServer rs = (HRegionServer) environment.getRegionServerServices();
1515 st.stepsAfterPONR(rs, rs, daughterRegions);
1516 }
1517
1518 }
1519
1520 static class CustomSplitPolicy extends RegionSplitPolicy {
1521
1522 @Override
1523 protected boolean shouldSplit() {
1524 return true;
1525 }
1526
1527 @Override
1528 public boolean skipStoreFileRangeCheck(String familyName) {
1529 if(familyName.startsWith("i_")) {
1530 return true;
1531 } else {
1532 return false;
1533 }
1534 }
1535 }
1536 }
1537