1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertNotSame;
25 import static org.junit.Assert.assertNull;
26 import static org.junit.Assert.assertTrue;
27 import static org.junit.Assert.fail;
28
29 import java.io.IOException;
30 import java.io.InterruptedIOException;
31 import java.util.Collection;
32 import java.util.List;
33 import java.util.Map;
34 import java.util.concurrent.CountDownLatch;
35
36 import org.apache.commons.logging.Log;
37 import org.apache.commons.logging.LogFactory;
38 import org.apache.hadoop.conf.Configuration;
39 import org.apache.hadoop.fs.FileSystem;
40 import org.apache.hadoop.fs.Path;
41 import org.apache.hadoop.hbase.Abortable;
42 import org.apache.hadoop.hbase.CoordinatedStateManager;
43 import org.apache.hadoop.hbase.Coprocessor;
44 import org.apache.hadoop.hbase.CoprocessorEnvironment;
45 import org.apache.hadoop.hbase.HBaseTestingUtility;
46 import org.apache.hadoop.hbase.HColumnDescriptor;
47 import org.apache.hadoop.hbase.HConstants;
48 import org.apache.hadoop.hbase.HRegionInfo;
49 import org.apache.hadoop.hbase.HTableDescriptor;
50 import org.apache.hadoop.hbase.MasterNotRunningException;
51 import org.apache.hadoop.hbase.MetaTableAccessor;
52 import org.apache.hadoop.hbase.MiniHBaseCluster;
53 import org.apache.hadoop.hbase.RegionTransition;
54 import org.apache.hadoop.hbase.Server;
55 import org.apache.hadoop.hbase.ServerName;
56 import org.apache.hadoop.hbase.TableName;
57 import org.apache.hadoop.hbase.UnknownRegionException;
58 import org.apache.hadoop.hbase.Waiter;
59 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
60 import org.apache.hadoop.hbase.client.Admin;
61 import org.apache.hadoop.hbase.client.Connection;
62 import org.apache.hadoop.hbase.client.ConnectionFactory;
63 import org.apache.hadoop.hbase.client.Delete;
64 import org.apache.hadoop.hbase.client.HBaseAdmin;
65 import org.apache.hadoop.hbase.client.HTable;
66 import org.apache.hadoop.hbase.client.Mutation;
67 import org.apache.hadoop.hbase.client.Put;
68 import org.apache.hadoop.hbase.client.Result;
69 import org.apache.hadoop.hbase.client.ResultScanner;
70 import org.apache.hadoop.hbase.client.Scan;
71 import org.apache.hadoop.hbase.client.Table;
72 import org.apache.hadoop.hbase.coordination.ZKSplitTransactionCoordination;
73 import org.apache.hadoop.hbase.coordination.ZkCloseRegionCoordination;
74 import org.apache.hadoop.hbase.coordination.ZkCoordinatedStateManager;
75 import org.apache.hadoop.hbase.coordination.ZkOpenRegionCoordination;
76 import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
77 import org.apache.hadoop.hbase.coprocessor.ObserverContext;
78 import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
79 import org.apache.hadoop.hbase.exceptions.DeserializationException;
80 import org.apache.hadoop.hbase.executor.EventType;
81 import org.apache.hadoop.hbase.master.AssignmentManager;
82 import org.apache.hadoop.hbase.master.HMaster;
83 import org.apache.hadoop.hbase.master.RegionState;
84 import org.apache.hadoop.hbase.master.RegionState.State;
85 import org.apache.hadoop.hbase.master.RegionStates;
86 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
87 import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
88 import org.apache.hadoop.hbase.testclassification.LargeTests;
89 import org.apache.hadoop.hbase.util.Bytes;
90 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
91 import org.apache.hadoop.hbase.util.FSUtils;
92 import org.apache.hadoop.hbase.util.HBaseFsck;
93 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
94 import org.apache.hadoop.hbase.util.PairOfSameType;
95 import org.apache.hadoop.hbase.util.Threads;
96 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
97 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
98 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
99 import org.apache.zookeeper.KeeperException;
100 import org.apache.zookeeper.KeeperException.NodeExistsException;
101 import org.apache.zookeeper.data.Stat;
102 import org.junit.After;
103 import org.junit.AfterClass;
104 import org.junit.Assert;
105 import org.junit.Before;
106 import org.junit.BeforeClass;
107 import org.junit.Test;
108 import org.junit.experimental.categories.Category;
109
110 import com.google.protobuf.ServiceException;
111
112
113
114
115
116
117 @Category(LargeTests.class)
118 @SuppressWarnings("deprecation")
119 public class TestSplitTransactionOnCluster {
120 private static final Log LOG =
121 LogFactory.getLog(TestSplitTransactionOnCluster.class);
122 private HBaseAdmin admin = null;
123 private MiniHBaseCluster cluster = null;
124 private static final int NB_SERVERS = 3;
125 private static CountDownLatch latch = new CountDownLatch(1);
126 private static volatile boolean secondSplit = false;
127 private static volatile boolean callRollBack = false;
128 private static volatile boolean firstSplitCompleted = false;
129 private static boolean useZKForAssignment;
130
131 static final HBaseTestingUtility TESTING_UTIL =
132 new HBaseTestingUtility();
133
134 static void setupOnce() throws Exception {
135 TESTING_UTIL.getConfiguration().setInt("hbase.balancer.period", 60000);
136 useZKForAssignment = TESTING_UTIL.getConfiguration().getBoolean(
137 "hbase.assignment.usezk", true);
138 TESTING_UTIL.startMiniCluster(NB_SERVERS);
139 }
140
141 @BeforeClass public static void before() throws Exception {
142
143 TESTING_UTIL.getConfiguration().setBoolean("hbase.assignment.usezk", true);
144 setupOnce();
145 }
146
147 @AfterClass public static void after() throws Exception {
148 TESTING_UTIL.shutdownMiniCluster();
149 }
150
151 @Before public void setup() throws IOException {
152 TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS);
153 this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
154 this.cluster = TESTING_UTIL.getMiniHBaseCluster();
155 }
156
157 @After
158 public void tearDown() throws Exception {
159 this.admin.close();
160 }
161
162 private HRegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions) {
163 assertEquals(1, regions.size());
164 HRegionInfo hri = regions.get(0).getRegionInfo();
165 return waitOnRIT(hri);
166 }
167
168
169
170
171
172
173
174
175 private HRegionInfo waitOnRIT(final HRegionInfo hri) {
176
177
178 while (TESTING_UTIL.getHBaseCluster().getMaster().getAssignmentManager().
179 getRegionStates().isRegionInTransition(hri)) {
180 LOG.info("Waiting on region in transition: " +
181 TESTING_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates().
182 getRegionTransitionState(hri));
183 Threads.sleep(10);
184 }
185 return hri;
186 }
187
188 @Test(timeout = 60000)
189 public void testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack() throws Exception {
190 final TableName tableName =
191 TableName.valueOf("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack");
192
193 if (!useZKForAssignment) {
194
195 return;
196 }
197
198 try {
199
200 HTable t = createTableAndWait(tableName, Bytes.toBytes("cf"));
201 final List<HRegion> regions = cluster.getRegions(tableName);
202 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
203 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
204 final HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
205 insertData(tableName, admin, t);
206 t.close();
207
208
209 this.admin.setBalancerRunning(false, true);
210
211 cluster.getMaster().setCatalogJanitorEnabled(false);
212
213
214 final HRegion region = findSplittableRegion(regions);
215 assertTrue("not able to find a splittable region", region != null);
216 MockedCoordinatedStateManager cp = new MockedCoordinatedStateManager();
217 cp.initialize(regionServer, region);
218 cp.start();
219 regionServer.csm = cp;
220
221 new Thread() {
222 @Override
223 public void run() {
224 SplitTransaction st = null;
225 st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
226 try {
227 st.prepare();
228 st.execute(regionServer, regionServer);
229 } catch (IOException e) {
230
231 }
232 }
233 }.start();
234 for (int i = 0; !callRollBack && i < 100; i++) {
235 Thread.sleep(100);
236 }
237 assertTrue("Waited too long for rollback", callRollBack);
238 SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row3"));
239 try {
240 secondSplit = true;
241
242 region.initialize();
243 st.prepare();
244 st.execute(regionServer, regionServer);
245 } catch (IOException e) {
246 LOG.debug("Rollback started :"+ e.getMessage());
247 st.rollback(regionServer, regionServer);
248 }
249 for (int i=0; !firstSplitCompleted && i<100; i++) {
250 Thread.sleep(100);
251 }
252 assertTrue("fist split did not complete", firstSplitCompleted);
253
254 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
255 Map<String, RegionState> rit = regionStates.getRegionsInTransition();
256
257 for (int i=0; rit.containsKey(hri.getTable()) && i<100; i++) {
258 Thread.sleep(100);
259 }
260 assertFalse("region still in transition", rit.containsKey(
261 rit.containsKey(hri.getTable())));
262
263 List<HRegion> onlineRegions = regionServer.getOnlineRegions(tableName);
264
265 assertEquals("The parent region should be splitted", 2, onlineRegions.size());
266
267 List<HRegionInfo> regionsOfTable = cluster.getMaster().getAssignmentManager()
268 .getRegionStates().getRegionsOfTable(tableName);
269
270 assertEquals("No of regions in master", 2, regionsOfTable.size());
271 } finally {
272 admin.setBalancerRunning(true, false);
273 secondSplit = false;
274 firstSplitCompleted = false;
275 callRollBack = false;
276 cluster.getMaster().setCatalogJanitorEnabled(true);
277 TESTING_UTIL.deleteTable(tableName);
278 }
279 }
280
281 @Test(timeout = 60000)
282 public void testRITStateForRollback() throws Exception {
283 final TableName tableName =
284 TableName.valueOf("testRITStateForRollback");
285 try {
286
287 Table t = createTableAndWait(tableName, Bytes.toBytes("cf"));
288 final List<HRegion> regions = cluster.getRegions(tableName);
289 final HRegionInfo hri = getAndCheckSingleTableRegion(regions);
290 insertData(tableName, admin, t);
291 t.close();
292
293
294 this.admin.setBalancerRunning(false, true);
295
296 cluster.getMaster().setCatalogJanitorEnabled(false);
297
298
299 final HRegion region = findSplittableRegion(regions);
300 assertTrue("not able to find a splittable region", region != null);
301
302
303 region.getCoprocessorHost().load(FailingSplitRegionObserver.class,
304 Coprocessor.PRIORITY_USER, region.getBaseConf());
305
306
307 this.admin.split(region.getRegionName(), new byte[] {42});
308
309
310 FailingSplitRegionObserver observer = (FailingSplitRegionObserver) region
311 .getCoprocessorHost().findCoprocessor(FailingSplitRegionObserver.class.getName());
312 assertNotNull(observer);
313 observer.latch.await();
314
315 LOG.info("Waiting for region to come out of RIT");
316 TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
317 @Override
318 public boolean evaluate() throws Exception {
319 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
320 Map<String, RegionState> rit = regionStates.getRegionsInTransition();
321 return (rit.size() == 0);
322 }
323 });
324 } finally {
325 admin.setBalancerRunning(true, false);
326 cluster.getMaster().setCatalogJanitorEnabled(true);
327 TESTING_UTIL.deleteTable(tableName);
328 }
329 }
330 @Test(timeout = 60000)
331 public void testSplitFailedCompactionAndSplit() throws Exception {
332 final TableName tableName = TableName.valueOf("testSplitFailedCompactionAndSplit");
333 Configuration conf = TESTING_UTIL.getConfiguration();
334 try {
335 HBaseAdmin admin = new HBaseAdmin(conf);
336
337 HTableDescriptor htd = new HTableDescriptor(tableName);
338 byte[] cf = Bytes.toBytes("cf");
339 htd.addFamily(new HColumnDescriptor(cf));
340 admin.createTable(htd);
341
342 for (int i = 0; cluster.getRegions(tableName).size() == 0 && i < 100; i++) {
343 Thread.sleep(100);
344 }
345 assertEquals(1, cluster.getRegions(tableName).size());
346
347 HRegion region = cluster.getRegions(tableName).get(0);
348 Store store = region.getStore(cf);
349 int regionServerIndex = cluster.getServerWith(region.getRegionName());
350 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
351
352 Table t = new HTable(conf, tableName);
353
354 insertData(tableName, admin, t);
355 insertData(tableName, admin, t);
356
357 int fileNum = store.getStorefiles().size();
358
359 store.triggerMajorCompaction();
360 CompactionContext cc = store.requestCompaction();
361 assertNotNull(cc);
362
363
364 assertEquals(2, region.close(false).get(cf).size());
365
366 region.initialize();
367
368
369 assertFalse(region.compact(cc, store));
370 assertTrue(fileNum > store.getStorefiles().size());
371
372
373 SplitTransaction st = new SplitTransaction(region, Bytes.toBytes("row3"));
374 assertTrue(st.prepare());
375 st.execute(regionServer, regionServer);
376 LOG.info("Waiting for region to come out of RIT");
377 TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
378 @Override
379 public boolean evaluate() throws Exception {
380 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
381 Map<String, RegionState> rit = regionStates.getRegionsInTransition();
382 return (rit.size() == 0);
383 }
384 });
385 assertEquals(2, cluster.getRegions(tableName).size());
386 } finally {
387 TESTING_UTIL.deleteTable(tableName);
388 }
389 }
390
391 public static class FailingSplitRegionObserver extends BaseRegionObserver {
392 volatile CountDownLatch latch;
393 volatile CountDownLatch postSplit;
394 @Override
395 public void start(CoprocessorEnvironment e) throws IOException {
396 latch = new CountDownLatch(1);
397 postSplit = new CountDownLatch(1);
398 }
399 @Override
400 public void preSplitBeforePONR(ObserverContext<RegionCoprocessorEnvironment> ctx,
401 byte[] splitKey, List<Mutation> metaEntries) throws IOException {
402 latch.countDown();
403 LOG.info("Causing rollback of region split");
404 throw new IOException("Causing rollback of region split");
405 }
406 @Override
407 public void postCompleteSplit(ObserverContext<RegionCoprocessorEnvironment> ctx)
408 throws IOException {
409 postSplit.countDown();
410 LOG.info("postCompleteSplit called");
411 }
412 }
413
414
415
416
417
418
419
420
421
422
423
424
425 @Test (timeout = 300000) public void testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling()
426 throws IOException, InterruptedException, NodeExistsException, KeeperException,
427 DeserializationException, ServiceException {
428 final TableName tableName =
429 TableName.valueOf("testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling");
430
431
432 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
433 List<HRegion> regions = cluster.getRegions(tableName);
434 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
435
436 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
437
438
439 this.admin.setBalancerRunning(false, true);
440
441 cluster.getMaster().setCatalogJanitorEnabled(false);
442 try {
443
444 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
445
446 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
447 printOutRegions(server, "Initial regions: ");
448 int regionCount = ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
449
450
451 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
452
453 split(hri, server, regionCount);
454
455 String path = ZKAssign.getNodeName(TESTING_UTIL.getZooKeeperWatcher(),
456 hri.getEncodedName());
457 RegionTransition rt = null;
458 Stat stats = null;
459 List<HRegion> daughters = null;
460 if (useZKForAssignment) {
461 daughters = checkAndGetDaughters(tableName);
462
463
464 for (int i=0; i<100; i++) {
465 stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
466 rt = RegionTransition.parseFrom(ZKAssign.getData(TESTING_UTIL.getZooKeeperWatcher(),
467 hri.getEncodedName()));
468 if (rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)) break;
469 Thread.sleep(100);
470 }
471 LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats);
472 assertTrue(rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT));
473
474 cluster.abortRegionServer(tableRegionIndex);
475 }
476 waitUntilRegionServerDead();
477 awaitDaughters(tableName, 2);
478 if (useZKForAssignment) {
479 regions = cluster.getRegions(tableName);
480 for (HRegion r: regions) {
481 assertTrue(daughters.contains(r));
482 }
483
484
485 for (int i=0; i<100; i++) {
486
487 stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
488 if (stats == null) break;
489 Thread.sleep(100);
490 }
491 LOG.info("EPHEMERAL NODE AFTER SERVER ABORT, path=" + path + ", stats=" + stats);
492 assertTrue(stats == null);
493 }
494 } finally {
495
496 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
497 cluster.getMaster().getAssignmentManager().regionOffline(hri);
498 admin.setBalancerRunning(true, false);
499 cluster.getMaster().setCatalogJanitorEnabled(true);
500 cluster.startRegionServer();
501 t.close();
502 TESTING_UTIL.deleteTable(tableName);
503 }
504 }
505
506 @Test (timeout = 300000) public void testExistingZnodeBlocksSplitAndWeRollback()
507 throws IOException, InterruptedException, NodeExistsException, KeeperException, ServiceException {
508 final TableName tableName =
509 TableName.valueOf("testExistingZnodeBlocksSplitAndWeRollback");
510
511
512 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
513 List<HRegion> regions = cluster.getRegions(tableName);
514 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
515
516 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
517
518 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
519
520
521 this.admin.setBalancerRunning(false, true);
522
523 cluster.getMaster().setCatalogJanitorEnabled(false);
524 try {
525
526 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
527
528 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
529 printOutRegions(server, "Initial regions: ");
530 int regionCount = ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
531
532
533 ServerName fakedServer = ServerName.valueOf("any.old.server", 1234, -1);
534 if (useZKForAssignment) {
535 ZKAssign.createNodeClosing(TESTING_UTIL.getZooKeeperWatcher(),
536 hri, fakedServer);
537 } else {
538 regionStates.updateRegionState(hri, RegionState.State.CLOSING);
539 }
540
541
542 this.admin.split(hri.getRegionNameAsString());
543 this.admin.split(hri.getRegionNameAsString());
544 this.admin.split(hri.getRegionNameAsString());
545
546 for (int i = 0; i < 10; i++) {
547 Thread.sleep(100);
548 assertEquals(regionCount, ProtobufUtil.getOnlineRegions(
549 server.getRSRpcServices()).size());
550 }
551 if (useZKForAssignment) {
552
553 ZKAssign.deleteClosingNode(TESTING_UTIL.getZooKeeperWatcher(),
554 hri, fakedServer);
555 } else {
556 regionStates.regionOnline(hri, server.getServerName());
557 }
558
559 split(hri, server, regionCount);
560
561 checkAndGetDaughters(tableName);
562
563 } finally {
564 admin.setBalancerRunning(true, false);
565 cluster.getMaster().setCatalogJanitorEnabled(true);
566 t.close();
567 }
568 }
569
570
571
572
573
574
575
576 @Test (timeout=300000) public void testShutdownFixupWhenDaughterHasSplit()
577 throws IOException, InterruptedException, ServiceException {
578 final TableName tableName =
579 TableName.valueOf("testShutdownFixupWhenDaughterHasSplit");
580
581
582 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
583 List<HRegion> regions = cluster.getRegions(tableName);
584 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
585
586 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
587
588
589 this.admin.setBalancerRunning(false, true);
590
591 cluster.getMaster().setCatalogJanitorEnabled(false);
592 try {
593
594 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
595
596 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
597 printOutRegions(server, "Initial regions: ");
598 int regionCount = ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
599
600 split(hri, server, regionCount);
601
602 List<HRegion> daughters = checkAndGetDaughters(tableName);
603
604 regionCount = ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
605 HRegionInfo daughter = daughters.get(0).getRegionInfo();
606 LOG.info("Daughter we are going to split: " + daughter);
607
608
609 this.admin.compact(daughter.getRegionName());
610 daughters = cluster.getRegions(tableName);
611 HRegion daughterRegion = null;
612 for (HRegion r: daughters) {
613 if (r.getRegionInfo().equals(daughter)) {
614 daughterRegion = r;
615 LOG.info("Found matching HRI: " + daughterRegion);
616 break;
617 }
618 }
619 assertTrue(daughterRegion != null);
620 for (int i=0; i<100; i++) {
621 if (!daughterRegion.hasReferences()) break;
622 Threads.sleep(100);
623 }
624 assertFalse("Waiting for reference to be compacted", daughterRegion.hasReferences());
625 LOG.info("Daughter hri before split (has been compacted): " + daughter);
626 split(daughter, server, regionCount);
627
628 daughters = cluster.getRegions(tableName);
629 for (HRegion d: daughters) {
630 LOG.info("Regions before crash: " + d);
631 }
632
633 cluster.abortRegionServer(tableRegionIndex);
634 waitUntilRegionServerDead();
635 awaitDaughters(tableName, daughters.size());
636
637
638 regions = cluster.getRegions(tableName);
639 for (HRegion d: daughters) {
640 LOG.info("Regions after crash: " + d);
641 }
642 assertEquals(daughters.size(), regions.size());
643 for (HRegion r: regions) {
644 LOG.info("Regions post crash " + r);
645 assertTrue("Missing region post crash " + r, daughters.contains(r));
646 }
647 } finally {
648 admin.setBalancerRunning(true, false);
649 cluster.getMaster().setCatalogJanitorEnabled(true);
650 t.close();
651 }
652 }
653
654 @Test(timeout = 180000)
655 public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception {
656 Configuration conf = TESTING_UTIL.getConfiguration();
657 TableName userTableName =
658 TableName.valueOf("testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles");
659 HTableDescriptor htd = new HTableDescriptor(userTableName);
660 HColumnDescriptor hcd = new HColumnDescriptor("col");
661 htd.addFamily(hcd);
662 admin.createTable(htd);
663 Table table = new HTable(conf, userTableName);
664 try {
665 for (int i = 0; i <= 5; i++) {
666 String row = "row" + i;
667 Put p = new Put(row.getBytes());
668 String val = "Val" + i;
669 p.add("col".getBytes(), "ql".getBytes(), val.getBytes());
670 table.put(p);
671 admin.flush(userTableName.getName());
672 Delete d = new Delete(row.getBytes());
673
674 table.delete(d);
675 admin.flush(userTableName.getName());
676 }
677 admin.majorCompact(userTableName.getName());
678 List<HRegionInfo> regionsOfTable = TESTING_UTIL.getMiniHBaseCluster()
679 .getMaster().getAssignmentManager().getRegionStates()
680 .getRegionsOfTable(userTableName);
681 HRegionInfo hRegionInfo = regionsOfTable.get(0);
682 Put p = new Put("row6".getBytes());
683 p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
684 table.put(p);
685 p = new Put("row7".getBytes());
686 p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
687 table.put(p);
688 p = new Put("row8".getBytes());
689 p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
690 table.put(p);
691 admin.flush(userTableName.getName());
692 admin.split(hRegionInfo.getRegionName(), "row7".getBytes());
693 regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
694 .getAssignmentManager().getRegionStates()
695 .getRegionsOfTable(userTableName);
696
697 while (regionsOfTable.size() != 2) {
698 Thread.sleep(2000);
699 regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
700 .getAssignmentManager().getRegionStates()
701 .getRegionsOfTable(userTableName);
702 }
703 Assert.assertEquals(2, regionsOfTable.size());
704 Scan s = new Scan();
705 ResultScanner scanner = table.getScanner(s);
706 int mainTableCount = 0;
707 for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
708 mainTableCount++;
709 }
710 Assert.assertEquals(3, mainTableCount);
711 } finally {
712 table.close();
713 }
714 }
715
716
717
718
719 static class UselessTestAbortable implements Abortable {
720 boolean aborted = false;
721 @Override
722 public void abort(String why, Throwable e) {
723 LOG.warn("ABORTED (But nothing to abort): why=" + why, e);
724 aborted = true;
725 }
726
727 @Override
728 public boolean isAborted() {
729 return this.aborted;
730 }
731 }
732
733
734
735
736
737
738
739
740
741
742
743 @Test(timeout = 400000)
744 public void testMasterRestartWhenSplittingIsPartial()
745 throws IOException, InterruptedException, NodeExistsException,
746 KeeperException, DeserializationException, ServiceException {
747 final TableName tableName = TableName.valueOf("testMasterRestartWhenSplittingIsPartial");
748
749 if (!useZKForAssignment) {
750
751 return;
752 }
753
754
755 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
756 List<HRegion> regions = cluster.getRegions(tableName);
757 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
758
759 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
760
761
762 this.admin.setBalancerRunning(false, true);
763
764 cluster.getMaster().setCatalogJanitorEnabled(false);
765 ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
766 "testMasterRestartWhenSplittingIsPartial", new UselessTestAbortable());
767 try {
768
769 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
770
771 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
772 printOutRegions(server, "Initial regions: ");
773
774
775 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
776
777
778 this.admin.split(hri.getRegionNameAsString());
779 checkAndGetDaughters(tableName);
780
781 String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
782 Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
783 LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
784 + stats);
785 byte[] bytes = ZKAssign.getData(zkw, hri.getEncodedName());
786 RegionTransition rtd = RegionTransition.parseFrom(bytes);
787
788 assertTrue(rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)
789 || rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLITTING));
790
791
792 MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
793
794 this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
795
796
797
798 hri.setOffline(true);
799 hri.setSplit(true);
800 ServerName regionServerOfRegion = master.getAssignmentManager()
801 .getRegionStates().getRegionServerOfRegion(hri);
802 assertTrue(regionServerOfRegion != null);
803
804
805 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
806 String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
807 Stat stat = new Stat();
808 byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
809
810 for (int i=0; data != null && i<60; i++) {
811 Thread.sleep(1000);
812 data = ZKUtil.getDataNoWatch(zkw, node, stat);
813 }
814 assertNull("Waited too long for ZK node to be removed: "+node, data);
815 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
816 assertTrue("Split parent should be in SPLIT state",
817 regionStates.isRegionInState(hri, State.SPLIT));
818 regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
819 assertTrue(regionServerOfRegion == null);
820 } finally {
821
822 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
823 admin.setBalancerRunning(true, false);
824 cluster.getMaster().setCatalogJanitorEnabled(true);
825 t.close();
826 zkw.close();
827 }
828 }
829
830
831
832
833
834
835
836
837
838 @Test (timeout = 300000)
839 public void testMasterRestartAtRegionSplitPendingCatalogJanitor()
840 throws IOException, InterruptedException, NodeExistsException,
841 KeeperException, ServiceException {
842 final TableName tableName = TableName
843 .valueOf("testMasterRestartAtRegionSplitPendingCatalogJanitor");
844
845
846 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
847 List<HRegion> regions = cluster.getRegions(tableName);
848 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
849
850 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
851
852
853 this.admin.setBalancerRunning(false, true);
854
855 cluster.getMaster().setCatalogJanitorEnabled(false);
856 ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
857 "testMasterRestartAtRegionSplitPendingCatalogJanitor", new UselessTestAbortable());
858 try {
859
860 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
861
862 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
863 printOutRegions(server, "Initial regions: ");
864
865 this.admin.split(hri.getRegionNameAsString());
866 checkAndGetDaughters(tableName);
867
868 String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
869 Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
870 LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
871 + stats);
872 String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
873 Stat stat = new Stat();
874 byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
875
876 for (int i=0; data != null && i<60; i++) {
877 Thread.sleep(1000);
878 data = ZKUtil.getDataNoWatch(zkw, node, stat);
879 }
880 assertNull("Waited too long for ZK node to be removed: "+node, data);
881
882 MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
883
884 this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
885
886
887
888 hri.setOffline(true);
889 hri.setSplit(true);
890 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
891 assertTrue("Split parent should be in SPLIT state",
892 regionStates.isRegionInState(hri, State.SPLIT));
893 ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
894 assertTrue(regionServerOfRegion == null);
895 } finally {
896 this.admin.setBalancerRunning(true, false);
897 cluster.getMaster().setCatalogJanitorEnabled(true);
898 t.close();
899 zkw.close();
900 }
901 }
902
903
904
905
906
907
908
909
910
911
912
913
914 @Test(timeout = 60000)
915 public void testSplitBeforeSettingSplittingInZK() throws Exception,
916 InterruptedException, KeeperException {
917 testSplitBeforeSettingSplittingInZKInternals();
918 }
919
920 @Test(timeout = 60000)
921 public void testTableExistsIfTheSpecifiedTableRegionIsSplitParent() throws Exception {
922 ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TESTING_UTIL);
923 final TableName tableName =
924 TableName.valueOf("testTableExistsIfTheSpecifiedTableRegionIsSplitParent");
925
926 Table t = createTableAndWait(tableName, Bytes.toBytes("cf"));
927 List<HRegion> regions = null;
928 try {
929 regions = cluster.getRegions(tableName);
930 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
931 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
932 insertData(tableName, admin, t);
933
934 admin.setBalancerRunning(false, true);
935
936 cluster.getMaster().setCatalogJanitorEnabled(false);
937 boolean tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(),
938 tableName);
939 assertEquals("The specified table should present.", true, tableExists);
940 final HRegion region = findSplittableRegion(regions);
941 assertTrue("not able to find a splittable region", region != null);
942 SplitTransaction st = new SplitTransaction(region, Bytes.toBytes("row2"));
943 try {
944 st.prepare();
945 st.createDaughters(regionServer, regionServer);
946 } catch (IOException e) {
947
948 }
949 tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(),
950 tableName);
951 assertEquals("The specified table should present.", true, tableExists);
952 Map<String, RegionState> rit = cluster.getMaster().getAssignmentManager().getRegionStates()
953 .getRegionsInTransition();
954 assertTrue(rit.size() == 3);
955 cluster.getMaster().getAssignmentManager().regionOffline(st.getFirstDaughter());
956 cluster.getMaster().getAssignmentManager().regionOffline(st.getSecondDaughter());
957 cluster.getMaster().getAssignmentManager().regionOffline(region.getRegionInfo());
958 rit = cluster.getMaster().getAssignmentManager().getRegionStates().getRegionsInTransition();
959 assertTrue(rit.size() == 0);
960 }
961 finally {
962 admin.setBalancerRunning(true, false);
963 cluster.getMaster().setCatalogJanitorEnabled(true);
964 t.close();
965 TESTING_UTIL.deleteTable(tableName);
966 }
967 }
968
969 private void insertData(final TableName tableName, HBaseAdmin admin, Table t) throws IOException,
970 InterruptedException {
971 Put p = new Put(Bytes.toBytes("row1"));
972 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1"));
973 t.put(p);
974 p = new Put(Bytes.toBytes("row2"));
975 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2"));
976 t.put(p);
977 p = new Put(Bytes.toBytes("row3"));
978 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3"));
979 t.put(p);
980 p = new Put(Bytes.toBytes("row4"));
981 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4"));
982 t.put(p);
983 admin.flush(tableName);
984 }
985
986
987
988
989
990 @Test(timeout = 60000)
991 public void testSplitRegionWithNoStoreFiles()
992 throws Exception {
993 final TableName tableName =
994 TableName.valueOf("testSplitRegionWithNoStoreFiles");
995
996 createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
997 List<HRegion> regions = cluster.getRegions(tableName);
998 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
999 ensureTableRegionNotOnSameServerAsMeta(admin, hri);
1000 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
1001 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
1002
1003 this.admin.setBalancerRunning(false, true);
1004
1005 cluster.getMaster().setCatalogJanitorEnabled(false);
1006 try {
1007
1008 printOutRegions(regionServer, "Initial regions: ");
1009 Configuration conf = cluster.getConfiguration();
1010 HBaseFsck.debugLsr(conf, new Path("/"));
1011 Path rootDir = FSUtils.getRootDir(conf);
1012 FileSystem fs = TESTING_UTIL.getDFSCluster().getFileSystem();
1013 Map<String, Path> storefiles =
1014 FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
1015 assertEquals("Expected nothing but found " + storefiles.toString(), storefiles.size(), 0);
1016
1017
1018 regions = cluster.getRegions(tableName);
1019 final HRegion region = findSplittableRegion(regions);
1020 assertTrue("not able to find a splittable region", region != null);
1021
1022
1023 SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
1024 try {
1025 st.prepare();
1026 st.execute(regionServer, regionServer);
1027 } catch (IOException e) {
1028 fail("Split execution should have succeeded with no exceptions thrown");
1029 }
1030
1031
1032
1033 List<HRegion> daughters = cluster.getRegions(tableName);
1034 assertTrue(daughters.size() == 2);
1035
1036
1037 HBaseFsck.debugLsr(conf, new Path("/"));
1038 Map<String, Path> storefilesAfter =
1039 FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
1040 assertEquals("Expected nothing but found " + storefilesAfter.toString(),
1041 storefilesAfter.size(), 0);
1042
1043 hri = region.getRegionInfo();
1044 AssignmentManager am = cluster.getMaster().getAssignmentManager();
1045 RegionStates regionStates = am.getRegionStates();
1046 long start = EnvironmentEdgeManager.currentTime();
1047 while (!regionStates.isRegionInState(hri, State.SPLIT)) {
1048 assertFalse("Timed out in waiting split parent to be in state SPLIT",
1049 EnvironmentEdgeManager.currentTime() - start > 60000);
1050 Thread.sleep(500);
1051 }
1052
1053
1054 am.assign(hri, true, true);
1055 assertFalse("Split region can't be assigned",
1056 regionStates.isRegionInTransition(hri));
1057 assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
1058
1059
1060 am.unassign(hri, true, null);
1061 assertFalse("Split region can't be unassigned",
1062 regionStates.isRegionInTransition(hri));
1063 assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
1064 } finally {
1065 admin.setBalancerRunning(true, false);
1066 cluster.getMaster().setCatalogJanitorEnabled(true);
1067 }
1068 }
1069
1070 @Test(timeout = 180000)
1071 public void testSplitHooksBeforeAndAfterPONR() throws Exception {
1072 TableName firstTable = TableName.valueOf("testSplitHooksBeforeAndAfterPONR_1");
1073 TableName secondTable = TableName.valueOf("testSplitHooksBeforeAndAfterPONR_2");
1074 HColumnDescriptor hcd = new HColumnDescriptor("cf");
1075
1076 HTableDescriptor desc = new HTableDescriptor(firstTable);
1077 desc.addCoprocessor(MockedRegionObserver.class.getName());
1078 desc.addFamily(hcd);
1079 admin.createTable(desc);
1080 TESTING_UTIL.waitUntilAllRegionsAssigned(firstTable);
1081
1082 desc = new HTableDescriptor(secondTable);
1083 desc.addFamily(hcd);
1084 admin.createTable(desc);
1085 TESTING_UTIL.waitUntilAllRegionsAssigned(secondTable);
1086
1087 List<HRegion> firstTableRegions = cluster.getRegions(firstTable);
1088 List<HRegion> secondTableRegions = cluster.getRegions(secondTable);
1089
1090
1091 if (firstTableRegions.size() == 0 || secondTableRegions.size() == 0) {
1092 fail("Each table should have at least one region.");
1093 }
1094 ServerName serverName =
1095 cluster.getServerHoldingRegion(firstTable, firstTableRegions.get(0).getRegionName());
1096 admin.move(secondTableRegions.get(0).getRegionInfo().getEncodedNameAsBytes(),
1097 Bytes.toBytes(serverName.getServerName()));
1098 Table table1 = null;
1099 Table table2 = null;
1100 try {
1101 table1 = new HTable(TESTING_UTIL.getConfiguration(), firstTable);
1102 table2 = new HTable(TESTING_UTIL.getConfiguration(), firstTable);
1103 insertData(firstTable, admin, table1);
1104 insertData(secondTable, admin, table2);
1105 admin.split(firstTable, "row2".getBytes());
1106 firstTableRegions = cluster.getRegions(firstTable);
1107 while (firstTableRegions.size() != 2) {
1108 Thread.sleep(1000);
1109 firstTableRegions = cluster.getRegions(firstTable);
1110 }
1111 assertEquals("Number of regions after split should be 2.", 2, firstTableRegions.size());
1112 secondTableRegions = cluster.getRegions(secondTable);
1113 assertEquals("Number of regions after split should be 2.", 2, secondTableRegions.size());
1114 } finally {
1115 if (table1 != null) {
1116 table1.close();
1117 }
1118 if (table2 != null) {
1119 table2.close();
1120 }
1121 TESTING_UTIL.deleteTable(firstTable);
1122 TESTING_UTIL.deleteTable(secondTable);
1123 }
1124 }
1125
1126 private void testSplitBeforeSettingSplittingInZKInternals() throws Exception {
1127 final TableName tableName = TableName.valueOf("testSplitBeforeSettingSplittingInZK");
1128 try {
1129
1130 createTableAndWait(tableName, Bytes.toBytes("cf"));
1131
1132 List<HRegion> regions = awaitTableRegions(tableName);
1133 assertTrue("Table not online", cluster.getRegions(tableName).size() != 0);
1134
1135 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
1136 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
1137 final HRegion region = findSplittableRegion(regions);
1138 assertTrue("not able to find a splittable region", region != null);
1139 SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row2")) {
1140 @Override
1141 public PairOfSameType<HRegion> stepsBeforePONR(final Server server,
1142 final RegionServerServices services, boolean testing) throws IOException {
1143 throw new SplittingNodeCreationFailedException ();
1144 }
1145 };
1146 String node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1147 region.getRegionInfo().getEncodedName());
1148 regionServer.getZooKeeper().sync(node);
1149 for (int i = 0; i < 100; i++) {
1150
1151
1152
1153 if (ZKUtil.checkExists(regionServer.getZooKeeper(), node) != -1) {
1154 Thread.sleep(100);
1155 }
1156 }
1157 try {
1158 st.prepare();
1159 st.execute(regionServer, regionServer);
1160 } catch (IOException e) {
1161
1162
1163
1164 assertTrue("Should be instance of CreateSplittingNodeFailedException",
1165 e instanceof SplittingNodeCreationFailedException );
1166 node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1167 region.getRegionInfo().getEncodedName());
1168 {
1169 assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
1170 }
1171 assertTrue(st.rollback(regionServer, regionServer));
1172 assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
1173 }
1174 } finally {
1175 TESTING_UTIL.deleteTable(tableName);
1176 }
1177 }
1178
1179 @Test
1180 public void testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck()
1181 throws Exception {
1182 final TableName tableName =
1183 TableName.valueOf("testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck");
1184 try {
1185 HTableDescriptor htd = new HTableDescriptor(tableName);
1186 htd.addFamily(new HColumnDescriptor("f"));
1187 htd.setRegionSplitPolicyClassName(CustomSplitPolicy.class.getName());
1188 admin.createTable(htd);
1189 List<HRegion> regions = awaitTableRegions(tableName);
1190 HRegion region = regions.get(0);
1191 for(int i = 3;i<9;i++) {
1192 Put p = new Put(Bytes.toBytes("row"+i));
1193 p.add(Bytes.toBytes("f"), Bytes.toBytes("q"), Bytes.toBytes("value"+i));
1194 region.put(p);
1195 }
1196 region.flushcache();
1197 Store store = region.getStore(Bytes.toBytes("f"));
1198 Collection<StoreFile> storefiles = store.getStorefiles();
1199 assertEquals(storefiles.size(), 1);
1200 assertFalse(region.hasReferences());
1201 Path referencePath = region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "f",
1202 storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
1203 assertNotNull(referencePath);
1204 } finally {
1205 TESTING_UTIL.deleteTable(tableName);
1206 }
1207 }
1208
1209 @Test(timeout = 120000)
1210 public void testFailedSplit() throws Exception {
1211 TableName tableName = TableName.valueOf("testFailedSplit");
1212 byte[] colFamily = Bytes.toBytes("info");
1213 TESTING_UTIL.createTable(tableName, colFamily);
1214 Connection connection = ConnectionFactory.createConnection(TESTING_UTIL.getConfiguration());
1215 HTable table = (HTable) connection.getTable(tableName);
1216 try {
1217 TESTING_UTIL.loadTable(table, colFamily);
1218 List<HRegionInfo> regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
1219 assertTrue(regions.size() == 1);
1220 final HRegion actualRegion = cluster.getRegions(tableName).get(0);
1221 actualRegion.getCoprocessorHost().load(FailingSplitRegionObserver.class,
1222 Coprocessor.PRIORITY_USER, actualRegion.getBaseConf());
1223
1224
1225 admin.split(tableName);
1226 FailingSplitRegionObserver observer = (FailingSplitRegionObserver) actualRegion
1227 .getCoprocessorHost().findCoprocessor(FailingSplitRegionObserver.class.getName());
1228 assertNotNull(observer);
1229 observer.latch.await();
1230 observer.postSplit.await();
1231 LOG.info("Waiting for region to come out of RIT");
1232 TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
1233 @Override
1234 public boolean evaluate() throws Exception {
1235 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
1236 Map<String, RegionState> rit = regionStates.getRegionsInTransition();
1237 return (rit.size() == 0);
1238 }
1239 });
1240 regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
1241 assertTrue(regions.size() == 1);
1242 assertTrue(admin.balancer());
1243 } finally {
1244 table.close();
1245 connection.close();
1246 TESTING_UTIL.deleteTable(tableName);
1247 }
1248 }
1249
1250 @Test (timeout=300000)
1251 public void testSSHCleanupDaugtherRegionsOfAbortedSplit() throws Exception {
1252 TableName table = TableName.valueOf("testSSHCleanupDaugtherRegionsOfAbortedSplit");
1253 try {
1254 HTableDescriptor desc = new HTableDescriptor(table);
1255 desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
1256 admin.createTable(desc);
1257 HTable hTable = new HTable(cluster.getConfiguration(), desc.getTableName());
1258 for(int i = 1; i < 5; i++) {
1259 Put p1 = new Put(("r"+i).getBytes());
1260 p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
1261 hTable.put(p1);
1262 }
1263 admin.flush(desc.getTableName());
1264 List<HRegion> regions = cluster.getRegions(desc.getTableName());
1265 int serverWith = cluster.getServerWith(regions.get(0).getRegionName());
1266 HRegionServer regionServer = cluster.getRegionServer(serverWith);
1267 cluster.getServerWith(regions.get(0).getRegionName());
1268 SplitTransaction st = new SplitTransaction(regions.get(0), Bytes.toBytes("r3"));
1269 st.prepare();
1270 st.stepsBeforePONR(regionServer, regionServer, false);
1271 Path tableDir =
1272 FSUtils.getTableDir(cluster.getMaster().getMasterFileSystem().getRootDir(),
1273 desc.getTableName());
1274 tableDir.getFileSystem(cluster.getConfiguration());
1275 List<Path> regionDirs =
1276 FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
1277 assertEquals(3,regionDirs.size());
1278 cluster.startRegionServer();
1279 regionServer.kill();
1280 cluster.getRegionServerThreads().get(serverWith).join();
1281
1282 while (cluster.getMaster().getServerManager().areDeadServersInProgress()) {
1283 Thread.sleep(10);
1284 }
1285 AssignmentManager am = cluster.getMaster().getAssignmentManager();
1286 while(am.getRegionStates().isRegionsInTransition()) {
1287 Thread.sleep(10);
1288 }
1289 assertEquals(am.getRegionStates().getRegionsInTransition().toString(), 0, am
1290 .getRegionStates().getRegionsInTransition().size());
1291 regionDirs =
1292 FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
1293 assertEquals(1,regionDirs.size());
1294 } finally {
1295 TESTING_UTIL.deleteTable(table);
1296 }
1297 }
1298
1299 public static class MockedCoordinatedStateManager extends ZkCoordinatedStateManager {
1300
1301 public void initialize(Server server, HRegion region) {
1302 this.server = server;
1303 this.watcher = server.getZooKeeper();
1304 splitTransactionCoordination = new MockedSplitTransactionCoordination(this, watcher, region);
1305 closeRegionCoordination = new ZkCloseRegionCoordination(this, watcher);
1306 openRegionCoordination = new ZkOpenRegionCoordination(this, watcher);
1307 }
1308 }
1309
1310 public static class MockedSplitTransaction extends SplitTransaction {
1311
1312 private HRegion currentRegion;
1313 public MockedSplitTransaction(HRegion region, byte[] splitrow) {
1314 super(region, splitrow);
1315 this.currentRegion = region;
1316 }
1317 @Override
1318 public boolean rollback(Server server, RegionServerServices services) throws IOException {
1319 if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1320 .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1321 if(secondSplit){
1322 super.rollback(server, services);
1323 latch.countDown();
1324 return true;
1325 }
1326 }
1327 return super.rollback(server, services);
1328 }
1329
1330
1331 }
1332
1333 public static class MockedSplitTransactionCoordination extends ZKSplitTransactionCoordination {
1334
1335 private HRegion currentRegion;
1336
1337 public MockedSplitTransactionCoordination(CoordinatedStateManager coordinationProvider,
1338 ZooKeeperWatcher watcher, HRegion region) {
1339 super(coordinationProvider, watcher);
1340 currentRegion = region;
1341 }
1342
1343 @Override
1344 public void completeSplitTransaction(RegionServerServices services, HRegion a, HRegion b,
1345 SplitTransactionDetails std, HRegion parent) throws IOException {
1346 if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1347 .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1348 try {
1349 if (!secondSplit){
1350 callRollBack = true;
1351 latch.await();
1352 }
1353 } catch (InterruptedException e) {
1354 }
1355
1356 }
1357 super.completeSplitTransaction(services, a, b, std, parent);
1358 if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1359 .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1360 firstSplitCompleted = true;
1361 }
1362 }
1363 }
1364
1365 private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException {
1366 for (int i = 0; i < 5; ++i) {
1367 for (HRegion r: regions) {
1368 if (r.isSplittable()) {
1369 return(r);
1370 }
1371 }
1372 Thread.sleep(100);
1373 }
1374 return(null);
1375 }
1376
1377 private List<HRegion> checkAndGetDaughters(TableName tableName)
1378 throws InterruptedException {
1379 List<HRegion> daughters = null;
1380
1381 for (int i=0; i<100; i++) {
1382 daughters = cluster.getRegions(tableName);
1383 if (daughters.size() >= 2) break;
1384 Thread.sleep(100);
1385 }
1386 assertTrue(daughters.size() >= 2);
1387 return daughters;
1388 }
1389
1390 private MockMasterWithoutCatalogJanitor abortAndWaitForMaster()
1391 throws IOException, InterruptedException {
1392 cluster.abortMaster(0);
1393 cluster.waitOnMaster(0);
1394 cluster.getConfiguration().setClass(HConstants.MASTER_IMPL,
1395 MockMasterWithoutCatalogJanitor.class, HMaster.class);
1396 MockMasterWithoutCatalogJanitor master = null;
1397 master = (MockMasterWithoutCatalogJanitor) cluster.startMaster().getMaster();
1398 cluster.waitForActiveAndReadyMaster();
1399 return master;
1400 }
1401
1402 private void split(final HRegionInfo hri, final HRegionServer server, final int regionCount)
1403 throws IOException, InterruptedException {
1404 this.admin.split(hri.getRegionNameAsString());
1405 try {
1406 for (int i = 0; ProtobufUtil.getOnlineRegions(
1407 server.getRSRpcServices()).size() <= regionCount && i < 300; i++) {
1408 LOG.debug("Waiting on region to split");
1409 Thread.sleep(100);
1410 }
1411
1412 assertFalse("Waited too long for split",
1413 ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size() <= regionCount);
1414 } catch (RegionServerStoppedException e) {
1415 if (useZKForAssignment) {
1416
1417 LOG.error(e);
1418 throw e;
1419 }
1420 }
1421 }
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434 private int ensureTableRegionNotOnSameServerAsMeta(final Admin admin,
1435 final HRegionInfo hri)
1436 throws IOException, MasterNotRunningException,
1437 ZooKeeperConnectionException, InterruptedException {
1438
1439
1440
1441 int metaServerIndex = cluster.getServerWithMeta();
1442 assertTrue(metaServerIndex != -1);
1443 HRegionServer metaRegionServer = cluster.getRegionServer(metaServerIndex);
1444 int tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1445 assertTrue(tableRegionIndex != -1);
1446 HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex);
1447 if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) {
1448 HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer);
1449 assertNotNull(hrs);
1450 assertNotNull(hri);
1451 LOG.info("Moving " + hri.getRegionNameAsString() + " from " +
1452 metaRegionServer.getServerName() + " to " +
1453 hrs.getServerName() + "; metaServerIndex=" + metaServerIndex);
1454 admin.move(hri.getEncodedNameAsBytes(), Bytes.toBytes(hrs.getServerName().toString()));
1455 }
1456
1457 for (int i = 0; i < 20; i++) {
1458 tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1459 if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
1460 LOG.debug("Waiting on region move off the hbase:meta server; current index " +
1461 tableRegionIndex + " and metaServerIndex=" + metaServerIndex);
1462 Thread.sleep(1000);
1463 }
1464 assertTrue("Region not moved off hbase:meta server", tableRegionIndex != -1
1465 && tableRegionIndex != metaServerIndex);
1466
1467 tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1468 assertTrue(tableRegionIndex != -1);
1469 assertNotSame(metaServerIndex, tableRegionIndex);
1470 return tableRegionIndex;
1471 }
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482 private HRegionServer getOtherRegionServer(final MiniHBaseCluster cluster,
1483 final HRegionServer notThisOne) {
1484 for (RegionServerThread rst: cluster.getRegionServerThreads()) {
1485 HRegionServer hrs = rst.getRegionServer();
1486 if (hrs.getServerName().equals(notThisOne.getServerName())) continue;
1487 if (hrs.isStopping() || hrs.isStopped()) continue;
1488 return hrs;
1489 }
1490 return null;
1491 }
1492
1493 private void printOutRegions(final HRegionServer hrs, final String prefix)
1494 throws IOException {
1495 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1496 for (HRegionInfo region: regions) {
1497 LOG.info(prefix + region.getRegionNameAsString());
1498 }
1499 }
1500
1501 private void waitUntilRegionServerDead() throws InterruptedException, InterruptedIOException {
1502
1503 for (int i=0; cluster.getMaster().getClusterStatus().
1504 getServers().size() > NB_SERVERS && i<100; i++) {
1505 LOG.info("Waiting on server to go down");
1506 Thread.sleep(100);
1507 }
1508 assertFalse("Waited too long for RS to die", cluster.getMaster().getClusterStatus().
1509 getServers().size() > NB_SERVERS);
1510 }
1511
1512 private void awaitDaughters(TableName tableName, int numDaughters) throws InterruptedException {
1513
1514 for (int i=0; cluster.getRegions(tableName).size() < numDaughters && i<60; i++) {
1515 LOG.info("Waiting for repair to happen");
1516 Thread.sleep(1000);
1517 }
1518 if (cluster.getRegions(tableName).size() < numDaughters) {
1519 fail("Waiting too long for daughter regions");
1520 }
1521 }
1522
1523 private List<HRegion> awaitTableRegions(final TableName tableName) throws InterruptedException {
1524 List<HRegion> regions = null;
1525 for (int i = 0; i < 100; i++) {
1526 regions = cluster.getRegions(tableName);
1527 if (regions.size() > 0) break;
1528 Thread.sleep(100);
1529 }
1530 return regions;
1531 }
1532
1533 private HTable createTableAndWait(TableName tableName, byte[] cf) throws IOException,
1534 InterruptedException {
1535 HTable t = TESTING_UTIL.createTable(tableName, cf);
1536 awaitTableRegions(tableName);
1537 assertTrue("Table not online: " + tableName,
1538 cluster.getRegions(tableName).size() != 0);
1539 return t;
1540 }
1541
1542 public static class MockMasterWithoutCatalogJanitor extends HMaster {
1543
1544 public MockMasterWithoutCatalogJanitor(Configuration conf, CoordinatedStateManager cp)
1545 throws IOException, KeeperException,
1546 InterruptedException {
1547 super(conf, cp);
1548 }
1549 }
1550
1551 private static class SplittingNodeCreationFailedException extends IOException {
1552 private static final long serialVersionUID = 1652404976265623004L;
1553
1554 public SplittingNodeCreationFailedException () {
1555 super();
1556 }
1557 }
1558
1559 public static class MockedRegionObserver extends BaseRegionObserver {
1560 private SplitTransaction st = null;
1561 private PairOfSameType<HRegion> daughterRegions = null;
1562
1563 @Override
1564 public void preSplitBeforePONR(ObserverContext<RegionCoprocessorEnvironment> ctx,
1565 byte[] splitKey, List<Mutation> metaEntries) throws IOException {
1566 RegionCoprocessorEnvironment environment = ctx.getEnvironment();
1567 HRegionServer rs = (HRegionServer) environment.getRegionServerServices();
1568 List<HRegion> onlineRegions =
1569 rs.getOnlineRegions(TableName.valueOf("testSplitHooksBeforeAndAfterPONR_2"));
1570 HRegion region = onlineRegions.get(0);
1571 for (HRegion r : onlineRegions) {
1572 if (r.getRegionInfo().containsRow(splitKey)) {
1573 region = r;
1574 break;
1575 }
1576 }
1577 st = new SplitTransaction(region, splitKey);
1578 if (!st.prepare()) {
1579 LOG.error("Prepare for the table " + region.getTableDesc().getNameAsString()
1580 + " failed. So returning null. ");
1581 ctx.bypass();
1582 return;
1583 }
1584 region.forceSplit(splitKey);
1585 daughterRegions = st.stepsBeforePONR(rs, rs, false);
1586 HRegionInfo copyOfParent = new HRegionInfo(region.getRegionInfo());
1587 copyOfParent.setOffline(true);
1588 copyOfParent.setSplit(true);
1589
1590 Put putParent = MetaTableAccessor.makePutFromRegionInfo(copyOfParent);
1591 MetaTableAccessor.addDaughtersToPut(putParent, daughterRegions.getFirst().getRegionInfo(),
1592 daughterRegions.getSecond().getRegionInfo());
1593 metaEntries.add(putParent);
1594
1595 Put putA = MetaTableAccessor.makePutFromRegionInfo(
1596 daughterRegions.getFirst().getRegionInfo());
1597 Put putB = MetaTableAccessor.makePutFromRegionInfo(
1598 daughterRegions.getSecond().getRegionInfo());
1599 st.addLocation(putA, rs.getServerName(), 1);
1600 st.addLocation(putB, rs.getServerName(), 1);
1601 metaEntries.add(putA);
1602 metaEntries.add(putB);
1603 }
1604
1605 @Override
1606 public void preSplitAfterPONR(ObserverContext<RegionCoprocessorEnvironment> ctx)
1607 throws IOException {
1608 RegionCoprocessorEnvironment environment = ctx.getEnvironment();
1609 HRegionServer rs = (HRegionServer) environment.getRegionServerServices();
1610 st.stepsAfterPONR(rs, rs, daughterRegions);
1611 }
1612
1613 }
1614
1615 static class CustomSplitPolicy extends RegionSplitPolicy {
1616
1617 @Override
1618 protected boolean shouldSplit() {
1619 return true;
1620 }
1621
1622 @Override
1623 public boolean skipStoreFileRangeCheck() {
1624 return true;
1625 }
1626 }
1627 }
1628