View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertNotSame;
25  import static org.junit.Assert.assertNull;
26  import static org.junit.Assert.assertTrue;
27  import static org.junit.Assert.fail;
28  
29  import java.io.IOException;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.concurrent.CountDownLatch;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.fs.FileSystem;
38  import org.apache.hadoop.fs.Path;
39  import org.apache.hadoop.hbase.Abortable;
40  import org.apache.hadoop.hbase.Coprocessor;
41  import org.apache.hadoop.hbase.HBaseIOException;
42  import org.apache.hadoop.hbase.HBaseTestingUtility;
43  import org.apache.hadoop.hbase.HColumnDescriptor;
44  import org.apache.hadoop.hbase.HConstants;
45  import org.apache.hadoop.hbase.HRegionInfo;
46  import org.apache.hadoop.hbase.HTableDescriptor;
47  import org.apache.hadoop.hbase.LargeTests;
48  import org.apache.hadoop.hbase.MasterNotRunningException;
49  import org.apache.hadoop.hbase.MiniHBaseCluster;
50  import org.apache.hadoop.hbase.RegionTransition;
51  import org.apache.hadoop.hbase.Server;
52  import org.apache.hadoop.hbase.ServerName;
53  import org.apache.hadoop.hbase.TableName;
54  import org.apache.hadoop.hbase.UnknownRegionException;
55  import org.apache.hadoop.hbase.Waiter;
56  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
57  import org.apache.hadoop.hbase.catalog.MetaEditor;
58  import org.apache.hadoop.hbase.catalog.MetaReader;
59  import org.apache.hadoop.hbase.client.Delete;
60  import org.apache.hadoop.hbase.client.HBaseAdmin;
61  import org.apache.hadoop.hbase.client.HTable;
62  import org.apache.hadoop.hbase.client.Mutation;
63  import org.apache.hadoop.hbase.client.Put;
64  import org.apache.hadoop.hbase.client.Result;
65  import org.apache.hadoop.hbase.client.ResultScanner;
66  import org.apache.hadoop.hbase.client.Scan;
67  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
68  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
69  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
70  import org.apache.hadoop.hbase.exceptions.DeserializationException;
71  import org.apache.hadoop.hbase.executor.EventType;
72  import org.apache.hadoop.hbase.master.AssignmentManager;
73  import org.apache.hadoop.hbase.master.HMaster;
74  import org.apache.hadoop.hbase.master.RegionState;
75  import org.apache.hadoop.hbase.master.RegionStates;
76  import org.apache.hadoop.hbase.master.RegionState.State;
77  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
78  import org.apache.hadoop.hbase.util.Bytes;
79  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
80  import org.apache.hadoop.hbase.util.FSUtils;
81  import org.apache.hadoop.hbase.util.HBaseFsck;
82  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
83  import org.apache.hadoop.hbase.util.PairOfSameType;
84  import org.apache.hadoop.hbase.util.Threads;
85  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
86  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
87  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
88  import org.apache.zookeeper.KeeperException;
89  import org.apache.zookeeper.KeeperException.NodeExistsException;
90  import org.apache.zookeeper.data.Stat;
91  import org.junit.After;
92  import org.junit.AfterClass;
93  import org.junit.Assert;
94  import org.junit.Before;
95  import org.junit.BeforeClass;
96  import org.junit.Test;
97  import org.junit.experimental.categories.Category;
98  
99  import com.google.protobuf.ServiceException;
100 
101 /**
102  * Like {@link TestSplitTransaction} in that we're testing {@link SplitTransaction}
103  * only the below tests are against a running cluster where {@link TestSplitTransaction}
104  * is tests against a bare {@link HRegion}.
105  */
106 @Category(LargeTests.class)
107 public class TestSplitTransactionOnCluster {
108   private static final Log LOG =
109     LogFactory.getLog(TestSplitTransactionOnCluster.class);
110   private HBaseAdmin admin = null;
111   private MiniHBaseCluster cluster = null;
112   private static final int NB_SERVERS = 3;
113   private static CountDownLatch latch = new CountDownLatch(1);
114   private static volatile boolean secondSplit = false;
115   private static volatile boolean callRollBack = false;
116   private static volatile boolean firstSplitCompleted = false;
117 
118   private static final HBaseTestingUtility TESTING_UTIL =
119     new HBaseTestingUtility();
120 
121   @BeforeClass public static void before() throws Exception {
122     TESTING_UTIL.getConfiguration().setInt("hbase.balancer.period", 60000);
123     // Needed because some tests have splits happening on RS that are killed
124     // We don't want to wait 3min for the master to figure it out
125     TESTING_UTIL.getConfiguration().setInt(
126         "hbase.master.assignment.timeoutmonitor.timeout", 4000);
127     TESTING_UTIL.startMiniCluster(NB_SERVERS);
128   }
129 
130   @AfterClass public static void after() throws Exception {
131     TESTING_UTIL.shutdownMiniCluster();
132   }
133 
134   @Before public void setup() throws IOException {
135     TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS);
136     this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
137     this.cluster = TESTING_UTIL.getMiniHBaseCluster();
138   }
139 
140   @After
141   public void tearDown() throws Exception {
142     this.admin.close();
143   }
144 
145   private HRegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions) {
146     assertEquals(1, regions.size());
147     HRegionInfo hri = regions.get(0).getRegionInfo();
148     return waitOnRIT(hri);
149   }
150 
151   /**
152    * Often region has not yet fully opened.  If we try to use it -- do a move for instance -- it
153    * will fail silently if the region is not yet opened.
154    * @param hri Region to check if in Regions In Transition... wait until out of transition before
155    * returning
156    * @return Passed in <code>hri</code>
157    */
158   private HRegionInfo waitOnRIT(final HRegionInfo hri) {
159     // Close worked but we are going to open the region elsewhere.  Before going on, make sure
160     // this completes.
161     while (TESTING_UTIL.getHBaseCluster().getMaster().getAssignmentManager().
162         getRegionStates().isRegionInTransition(hri)) {
163       LOG.info("Waiting on region in transition: " +
164         TESTING_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates().
165           getRegionTransitionState(hri));
166       Threads.sleep(10);
167     }
168     return hri;
169   }
170 
171   @SuppressWarnings("deprecation")
172   @Test(timeout = 60000)
173   public void testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack() throws Exception {
174     final TableName tableName =
175         TableName.valueOf("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack");
176     try {
177       // Create table then get the single region for our new table.
178       HTable t = createTableAndWait(tableName.getName(), Bytes.toBytes("cf"));
179       final List<HRegion> regions = cluster.getRegions(tableName);
180       HRegionInfo hri = getAndCheckSingleTableRegion(regions);
181       int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
182       final HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
183       insertData(tableName.getName(), admin, t);
184       t.close();
185 
186       // Turn off balancer so it doesn't cut in and mess up our placements.
187       this.admin.setBalancerRunning(false, true);
188       // Turn off the meta scanner so it don't remove parent on us.
189       cluster.getMaster().setCatalogJanitorEnabled(false);
190 
191       // find a splittable region
192       final HRegion region = findSplittableRegion(regions);
193       assertTrue("not able to find a splittable region", region != null);
194 
195       new Thread() {
196         @Override
197         public void run() {
198           SplitTransaction st = null;
199           st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
200           try {
201             st.prepare();
202             st.execute(regionServer, regionServer);
203           } catch (IOException e) {
204 
205           }
206         }
207       }.start();
208       for (int i = 0; !callRollBack && i < 100; i++) {
209         Thread.sleep(100);
210       }
211       assertTrue("Waited too long for rollback", callRollBack);
212       SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row3"));
213       try {
214         secondSplit = true;
215         // make region splittable
216         region.initialize();
217         st.prepare();
218         st.execute(regionServer, regionServer);
219       } catch (IOException e) {
220         LOG.debug("Rollback started :"+ e.getMessage());
221         st.rollback(regionServer, regionServer);
222       }
223       for (int i=0; !firstSplitCompleted && i<100; i++) {
224         Thread.sleep(100);
225       }
226       assertTrue("fist split did not complete", firstSplitCompleted);
227 
228       RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
229       Map<String, RegionState> rit = regionStates.getRegionsInTransition();
230 
231       for (int i=0; rit.containsKey(hri.getTable()) && i<100; i++) {
232         Thread.sleep(100);
233       }
234       assertFalse("region still in transition", rit.containsKey(
235           rit.containsKey(hri.getTable())));
236 
237       List<HRegion> onlineRegions = regionServer.getOnlineRegions(tableName);
238       // Region server side split is successful.
239       assertEquals("The parent region should be splitted", 2, onlineRegions.size());
240       //Should be present in RIT
241       List<HRegionInfo> regionsOfTable = cluster.getMaster().getAssignmentManager()
242           .getRegionStates().getRegionsOfTable(tableName);
243       // Master side should also reflect the same
244       assertEquals("No of regions in master", 2, regionsOfTable.size());
245     } finally {
246       admin.setBalancerRunning(true, false);
247       secondSplit = false;
248       firstSplitCompleted = false;
249       callRollBack = false;
250       cluster.getMaster().setCatalogJanitorEnabled(true);
251       TESTING_UTIL.deleteTable(tableName);
252     }
253   }
254 
255   @Test(timeout = 60000)
256   public void testRITStateForRollback() throws Exception {
257     final TableName tableName =
258         TableName.valueOf("testRITStateForRollback");
259     try {
260       // Create table then get the single region for our new table.
261       HTable t = createTableAndWait(tableName.getName(), Bytes.toBytes("cf"));
262       final List<HRegion> regions = cluster.getRegions(tableName);
263       final HRegionInfo hri = getAndCheckSingleTableRegion(regions);
264       insertData(tableName.getName(), admin, t);
265       t.close();
266 
267       // Turn off balancer so it doesn't cut in and mess up our placements.
268       this.admin.setBalancerRunning(false, true);
269       // Turn off the meta scanner so it don't remove parent on us.
270       cluster.getMaster().setCatalogJanitorEnabled(false);
271 
272       // find a splittable region
273       final HRegion region = findSplittableRegion(regions);
274       assertTrue("not able to find a splittable region", region != null);
275 
276       // install region co-processor to fail splits
277       region.getCoprocessorHost().load(FailingSplitRegionObserver.class,
278         Coprocessor.PRIORITY_USER, region.getBaseConf());
279 
280       // split async
281       this.admin.split(region.getRegionName(), new byte[] {42});
282 
283       // we have to wait until the SPLITTING state is seen by the master
284       FailingSplitRegionObserver.latch.await();
285 
286       LOG.info("Waiting for region to come out of RIT");
287       TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
288         @Override
289         public boolean evaluate() throws Exception {
290           RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
291           Map<String, RegionState> rit = regionStates.getRegionsInTransition();
292           return !rit.containsKey(hri.getEncodedName());
293         }
294       });
295     } finally {
296       admin.setBalancerRunning(true, false);
297       cluster.getMaster().setCatalogJanitorEnabled(true);
298       TESTING_UTIL.deleteTable(tableName);
299     }
300   }
301 
302   public static class FailingSplitRegionObserver extends BaseRegionObserver {
303     static volatile CountDownLatch latch = new CountDownLatch(1);
304     @Override
305     public void preSplitBeforePONR(ObserverContext<RegionCoprocessorEnvironment> ctx,
306         byte[] splitKey, List<Mutation> metaEntries) throws IOException {
307       latch.countDown();
308       throw new IOException("Causing rollback of region split");
309     }
310   }
311 
312  /**
313    * A test that intentionally has master fail the processing of the split message.
314    * Tests that the regionserver split ephemeral node gets cleaned up if it
315    * crashes and that after we process server shutdown, the daughters are up on
316    * line.
317    * @throws IOException
318    * @throws InterruptedException
319    * @throws NodeExistsException
320    * @throws KeeperException
321    * @throws DeserializationException
322    */
323   @Test (timeout = 300000) public void testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling()
324   throws IOException, InterruptedException, NodeExistsException, KeeperException,
325       DeserializationException, ServiceException {
326     final byte [] tableName =
327       Bytes.toBytes("testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling");
328 
329     // Create table then get the single region for our new table.
330     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
331     List<HRegion> regions = cluster.getRegions(tableName);
332     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
333 
334     int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
335 
336     // Turn off balancer so it doesn't cut in and mess up our placements.
337     this.admin.setBalancerRunning(false, true);
338     // Turn off the meta scanner so it don't remove parent on us.
339     cluster.getMaster().setCatalogJanitorEnabled(false);
340     try {
341       // Add a bit of load up into the table so splittable.
342       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
343       // Get region pre-split.
344       HRegionServer server = cluster.getRegionServer(tableRegionIndex);
345       printOutRegions(server, "Initial regions: ");
346       int regionCount = ProtobufUtil.getOnlineRegions(server).size();
347       // Now, before we split, set special flag in master, a flag that has
348       // it FAIL the processing of split.
349       AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
350       // Now try splitting and it should work.
351       split(hri, server, regionCount);
352       // Get daughters
353       List<HRegion> daughters = checkAndGetDaughters(tableName);
354       // Assert the ephemeral node is up in zk.
355       String path = ZKAssign.getNodeName(TESTING_UTIL.getZooKeeperWatcher(),
356         hri.getEncodedName());
357       RegionTransition rt = null;
358       Stat stats = null;
359       // Wait till the znode moved to SPLIT
360       for (int i=0; i<100; i++) {
361         stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
362         rt = RegionTransition.parseFrom(ZKAssign.getData(TESTING_UTIL.getZooKeeperWatcher(),
363           hri.getEncodedName()));
364         if (rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)) break;
365         Thread.sleep(100);
366       }
367       LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats);
368       assertTrue(rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT));
369       // Now crash the server
370       cluster.abortRegionServer(tableRegionIndex);
371       waitUntilRegionServerDead();
372       awaitDaughters(tableName, daughters.size());
373 
374       // Assert daughters are online.
375       regions = cluster.getRegions(tableName);
376       for (HRegion r: regions) {
377         assertTrue(daughters.contains(r));
378       }
379       // Finally assert that the ephemeral SPLIT znode was cleaned up.
380       for (int i=0; i<100; i++) {
381         // wait a bit (10s max) for the node to disappear
382         stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
383         if (stats == null) break;
384         Thread.sleep(100);
385       }
386       LOG.info("EPHEMERAL NODE AFTER SERVER ABORT, path=" + path + ", stats=" + stats);
387       assertTrue(stats == null);
388     } finally {
389       // Set this flag back.
390       AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
391       admin.setBalancerRunning(true, false);
392       cluster.getMaster().setCatalogJanitorEnabled(true);
393       t.close();
394     }
395   }
396 
397   @Test (timeout = 300000) public void testExistingZnodeBlocksSplitAndWeRollback()
398   throws IOException, InterruptedException, NodeExistsException, KeeperException, ServiceException {
399     final byte [] tableName =
400       Bytes.toBytes("testExistingZnodeBlocksSplitAndWeRollback");
401 
402     // Create table then get the single region for our new table.
403     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
404     List<HRegion> regions = cluster.getRegions(tableName);
405     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
406 
407     int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
408 
409     // Turn off balancer so it doesn't cut in and mess up our placements.
410     this.admin.setBalancerRunning(false, true);
411     // Turn off the meta scanner so it don't remove parent on us.
412     cluster.getMaster().setCatalogJanitorEnabled(false);
413     try {
414       // Add a bit of load up into the table so splittable.
415       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
416       // Get region pre-split.
417       HRegionServer server = cluster.getRegionServer(tableRegionIndex);
418       printOutRegions(server, "Initial regions: ");
419       int regionCount = ProtobufUtil.getOnlineRegions(server).size();
420       // Insert into zk a blocking znode, a znode of same name as region
421       // so it gets in way of our splitting.
422       ServerName fakedServer = ServerName.valueOf("any.old.server", 1234, -1);
423       ZKAssign.createNodeClosing(TESTING_UTIL.getZooKeeperWatcher(),
424         hri, fakedServer);
425       // Now try splitting.... should fail.  And each should successfully
426       // rollback.
427       this.admin.split(hri.getRegionNameAsString());
428       this.admin.split(hri.getRegionNameAsString());
429       this.admin.split(hri.getRegionNameAsString());
430       // Wait around a while and assert count of regions remains constant.
431       for (int i = 0; i < 10; i++) {
432         Thread.sleep(100);
433         assertEquals(regionCount, ProtobufUtil.getOnlineRegions(server).size());
434       }
435       // Now clear the zknode
436       ZKAssign.deleteClosingNode(TESTING_UTIL.getZooKeeperWatcher(),
437         hri, fakedServer);
438       // Now try splitting and it should work.
439       split(hri, server, regionCount);
440       // Get daughters
441       checkAndGetDaughters(tableName);
442       // OK, so split happened after we cleared the blocking node.
443     } finally {
444       admin.setBalancerRunning(true, false);
445       cluster.getMaster().setCatalogJanitorEnabled(true);
446       t.close();
447     }
448   }
449 
450   /**
451    * Test that if daughter split on us, we won't do the shutdown handler fixup
452    * just because we can't find the immediate daughter of an offlined parent.
453    * @throws IOException
454    * @throws InterruptedException
455    */
456   @Test (timeout=300000) public void testShutdownFixupWhenDaughterHasSplit()
457   throws IOException, InterruptedException, ServiceException {
458     final byte [] tableName =
459       Bytes.toBytes("testShutdownFixupWhenDaughterHasSplit");
460 
461     // Create table then get the single region for our new table.
462     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
463     List<HRegion> regions = cluster.getRegions(tableName);
464     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
465 
466     int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
467 
468     // Turn off balancer so it doesn't cut in and mess up our placements.
469     this.admin.setBalancerRunning(false, true);
470     // Turn off the meta scanner so it don't remove parent on us.
471     cluster.getMaster().setCatalogJanitorEnabled(false);
472     try {
473       // Add a bit of load up into the table so splittable.
474       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
475       // Get region pre-split.
476       HRegionServer server = cluster.getRegionServer(tableRegionIndex);
477       printOutRegions(server, "Initial regions: ");
478       int regionCount = ProtobufUtil.getOnlineRegions(server).size();
479       // Now split.
480       split(hri, server, regionCount);
481       // Get daughters
482       List<HRegion> daughters = checkAndGetDaughters(tableName);
483       // Now split one of the daughters.
484       regionCount = ProtobufUtil.getOnlineRegions(server).size();
485       HRegionInfo daughter = daughters.get(0).getRegionInfo();
486       LOG.info("Daughter we are going to split: " + daughter);
487       // Compact first to ensure we have cleaned up references -- else the split
488       // will fail.
489       this.admin.compact(daughter.getRegionName());
490       daughters = cluster.getRegions(tableName);
491       HRegion daughterRegion = null;
492       for (HRegion r: daughters) {
493         if (r.getRegionInfo().equals(daughter)) {
494           daughterRegion = r;
495           LOG.info("Found matching HRI: " + daughterRegion);
496           break;
497         }
498       }
499       assertTrue(daughterRegion != null);
500       for (int i=0; i<100; i++) {
501         if (!daughterRegion.hasReferences()) break;
502         Threads.sleep(100);
503       }
504       assertFalse("Waiting for reference to be compacted", daughterRegion.hasReferences());
505       LOG.info("Daughter hri before split (has been compacted): " + daughter);
506       split(daughter, server, regionCount);
507       // Get list of daughters
508       daughters = cluster.getRegions(tableName);
509       for (HRegion d: daughters) {
510         LOG.info("Regions before crash: " + d);
511       }
512       // Now crash the server
513       cluster.abortRegionServer(tableRegionIndex);
514       waitUntilRegionServerDead();
515       awaitDaughters(tableName, daughters.size());
516       // Assert daughters are online and ONLY the original daughters -- that
517       // fixup didn't insert one during server shutdown recover.
518       regions = cluster.getRegions(tableName);
519       for (HRegion d: daughters) {
520         LOG.info("Regions after crash: " + d);
521       }
522       assertEquals(daughters.size(), regions.size());
523       for (HRegion r: regions) {
524         LOG.info("Regions post crash " + r);
525         assertTrue("Missing region post crash " + r, daughters.contains(r));
526       }
527     } finally {
528       admin.setBalancerRunning(true, false);
529       cluster.getMaster().setCatalogJanitorEnabled(true);
530       t.close();
531     }
532   }
533 
534   @Test(timeout = 180000)
535   public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception {
536     Configuration conf = TESTING_UTIL.getConfiguration();
537     TableName userTableName =
538         TableName.valueOf("testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles");
539     HTableDescriptor htd = new HTableDescriptor(userTableName);
540     HColumnDescriptor hcd = new HColumnDescriptor("col");
541     htd.addFamily(hcd);
542     admin.createTable(htd);
543     HTable table = new HTable(conf, userTableName);
544     try {
545       for (int i = 0; i <= 5; i++) {
546         String row = "row" + i;
547         Put p = new Put(row.getBytes());
548         String val = "Val" + i;
549         p.add("col".getBytes(), "ql".getBytes(), val.getBytes());
550         table.put(p);
551         admin.flush(userTableName.getName());
552         Delete d = new Delete(row.getBytes());
553         // Do a normal delete
554         table.delete(d);
555         admin.flush(userTableName.getName());
556       }
557       admin.majorCompact(userTableName.getName());
558       List<HRegionInfo> regionsOfTable = TESTING_UTIL.getMiniHBaseCluster()
559           .getMaster().getAssignmentManager().getRegionStates()
560           .getRegionsOfTable(userTableName);
561       HRegionInfo hRegionInfo = regionsOfTable.get(0);
562       Put p = new Put("row6".getBytes());
563       p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
564       table.put(p);
565       p = new Put("row7".getBytes());
566       p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
567       table.put(p);
568       p = new Put("row8".getBytes());
569       p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
570       table.put(p);
571       admin.flush(userTableName.getName());
572       admin.split(hRegionInfo.getRegionName(), "row7".getBytes());
573       regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
574           .getAssignmentManager().getRegionStates()
575           .getRegionsOfTable(userTableName);
576 
577       while (regionsOfTable.size() != 2) {
578         Thread.sleep(2000);
579         regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
580             .getAssignmentManager().getRegionStates()
581             .getRegionsOfTable(userTableName);
582       }
583       Assert.assertEquals(2, regionsOfTable.size());
584       Scan s = new Scan();
585       ResultScanner scanner = table.getScanner(s);
586       int mainTableCount = 0;
587       for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
588         mainTableCount++;
589       }
590       Assert.assertEquals(3, mainTableCount);
591     } finally {
592       table.close();
593     }
594   }
595 
596   /**
597    * Noop Abortable implementation used below in tests.
598    */
599   static class UselessTestAbortable implements Abortable {
600     boolean aborted = false;
601     @Override
602     public void abort(String why, Throwable e) {
603       LOG.warn("ABORTED (But nothing to abort): why=" + why, e);
604       aborted = true;
605     }
606 
607     @Override
608     public boolean isAborted() {
609       return this.aborted;
610     }
611   }
612 
613   /**
614    * Verifies HBASE-5806.  When splitting is partially done and the master goes down
615    * when the SPLIT node is in either SPLIT or SPLITTING state.
616    *
617    * @throws IOException
618    * @throws InterruptedException
619    * @throws NodeExistsException
620    * @throws KeeperException
621    * @throws DeserializationException
622    */
623   @Test(timeout = 400000)
624   public void testMasterRestartWhenSplittingIsPartial()
625       throws IOException, InterruptedException, NodeExistsException,
626       KeeperException, DeserializationException, ServiceException {
627     final byte[] tableName = Bytes.toBytes("testMasterRestartWhenSplittingIsPartial");
628 
629     // Create table then get the single region for our new table.
630     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
631     List<HRegion> regions = cluster.getRegions(tableName);
632     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
633 
634     int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
635 
636     // Turn off balancer so it doesn't cut in and mess up our placements.
637     this.admin.setBalancerRunning(false, true);
638     // Turn off the meta scanner so it don't remove parent on us.
639     cluster.getMaster().setCatalogJanitorEnabled(false);
640     ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
641       "testMasterRestartWhenSplittingIsPartial", new UselessTestAbortable());
642     try {
643       // Add a bit of load up into the table so splittable.
644       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
645       // Get region pre-split.
646       HRegionServer server = cluster.getRegionServer(tableRegionIndex);
647       printOutRegions(server, "Initial regions: ");
648       // Now, before we split, set special flag in master, a flag that has
649       // it FAIL the processing of split.
650       AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
651       // Now try splitting and it should work.
652 
653       this.admin.split(hri.getRegionNameAsString());
654       checkAndGetDaughters(tableName);
655       // Assert the ephemeral node is up in zk.
656       String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
657       Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
658       LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
659           + stats);
660       byte[] bytes = ZKAssign.getData(zkw, hri.getEncodedName());
661       RegionTransition rtd = RegionTransition.parseFrom(bytes);
662       // State could be SPLIT or SPLITTING.
663       assertTrue(rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)
664           || rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLITTING));
665 
666       // abort and wait for new master.
667       MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
668 
669       this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
670 
671       // Update the region to be offline and split, so that HRegionInfo#equals
672       // returns true in checking rebuilt region states map.
673       hri.setOffline(true);
674       hri.setSplit(true);
675       ServerName regionServerOfRegion = master.getAssignmentManager()
676         .getRegionStates().getRegionServerOfRegion(hri);
677       assertTrue(regionServerOfRegion != null);
678 
679       // Remove the block so that split can move ahead.
680       AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
681       String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
682       Stat stat = new Stat();
683       byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
684       // ZKUtil.create
685       for (int i=0; data != null && i<60; i++) {
686         Thread.sleep(1000);
687         data = ZKUtil.getDataNoWatch(zkw, node, stat);
688       }
689       assertNull("Waited too long for ZK node to be removed: "+node, data);
690       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
691       assertTrue("Split parent should be in SPLIT state",
692         regionStates.isRegionInState(hri, State.SPLIT));
693       regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
694       assertTrue(regionServerOfRegion == null);
695     } finally {
696       // Set this flag back.
697       AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
698       admin.setBalancerRunning(true, false);
699       cluster.getMaster().setCatalogJanitorEnabled(true);
700       t.close();
701       zkw.close();
702     }
703   }
704 
705   /**
706    * Verifies HBASE-5806.  Here the case is that splitting is completed but before the
707    * CJ could remove the parent region the master is killed and restarted.
708    * @throws IOException
709    * @throws InterruptedException
710    * @throws NodeExistsException
711    * @throws KeeperException
712    */
713   @Test (timeout = 300000)
714   public void testMasterRestartAtRegionSplitPendingCatalogJanitor()
715       throws IOException, InterruptedException, NodeExistsException,
716       KeeperException, ServiceException {
717     final byte[] tableName = Bytes.toBytes("testMasterRestartAtRegionSplitPendingCatalogJanitor");
718 
719     // Create table then get the single region for our new table.
720     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
721     List<HRegion> regions = cluster.getRegions(tableName);
722     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
723 
724     int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
725 
726     // Turn off balancer so it doesn't cut in and mess up our placements.
727     this.admin.setBalancerRunning(false, true);
728     // Turn off the meta scanner so it don't remove parent on us.
729     cluster.getMaster().setCatalogJanitorEnabled(false);
730     ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
731       "testMasterRestartAtRegionSplitPendingCatalogJanitor", new UselessTestAbortable());
732     try {
733       // Add a bit of load up into the table so splittable.
734       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
735       // Get region pre-split.
736       HRegionServer server = cluster.getRegionServer(tableRegionIndex);
737       printOutRegions(server, "Initial regions: ");
738 
739       this.admin.split(hri.getRegionNameAsString());
740       checkAndGetDaughters(tableName);
741       // Assert the ephemeral node is up in zk.
742       String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
743       Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
744       LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
745           + stats);
746       String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
747       Stat stat = new Stat();
748       byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
749       // ZKUtil.create
750       for (int i=0; data != null && i<60; i++) {
751         Thread.sleep(1000);
752         data = ZKUtil.getDataNoWatch(zkw, node, stat);
753       }
754       assertNull("Waited too long for ZK node to be removed: "+node, data);
755 
756       MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
757 
758       this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
759 
760       // Update the region to be offline and split, so that HRegionInfo#equals
761       // returns true in checking rebuilt region states map.
762       hri.setOffline(true);
763       hri.setSplit(true);
764       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
765       assertTrue("Split parent should be in SPLIT state",
766         regionStates.isRegionInState(hri, State.SPLIT));
767       ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
768       assertTrue(regionServerOfRegion == null);
769     } finally {
770       this.admin.setBalancerRunning(true, false);
771       cluster.getMaster().setCatalogJanitorEnabled(true);
772       t.close();
773       zkw.close();
774     }
775   }
776 
777   /**
778    *
779    * While transitioning node from RS_ZK_REGION_SPLITTING to
780    * RS_ZK_REGION_SPLITTING during region split,if zookeper went down split always
781    * fails for the region. HBASE-6088 fixes this scenario.
782    * This test case is to test the znode is deleted(if created) or not in roll back.
783    *
784    * @throws IOException
785    * @throws InterruptedException
786    * @throws KeeperException
787    */
788   @Test
789   public void testSplitBeforeSettingSplittingInZK() throws Exception,
790       InterruptedException, KeeperException {
791     testSplitBeforeSettingSplittingInZKInternals();
792   }
793 
794   @Test(timeout = 60000)
795   public void testTableExistsIfTheSpecifiedTableRegionIsSplitParent() throws Exception {
796     ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TESTING_UTIL);
797     final TableName tableName =
798         TableName.valueOf("testTableExistsIfTheSpecifiedTableRegionIsSplitParent");
799     // Create table then get the single region for our new table.
800     HTable t = createTableAndWait(tableName.getName(), Bytes.toBytes("cf"));
801     List<HRegion> regions = null;
802     try {
803       regions = cluster.getRegions(tableName);
804       int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
805       HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
806       insertData(tableName.getName(), admin, t);
807       // Turn off balancer so it doesn't cut in and mess up our placements.
808       admin.setBalancerRunning(false, true);
809       // Turn off the meta scanner so it don't remove parent on us.
810       cluster.getMaster().setCatalogJanitorEnabled(false);
811       boolean tableExists = MetaReader.tableExists(regionServer.getCatalogTracker(),
812           tableName);
813       assertEquals("The specified table should present.", true, tableExists);
814       final HRegion region = findSplittableRegion(regions);
815       assertTrue("not able to find a splittable region", region != null);
816       SplitTransaction st = new SplitTransaction(region, Bytes.toBytes("row2"));
817       try {
818         st.prepare();
819         st.createDaughters(regionServer, regionServer);
820       } catch (IOException e) {
821 
822       }
823       tableExists = MetaReader.tableExists(regionServer.getCatalogTracker(),
824           tableName);
825       assertEquals("The specified table should present.", true, tableExists);
826     } finally {
827       if (regions != null) {
828         String node = ZKAssign.getNodeName(zkw, regions.get(0).getRegionInfo()
829             .getEncodedName());
830         ZKUtil.deleteNodeFailSilent(zkw, node);
831       }
832       admin.setBalancerRunning(true, false);
833       cluster.getMaster().setCatalogJanitorEnabled(true);
834       t.close();
835     }
836   }
837 
838   private void insertData(final byte[] tableName, HBaseAdmin admin, HTable t) throws IOException,
839       InterruptedException {
840     Put p = new Put(Bytes.toBytes("row1"));
841     p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1"));
842     t.put(p);
843     p = new Put(Bytes.toBytes("row2"));
844     p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2"));
845     t.put(p);
846     p = new Put(Bytes.toBytes("row3"));
847     p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3"));
848     t.put(p);
849     p = new Put(Bytes.toBytes("row4"));
850     p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4"));
851     t.put(p);
852     admin.flush(tableName);
853   }
854 
855   /**
856    * If a table has regions that have no store files in a region, they should split successfully
857    * into two regions with no store files.
858    */
859   @Test
860   public void testSplitRegionWithNoStoreFiles()
861       throws Exception {
862     final TableName tableName =
863         TableName.valueOf("testSplitRegionWithNoStoreFiles");
864     // Create table then get the single region for our new table.
865     createTableAndWait(tableName.getName(), HConstants.CATALOG_FAMILY);
866     List<HRegion> regions = cluster.getRegions(tableName);
867     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
868     ensureTableRegionNotOnSameServerAsMeta(admin, hri);
869     int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
870     HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
871     // Turn off balancer so it doesn't cut in and mess up our placements.
872     this.admin.setBalancerRunning(false, true);
873     // Turn off the meta scanner so it don't remove parent on us.
874     cluster.getMaster().setCatalogJanitorEnabled(false);
875     try {
876       // Precondition: we created a table with no data, no store files.
877       printOutRegions(regionServer, "Initial regions: ");
878       Configuration conf = cluster.getConfiguration();
879       HBaseFsck.debugLsr(conf, new Path("/"));
880       Path rootDir = FSUtils.getRootDir(conf);
881       FileSystem fs = TESTING_UTIL.getDFSCluster().getFileSystem();
882       Map<String, Path> storefiles =
883           FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
884       assertEquals("Expected nothing but found " + storefiles.toString(), storefiles.size(), 0);
885 
886       // find a splittable region.  Refresh the regions list
887       regions = cluster.getRegions(tableName);
888       final HRegion region = findSplittableRegion(regions);
889       assertTrue("not able to find a splittable region", region != null);
890 
891       // Now split.
892       SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
893       try {
894         st.prepare();
895         st.execute(regionServer, regionServer);
896       } catch (IOException e) {
897         fail("Split execution should have succeeded with no exceptions thrown");
898       }
899 
900       // Postcondition: split the table with no store files into two regions, but still have not
901       // store files
902       List<HRegion> daughters = cluster.getRegions(tableName);
903       assertTrue(daughters.size() == 2);
904 
905       // check dirs
906       HBaseFsck.debugLsr(conf, new Path("/"));
907       Map<String, Path> storefilesAfter =
908           FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
909       assertEquals("Expected nothing but found " + storefilesAfter.toString(),
910           storefilesAfter.size(), 0);
911 
912       hri = region.getRegionInfo(); // split parent
913       AssignmentManager am = cluster.getMaster().getAssignmentManager();
914       RegionStates regionStates = am.getRegionStates();
915       long start = EnvironmentEdgeManager.currentTimeMillis();
916       while (!regionStates.isRegionInState(hri, State.SPLIT)) {
917         assertFalse("Timed out in waiting split parent to be in state SPLIT",
918           EnvironmentEdgeManager.currentTimeMillis() - start > 60000);
919         Thread.sleep(500);
920       }
921 
922       // We should not be able to assign it again
923       am.assign(hri, true, true);
924       assertFalse("Split region can't be assigned",
925         regionStates.isRegionInTransition(hri));
926       assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
927 
928       // We should not be able to unassign it either
929       am.unassign(hri, true, null);
930       assertFalse("Split region can't be unassigned",
931         regionStates.isRegionInTransition(hri));
932       assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
933     } finally {
934       admin.setBalancerRunning(true, false);
935       cluster.getMaster().setCatalogJanitorEnabled(true);
936     }
937   }
938 
939   @Test(timeout = 180000)
940   public void testSplitHooksBeforeAndAfterPONR() throws Exception {
941     String firstTable = "testSplitHooksBeforeAndAfterPONR_1";
942     String secondTable = "testSplitHooksBeforeAndAfterPONR_2";
943     HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(firstTable));
944     desc.addCoprocessor(MockedRegionObserver.class.getName());
945     HColumnDescriptor hcd = new HColumnDescriptor("cf");
946     desc.addFamily(hcd);
947     admin.createTable(desc);
948     desc = new HTableDescriptor(TableName.valueOf(secondTable));
949     hcd = new HColumnDescriptor("cf");
950     desc.addFamily(hcd);
951     admin.createTable(desc);
952     List<HRegion> firstTableregions = cluster.getRegions(TableName.valueOf(firstTable));
953     List<HRegion> secondTableRegions = cluster.getRegions(TableName.valueOf(secondTable));
954     ServerName serverName =
955         cluster.getServerHoldingRegion(firstTableregions.get(0).getRegionName());
956     admin.move(secondTableRegions.get(0).getRegionInfo().getEncodedNameAsBytes(),
957       Bytes.toBytes(serverName.getServerName()));
958     HTable table1 = null;
959     HTable table2 = null;
960     try {
961       table1 = new HTable(TESTING_UTIL.getConfiguration(), firstTable);
962       table2 = new HTable(TESTING_UTIL.getConfiguration(), firstTable);
963       insertData(Bytes.toBytes(firstTable), admin, table1);
964       insertData(Bytes.toBytes(secondTable), admin, table2);
965       admin.split(Bytes.toBytes(firstTable), "row2".getBytes());
966       firstTableregions = cluster.getRegions(Bytes.toBytes(firstTable));
967       while (firstTableregions.size() != 2) {
968         Thread.sleep(1000);
969         firstTableregions = cluster.getRegions(Bytes.toBytes(firstTable));
970       }
971       assertEquals("Number of regions after split should be 2.", 2, firstTableregions.size());
972       secondTableRegions = cluster.getRegions(Bytes.toBytes(secondTable));
973       assertEquals("Number of regions after split should be 2.", 2, secondTableRegions.size());
974     } finally {
975       if (table1 != null) {
976         table1.close();
977       }
978       if (table2 != null) {
979         table2.close();
980       }
981       TESTING_UTIL.deleteTable(firstTable);
982       TESTING_UTIL.deleteTable(secondTable);
983     }
984   }
985 
986   private void testSplitBeforeSettingSplittingInZKInternals() throws Exception {
987     final byte[] tableName = Bytes.toBytes("testSplitBeforeSettingSplittingInZK");
988     try {
989       // Create table then get the single region for our new table.
990       createTableAndWait(tableName, Bytes.toBytes("cf"));
991 
992       List<HRegion> regions = awaitTableRegions(tableName);
993       assertTrue("Table not online", cluster.getRegions(tableName).size() != 0);
994 
995       int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
996       HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
997       final HRegion region = findSplittableRegion(regions);
998       assertTrue("not able to find a splittable region", region != null);
999       SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row2")) {
1000         @Override
1001         public PairOfSameType<HRegion> stepsBeforePONR(final Server server,
1002             final RegionServerServices services, boolean testing) throws IOException {
1003           throw new SplittingNodeCreationFailedException ();
1004         }
1005       };
1006       String node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1007           region.getRegionInfo().getEncodedName());
1008       regionServer.getZooKeeper().sync(node);
1009       for (int i = 0; i < 100; i++) {
1010         // We expect the znode to be deleted by this time. Here the
1011         // znode could be in OPENED state and the
1012         // master has not yet deleted the znode.
1013         if (ZKUtil.checkExists(regionServer.getZooKeeper(), node) != -1) {
1014           Thread.sleep(100);
1015         }
1016       }
1017       try {
1018         st.prepare();
1019         st.execute(regionServer, regionServer);
1020       } catch (IOException e) {
1021         // check for the specific instance in case the Split failed due to the
1022         // existence of the znode in OPENED state.
1023         // This will at least make the test to fail;
1024         assertTrue("Should be instance of CreateSplittingNodeFailedException",
1025             e instanceof SplittingNodeCreationFailedException );
1026         node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1027             region.getRegionInfo().getEncodedName());
1028         {
1029           assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
1030         }
1031         assertTrue(st.rollback(regionServer, regionServer));
1032         assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
1033       }
1034     } finally {
1035       TESTING_UTIL.deleteTable(tableName);
1036     }
1037   }
1038 
1039   public static class MockedSplitTransaction extends SplitTransaction {
1040 
1041     private HRegion currentRegion;
1042     public MockedSplitTransaction(HRegion r, byte[] splitrow) {
1043       super(r, splitrow);
1044       this.currentRegion = r;
1045     }
1046 
1047     @Override
1048     void transitionZKNode(Server server, RegionServerServices services, HRegion a, HRegion b)
1049         throws IOException {
1050       if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1051           .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1052         try {
1053           if (!secondSplit){
1054             callRollBack = true;
1055             latch.await();
1056           }
1057         } catch (InterruptedException e) {
1058         }
1059 
1060       }
1061       super.transitionZKNode(server, services, a, b);
1062       if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1063           .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1064         firstSplitCompleted = true;
1065       }
1066     }
1067     @Override
1068     public boolean rollback(Server server, RegionServerServices services) throws IOException {
1069       if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1070           .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1071         if(secondSplit){
1072           super.rollback(server, services);
1073           latch.countDown();
1074           return true;
1075         }
1076       }
1077       return super.rollback(server, services);
1078     }
1079 
1080   }
1081 
1082   private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException {
1083     for (int i = 0; i < 5; ++i) {
1084       for (HRegion r: regions) {
1085         if (r.isSplittable()) {
1086           return(r);
1087         }
1088       }
1089       Thread.sleep(100);
1090     }
1091     return(null);
1092   }
1093 
1094   private List<HRegion> checkAndGetDaughters(byte[] tableName)
1095       throws InterruptedException {
1096     List<HRegion> daughters = null;
1097     // try up to 10s
1098     for (int i=0; i<100; i++) {
1099       daughters = cluster.getRegions(tableName);
1100       if (daughters.size() >= 2) break;
1101       Thread.sleep(100);
1102     }
1103     assertTrue(daughters.size() >= 2);
1104     return daughters;
1105   }
1106 
1107   private MockMasterWithoutCatalogJanitor abortAndWaitForMaster()
1108   throws IOException, InterruptedException {
1109     cluster.abortMaster(0);
1110     cluster.waitOnMaster(0);
1111     cluster.getConfiguration().setClass(HConstants.MASTER_IMPL,
1112     		MockMasterWithoutCatalogJanitor.class, HMaster.class);
1113     MockMasterWithoutCatalogJanitor master = null;
1114     master = (MockMasterWithoutCatalogJanitor) cluster.startMaster().getMaster();
1115     cluster.waitForActiveAndReadyMaster();
1116     return master;
1117   }
1118 
1119   private void split(final HRegionInfo hri, final HRegionServer server, final int regionCount)
1120       throws IOException, InterruptedException {
1121     this.admin.split(hri.getRegionNameAsString());
1122     for (int i = 0; ProtobufUtil.getOnlineRegions(server).size() <= regionCount && i < 300; i++) {
1123       LOG.debug("Waiting on region to split");
1124       Thread.sleep(100);
1125     }
1126 
1127     assertFalse("Waited too long for split",
1128         ProtobufUtil.getOnlineRegions(server).size() <= regionCount);
1129   }
1130 
1131   /**
1132    * Ensure single table region is not on same server as the single hbase:meta table
1133    * region.
1134    * @param admin
1135    * @param hri
1136    * @return Index of the server hosting the single table region
1137    * @throws UnknownRegionException
1138    * @throws MasterNotRunningException
1139    * @throws org.apache.hadoop.hbase.ZooKeeperConnectionException
1140    * @throws InterruptedException
1141    */
1142   private int ensureTableRegionNotOnSameServerAsMeta(final HBaseAdmin admin,
1143       final HRegionInfo hri)
1144   throws HBaseIOException, MasterNotRunningException,
1145   ZooKeeperConnectionException, InterruptedException {
1146     // Now make sure that the table region is not on same server as that hosting
1147     // hbase:meta  We don't want hbase:meta replay polluting our test when we later crash
1148     // the table region serving server.
1149     int metaServerIndex = cluster.getServerWithMeta();
1150     assertTrue(metaServerIndex != -1);
1151     HRegionServer metaRegionServer = cluster.getRegionServer(metaServerIndex);
1152     int tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1153     assertTrue(tableRegionIndex != -1);
1154     HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex);
1155     if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) {
1156       HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer);
1157       assertNotNull(hrs);
1158       assertNotNull(hri);
1159       LOG.info("Moving " + hri.getRegionNameAsString() + " from " +
1160         metaRegionServer.getServerName() + " to " +
1161         hrs.getServerName() + "; metaServerIndex=" + metaServerIndex);
1162       admin.move(hri.getEncodedNameAsBytes(), Bytes.toBytes(hrs.getServerName().toString()));
1163     }
1164     // Wait till table region is up on the server that is NOT carrying hbase:meta.
1165     for (int i = 0; i < 100; i++) {
1166       tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1167       if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
1168       LOG.debug("Waiting on region move off the hbase:meta server; current index " +
1169         tableRegionIndex + " and metaServerIndex=" + metaServerIndex);
1170       Thread.sleep(100);
1171     }
1172     assertTrue("Region not moved off hbase:meta server", tableRegionIndex != -1
1173         && tableRegionIndex != metaServerIndex);
1174     // Verify for sure table region is not on same server as hbase:meta
1175     tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1176     assertTrue(tableRegionIndex != -1);
1177     assertNotSame(metaServerIndex, tableRegionIndex);
1178     return tableRegionIndex;
1179   }
1180 
1181   /**
1182    * Find regionserver other than the one passed.
1183    * Can't rely on indexes into list of regionservers since crashed servers
1184    * occupy an index.
1185    * @param cluster
1186    * @param notThisOne
1187    * @return A regionserver that is not <code>notThisOne</code> or null if none
1188    * found
1189    */
1190   private HRegionServer getOtherRegionServer(final MiniHBaseCluster cluster,
1191       final HRegionServer notThisOne) {
1192     for (RegionServerThread rst: cluster.getRegionServerThreads()) {
1193       HRegionServer hrs = rst.getRegionServer();
1194       if (hrs.getServerName().equals(notThisOne.getServerName())) continue;
1195       if (hrs.isStopping() || hrs.isStopped()) continue;
1196       return hrs;
1197     }
1198     return null;
1199   }
1200 
1201   private void printOutRegions(final HRegionServer hrs, final String prefix)
1202       throws IOException {
1203     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
1204     for (HRegionInfo region: regions) {
1205       LOG.info(prefix + region.getRegionNameAsString());
1206     }
1207   }
1208 
1209   private void waitUntilRegionServerDead() throws InterruptedException {
1210     // Wait until the master processes the RS shutdown
1211     for (int i=0; cluster.getMaster().getClusterStatus().
1212         getServers().size() == NB_SERVERS && i<100; i++) {
1213       LOG.info("Waiting on server to go down");
1214       Thread.sleep(100);
1215     }
1216     assertFalse("Waited too long for RS to die", cluster.getMaster().getClusterStatus().
1217         getServers().size() == NB_SERVERS);
1218   }
1219 
1220   private void awaitDaughters(byte[] tableName, int numDaughters) throws InterruptedException {
1221     // Wait till regions are back on line again.
1222     for (int i=0; cluster.getRegions(tableName).size() < numDaughters && i<60; i++) {
1223       LOG.info("Waiting for repair to happen");
1224       Thread.sleep(1000);
1225     }
1226     if (cluster.getRegions(tableName).size() < numDaughters) {
1227       fail("Waiting too long for daughter regions");
1228     }
1229   }
1230 
1231   private List<HRegion> awaitTableRegions(final byte[] tableName) throws InterruptedException {
1232     List<HRegion> regions = null;
1233     for (int i = 0; i < 100; i++) {
1234       regions = cluster.getRegions(tableName);
1235       if (regions.size() > 0) break;
1236       Thread.sleep(100);
1237     }
1238     return regions;
1239   }
1240 
1241   private HTable createTableAndWait(byte[] tableName, byte[] cf) throws IOException,
1242       InterruptedException {
1243     HTable t = TESTING_UTIL.createTable(tableName, cf);
1244     awaitTableRegions(tableName);
1245     assertTrue("Table not online: " + Bytes.toString(tableName),
1246       cluster.getRegions(tableName).size() != 0);
1247     return t;
1248   }
1249 
1250   public static class MockMasterWithoutCatalogJanitor extends HMaster {
1251 
1252     public MockMasterWithoutCatalogJanitor(Configuration conf) throws IOException, KeeperException,
1253         InterruptedException {
1254       super(conf);
1255     }
1256 
1257     @Override
1258     protected void startCatalogJanitorChore() {
1259       LOG.debug("Customised master executed.");
1260     }
1261   }
1262 
1263   private static class SplittingNodeCreationFailedException  extends IOException {
1264     private static final long serialVersionUID = 1652404976265623004L;
1265 
1266     public SplittingNodeCreationFailedException () {
1267       super();
1268     }
1269   }
1270 
1271   public static class MockedRegionObserver extends BaseRegionObserver {
1272     private SplitTransaction st = null;
1273     private PairOfSameType<HRegion> daughterRegions = null;
1274 
1275     @Override
1276     public void preSplitBeforePONR(ObserverContext<RegionCoprocessorEnvironment> ctx,
1277         byte[] splitKey, List<Mutation> metaEntries) throws IOException {
1278       RegionCoprocessorEnvironment environment = ctx.getEnvironment();
1279       HRegionServer rs = (HRegionServer) environment.getRegionServerServices();
1280       List<HRegion> onlineRegions =
1281           rs.getOnlineRegions(TableName.valueOf("testSplitHooksBeforeAndAfterPONR_2"));
1282       HRegion region = onlineRegions.get(0);
1283       for (HRegion r : onlineRegions) {
1284         if (r.getRegionInfo().containsRow(splitKey)) {
1285           region = r;
1286           break;
1287         }
1288       }
1289       st = new SplitTransaction(region, splitKey);
1290       if (!st.prepare()) {
1291         LOG.error("Prepare for the table " + region.getTableDesc().getNameAsString()
1292             + " failed. So returning null. ");
1293         ctx.bypass();
1294         return;
1295       }
1296       region.forceSplit(splitKey);
1297       daughterRegions = st.stepsBeforePONR(rs, rs, false);
1298       HRegionInfo copyOfParent = new HRegionInfo(region.getRegionInfo());
1299       copyOfParent.setOffline(true);
1300       copyOfParent.setSplit(true);
1301       // Put for parent
1302       Put putParent = MetaEditor.makePutFromRegionInfo(copyOfParent);
1303       MetaEditor.addDaughtersToPut(putParent, daughterRegions.getFirst().getRegionInfo(),
1304         daughterRegions.getSecond().getRegionInfo());
1305       metaEntries.add(putParent);
1306       // Puts for daughters
1307       Put putA = MetaEditor.makePutFromRegionInfo(daughterRegions.getFirst().getRegionInfo());
1308       Put putB = MetaEditor.makePutFromRegionInfo(daughterRegions.getSecond().getRegionInfo());
1309       st.addLocation(putA, rs.getServerName(), 1);
1310       st.addLocation(putB, rs.getServerName(), 1);
1311       metaEntries.add(putA);
1312       metaEntries.add(putB);
1313     }
1314 
1315     @Override
1316     public void preSplitAfterPONR(ObserverContext<RegionCoprocessorEnvironment> ctx)
1317         throws IOException {
1318       RegionCoprocessorEnvironment environment = ctx.getEnvironment();
1319       HRegionServer rs = (HRegionServer) environment.getRegionServerServices();
1320       st.stepsAfterPONR(rs, rs, daughterRegions);
1321     }
1322 
1323   }
1324 }
1325