View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.util;
20  
21  import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
22  import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
23  import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
24  import static org.junit.Assert.assertEquals;
25  import static org.junit.Assert.assertFalse;
26  import static org.junit.Assert.assertNotEquals;
27  import static org.junit.Assert.assertNotNull;
28  import static org.junit.Assert.assertTrue;
29  import static org.junit.Assert.fail;
30  
31  import java.io.IOException;
32  import java.util.ArrayList;
33  import java.util.Collection;
34  import java.util.HashMap;
35  import java.util.LinkedList;
36  import java.util.List;
37  import java.util.Map;
38  import java.util.Map.Entry;
39  import java.util.concurrent.CountDownLatch;
40  import java.util.concurrent.ExecutorService;
41  import java.util.concurrent.ScheduledThreadPoolExecutor;
42  import java.util.concurrent.SynchronousQueue;
43  import java.util.concurrent.ThreadPoolExecutor;
44  import java.util.concurrent.TimeUnit;
45  
46  import org.apache.commons.io.IOUtils;
47  import org.apache.commons.logging.Log;
48  import org.apache.commons.logging.LogFactory;
49  import org.apache.hadoop.conf.Configuration;
50  import org.apache.hadoop.fs.FileStatus;
51  import org.apache.hadoop.fs.FileSystem;
52  import org.apache.hadoop.fs.Path;
53  import org.apache.hadoop.hbase.ClusterStatus;
54  import org.apache.hadoop.hbase.HBaseTestingUtility;
55  import org.apache.hadoop.hbase.HColumnDescriptor;
56  import org.apache.hadoop.hbase.HConstants;
57  import org.apache.hadoop.hbase.HRegionInfo;
58  import org.apache.hadoop.hbase.HRegionLocation;
59  import org.apache.hadoop.hbase.HTableDescriptor;
60  import org.apache.hadoop.hbase.LargeTests;
61  import org.apache.hadoop.hbase.MiniHBaseCluster;
62  import org.apache.hadoop.hbase.ServerName;
63  import org.apache.hadoop.hbase.TableName;
64  import org.apache.hadoop.hbase.catalog.MetaEditor;
65  import org.apache.hadoop.hbase.client.Delete;
66  import org.apache.hadoop.hbase.client.Durability;
67  import org.apache.hadoop.hbase.client.Get;
68  import org.apache.hadoop.hbase.client.HBaseAdmin;
69  import org.apache.hadoop.hbase.client.HConnection;
70  import org.apache.hadoop.hbase.client.HConnectionManager;
71  import org.apache.hadoop.hbase.client.HTable;
72  import org.apache.hadoop.hbase.client.MetaScanner;
73  import org.apache.hadoop.hbase.client.Put;
74  import org.apache.hadoop.hbase.client.Result;
75  import org.apache.hadoop.hbase.client.ResultScanner;
76  import org.apache.hadoop.hbase.client.Scan;
77  import org.apache.hadoop.hbase.io.hfile.TestHFile;
78  import org.apache.hadoop.hbase.master.AssignmentManager;
79  import org.apache.hadoop.hbase.master.HMaster;
80  import org.apache.hadoop.hbase.master.RegionStates;
81  import org.apache.hadoop.hbase.master.TableLockManager;
82  import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
83  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
84  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
85  import org.apache.hadoop.hbase.regionserver.HRegion;
86  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
87  import org.apache.hadoop.hbase.regionserver.HRegionServer;
88  import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
89  import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
90  import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
91  import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;
92  import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter;
93  import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
94  import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
95  import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
96  import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
97  import org.apache.zookeeper.KeeperException;
98  import org.junit.AfterClass;
99  import org.junit.BeforeClass;
100 import org.junit.Ignore;
101 import org.junit.Test;
102 import org.junit.experimental.categories.Category;
103 import org.junit.rules.TestName;
104 
105 import com.google.common.collect.Multimap;
106 
107 /**
108  * This tests HBaseFsck's ability to detect reasons for inconsistent tables.
109  */
110 @Category(LargeTests.class)
111 public class TestHBaseFsck {
112   final static Log LOG = LogFactory.getLog(TestHBaseFsck.class);
113   private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
114   private final static Configuration conf = TEST_UTIL.getConfiguration();
115   private final static String FAM_STR = "fam";
116   private final static byte[] FAM = Bytes.toBytes(FAM_STR);
117   private final static int REGION_ONLINE_TIMEOUT = 800;
118   private static RegionStates regionStates;
119   private static ExecutorService executorService;
120 
121   // for the instance, reset every test run
122   private HTable tbl;
123   private final static byte[][] SPLITS = new byte[][] { Bytes.toBytes("A"),
124     Bytes.toBytes("B"), Bytes.toBytes("C") };
125   // one row per region.
126   private final static byte[][] ROWKEYS= new byte[][] {
127     Bytes.toBytes("00"), Bytes.toBytes("50"), Bytes.toBytes("A0"), Bytes.toBytes("A5"),
128     Bytes.toBytes("B0"), Bytes.toBytes("B5"), Bytes.toBytes("C0"), Bytes.toBytes("C5") };
129 
130   @BeforeClass
131   public static void setUpBeforeClass() throws Exception {
132     TEST_UTIL.getConfiguration().setInt("hbase.regionserver.handler.count", 2);
133     TEST_UTIL.getConfiguration().setInt("hbase.regionserver.metahandler.count", 2);
134     TEST_UTIL.startMiniCluster(3);
135 
136     executorService = new ThreadPoolExecutor(1, Integer.MAX_VALUE, 60, TimeUnit.SECONDS,
137         new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
138 
139     AssignmentManager assignmentManager =
140       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
141     regionStates = assignmentManager.getRegionStates();
142   }
143 
144   @AfterClass
145   public static void tearDownAfterClass() throws Exception {
146     TEST_UTIL.shutdownMiniCluster();
147   }
148 
149   @Test
150   public void testHBaseFsck() throws Exception {
151     assertNoErrors(doFsck(conf, false));
152     String table = "tableBadMetaAssign";
153     TEST_UTIL.createTable(Bytes.toBytes(table), FAM);
154 
155     // We created 1 table, should be fine
156     assertNoErrors(doFsck(conf, false));
157 
158     // Now let's mess it up and change the assignment in hbase:meta to
159     // point to a different region server
160     HTable meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName(),
161         executorService);
162     Scan scan = new Scan();
163     scan.setStartRow(Bytes.toBytes(table+",,"));
164     ResultScanner scanner = meta.getScanner(scan);
165     HRegionInfo hri = null;
166 
167     Result res = scanner.next();
168     ServerName currServer =
169       ServerName.parseFrom(res.getValue(HConstants.CATALOG_FAMILY,
170           HConstants.SERVER_QUALIFIER));
171     long startCode = Bytes.toLong(res.getValue(HConstants.CATALOG_FAMILY,
172         HConstants.STARTCODE_QUALIFIER));
173 
174     for (JVMClusterUtil.RegionServerThread rs :
175         TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
176 
177       ServerName sn = rs.getRegionServer().getServerName();
178 
179       // When we find a diff RS, change the assignment and break
180       if (!currServer.getHostAndPort().equals(sn.getHostAndPort()) ||
181           startCode != sn.getStartcode()) {
182         Put put = new Put(res.getRow());
183         put.setDurability(Durability.SKIP_WAL);
184         put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
185           Bytes.toBytes(sn.getHostAndPort()));
186         put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
187           Bytes.toBytes(sn.getStartcode()));
188         meta.put(put);
189         hri = HRegionInfo.getHRegionInfo(res);
190         break;
191       }
192     }
193 
194     // Try to fix the data
195     assertErrors(doFsck(conf, true), new ERROR_CODE[]{
196         ERROR_CODE.SERVER_DOES_NOT_MATCH_META});
197 
198     TEST_UTIL.getHBaseCluster().getMaster()
199       .getAssignmentManager().waitForAssignment(hri);
200 
201     // Should be fixed now
202     assertNoErrors(doFsck(conf, false));
203 
204     // comment needed - what is the purpose of this line
205     HTable t = new HTable(conf, Bytes.toBytes(table), executorService);
206     ResultScanner s = t.getScanner(new Scan());
207     s.close();
208     t.close();
209 
210     scanner.close();
211     meta.close();
212   }
213 
214   @Test(timeout=180000)
215   public void testFixAssignmentsWhenMETAinTransition() throws Exception {
216     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
217     HBaseAdmin admin = null;
218     try {
219       admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
220       admin.closeRegion(cluster.getServerHoldingMeta(),
221           HRegionInfo.FIRST_META_REGIONINFO);
222     } finally {
223       if (admin != null) {
224         admin.close();
225       }
226     }
227     regionStates.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
228     MetaRegionTracker.deleteMetaLocation(cluster.getMaster().getZooKeeper());
229     assertFalse(regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
230     HBaseFsck hbck = doFsck(conf, true);
231     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.UNKNOWN, ERROR_CODE.NO_META_REGION,
232         ERROR_CODE.NULL_META_REGION });
233     assertNoErrors(doFsck(conf, false));
234   }
235 
236   /**
237    * Create a new region in META.
238    */
239   private HRegionInfo createRegion(Configuration conf, final HTableDescriptor
240       htd, byte[] startKey, byte[] endKey)
241       throws IOException {
242     HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
243     HRegionInfo hri = new HRegionInfo(htd.getTableName(), startKey, endKey);
244     MetaEditor.addRegionToMeta(meta, hri);
245     meta.close();
246     return hri;
247   }
248 
249   /**
250    * Debugging method to dump the contents of meta.
251    */
252   private void dumpMeta(TableName tableName) throws IOException {
253     List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(tableName);
254     for (byte[] row : metaRows) {
255       LOG.info(Bytes.toString(row));
256     }
257   }
258 
259   /**
260    * This method is used to undeploy a region -- close it and attempt to
261    * remove its state from the Master.
262    */
263   private void undeployRegion(HBaseAdmin admin, ServerName sn,
264       HRegionInfo hri) throws IOException, InterruptedException {
265     try {
266       HBaseFsckRepair.closeRegionSilentlyAndWait(admin, sn, hri);
267       if (!hri.isMetaTable()) {
268         admin.offline(hri.getRegionName());
269       }
270     } catch (IOException ioe) {
271       LOG.warn("Got exception when attempting to offline region "
272           + Bytes.toString(hri.getRegionName()), ioe);
273     }
274   }
275   /**
276    * Delete a region from assignments, meta, or completely from hdfs.
277    * @param unassign if true unassign region if assigned
278    * @param metaRow  if true remove region's row from META
279    * @param hdfs if true remove region's dir in HDFS
280    */
281   private void deleteRegion(Configuration conf, final HTableDescriptor htd,
282       byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
283       boolean hdfs) throws IOException, InterruptedException {
284     deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false);
285   }
286 
287   /**
288    * Delete a region from assignments, meta, or completely from hdfs.
289    * @param unassign if true unassign region if assigned
290    * @param metaRow  if true remove region's row from META
291    * @param hdfs if true remove region's dir in HDFS
292    * @param regionInfoOnly if true remove a region dir's .regioninfo file
293    */
294   private void deleteRegion(Configuration conf, final HTableDescriptor htd,
295       byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
296       boolean hdfs, boolean regionInfoOnly) throws IOException, InterruptedException {
297     LOG.info("** Before delete:");
298     dumpMeta(htd.getTableName());
299 
300     Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
301     for (Entry<HRegionInfo, ServerName> e: hris.entrySet()) {
302       HRegionInfo hri = e.getKey();
303       ServerName hsa = e.getValue();
304       if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
305           && Bytes.compareTo(hri.getEndKey(), endKey) == 0) {
306 
307         LOG.info("RegionName: " +hri.getRegionNameAsString());
308         byte[] deleteRow = hri.getRegionName();
309 
310         if (unassign) {
311           LOG.info("Undeploying region " + hri + " from server " + hsa);
312           undeployRegion(new HBaseAdmin(conf), hsa, hri);
313         }
314 
315         if (regionInfoOnly) {
316           LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
317           Path rootDir = FSUtils.getRootDir(conf);
318           FileSystem fs = rootDir.getFileSystem(conf);
319           Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
320               hri.getEncodedName());
321           Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
322           fs.delete(hriPath, true);
323         }
324 
325         if (hdfs) {
326           LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
327           Path rootDir = FSUtils.getRootDir(conf);
328           FileSystem fs = rootDir.getFileSystem(conf);
329           Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
330               hri.getEncodedName());
331           HBaseFsck.debugLsr(conf, p);
332           boolean success = fs.delete(p, true);
333           LOG.info("Deleted " + p + " sucessfully? " + success);
334           HBaseFsck.debugLsr(conf, p);
335         }
336 
337         if (metaRow) {
338           HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
339           Delete delete = new Delete(deleteRow);
340           meta.delete(delete);
341         }
342       }
343       LOG.info(hri.toString() + hsa.toString());
344     }
345 
346     TEST_UTIL.getMetaTableRows(htd.getTableName());
347     LOG.info("*** After delete:");
348     dumpMeta(htd.getTableName());
349   }
350 
351   /**
352    * Setup a clean table before we start mucking with it.
353    *
354    * @throws IOException
355    * @throws InterruptedException
356    * @throws KeeperException
357    */
358   HTable setupTable(TableName tablename) throws Exception {
359     HTableDescriptor desc = new HTableDescriptor(tablename);
360     HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
361     desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
362     TEST_UTIL.getHBaseAdmin().createTable(desc, SPLITS);
363     tbl = new HTable(TEST_UTIL.getConfiguration(), tablename, executorService);
364 
365     List<Put> puts = new ArrayList<Put>();
366     for (byte[] row : ROWKEYS) {
367       Put p = new Put(row);
368       p.add(FAM, Bytes.toBytes("val"), row);
369       puts.add(p);
370     }
371     tbl.put(puts);
372     tbl.flushCommits();
373     return tbl;
374   }
375 
376   /**
377    * Counts the number of row to verify data loss or non-dataloss.
378    */
379   int countRows() throws IOException {
380      Scan s = new Scan();
381      ResultScanner rs = tbl.getScanner(s);
382      int i = 0;
383      while(rs.next() !=null) {
384        i++;
385      }
386      return i;
387   }
388 
389   /**
390    * delete table in preparation for next test
391    *
392    * @param tablename
393    * @throws IOException
394    */
395   void deleteTable(TableName tablename) throws IOException {
396     HBaseAdmin admin = new HBaseAdmin(conf);
397     admin.getConnection().clearRegionCache();
398     if (admin.isTableEnabled(tablename)) {
399       admin.disableTableAsync(tablename);
400     }
401     long totalWait = 0;
402     long maxWait = 30*1000;
403     long sleepTime = 250;
404     while (!admin.isTableDisabled(tablename)) {
405       try {
406         Thread.sleep(sleepTime);
407         totalWait += sleepTime;
408         if (totalWait >= maxWait) {
409           fail("Waited too long for table to be disabled + " + tablename);
410         }
411       } catch (InterruptedException e) {
412         e.printStackTrace();
413         fail("Interrupted when trying to disable table " + tablename);
414       }
415     }
416     admin.deleteTable(tablename);
417   }
418 
419   /**
420    * This creates a clean table and confirms that the table is clean.
421    */
422   @Test
423   public void testHBaseFsckClean() throws Exception {
424     assertNoErrors(doFsck(conf, false));
425     TableName table = TableName.valueOf("tableClean");
426     try {
427       HBaseFsck hbck = doFsck(conf, false);
428       assertNoErrors(hbck);
429 
430       setupTable(table);
431       assertEquals(ROWKEYS.length, countRows());
432 
433       // We created 1 table, should be fine
434       hbck = doFsck(conf, false);
435       assertNoErrors(hbck);
436       assertEquals(0, hbck.getOverlapGroups(table).size());
437       assertEquals(ROWKEYS.length, countRows());
438     } finally {
439       deleteTable(table);
440     }
441   }
442 
443   /**
444    * Test thread pooling in the case where there are more regions than threads
445    */
446   @Test
447   public void testHbckThreadpooling() throws Exception {
448     TableName table =
449         TableName.valueOf("tableDupeStartKey");
450     try {
451       // Create table with 4 regions
452       setupTable(table);
453 
454       // limit number of threads to 1.
455       Configuration newconf = new Configuration(conf);
456       newconf.setInt("hbasefsck.numthreads", 1);
457       assertNoErrors(doFsck(newconf, false));
458 
459       // We should pass without triggering a RejectedExecutionException
460     } finally {
461       deleteTable(table);
462     }
463   }
464 
465   @Test
466   public void testHbckFixOrphanTable() throws Exception {
467     TableName table = TableName.valueOf("tableInfo");
468     FileSystem fs = null;
469     Path tableinfo = null;
470     try {
471       setupTable(table);
472       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
473 
474       Path hbaseTableDir = FSUtils.getTableDir(
475           FSUtils.getRootDir(conf), table);
476       fs = hbaseTableDir.getFileSystem(conf);
477       FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
478       tableinfo = status.getPath();
479       fs.rename(tableinfo, new Path("/.tableinfo"));
480 
481       //to report error if .tableinfo is missing.
482       HBaseFsck hbck = doFsck(conf, false);
483       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE });
484 
485       // fix OrphanTable with default .tableinfo (htd not yet cached on master)
486       hbck = doFsck(conf, true);
487       assertNoErrors(hbck);
488       status = null;
489       status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
490       assertNotNull(status);
491 
492       HTableDescriptor htd = admin.getTableDescriptor(table);
493       htd.setValue("NOT_DEFAULT", "true");
494       admin.disableTable(table);
495       admin.modifyTable(table, htd);
496       admin.enableTable(table);
497       fs.delete(status.getPath(), true);
498 
499       // fix OrphanTable with cache
500       htd = admin.getTableDescriptor(table); // warms up cached htd on master
501       hbck = doFsck(conf, true);
502       assertNoErrors(hbck);
503       status = null;
504       status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
505       assertNotNull(status);
506       htd = admin.getTableDescriptor(table);
507       assertEquals(htd.getValue("NOT_DEFAULT"), "true");
508     } finally {
509       fs.rename(new Path("/.tableinfo"), tableinfo);
510       deleteTable(table);
511     }
512   }
513 
514   /**
515    * This create and fixes a bad table with regions that have a duplicate
516    * start key
517    */
518   @Test
519   public void testDupeStartKey() throws Exception {
520     TableName table =
521         TableName.valueOf("tableDupeStartKey");
522     try {
523       setupTable(table);
524       assertNoErrors(doFsck(conf, false));
525       assertEquals(ROWKEYS.length, countRows());
526 
527       // Now let's mess it up, by adding a region with a duplicate startkey
528       HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
529           Bytes.toBytes("A"), Bytes.toBytes("A2"));
530       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
531       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
532           .waitForAssignment(hriDupe);
533       ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
534       TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
535 
536       HBaseFsck hbck = doFsck(conf, false);
537       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
538             ERROR_CODE.DUPE_STARTKEYS});
539       assertEquals(2, hbck.getOverlapGroups(table).size());
540       assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
541 
542       // fix the degenerate region.
543       doFsck(conf,true);
544 
545       // check that the degenerate region is gone and no data loss
546       HBaseFsck hbck2 = doFsck(conf,false);
547       assertNoErrors(hbck2);
548       assertEquals(0, hbck2.getOverlapGroups(table).size());
549       assertEquals(ROWKEYS.length, countRows());
550     } finally {
551       deleteTable(table);
552     }
553   }
554 
555   /**
556    * Get region info from local cluster.
557    */
558   Map<ServerName, List<String>> getDeployedHRIs(
559       final HBaseAdmin admin) throws IOException {
560     ClusterStatus status = admin.getClusterStatus();
561     Collection<ServerName> regionServers = status.getServers();
562     Map<ServerName, List<String>> mm =
563         new HashMap<ServerName, List<String>>();
564     HConnection connection = admin.getConnection();
565     for (ServerName hsi : regionServers) {
566       AdminProtos.AdminService.BlockingInterface server = connection.getAdmin(hsi);
567 
568       // list all online regions from this region server
569       List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
570       List<String> regionNames = new ArrayList<String>();
571       for (HRegionInfo hri : regions) {
572         regionNames.add(hri.getRegionNameAsString());
573       }
574       mm.put(hsi, regionNames);
575     }
576     return mm;
577   }
578 
579   /**
580    * Returns the HSI a region info is on.
581    */
582   ServerName findDeployedHSI(Map<ServerName, List<String>> mm, HRegionInfo hri) {
583     for (Map.Entry<ServerName,List <String>> e : mm.entrySet()) {
584       if (e.getValue().contains(hri.getRegionNameAsString())) {
585         return e.getKey();
586       }
587     }
588     return null;
589   }
590 
591   /**
592    * This create and fixes a bad table with regions that have a duplicate
593    * start key
594    */
595   @Test
596   public void testDupeRegion() throws Exception {
597     TableName table =
598         TableName.valueOf("tableDupeRegion");
599     try {
600       setupTable(table);
601       assertNoErrors(doFsck(conf, false));
602       assertEquals(ROWKEYS.length, countRows());
603 
604       // Now let's mess it up, by adding a region with a duplicate startkey
605       HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
606           Bytes.toBytes("A"), Bytes.toBytes("B"));
607 
608       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
609       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
610           .waitForAssignment(hriDupe);
611       ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
612       TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
613 
614       // Yikes! The assignment manager can't tell between diff between two
615       // different regions with the same start/endkeys since it doesn't
616       // differentiate on ts/regionId!  We actually need to recheck
617       // deployments!
618       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
619       while (findDeployedHSI(getDeployedHRIs(admin), hriDupe) == null) {
620         Thread.sleep(250);
621       }
622 
623       LOG.debug("Finished assignment of dupe region");
624 
625       // TODO why is dupe region different from dupe start keys?
626       HBaseFsck hbck = doFsck(conf, false);
627       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
628             ERROR_CODE.DUPE_STARTKEYS});
629       assertEquals(2, hbck.getOverlapGroups(table).size());
630       assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
631 
632       // fix the degenerate region.
633       doFsck(conf,true);
634 
635       // check that the degenerate region is gone and no data loss
636       HBaseFsck hbck2 = doFsck(conf,false);
637       assertNoErrors(hbck2);
638       assertEquals(0, hbck2.getOverlapGroups(table).size());
639       assertEquals(ROWKEYS.length, countRows());
640     } finally {
641       deleteTable(table);
642     }
643   }
644 
645   /**
646    * This creates and fixes a bad table with regions that has startkey == endkey
647    */
648   @Test
649   public void testDegenerateRegions() throws Exception {
650     TableName table =
651         TableName.valueOf("tableDegenerateRegions");
652     try {
653       setupTable(table);
654       assertNoErrors(doFsck(conf,false));
655       assertEquals(ROWKEYS.length, countRows());
656 
657       // Now let's mess it up, by adding a region with a duplicate startkey
658       HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
659           Bytes.toBytes("B"), Bytes.toBytes("B"));
660       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
661       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
662           .waitForAssignment(hriDupe);
663       ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
664       TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
665 
666       HBaseFsck hbck = doFsck(conf,false);
667       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DEGENERATE_REGION,
668           ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.DUPE_STARTKEYS});
669       assertEquals(2, hbck.getOverlapGroups(table).size());
670       assertEquals(ROWKEYS.length, countRows());
671 
672       // fix the degenerate region.
673       doFsck(conf,true);
674 
675       // check that the degenerate region is gone and no data loss
676       HBaseFsck hbck2 = doFsck(conf,false);
677       assertNoErrors(hbck2);
678       assertEquals(0, hbck2.getOverlapGroups(table).size());
679       assertEquals(ROWKEYS.length, countRows());
680     } finally {
681       deleteTable(table);
682     }
683   }
684 
685   /**
686    * This creates and fixes a bad table where a region is completely contained
687    * by another region.
688    */
689   @Test
690   public void testContainedRegionOverlap() throws Exception {
691     TableName table =
692         TableName.valueOf("tableContainedRegionOverlap");
693     try {
694       setupTable(table);
695       assertEquals(ROWKEYS.length, countRows());
696 
697       // Mess it up by creating an overlap in the metadata
698       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
699           Bytes.toBytes("A2"), Bytes.toBytes("B"));
700       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
701       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
702           .waitForAssignment(hriOverlap);
703       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
704       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
705 
706       HBaseFsck hbck = doFsck(conf, false);
707       assertErrors(hbck, new ERROR_CODE[] {
708           ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
709       assertEquals(2, hbck.getOverlapGroups(table).size());
710       assertEquals(ROWKEYS.length, countRows());
711 
712       // fix the problem.
713       doFsck(conf, true);
714 
715       // verify that overlaps are fixed
716       HBaseFsck hbck2 = doFsck(conf,false);
717       assertNoErrors(hbck2);
718       assertEquals(0, hbck2.getOverlapGroups(table).size());
719       assertEquals(ROWKEYS.length, countRows());
720     } finally {
721        deleteTable(table);
722     }
723   }
724 
725   /**
726    * This creates and fixes a bad table where an overlap group of
727    * 3 regions. Set HBaseFsck.maxMerge to 2 to trigger sideline overlapped
728    * region. Mess around the meta data so that closeRegion/offlineRegion
729    * throws exceptions.
730    */
731   @Test
732   public void testSidelineOverlapRegion() throws Exception {
733     TableName table =
734         TableName.valueOf("testSidelineOverlapRegion");
735     try {
736       setupTable(table);
737       assertEquals(ROWKEYS.length, countRows());
738 
739       // Mess it up by creating an overlap
740       MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
741       HMaster master = cluster.getMaster();
742       HRegionInfo hriOverlap1 = createRegion(conf, tbl.getTableDescriptor(),
743         Bytes.toBytes("A"), Bytes.toBytes("AB"));
744       master.assignRegion(hriOverlap1);
745       master.getAssignmentManager().waitForAssignment(hriOverlap1);
746       HRegionInfo hriOverlap2 = createRegion(conf, tbl.getTableDescriptor(),
747         Bytes.toBytes("AB"), Bytes.toBytes("B"));
748       master.assignRegion(hriOverlap2);
749       master.getAssignmentManager().waitForAssignment(hriOverlap2);
750 
751       HBaseFsck hbck = doFsck(conf, false);
752       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.DUPE_STARTKEYS,
753         ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
754       assertEquals(3, hbck.getOverlapGroups(table).size());
755       assertEquals(ROWKEYS.length, countRows());
756 
757       // mess around the overlapped regions, to trigger NotServingRegionException
758       Multimap<byte[], HbckInfo> overlapGroups = hbck.getOverlapGroups(table);
759       ServerName serverName = null;
760       byte[] regionName = null;
761       for (HbckInfo hbi: overlapGroups.values()) {
762         if ("A".equals(Bytes.toString(hbi.getStartKey()))
763             && "B".equals(Bytes.toString(hbi.getEndKey()))) {
764           regionName = hbi.getRegionName();
765 
766           // get an RS not serving the region to force bad assignment info in to META.
767           int k = cluster.getServerWith(regionName);
768           for (int i = 0; i < 3; i++) {
769             if (i != k) {
770               HRegionServer rs = cluster.getRegionServer(i);
771               serverName = rs.getServerName();
772               break;
773             }
774           }
775 
776           HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
777           HBaseFsckRepair.closeRegionSilentlyAndWait(admin,
778             cluster.getRegionServer(k).getServerName(), hbi.getHdfsHRI());
779           admin.offline(regionName);
780           break;
781         }
782       }
783 
784       assertNotNull(regionName);
785       assertNotNull(serverName);
786       HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
787       Put put = new Put(regionName);
788       put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
789         Bytes.toBytes(serverName.getHostAndPort()));
790       meta.put(put);
791 
792       // fix the problem.
793       HBaseFsck fsck = new HBaseFsck(conf);
794       fsck.connect();
795       fsck.setDisplayFullReport(); // i.e. -details
796       fsck.setTimeLag(0);
797       fsck.setFixAssignments(true);
798       fsck.setFixMeta(true);
799       fsck.setFixHdfsHoles(true);
800       fsck.setFixHdfsOverlaps(true);
801       fsck.setFixHdfsOrphans(true);
802       fsck.setFixVersionFile(true);
803       fsck.setSidelineBigOverlaps(true);
804       fsck.setMaxMerge(2);
805       fsck.onlineHbck();
806 
807       // verify that overlaps are fixed, and there are less rows
808       // since one region is sidelined.
809       HBaseFsck hbck2 = doFsck(conf,false);
810       assertNoErrors(hbck2);
811       assertEquals(0, hbck2.getOverlapGroups(table).size());
812       assertTrue(ROWKEYS.length > countRows());
813     } finally {
814        deleteTable(table);
815     }
816   }
817 
818   /**
819    * This creates and fixes a bad table where a region is completely contained
820    * by another region, and there is a hole (sort of like a bad split)
821    */
822   @Test
823   public void testOverlapAndOrphan() throws Exception {
824     TableName table =
825         TableName.valueOf("tableOverlapAndOrphan");
826     try {
827       setupTable(table);
828       assertEquals(ROWKEYS.length, countRows());
829 
830       // Mess it up by creating an overlap in the metadata
831       TEST_UTIL.getHBaseAdmin().disableTable(table);
832       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
833           Bytes.toBytes("B"), true, true, false, true);
834       TEST_UTIL.getHBaseAdmin().enableTable(table);
835 
836       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
837           Bytes.toBytes("A2"), Bytes.toBytes("B"));
838       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
839       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
840           .waitForAssignment(hriOverlap);
841       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
842       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
843 
844       HBaseFsck hbck = doFsck(conf, false);
845       assertErrors(hbck, new ERROR_CODE[] {
846           ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
847           ERROR_CODE.HOLE_IN_REGION_CHAIN});
848 
849       // fix the problem.
850       doFsck(conf, true);
851 
852       // verify that overlaps are fixed
853       HBaseFsck hbck2 = doFsck(conf,false);
854       assertNoErrors(hbck2);
855       assertEquals(0, hbck2.getOverlapGroups(table).size());
856       assertEquals(ROWKEYS.length, countRows());
857     } finally {
858        deleteTable(table);
859     }
860   }
861 
862   /**
863    * This creates and fixes a bad table where a region overlaps two regions --
864    * a start key contained in another region and its end key is contained in
865    * yet another region.
866    */
867   @Test
868   public void testCoveredStartKey() throws Exception {
869     TableName table =
870         TableName.valueOf("tableCoveredStartKey");
871     try {
872       setupTable(table);
873       assertEquals(ROWKEYS.length, countRows());
874 
875       // Mess it up by creating an overlap in the metadata
876       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
877           Bytes.toBytes("A2"), Bytes.toBytes("B2"));
878       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
879       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
880           .waitForAssignment(hriOverlap);
881       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
882       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
883 
884       HBaseFsck hbck = doFsck(conf, false);
885       assertErrors(hbck, new ERROR_CODE[] {
886           ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
887           ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
888       assertEquals(3, hbck.getOverlapGroups(table).size());
889       assertEquals(ROWKEYS.length, countRows());
890 
891       // fix the problem.
892       doFsck(conf, true);
893 
894       // verify that overlaps are fixed
895       HBaseFsck hbck2 = doFsck(conf, false);
896       assertErrors(hbck2, new ERROR_CODE[0]);
897       assertEquals(0, hbck2.getOverlapGroups(table).size());
898       assertEquals(ROWKEYS.length, countRows());
899     } finally {
900       deleteTable(table);
901     }
902   }
903 
904   /**
905    * This creates and fixes a bad table with a missing region -- hole in meta
906    * and data missing in the fs.
907    */
908   @Test
909   public void testRegionHole() throws Exception {
910     TableName table =
911         TableName.valueOf("tableRegionHole");
912     try {
913       setupTable(table);
914       assertEquals(ROWKEYS.length, countRows());
915 
916       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
917       TEST_UTIL.getHBaseAdmin().disableTable(table);
918       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
919           Bytes.toBytes("C"), true, true, true);
920       TEST_UTIL.getHBaseAdmin().enableTable(table);
921 
922       HBaseFsck hbck = doFsck(conf, false);
923       assertErrors(hbck, new ERROR_CODE[] {
924           ERROR_CODE.HOLE_IN_REGION_CHAIN});
925       // holes are separate from overlap groups
926       assertEquals(0, hbck.getOverlapGroups(table).size());
927 
928       // fix hole
929       doFsck(conf, true);
930 
931       // check that hole fixed
932       assertNoErrors(doFsck(conf,false));
933       assertEquals(ROWKEYS.length - 2 , countRows()); // lost a region so lost a row
934     } finally {
935       deleteTable(table);
936     }
937   }
938 
939   /**
940    * This creates and fixes a bad table with a missing region -- hole in meta
941    * and data present but .regioinfino missing (an orphan hdfs region)in the fs.
942    */
943   @Test
944   public void testHDFSRegioninfoMissing() throws Exception {
945     TableName table =
946         TableName.valueOf("tableHDFSRegioininfoMissing");
947     try {
948       setupTable(table);
949       assertEquals(ROWKEYS.length, countRows());
950 
951       // Mess it up by leaving a hole in the meta data
952       TEST_UTIL.getHBaseAdmin().disableTable(table);
953       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
954           Bytes.toBytes("C"), true, true, false, true);
955       TEST_UTIL.getHBaseAdmin().enableTable(table);
956 
957       HBaseFsck hbck = doFsck(conf, false);
958       assertErrors(hbck, new ERROR_CODE[] {
959           ERROR_CODE.ORPHAN_HDFS_REGION,
960           ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
961           ERROR_CODE.HOLE_IN_REGION_CHAIN});
962       // holes are separate from overlap groups
963       assertEquals(0, hbck.getOverlapGroups(table).size());
964 
965       // fix hole
966       doFsck(conf, true);
967 
968       // check that hole fixed
969       assertNoErrors(doFsck(conf, false));
970       assertEquals(ROWKEYS.length, countRows());
971     } finally {
972       deleteTable(table);
973     }
974   }
975 
976   /**
977    * This creates and fixes a bad table with a region that is missing meta and
978    * not assigned to a region server.
979    */
980   @Test
981   public void testNotInMetaOrDeployedHole() throws Exception {
982     TableName table =
983         TableName.valueOf("tableNotInMetaOrDeployedHole");
984     try {
985       setupTable(table);
986       assertEquals(ROWKEYS.length, countRows());
987 
988       // Mess it up by leaving a hole in the meta data
989       TEST_UTIL.getHBaseAdmin().disableTable(table);
990       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
991           Bytes.toBytes("C"), true, true, false); // don't rm from fs
992       TEST_UTIL.getHBaseAdmin().enableTable(table);
993 
994       HBaseFsck hbck = doFsck(conf, false);
995       assertErrors(hbck, new ERROR_CODE[] {
996           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
997       // holes are separate from overlap groups
998       assertEquals(0, hbck.getOverlapGroups(table).size());
999 
1000       // fix hole
1001       assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1002           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1003 
1004       // check that hole fixed
1005       assertNoErrors(doFsck(conf,false));
1006       assertEquals(ROWKEYS.length, countRows());
1007     } finally {
1008       deleteTable(table);
1009     }
1010   }
1011 
1012   /**
1013    * This creates fixes a bad table with a hole in meta.
1014    */
1015   @Test
1016   public void testNotInMetaHole() throws Exception {
1017     TableName table =
1018         TableName.valueOf("tableNotInMetaHole");
1019     try {
1020       setupTable(table);
1021       assertEquals(ROWKEYS.length, countRows());
1022 
1023       // Mess it up by leaving a hole in the meta data
1024       TEST_UTIL.getHBaseAdmin().disableTable(table);
1025       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1026           Bytes.toBytes("C"), false, true, false); // don't rm from fs
1027       TEST_UTIL.getHBaseAdmin().enableTable(table);
1028 
1029       HBaseFsck hbck = doFsck(conf, false);
1030       assertErrors(hbck, new ERROR_CODE[] {
1031           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1032       // holes are separate from overlap groups
1033       assertEquals(0, hbck.getOverlapGroups(table).size());
1034 
1035       // fix hole
1036       assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1037           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1038 
1039       // check that hole fixed
1040       assertNoErrors(doFsck(conf,false));
1041       assertEquals(ROWKEYS.length, countRows());
1042     } finally {
1043       deleteTable(table);
1044     }
1045   }
1046 
1047   /**
1048    * This creates and fixes a bad table with a region that is in meta but has
1049    * no deployment or data hdfs
1050    */
1051   @Test
1052   public void testNotInHdfs() throws Exception {
1053     TableName table =
1054         TableName.valueOf("tableNotInHdfs");
1055     try {
1056       setupTable(table);
1057       assertEquals(ROWKEYS.length, countRows());
1058 
1059       // make sure data in regions, if in hlog only there is no data loss
1060       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1061 
1062       // Mess it up by leaving a hole in the hdfs data
1063       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1064           Bytes.toBytes("C"), false, false, true); // don't rm meta
1065 
1066       HBaseFsck hbck = doFsck(conf, false);
1067       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1068       // holes are separate from overlap groups
1069       assertEquals(0, hbck.getOverlapGroups(table).size());
1070 
1071       // fix hole
1072       doFsck(conf, true);
1073 
1074       // check that hole fixed
1075       assertNoErrors(doFsck(conf,false));
1076       assertEquals(ROWKEYS.length - 2, countRows());
1077     } finally {
1078       deleteTable(table);
1079     }
1080   }
1081 
1082   /**
1083    * This creates entries in hbase:meta with no hdfs data.  This should cleanly
1084    * remove the table.
1085    */
1086   @Test
1087   public void testNoHdfsTable() throws Exception {
1088     TableName table = TableName.valueOf("NoHdfsTable");
1089     setupTable(table);
1090     assertEquals(ROWKEYS.length, countRows());
1091 
1092     // make sure data in regions, if in hlog only there is no data loss
1093     TEST_UTIL.getHBaseAdmin().flush(table.getName());
1094 
1095     // Mess it up by deleting hdfs dirs
1096     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
1097         Bytes.toBytes("A"), false, false, true); // don't rm meta
1098     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1099         Bytes.toBytes("B"), false, false, true); // don't rm meta
1100     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1101         Bytes.toBytes("C"), false, false, true); // don't rm meta
1102     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
1103         Bytes.toBytes(""), false, false, true); // don't rm meta
1104 
1105     // also remove the table directory in hdfs
1106     deleteTableDir(table);
1107 
1108     HBaseFsck hbck = doFsck(conf, false);
1109     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS,
1110         ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS,
1111         ERROR_CODE.NOT_IN_HDFS,});
1112     // holes are separate from overlap groups
1113     assertEquals(0, hbck.getOverlapGroups(table).size());
1114 
1115     // fix hole
1116     doFsck(conf, true); // detect dangling regions and remove those
1117 
1118     // check that hole fixed
1119     assertNoErrors(doFsck(conf,false));
1120     assertFalse("Table "+ table + " should have been deleted",
1121         TEST_UTIL.getHBaseAdmin().tableExists(table));
1122   }
1123 
1124   public void deleteTableDir(TableName table) throws IOException {
1125     Path rootDir = FSUtils.getRootDir(conf);
1126     FileSystem fs = rootDir.getFileSystem(conf);
1127     Path p = FSUtils.getTableDir(rootDir, table);
1128     HBaseFsck.debugLsr(conf, p);
1129     boolean success = fs.delete(p, true);
1130     LOG.info("Deleted " + p + " sucessfully? " + success);
1131   }
1132 
1133   /**
1134    * when the hbase.version file missing, It is fix the fault.
1135    */
1136   @Test
1137   public void testNoVersionFile() throws Exception {
1138     // delete the hbase.version file
1139     Path rootDir = FSUtils.getRootDir(conf);
1140     FileSystem fs = rootDir.getFileSystem(conf);
1141     Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
1142     fs.delete(versionFile, true);
1143 
1144     // test
1145     HBaseFsck hbck = doFsck(conf, false);
1146     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_VERSION_FILE });
1147     // fix hbase.version missing
1148     doFsck(conf, true);
1149 
1150     // no version file fixed
1151     assertNoErrors(doFsck(conf, false));
1152   }
1153 
1154   /**
1155    * The region is not deployed when the table is disabled.
1156    */
1157   @Test
1158   public void testRegionShouldNotBeDeployed() throws Exception {
1159     TableName table =
1160         TableName.valueOf("tableRegionShouldNotBeDeployed");
1161     try {
1162       LOG.info("Starting testRegionShouldNotBeDeployed.");
1163       MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1164       assertTrue(cluster.waitForActiveAndReadyMaster());
1165 
1166 
1167       byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"),
1168           Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") };
1169       HTableDescriptor htdDisabled = new HTableDescriptor(table);
1170       htdDisabled.addFamily(new HColumnDescriptor(FAM));
1171 
1172       // Write the .tableinfo
1173       FSTableDescriptors fstd = new FSTableDescriptors(conf);
1174       fstd.createTableDescriptor(htdDisabled);
1175       List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
1176           TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
1177 
1178       // Let's just assign everything to first RS
1179       HRegionServer hrs = cluster.getRegionServer(0);
1180 
1181       // Create region files.
1182       TEST_UTIL.getHBaseAdmin().disableTable(table);
1183       TEST_UTIL.getHBaseAdmin().enableTable(table);
1184 
1185       // Disable the table and close its regions
1186       TEST_UTIL.getHBaseAdmin().disableTable(table);
1187       HRegionInfo region = disabledRegions.remove(0);
1188       byte[] regionName = region.getRegionName();
1189 
1190       // The region should not be assigned currently
1191       assertTrue(cluster.getServerWith(regionName) == -1);
1192 
1193       // Directly open a region on a region server.
1194       // If going through AM/ZK, the region won't be open.
1195       // Even it is opened, AM will close it which causes
1196       // flakiness of this test.
1197       HRegion r = HRegion.openHRegion(
1198         region, htdDisabled, hrs.getWAL(region), conf);
1199       hrs.addToOnlineRegions(r);
1200 
1201       HBaseFsck hbck = doFsck(conf, false);
1202       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.SHOULD_NOT_BE_DEPLOYED });
1203 
1204       // fix this fault
1205       doFsck(conf, true);
1206 
1207       // check result
1208       assertNoErrors(doFsck(conf, false));
1209     } finally {
1210       TEST_UTIL.getHBaseAdmin().enableTable(table);
1211       deleteTable(table);
1212     }
1213   }
1214 
1215   /**
1216    * This creates two tables and mess both of them and fix them one by one
1217    */
1218   @Test
1219   public void testFixByTable() throws Exception {
1220     TableName table1 =
1221         TableName.valueOf("testFixByTable1");
1222     TableName table2 =
1223         TableName.valueOf("testFixByTable2");
1224     try {
1225       setupTable(table1);
1226       // make sure data in regions, if in hlog only there is no data loss
1227       TEST_UTIL.getHBaseAdmin().flush(table1.getName());
1228       // Mess them up by leaving a hole in the hdfs data
1229       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1230         Bytes.toBytes("C"), false, false, true); // don't rm meta
1231 
1232       setupTable(table2);
1233       // make sure data in regions, if in hlog only there is no data loss
1234       TEST_UTIL.getHBaseAdmin().flush(table2.getName());
1235       // Mess them up by leaving a hole in the hdfs data
1236       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1237         Bytes.toBytes("C"), false, false, true); // don't rm meta
1238 
1239       HBaseFsck hbck = doFsck(conf, false);
1240       assertErrors(hbck, new ERROR_CODE[] {
1241         ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS});
1242 
1243       // fix hole in table 1
1244       doFsck(conf, true, table1);
1245       // check that hole in table 1 fixed
1246       assertNoErrors(doFsck(conf, false, table1));
1247       // check that hole in table 2 still there
1248       assertErrors(doFsck(conf, false, table2),
1249         new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1250 
1251       // fix hole in table 2
1252       doFsck(conf, true, table2);
1253       // check that hole in both tables fixed
1254       assertNoErrors(doFsck(conf, false));
1255       assertEquals(ROWKEYS.length - 2, countRows());
1256     } finally {
1257       deleteTable(table1);
1258       deleteTable(table2);
1259     }
1260   }
1261   /**
1262    * A split parent in meta, in hdfs, and not deployed
1263    */
1264   @Test
1265   public void testLingeringSplitParent() throws Exception {
1266     TableName table =
1267         TableName.valueOf("testLingeringSplitParent");
1268     HTable meta = null;
1269     try {
1270       setupTable(table);
1271       assertEquals(ROWKEYS.length, countRows());
1272 
1273       // make sure data in regions, if in hlog only there is no data loss
1274       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1275       HRegionLocation location = tbl.getRegionLocation("B");
1276 
1277       // Delete one region from meta, but not hdfs, unassign it.
1278       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1279         Bytes.toBytes("C"), true, true, false);
1280 
1281       // Create a new meta entry to fake it as a split parent.
1282       meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName(),
1283           executorService);
1284       HRegionInfo hri = location.getRegionInfo();
1285 
1286       HRegionInfo a = new HRegionInfo(tbl.getName(),
1287         Bytes.toBytes("B"), Bytes.toBytes("BM"));
1288       HRegionInfo b = new HRegionInfo(tbl.getName(),
1289         Bytes.toBytes("BM"), Bytes.toBytes("C"));
1290 
1291       hri.setOffline(true);
1292       hri.setSplit(true);
1293 
1294       MetaEditor.addRegionToMeta(meta, hri, a, b);
1295       meta.flushCommits();
1296       TEST_UTIL.getHBaseAdmin().flush(TableName.META_TABLE_NAME.getName());
1297 
1298       HBaseFsck hbck = doFsck(conf, false);
1299       assertErrors(hbck, new ERROR_CODE[] {
1300         ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1301 
1302       // regular repair cannot fix lingering split parent
1303       hbck = doFsck(conf, true);
1304       assertErrors(hbck, new ERROR_CODE[] {
1305         ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1306       assertFalse(hbck.shouldRerun());
1307       hbck = doFsck(conf, false);
1308       assertErrors(hbck, new ERROR_CODE[] {
1309         ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1310 
1311       // fix lingering split parent
1312       hbck = new HBaseFsck(conf);
1313       hbck.connect();
1314       hbck.setDisplayFullReport(); // i.e. -details
1315       hbck.setTimeLag(0);
1316       hbck.setFixSplitParents(true);
1317       hbck.onlineHbck();
1318       assertTrue(hbck.shouldRerun());
1319 
1320       Get get = new Get(hri.getRegionName());
1321       Result result = meta.get(get);
1322       assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1323         HConstants.SPLITA_QUALIFIER).isEmpty());
1324       assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1325         HConstants.SPLITB_QUALIFIER).isEmpty());
1326       TEST_UTIL.getHBaseAdmin().flush(TableName.META_TABLE_NAME.getName());
1327 
1328       // fix other issues
1329       doFsck(conf, true);
1330 
1331       // check that all are fixed
1332       assertNoErrors(doFsck(conf, false));
1333       assertEquals(ROWKEYS.length, countRows());
1334     } finally {
1335       deleteTable(table);
1336       IOUtils.closeQuietly(meta);
1337     }
1338   }
1339 
1340   /**
1341    * Tests that LINGERING_SPLIT_PARENT is not erroneously reported for
1342    * valid cases where the daughters are there.
1343    */
1344   @Test
1345   public void testValidLingeringSplitParent() throws Exception {
1346     TableName table =
1347         TableName.valueOf("testLingeringSplitParent");
1348     HTable meta = null;
1349     try {
1350       setupTable(table);
1351       assertEquals(ROWKEYS.length, countRows());
1352 
1353       // make sure data in regions, if in hlog only there is no data loss
1354       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1355       HRegionLocation location = tbl.getRegionLocation("B");
1356 
1357       meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName());
1358       HRegionInfo hri = location.getRegionInfo();
1359 
1360       // do a regular split
1361       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1362       byte[] regionName = location.getRegionInfo().getRegionName();
1363       admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1364       TestEndToEndSplitTransaction.blockUntilRegionSplit(
1365           TEST_UTIL.getConfiguration(), 60000, regionName, true);
1366 
1367       // TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on
1368       // for some time until children references are deleted. HBCK erroneously sees this as
1369       // overlapping regions
1370       HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, false, false, null);
1371       assertErrors(hbck, new ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported
1372 
1373       // assert that the split hbase:meta entry is still there.
1374       Get get = new Get(hri.getRegionName());
1375       Result result = meta.get(get);
1376       assertNotNull(result);
1377       assertNotNull(HRegionInfo.getHRegionInfo(result));
1378 
1379       assertEquals(ROWKEYS.length, countRows());
1380 
1381       // assert that we still have the split regions
1382       assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions pre-split.
1383       assertNoErrors(doFsck(conf, false));
1384     } finally {
1385       deleteTable(table);
1386       IOUtils.closeQuietly(meta);
1387     }
1388   }
1389 
1390   /**
1391    * Split crashed after write to hbase:meta finished for the parent region, but
1392    * failed to write daughters (pre HBASE-7721 codebase)
1393    */
1394   @Test(timeout=75000)
1395   public void testSplitDaughtersNotInMeta() throws Exception {
1396     TableName table =
1397         TableName.valueOf("testSplitdaughtersNotInMeta");
1398     HTable meta = null;
1399     try {
1400       setupTable(table);
1401       assertEquals(ROWKEYS.length, countRows());
1402 
1403       // make sure data in regions, if in hlog only there is no data loss
1404       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1405       HRegionLocation location = tbl.getRegionLocation("B");
1406 
1407       meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName());
1408       HRegionInfo hri = location.getRegionInfo();
1409 
1410       // do a regular split
1411       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1412       byte[] regionName = location.getRegionInfo().getRegionName();
1413       admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1414       TestEndToEndSplitTransaction.blockUntilRegionSplit(
1415           TEST_UTIL.getConfiguration(), 60000, regionName, true);
1416 
1417       PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(meta.get(new Get(regionName)));
1418 
1419       // Delete daughter regions from meta, but not hdfs, unassign it.
1420       Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
1421       undeployRegion(admin, hris.get(daughters.getFirst()), daughters.getFirst());
1422       undeployRegion(admin, hris.get(daughters.getSecond()), daughters.getSecond());
1423 
1424       meta.delete(new Delete(daughters.getFirst().getRegionName()));
1425       meta.delete(new Delete(daughters.getSecond().getRegionName()));
1426       meta.flushCommits();
1427 
1428       HBaseFsck hbck = doFsck(conf, false);
1429       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1430           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN}); //no LINGERING_SPLIT_PARENT
1431 
1432       // now fix it. The fix should not revert the region split, but add daughters to META
1433       hbck = doFsck(conf, true, true, false, false, false, false, false, false, false, false, null);
1434       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1435           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1436 
1437       // assert that the split hbase:meta entry is still there.
1438       Get get = new Get(hri.getRegionName());
1439       Result result = meta.get(get);
1440       assertNotNull(result);
1441       assertNotNull(HRegionInfo.getHRegionInfo(result));
1442 
1443       assertEquals(ROWKEYS.length, countRows());
1444 
1445       // assert that we still have the split regions
1446       assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions pre-split.
1447       assertNoErrors(doFsck(conf, false)); //should be fixed by now
1448     } finally {
1449       deleteTable(table);
1450       IOUtils.closeQuietly(meta);
1451     }
1452   }
1453 
1454   /**
1455    * This creates and fixes a bad table with a missing region which is the 1st region -- hole in
1456    * meta and data missing in the fs.
1457    */
1458   @Test(timeout=120000)
1459   public void testMissingFirstRegion() throws Exception {
1460     TableName table =
1461         TableName.valueOf("testMissingFirstRegion");
1462     try {
1463       setupTable(table);
1464       assertEquals(ROWKEYS.length, countRows());
1465 
1466       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1467       TEST_UTIL.getHBaseAdmin().disableTable(table);
1468       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true,
1469           true, true);
1470       TEST_UTIL.getHBaseAdmin().enableTable(table);
1471 
1472       HBaseFsck hbck = doFsck(conf, false);
1473       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY });
1474       // fix hole
1475       doFsck(conf, true);
1476       // check that hole fixed
1477       assertNoErrors(doFsck(conf, false));
1478     } finally {
1479       deleteTable(table);
1480     }
1481   }
1482 
1483   /**
1484    * This creates and fixes a bad table with missing last region -- hole in meta and data missing in
1485    * the fs.
1486    */
1487   @Test(timeout=120000)
1488   public void testMissingLastRegion() throws Exception {
1489     TableName table =
1490         TableName.valueOf("testMissingLastRegion");
1491     try {
1492       setupTable(table);
1493       assertEquals(ROWKEYS.length, countRows());
1494 
1495       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1496       TEST_UTIL.getHBaseAdmin().disableTable(table);
1497       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true,
1498           true, true);
1499       TEST_UTIL.getHBaseAdmin().enableTable(table);
1500 
1501       HBaseFsck hbck = doFsck(conf, false);
1502       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY });
1503       // fix hole
1504       doFsck(conf, true);
1505       // check that hole fixed
1506       assertNoErrors(doFsck(conf, false));
1507     } finally {
1508       deleteTable(table);
1509     }
1510   }
1511 
1512   /**
1513    * Test -noHdfsChecking option can detect and fix assignments issue.
1514    */
1515   @Test
1516   public void testFixAssignmentsAndNoHdfsChecking() throws Exception {
1517     TableName table =
1518         TableName.valueOf("testFixAssignmentsAndNoHdfsChecking");
1519     try {
1520       setupTable(table);
1521       assertEquals(ROWKEYS.length, countRows());
1522 
1523       // Mess it up by closing a region
1524       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1525         Bytes.toBytes("B"), true, false, false, false);
1526 
1527       // verify there is no other errors
1528       HBaseFsck hbck = doFsck(conf, false);
1529       assertErrors(hbck, new ERROR_CODE[] {
1530         ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1531 
1532       // verify that noHdfsChecking report the same errors
1533       HBaseFsck fsck = new HBaseFsck(conf);
1534       fsck.connect();
1535       fsck.setDisplayFullReport(); // i.e. -details
1536       fsck.setTimeLag(0);
1537       fsck.setCheckHdfs(false);
1538       fsck.onlineHbck();
1539       assertErrors(fsck, new ERROR_CODE[] {
1540         ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1541 
1542       // verify that fixAssignments works fine with noHdfsChecking
1543       fsck = new HBaseFsck(conf);
1544       fsck.connect();
1545       fsck.setDisplayFullReport(); // i.e. -details
1546       fsck.setTimeLag(0);
1547       fsck.setCheckHdfs(false);
1548       fsck.setFixAssignments(true);
1549       fsck.onlineHbck();
1550       assertTrue(fsck.shouldRerun());
1551       fsck.onlineHbck();
1552       assertNoErrors(fsck);
1553 
1554       assertEquals(ROWKEYS.length, countRows());
1555     } finally {
1556       deleteTable(table);
1557     }
1558   }
1559 
1560   /**
1561    * Test -noHdfsChecking option can detect region is not in meta but deployed.
1562    * However, it can not fix it without checking Hdfs because we need to get
1563    * the region info from Hdfs in this case, then to patch the meta.
1564    */
1565   @Test
1566   public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception {
1567     TableName table =
1568         TableName.valueOf("testFixMetaNotWorkingWithNoHdfsChecking");
1569     try {
1570       setupTable(table);
1571       assertEquals(ROWKEYS.length, countRows());
1572 
1573       // Mess it up by deleting a region from the metadata
1574       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1575         Bytes.toBytes("B"), false, true, false, false);
1576 
1577       // verify there is no other errors
1578       HBaseFsck hbck = doFsck(conf, false);
1579       assertErrors(hbck, new ERROR_CODE[] {
1580         ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1581 
1582       // verify that noHdfsChecking report the same errors
1583       HBaseFsck fsck = new HBaseFsck(conf);
1584       fsck.connect();
1585       fsck.setDisplayFullReport(); // i.e. -details
1586       fsck.setTimeLag(0);
1587       fsck.setCheckHdfs(false);
1588       fsck.onlineHbck();
1589       assertErrors(fsck, new ERROR_CODE[] {
1590         ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1591 
1592       // verify that fixMeta doesn't work with noHdfsChecking
1593       fsck = new HBaseFsck(conf);
1594       fsck.connect();
1595       fsck.setDisplayFullReport(); // i.e. -details
1596       fsck.setTimeLag(0);
1597       fsck.setCheckHdfs(false);
1598       fsck.setFixAssignments(true);
1599       fsck.setFixMeta(true);
1600       fsck.onlineHbck();
1601       assertFalse(fsck.shouldRerun());
1602       assertErrors(fsck, new ERROR_CODE[] {
1603         ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1604     } finally {
1605       deleteTable(table);
1606     }
1607   }
1608 
1609   /**
1610    * Test -fixHdfsHoles doesn't work with -noHdfsChecking option,
1611    * and -noHdfsChecking can't detect orphan Hdfs region.
1612    */
1613   @Test
1614   public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception {
1615     TableName table =
1616         TableName.valueOf("testFixHdfsHolesNotWorkingWithNoHdfsChecking");
1617     try {
1618       setupTable(table);
1619       assertEquals(ROWKEYS.length, countRows());
1620 
1621       // Mess it up by creating an overlap in the metadata
1622       TEST_UTIL.getHBaseAdmin().disableTable(table);
1623       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1624         Bytes.toBytes("B"), true, true, false, true);
1625       TEST_UTIL.getHBaseAdmin().enableTable(table);
1626 
1627       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
1628         Bytes.toBytes("A2"), Bytes.toBytes("B"));
1629       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
1630       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
1631         .waitForAssignment(hriOverlap);
1632       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
1633       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
1634 
1635       HBaseFsck hbck = doFsck(conf, false);
1636       assertErrors(hbck, new ERROR_CODE[] {
1637         ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1638         ERROR_CODE.HOLE_IN_REGION_CHAIN});
1639 
1640       // verify that noHdfsChecking can't detect ORPHAN_HDFS_REGION
1641       HBaseFsck fsck = new HBaseFsck(conf);
1642       fsck.connect();
1643       fsck.setDisplayFullReport(); // i.e. -details
1644       fsck.setTimeLag(0);
1645       fsck.setCheckHdfs(false);
1646       fsck.onlineHbck();
1647       assertErrors(fsck, new ERROR_CODE[] {
1648         ERROR_CODE.HOLE_IN_REGION_CHAIN});
1649 
1650       // verify that fixHdfsHoles doesn't work with noHdfsChecking
1651       fsck = new HBaseFsck(conf);
1652       fsck.connect();
1653       fsck.setDisplayFullReport(); // i.e. -details
1654       fsck.setTimeLag(0);
1655       fsck.setCheckHdfs(false);
1656       fsck.setFixHdfsHoles(true);
1657       fsck.setFixHdfsOverlaps(true);
1658       fsck.setFixHdfsOrphans(true);
1659       fsck.onlineHbck();
1660       assertFalse(fsck.shouldRerun());
1661       assertErrors(fsck, new ERROR_CODE[] {
1662         ERROR_CODE.HOLE_IN_REGION_CHAIN});
1663     } finally {
1664       if (TEST_UTIL.getHBaseAdmin().isTableDisabled(table)) {
1665         TEST_UTIL.getHBaseAdmin().enableTable(table);
1666       }
1667       deleteTable(table);
1668     }
1669   }
1670 
1671   /**
1672    * We don't have an easy way to verify that a flush completed, so we loop until we find a
1673    * legitimate hfile and return it.
1674    * @param fs
1675    * @param table
1676    * @return Path of a flushed hfile.
1677    * @throws IOException
1678    */
1679   Path getFlushedHFile(FileSystem fs, TableName table) throws IOException {
1680     Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
1681     Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
1682     Path famDir = new Path(regionDir, FAM_STR);
1683 
1684     // keep doing this until we get a legit hfile
1685     while (true) {
1686       FileStatus[] hfFss = fs.listStatus(famDir);
1687       if (hfFss.length == 0) {
1688         continue;
1689       }
1690       for (FileStatus hfs : hfFss) {
1691         if (!hfs.isDir()) {
1692           return hfs.getPath();
1693         }
1694       }
1695     }
1696   }
1697 
1698   /**
1699    * This creates a table and then corrupts an hfile.  Hbck should quarantine the file.
1700    */
1701   @Test(timeout=180000)
1702   public void testQuarantineCorruptHFile() throws Exception {
1703     TableName table = TableName.valueOf(name.getMethodName());
1704     try {
1705       setupTable(table);
1706       assertEquals(ROWKEYS.length, countRows());
1707       TEST_UTIL.getHBaseAdmin().flush(table.getName()); // flush is async.
1708 
1709       FileSystem fs = FileSystem.get(conf);
1710       Path hfile = getFlushedHFile(fs, table);
1711 
1712       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1713       TEST_UTIL.getHBaseAdmin().disableTable(table);
1714 
1715       // create new corrupt file called deadbeef (valid hfile name)
1716       Path corrupt = new Path(hfile.getParent(), "deadbeef");
1717       TestHFile.truncateFile(fs, hfile, corrupt);
1718       LOG.info("Created corrupted file " + corrupt);
1719       HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
1720 
1721       // we cannot enable here because enable never finished due to the corrupt region.
1722       HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
1723       assertEquals(res.getRetCode(), 0);
1724       HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
1725       assertEquals(hfcc.getHFilesChecked(), 5);
1726       assertEquals(hfcc.getCorrupted().size(), 1);
1727       assertEquals(hfcc.getFailures().size(), 0);
1728       assertEquals(hfcc.getQuarantined().size(), 1);
1729       assertEquals(hfcc.getMissing().size(), 0);
1730 
1731       // Its been fixed, verify that we can enable.
1732       TEST_UTIL.getHBaseAdmin().enableTable(table);
1733     } finally {
1734       deleteTable(table);
1735     }
1736   }
1737 
1738   /**
1739   * Test that use this should have a timeout, because this method could potentially wait forever.
1740   */
1741   private void doQuarantineTest(TableName table, HBaseFsck hbck, int check,
1742                                 int corrupt, int fail, int quar, int missing) throws Exception {
1743     try {
1744       setupTable(table);
1745       assertEquals(ROWKEYS.length, countRows());
1746       TEST_UTIL.getHBaseAdmin().flush(table.getName()); // flush is async.
1747 
1748       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1749       TEST_UTIL.getHBaseAdmin().disableTable(table);
1750 
1751       String[] args = {"-sidelineCorruptHFiles", "-repairHoles", "-ignorePreCheckPermission",
1752           table.getNameAsString()};
1753       ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1754       HBaseFsck res = hbck.exec(exec, args);
1755 
1756       HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
1757       assertEquals(hfcc.getHFilesChecked(), check);
1758       assertEquals(hfcc.getCorrupted().size(), corrupt);
1759       assertEquals(hfcc.getFailures().size(), fail);
1760       assertEquals(hfcc.getQuarantined().size(), quar);
1761       assertEquals(hfcc.getMissing().size(), missing);
1762 
1763       // its been fixed, verify that we can enable
1764       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1765       admin.enableTableAsync(table);
1766       while (!admin.isTableEnabled(table)) {
1767         try {
1768           Thread.sleep(250);
1769         } catch (InterruptedException e) {
1770           e.printStackTrace();
1771           fail("Interrupted when trying to enable table " + table);
1772         }
1773       }
1774     } finally {
1775       deleteTable(table);
1776     }
1777   }
1778 
1779   /**
1780    * This creates a table and simulates the race situation where a concurrent compaction or split
1781    * has removed an hfile after the corruption checker learned about it.
1782    */
1783   @Test(timeout=180000)
1784   public void testQuarantineMissingHFile() throws Exception {
1785     TableName table = TableName.valueOf(name.getMethodName());
1786     ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1787     // inject a fault in the hfcc created.
1788     final FileSystem fs = FileSystem.get(conf);
1789     HBaseFsck hbck = new HBaseFsck(conf, exec) {
1790       @Override
1791       public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1792         return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1793           boolean attemptedFirstHFile = false;
1794           @Override
1795           protected void checkHFile(Path p) throws IOException {
1796             if (!attemptedFirstHFile) {
1797               attemptedFirstHFile = true;
1798               assertTrue(fs.delete(p, true)); // make sure delete happened.
1799             }
1800             super.checkHFile(p);
1801           }
1802         };
1803       }
1804     };
1805     doQuarantineTest(table, hbck, 4, 0, 0, 0, 1); // 4 attempted, but 1 missing.
1806   }
1807 
1808   /**
1809    * This creates a table and simulates the race situation where a concurrent compaction or split
1810    * has removed an colfam dir before the corruption checker got to it.
1811    */
1812   // Disabled because fails sporadically.  Is this test right?  Timing-wise, there could be no
1813   // files in a column family on initial creation -- as suggested by Matteo.
1814   @Ignore @Test(timeout=180000)
1815   public void testQuarantineMissingFamdir() throws Exception {
1816     TableName table = TableName.valueOf(name.getMethodName());
1817     ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1818     // inject a fault in the hfcc created.
1819     final FileSystem fs = FileSystem.get(conf);
1820     HBaseFsck hbck = new HBaseFsck(conf, exec) {
1821       @Override
1822       public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1823         return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1824           boolean attemptedFirstFamDir = false;
1825           @Override
1826           protected void checkColFamDir(Path p) throws IOException {
1827             if (!attemptedFirstFamDir) {
1828               attemptedFirstFamDir = true;
1829               assertTrue(fs.delete(p, true)); // make sure delete happened.
1830             }
1831             super.checkColFamDir(p);
1832           }
1833         };
1834       }
1835     };
1836     doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
1837   }
1838 
1839   /**
1840    * This creates a table and simulates the race situation where a concurrent compaction or split
1841    * has removed a region dir before the corruption checker got to it.
1842    */
1843   @Test(timeout=180000)
1844   public void testQuarantineMissingRegionDir() throws Exception {
1845     TableName table = TableName.valueOf(name.getMethodName());
1846     ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1847     // inject a fault in the hfcc created.
1848     final FileSystem fs = FileSystem.get(conf);
1849     HBaseFsck hbck = new HBaseFsck(conf, exec) {
1850       @Override
1851       public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1852         return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1853           boolean attemptedFirstRegionDir = false;
1854           @Override
1855           protected void checkRegionDir(Path p) throws IOException {
1856             if (!attemptedFirstRegionDir) {
1857               attemptedFirstRegionDir = true;
1858               assertTrue(fs.delete(p, true)); // make sure delete happened.
1859             }
1860             super.checkRegionDir(p);
1861           }
1862         };
1863       }
1864     };
1865     doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
1866   }
1867 
1868   /**
1869    * Test fixing lingering reference file.
1870    */
1871   @Test
1872   public void testLingeringReferenceFile() throws Exception {
1873     TableName table =
1874         TableName.valueOf("testLingeringReferenceFile");
1875     try {
1876       setupTable(table);
1877       assertEquals(ROWKEYS.length, countRows());
1878 
1879       // Mess it up by creating a fake reference file
1880       FileSystem fs = FileSystem.get(conf);
1881       Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
1882       Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
1883       Path famDir = new Path(regionDir, FAM_STR);
1884       Path fakeReferenceFile = new Path(famDir, "fbce357483ceea.12144538");
1885       fs.create(fakeReferenceFile);
1886 
1887       HBaseFsck hbck = doFsck(conf, false);
1888       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LINGERING_REFERENCE_HFILE });
1889       // fix reference file
1890       doFsck(conf, true);
1891       // check that reference file fixed
1892       assertNoErrors(doFsck(conf, false));
1893     } finally {
1894       deleteTable(table);
1895     }
1896   }
1897 
1898   /**
1899    * Test mission REGIONINFO_QUALIFIER in hbase:meta
1900    */
1901   @Test
1902   public void testMissingRegionInfoQualifier() throws Exception {
1903     TableName table =
1904         TableName.valueOf("testMissingRegionInfoQualifier");
1905     try {
1906       setupTable(table);
1907 
1908       // Mess it up by removing the RegionInfo for one region.
1909       final List<Delete> deletes = new LinkedList<Delete>();
1910       HTable meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName());
1911       MetaScanner.metaScan(conf, new MetaScanner.MetaScannerVisitor() {
1912 
1913         @Override
1914         public boolean processRow(Result rowResult) throws IOException {
1915           HRegionInfo hri = MetaScanner.getHRegionInfo(rowResult);
1916           if (hri != null && !hri.getTable().isSystemTable()) {
1917             Delete delete = new Delete(rowResult.getRow());
1918             delete.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1919             deletes.add(delete);
1920           }
1921           return true;
1922         }
1923 
1924         @Override
1925         public void close() throws IOException {
1926         }
1927       });
1928       meta.delete(deletes);
1929 
1930       // Mess it up by creating a fake hbase:meta entry with no associated RegionInfo
1931       meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
1932         HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes.toBytes("node1:60020")));
1933       meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
1934         HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(1362150791183L)));
1935       meta.close();
1936 
1937       HBaseFsck hbck = doFsck(conf, false);
1938       assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
1939 
1940       // fix reference file
1941       hbck = doFsck(conf, true);
1942 
1943       // check that reference file fixed
1944       assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
1945     } finally {
1946       deleteTable(table);
1947     }
1948   }
1949 
1950 
1951   /**
1952    * Test pluggable error reporter. It can be plugged in
1953    * from system property or configuration.
1954    */
1955   @Test
1956   public void testErrorReporter() throws Exception {
1957     try {
1958       MockErrorReporter.calledCount = 0;
1959       doFsck(conf, false);
1960       assertEquals(MockErrorReporter.calledCount, 0);
1961 
1962       conf.set("hbasefsck.errorreporter", MockErrorReporter.class.getName());
1963       doFsck(conf, false);
1964       assertTrue(MockErrorReporter.calledCount > 20);
1965     } finally {
1966       conf.set("hbasefsck.errorreporter",
1967         PrintingErrorReporter.class.getName());
1968       MockErrorReporter.calledCount = 0;
1969     }
1970   }
1971 
1972   static class MockErrorReporter implements ErrorReporter {
1973     static int calledCount = 0;
1974 
1975     @Override
1976     public void clear() {
1977       calledCount++;
1978     }
1979 
1980     @Override
1981     public void report(String message) {
1982       calledCount++;
1983     }
1984 
1985     @Override
1986     public void reportError(String message) {
1987       calledCount++;
1988     }
1989 
1990     @Override
1991     public void reportError(ERROR_CODE errorCode, String message) {
1992       calledCount++;
1993     }
1994 
1995     @Override
1996     public void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
1997       calledCount++;
1998     }
1999 
2000     @Override
2001     public void reportError(ERROR_CODE errorCode,
2002         String message, TableInfo table, HbckInfo info) {
2003       calledCount++;
2004     }
2005 
2006     @Override
2007     public void reportError(ERROR_CODE errorCode, String message,
2008         TableInfo table, HbckInfo info1, HbckInfo info2) {
2009       calledCount++;
2010     }
2011 
2012     @Override
2013     public int summarize() {
2014       return ++calledCount;
2015     }
2016 
2017     @Override
2018     public void detail(String details) {
2019       calledCount++;
2020     }
2021 
2022     @Override
2023     public ArrayList<ERROR_CODE> getErrorList() {
2024       calledCount++;
2025       return new ArrayList<ERROR_CODE>();
2026     }
2027 
2028     @Override
2029     public void progress() {
2030       calledCount++;
2031     }
2032 
2033     @Override
2034     public void print(String message) {
2035       calledCount++;
2036     }
2037 
2038     @Override
2039     public void resetErrors() {
2040       calledCount++;
2041     }
2042 
2043     @Override
2044     public boolean tableHasErrors(TableInfo table) {
2045       calledCount++;
2046       return false;
2047     }
2048   }
2049 
2050   @Test(timeout=60000)
2051   public void testCheckTableLocks() throws Exception {
2052     IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0);
2053     EnvironmentEdgeManager.injectEdge(edge);
2054     // check no errors
2055     HBaseFsck hbck = doFsck(conf, false);
2056     assertNoErrors(hbck);
2057 
2058     ServerName mockName = ServerName.valueOf("localhost", 60000, 1);
2059 
2060     // obtain one lock
2061     final TableLockManager tableLockManager = TableLockManager.createTableLockManager(conf, TEST_UTIL.getZooKeeperWatcher(), mockName);
2062     TableLock writeLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2063         "testCheckTableLocks");
2064     writeLock.acquire();
2065     hbck = doFsck(conf, false);
2066     assertNoErrors(hbck); // should not have expired, no problems
2067 
2068     edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2069         TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire
2070 
2071     hbck = doFsck(conf, false);
2072     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK});
2073 
2074     final CountDownLatch latch = new CountDownLatch(1);
2075     new Thread() {
2076       @Override
2077       public void run() {
2078         TableLock readLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2079             "testCheckTableLocks");
2080         try {
2081           latch.countDown();
2082           readLock.acquire();
2083         } catch (IOException ex) {
2084           fail();
2085         } catch (IllegalStateException ex) {
2086           return; // expected, since this will be reaped under us.
2087         }
2088         fail("should not have come here");
2089       };
2090     }.start();
2091 
2092     latch.await(); // wait until thread starts
2093     Threads.sleep(300); // wait some more to ensure writeLock.acquire() is called
2094 
2095     hbck = doFsck(conf, false);
2096     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK}); // still one expired, one not-expired
2097 
2098     edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2099         TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire
2100 
2101     hbck = doFsck(conf, false);
2102     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK, ERROR_CODE.EXPIRED_TABLE_LOCK}); // both are expired
2103 
2104     conf.setLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, 1); // reaping from ZKInterProcessWriteLock uses znode cTime,
2105                                                                  // which is not injectable through EnvironmentEdge
2106     Threads.sleep(10);
2107     hbck = doFsck(conf, true); // now fix both cases
2108 
2109     hbck = doFsck(conf, false);
2110     assertNoErrors(hbck);
2111 
2112     // ensure that locks are deleted
2113     writeLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2114         "should acquire without blocking");
2115     writeLock.acquire(); // this should not block.
2116     writeLock.release(); // release for clean state
2117   }
2118 
2119   @Test
2120   public void testMetaOffline() throws Exception {
2121     // check no errors
2122     HBaseFsck hbck = doFsck(conf, false);
2123     assertNoErrors(hbck);
2124     deleteMetaRegion(conf, true, false, false);
2125     hbck = doFsck(conf, false);
2126     // ERROR_CODE.UNKNOWN is coming because we reportError with a message for the hbase:meta
2127     // inconsistency and whether we will be fixing it or not.
2128     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2129     hbck = doFsck(conf, true);
2130     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2131     hbck = doFsck(conf, false);
2132     assertNoErrors(hbck);
2133   }
2134 
2135   private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
2136       boolean regionInfoOnly) throws IOException, InterruptedException {
2137     HConnection connection = HConnectionManager.getConnection(conf);
2138     HRegionLocation metaLocation = connection.locateRegion(TableName.META_TABLE_NAME,
2139         HConstants.EMPTY_START_ROW);
2140     ServerName hsa = ServerName.valueOf(metaLocation.getHostnamePort(), 0L);
2141     HRegionInfo hri = metaLocation.getRegionInfo();
2142     if (unassign) {
2143       LOG.info("Undeploying meta region " + hri + " from server " + hsa);
2144       undeployRegion(new HBaseAdmin(conf), hsa, hri);
2145     }
2146 
2147     if (regionInfoOnly) {
2148       LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
2149       Path rootDir = FSUtils.getRootDir(conf);
2150       FileSystem fs = rootDir.getFileSystem(conf);
2151       Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(),
2152           hri.getEncodedName());
2153       Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
2154       fs.delete(hriPath, true);
2155     }
2156 
2157     if (hdfs) {
2158       LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
2159       Path rootDir = FSUtils.getRootDir(conf);
2160       FileSystem fs = rootDir.getFileSystem(conf);
2161       Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(),
2162           hri.getEncodedName());
2163       HBaseFsck.debugLsr(conf, p);
2164       boolean success = fs.delete(p, true);
2165       LOG.info("Deleted " + p + " sucessfully? " + success);
2166       HBaseFsck.debugLsr(conf, p);
2167     }
2168   }
2169 
2170   @Test
2171   public void testTableWithNoRegions() throws Exception {
2172     // We might end up with empty regions in a table
2173     // see also testNoHdfsTable()
2174     TableName table =
2175         TableName.valueOf(name.getMethodName());
2176     try {
2177       // create table with one region
2178       HTableDescriptor desc = new HTableDescriptor(table);
2179       HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
2180       desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
2181       TEST_UTIL.getHBaseAdmin().createTable(desc);
2182       tbl = new HTable(TEST_UTIL.getConfiguration(), table, executorService);
2183 
2184       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
2185       deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false,
2186           false, true);
2187 
2188       HBaseFsck hbck = doFsck(conf, false);
2189       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
2190 
2191       doFsck(conf, true);
2192 
2193       // fix hole
2194       doFsck(conf, true);
2195 
2196       // check that hole fixed
2197       assertNoErrors(doFsck(conf, false));
2198     } finally {
2199       deleteTable(table);
2200     }
2201 
2202   }
2203 
2204   @Test
2205   public void testHbckAfterRegionMerge() throws Exception {
2206     TableName table = TableName.valueOf("testMergeRegionFilesInHdfs");
2207     HTable meta = null;
2208     try {
2209       // disable CatalogJanitor
2210       TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
2211       setupTable(table);
2212       assertEquals(ROWKEYS.length, countRows());
2213 
2214       // make sure data in regions, if in hlog only there is no data loss
2215       TEST_UTIL.getHBaseAdmin().flush(table.getName());
2216       HRegionInfo region1 = tbl.getRegionLocation("A").getRegionInfo();
2217       HRegionInfo region2 = tbl.getRegionLocation("B").getRegionInfo();
2218 
2219       int regionCountBeforeMerge = tbl.getRegionLocations().size();
2220 
2221       assertNotEquals(region1, region2);
2222 
2223       // do a region merge
2224       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
2225       admin.mergeRegions(region1.getEncodedNameAsBytes(),
2226           region2.getEncodedNameAsBytes(), false);
2227 
2228       // wait until region merged
2229       long timeout = System.currentTimeMillis() + 30 * 1000;
2230       while (true) {
2231         if (tbl.getRegionLocations().size() < regionCountBeforeMerge) {
2232           break;
2233         } else if (System.currentTimeMillis() > timeout) {
2234           fail("Time out waiting on region " + region1.getEncodedName()
2235               + " and " + region2.getEncodedName() + " be merged");
2236         }
2237         Thread.sleep(10);
2238       }
2239 
2240       assertEquals(ROWKEYS.length, countRows());
2241 
2242       HBaseFsck hbck = doFsck(conf, false);
2243       assertNoErrors(hbck); // no errors
2244 
2245     } finally {
2246       TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
2247       deleteTable(table);
2248       IOUtils.closeQuietly(meta);
2249     }
2250   }
2251 
2252   @org.junit.Rule
2253   public TestName name = new TestName();
2254 }