View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.util;
20  
21  import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
22  import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
23  import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
24  import static org.junit.Assert.assertEquals;
25  import static org.junit.Assert.assertFalse;
26  import static org.junit.Assert.assertNotEquals;
27  import static org.junit.Assert.assertNotNull;
28  import static org.junit.Assert.assertTrue;
29  import static org.junit.Assert.fail;
30  
31  import java.io.IOException;
32  import java.util.ArrayList;
33  import java.util.Collection;
34  import java.util.HashMap;
35  import java.util.LinkedList;
36  import java.util.List;
37  import java.util.Map;
38  import java.util.Map.Entry;
39  import java.util.concurrent.Callable;
40  import java.util.concurrent.CountDownLatch;
41  import java.util.concurrent.ExecutorService;
42  import java.util.concurrent.Executors;
43  import java.util.concurrent.Future;
44  import java.util.concurrent.ScheduledThreadPoolExecutor;
45  import java.util.concurrent.SynchronousQueue;
46  import java.util.concurrent.ThreadPoolExecutor;
47  import java.util.concurrent.TimeUnit;
48  
49  import org.apache.commons.io.IOUtils;
50  import org.apache.commons.logging.Log;
51  import org.apache.commons.logging.LogFactory;
52  import org.apache.hadoop.conf.Configuration;
53  import org.apache.hadoop.fs.FileStatus;
54  import org.apache.hadoop.fs.FileSystem;
55  import org.apache.hadoop.fs.Path;
56  import org.apache.hadoop.hbase.ClusterStatus;
57  import org.apache.hadoop.hbase.HBaseTestingUtility;
58  import org.apache.hadoop.hbase.HColumnDescriptor;
59  import org.apache.hadoop.hbase.HConstants;
60  import org.apache.hadoop.hbase.HRegionInfo;
61  import org.apache.hadoop.hbase.HRegionLocation;
62  import org.apache.hadoop.hbase.HTableDescriptor;
63  import org.apache.hadoop.hbase.LargeTests;
64  import org.apache.hadoop.hbase.MiniHBaseCluster;
65  import org.apache.hadoop.hbase.ServerName;
66  import org.apache.hadoop.hbase.TableName;
67  import org.apache.hadoop.hbase.catalog.MetaEditor;
68  import org.apache.hadoop.hbase.client.Delete;
69  import org.apache.hadoop.hbase.client.Durability;
70  import org.apache.hadoop.hbase.client.Get;
71  import org.apache.hadoop.hbase.client.HBaseAdmin;
72  import org.apache.hadoop.hbase.client.HConnection;
73  import org.apache.hadoop.hbase.client.HConnectionManager;
74  import org.apache.hadoop.hbase.client.HTable;
75  import org.apache.hadoop.hbase.client.MetaScanner;
76  import org.apache.hadoop.hbase.client.Put;
77  import org.apache.hadoop.hbase.client.Result;
78  import org.apache.hadoop.hbase.client.ResultScanner;
79  import org.apache.hadoop.hbase.client.Scan;
80  import org.apache.hadoop.hbase.io.hfile.TestHFile;
81  import org.apache.hadoop.hbase.master.AssignmentManager;
82  import org.apache.hadoop.hbase.master.HMaster;
83  import org.apache.hadoop.hbase.master.RegionStates;
84  import org.apache.hadoop.hbase.master.TableLockManager;
85  import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
86  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
87  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
88  import org.apache.hadoop.hbase.regionserver.HRegion;
89  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
90  import org.apache.hadoop.hbase.regionserver.HRegionServer;
91  import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
92  import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
93  import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
94  import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;
95  import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter;
96  import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
97  import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
98  import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
99  import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
100 import org.apache.zookeeper.KeeperException;
101 import org.junit.AfterClass;
102 import org.junit.BeforeClass;
103 import org.junit.Ignore;
104 import org.junit.Test;
105 import org.junit.experimental.categories.Category;
106 import org.junit.rules.TestName;
107 
108 import com.google.common.collect.Multimap;
109 
110 /**
111  * This tests HBaseFsck's ability to detect reasons for inconsistent tables.
112  */
113 @Category(LargeTests.class)
114 public class TestHBaseFsck {
115   final static Log LOG = LogFactory.getLog(TestHBaseFsck.class);
116   private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
117   private final static Configuration conf = TEST_UTIL.getConfiguration();
118   private final static String FAM_STR = "fam";
119   private final static byte[] FAM = Bytes.toBytes(FAM_STR);
120   private final static int REGION_ONLINE_TIMEOUT = 800;
121   private static RegionStates regionStates;
122   private static ExecutorService executorService;
123 
124   // for the instance, reset every test run
125   private HTable tbl;
126   private final static byte[][] SPLITS = new byte[][] { Bytes.toBytes("A"),
127     Bytes.toBytes("B"), Bytes.toBytes("C") };
128   // one row per region.
129   private final static byte[][] ROWKEYS= new byte[][] {
130     Bytes.toBytes("00"), Bytes.toBytes("50"), Bytes.toBytes("A0"), Bytes.toBytes("A5"),
131     Bytes.toBytes("B0"), Bytes.toBytes("B5"), Bytes.toBytes("C0"), Bytes.toBytes("C5") };
132 
133   @SuppressWarnings("deprecation")
134   @BeforeClass
135   public static void setUpBeforeClass() throws Exception {
136     TEST_UTIL.getConfiguration().setInt("hbase.regionserver.handler.count", 2);
137     TEST_UTIL.getConfiguration().setInt("hbase.regionserver.metahandler.count", 2);
138     TEST_UTIL.startMiniCluster(3);
139     TEST_UTIL.setHDFSClientRetry(0);
140 
141     executorService = new ThreadPoolExecutor(1, Integer.MAX_VALUE, 60, TimeUnit.SECONDS,
142         new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
143 
144     AssignmentManager assignmentManager =
145       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
146     regionStates = assignmentManager.getRegionStates();
147     TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, true);
148   }
149 
150   @AfterClass
151   public static void tearDownAfterClass() throws Exception {
152     TEST_UTIL.shutdownMiniCluster();
153   }
154 
155   @Test
156   public void testHBaseFsck() throws Exception {
157     assertNoErrors(doFsck(conf, false));
158     String table = "tableBadMetaAssign";
159     TEST_UTIL.createTable(Bytes.toBytes(table), FAM);
160 
161     // We created 1 table, should be fine
162     assertNoErrors(doFsck(conf, false));
163 
164     // Now let's mess it up and change the assignment in hbase:meta to
165     // point to a different region server
166     HTable meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName(),
167         executorService);
168     Scan scan = new Scan();
169     scan.setStartRow(Bytes.toBytes(table+",,"));
170     ResultScanner scanner = meta.getScanner(scan);
171     HRegionInfo hri = null;
172 
173     Result res = scanner.next();
174     ServerName currServer =
175       ServerName.parseFrom(res.getValue(HConstants.CATALOG_FAMILY,
176           HConstants.SERVER_QUALIFIER));
177     long startCode = Bytes.toLong(res.getValue(HConstants.CATALOG_FAMILY,
178         HConstants.STARTCODE_QUALIFIER));
179 
180     for (JVMClusterUtil.RegionServerThread rs :
181         TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
182 
183       ServerName sn = rs.getRegionServer().getServerName();
184 
185       // When we find a diff RS, change the assignment and break
186       if (!currServer.getHostAndPort().equals(sn.getHostAndPort()) ||
187           startCode != sn.getStartcode()) {
188         Put put = new Put(res.getRow());
189         put.setDurability(Durability.SKIP_WAL);
190         put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
191           Bytes.toBytes(sn.getHostAndPort()));
192         put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
193           Bytes.toBytes(sn.getStartcode()));
194         meta.put(put);
195         hri = HRegionInfo.getHRegionInfo(res);
196         break;
197       }
198     }
199 
200     // Try to fix the data
201     assertErrors(doFsck(conf, true), new ERROR_CODE[]{
202         ERROR_CODE.SERVER_DOES_NOT_MATCH_META});
203 
204     TEST_UTIL.getHBaseCluster().getMaster()
205       .getAssignmentManager().waitForAssignment(hri);
206 
207     // Should be fixed now
208     assertNoErrors(doFsck(conf, false));
209 
210     // comment needed - what is the purpose of this line
211     HTable t = new HTable(conf, Bytes.toBytes(table), executorService);
212     ResultScanner s = t.getScanner(new Scan());
213     s.close();
214     t.close();
215 
216     scanner.close();
217     meta.close();
218   }
219 
220   @Test(timeout=180000)
221   public void testFixAssignmentsWhenMETAinTransition() throws Exception {
222     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
223     HBaseAdmin admin = null;
224     try {
225       admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
226       admin.closeRegion(cluster.getServerHoldingMeta(),
227           HRegionInfo.FIRST_META_REGIONINFO);
228     } finally {
229       if (admin != null) {
230         admin.close();
231       }
232     }
233     regionStates.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
234     MetaRegionTracker.deleteMetaLocation(cluster.getMaster().getZooKeeper());
235     assertFalse(regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
236     HBaseFsck hbck = doFsck(conf, true);
237     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.UNKNOWN, ERROR_CODE.NO_META_REGION,
238         ERROR_CODE.NULL_META_REGION });
239     assertNoErrors(doFsck(conf, false));
240   }
241 
242   /**
243    * Create a new region in META.
244    */
245   private HRegionInfo createRegion(Configuration conf, final HTableDescriptor
246       htd, byte[] startKey, byte[] endKey)
247       throws IOException {
248     HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
249     HRegionInfo hri = new HRegionInfo(htd.getTableName(), startKey, endKey);
250     MetaEditor.addRegionToMeta(meta, hri);
251     meta.close();
252     return hri;
253   }
254 
255   /**
256    * Debugging method to dump the contents of meta.
257    */
258   private void dumpMeta(TableName tableName) throws IOException {
259     List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(tableName);
260     for (byte[] row : metaRows) {
261       LOG.info(Bytes.toString(row));
262     }
263   }
264 
265   /**
266    * This method is used to undeploy a region -- close it and attempt to
267    * remove its state from the Master.
268    */
269   private void undeployRegion(HBaseAdmin admin, ServerName sn,
270       HRegionInfo hri) throws IOException, InterruptedException {
271     try {
272       HBaseFsckRepair.closeRegionSilentlyAndWait(admin, sn, hri);
273       if (!hri.isMetaTable()) {
274         admin.offline(hri.getRegionName());
275       }
276     } catch (IOException ioe) {
277       LOG.warn("Got exception when attempting to offline region "
278           + Bytes.toString(hri.getRegionName()), ioe);
279     }
280   }
281   /**
282    * Delete a region from assignments, meta, or completely from hdfs.
283    * @param unassign if true unassign region if assigned
284    * @param metaRow  if true remove region's row from META
285    * @param hdfs if true remove region's dir in HDFS
286    */
287   private void deleteRegion(Configuration conf, final HTableDescriptor htd,
288       byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
289       boolean hdfs) throws IOException, InterruptedException {
290     deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false);
291   }
292 
293   /**
294    * Delete a region from assignments, meta, or completely from hdfs.
295    * @param unassign if true unassign region if assigned
296    * @param metaRow  if true remove region's row from META
297    * @param hdfs if true remove region's dir in HDFS
298    * @param regionInfoOnly if true remove a region dir's .regioninfo file
299    */
300   private void deleteRegion(Configuration conf, final HTableDescriptor htd,
301       byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
302       boolean hdfs, boolean regionInfoOnly) throws IOException, InterruptedException {
303     LOG.info("** Before delete:");
304     dumpMeta(htd.getTableName());
305 
306     Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
307     for (Entry<HRegionInfo, ServerName> e: hris.entrySet()) {
308       HRegionInfo hri = e.getKey();
309       ServerName hsa = e.getValue();
310       if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
311           && Bytes.compareTo(hri.getEndKey(), endKey) == 0) {
312 
313         LOG.info("RegionName: " +hri.getRegionNameAsString());
314         byte[] deleteRow = hri.getRegionName();
315 
316         if (unassign) {
317           LOG.info("Undeploying region " + hri + " from server " + hsa);
318           undeployRegion(new HBaseAdmin(conf), hsa, hri);
319         }
320 
321         if (regionInfoOnly) {
322           LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
323           Path rootDir = FSUtils.getRootDir(conf);
324           FileSystem fs = rootDir.getFileSystem(conf);
325           Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
326               hri.getEncodedName());
327           Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
328           fs.delete(hriPath, true);
329         }
330 
331         if (hdfs) {
332           LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
333           Path rootDir = FSUtils.getRootDir(conf);
334           FileSystem fs = rootDir.getFileSystem(conf);
335           Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
336               hri.getEncodedName());
337           HBaseFsck.debugLsr(conf, p);
338           boolean success = fs.delete(p, true);
339           LOG.info("Deleted " + p + " sucessfully? " + success);
340           HBaseFsck.debugLsr(conf, p);
341         }
342 
343         if (metaRow) {
344           HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
345           Delete delete = new Delete(deleteRow);
346           meta.delete(delete);
347         }
348       }
349       LOG.info(hri.toString() + hsa.toString());
350     }
351 
352     TEST_UTIL.getMetaTableRows(htd.getTableName());
353     LOG.info("*** After delete:");
354     dumpMeta(htd.getTableName());
355   }
356 
357   /**
358    * Setup a clean table before we start mucking with it.
359    *
360    * @throws IOException
361    * @throws InterruptedException
362    * @throws KeeperException
363    */
364   HTable setupTable(TableName tablename) throws Exception {
365     HTableDescriptor desc = new HTableDescriptor(tablename);
366     HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
367     desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
368     TEST_UTIL.getHBaseAdmin().createTable(desc, SPLITS);
369     tbl = new HTable(TEST_UTIL.getConfiguration(), tablename, executorService);
370 
371     List<Put> puts = new ArrayList<Put>();
372     for (byte[] row : ROWKEYS) {
373       Put p = new Put(row);
374       p.add(FAM, Bytes.toBytes("val"), row);
375       puts.add(p);
376     }
377     tbl.put(puts);
378     tbl.flushCommits();
379     return tbl;
380   }
381 
382   /**
383    * Counts the number of row to verify data loss or non-dataloss.
384    */
385   int countRows() throws IOException {
386      Scan s = new Scan();
387      ResultScanner rs = tbl.getScanner(s);
388      int i = 0;
389      while(rs.next() !=null) {
390        i++;
391      }
392      return i;
393   }
394 
395   /**
396    * delete table in preparation for next test
397    *
398    * @param tablename
399    * @throws IOException
400    */
401   void deleteTable(TableName tablename) throws IOException {
402     HBaseAdmin admin = new HBaseAdmin(conf);
403     admin.getConnection().clearRegionCache();
404     if (admin.isTableEnabled(tablename)) {
405       admin.disableTableAsync(tablename);
406     }
407     long totalWait = 0;
408     long maxWait = 30*1000;
409     long sleepTime = 250;
410     while (!admin.isTableDisabled(tablename)) {
411       try {
412         Thread.sleep(sleepTime);
413         totalWait += sleepTime;
414         if (totalWait >= maxWait) {
415           fail("Waited too long for table to be disabled + " + tablename);
416         }
417       } catch (InterruptedException e) {
418         e.printStackTrace();
419         fail("Interrupted when trying to disable table " + tablename);
420       }
421     }
422     admin.deleteTable(tablename);
423   }
424 
425   /**
426    * This creates a clean table and confirms that the table is clean.
427    */
428   @Test
429   public void testHBaseFsckClean() throws Exception {
430     assertNoErrors(doFsck(conf, false));
431     TableName table = TableName.valueOf("tableClean");
432     try {
433       HBaseFsck hbck = doFsck(conf, false);
434       assertNoErrors(hbck);
435 
436       setupTable(table);
437       assertEquals(ROWKEYS.length, countRows());
438 
439       // We created 1 table, should be fine
440       hbck = doFsck(conf, false);
441       assertNoErrors(hbck);
442       assertEquals(0, hbck.getOverlapGroups(table).size());
443       assertEquals(ROWKEYS.length, countRows());
444     } finally {
445       deleteTable(table);
446     }
447   }
448 
449   /**
450    * Test thread pooling in the case where there are more regions than threads
451    */
452   @Test
453   public void testHbckThreadpooling() throws Exception {
454     TableName table =
455         TableName.valueOf("tableDupeStartKey");
456     try {
457       // Create table with 4 regions
458       setupTable(table);
459 
460       // limit number of threads to 1.
461       Configuration newconf = new Configuration(conf);
462       newconf.setInt("hbasefsck.numthreads", 1);
463       assertNoErrors(doFsck(newconf, false));
464 
465       // We should pass without triggering a RejectedExecutionException
466     } finally {
467       deleteTable(table);
468     }
469   }
470 
471   @Test
472   public void testHbckFixOrphanTable() throws Exception {
473     TableName table = TableName.valueOf("tableInfo");
474     FileSystem fs = null;
475     Path tableinfo = null;
476     try {
477       setupTable(table);
478       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
479 
480       Path hbaseTableDir = FSUtils.getTableDir(
481           FSUtils.getRootDir(conf), table);
482       fs = hbaseTableDir.getFileSystem(conf);
483       FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
484       tableinfo = status.getPath();
485       fs.rename(tableinfo, new Path("/.tableinfo"));
486 
487       //to report error if .tableinfo is missing.
488       HBaseFsck hbck = doFsck(conf, false);
489       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE });
490 
491       // fix OrphanTable with default .tableinfo (htd not yet cached on master)
492       hbck = doFsck(conf, true);
493       assertNoErrors(hbck);
494       status = null;
495       status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
496       assertNotNull(status);
497 
498       HTableDescriptor htd = admin.getTableDescriptor(table);
499       htd.setValue("NOT_DEFAULT", "true");
500       admin.disableTable(table);
501       admin.modifyTable(table, htd);
502       admin.enableTable(table);
503       fs.delete(status.getPath(), true);
504 
505       // fix OrphanTable with cache
506       htd = admin.getTableDescriptor(table); // warms up cached htd on master
507       hbck = doFsck(conf, true);
508       assertNoErrors(hbck);
509       status = null;
510       status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
511       assertNotNull(status);
512       htd = admin.getTableDescriptor(table);
513       assertEquals(htd.getValue("NOT_DEFAULT"), "true");
514     } finally {
515       fs.rename(new Path("/.tableinfo"), tableinfo);
516       deleteTable(table);
517     }
518   }
519 
520   /**
521    * This test makes sure that parallel instances of Hbck is disabled.
522    *
523    * @throws Exception
524    */
525   @Test
526   public void testParallelHbck() throws Exception {
527     final ExecutorService service;
528     final Future<HBaseFsck> hbck1,hbck2;
529 
530     class RunHbck implements Callable<HBaseFsck>{
531       boolean fail = true;
532       public HBaseFsck call(){
533         try{
534           return doFsck(conf, false);
535         } catch(Exception e){
536           if (e.getMessage().contains("Duplicate hbck")) {
537             fail = false;
538           } else {
539             LOG.fatal("hbck failed.", e);
540           }
541         }
542         // If we reach here, then an exception was caught
543         if (fail) fail();
544         return null;
545       }
546     }
547     service = Executors.newFixedThreadPool(2);
548     hbck1 = service.submit(new RunHbck());
549     hbck2 = service.submit(new RunHbck());
550     service.shutdown();
551     //wait for 15 seconds, for both hbck calls finish
552     service.awaitTermination(15, TimeUnit.SECONDS);
553     HBaseFsck h1 = hbck1.get();
554     HBaseFsck h2 = hbck2.get();
555     // Make sure only one of the calls was successful
556     assert(h1 == null || h2 == null);
557     if (h1 != null) {
558       assert(h1.getRetCode() >= 0);
559     }
560     if (h2 != null) {
561       assert(h2.getRetCode() >= 0);
562     }
563   }
564 
565   /**
566    * This create and fixes a bad table with regions that have a duplicate
567    * start key
568    */
569   @Test
570   public void testDupeStartKey() throws Exception {
571     TableName table =
572         TableName.valueOf("tableDupeStartKey");
573     try {
574       setupTable(table);
575       assertNoErrors(doFsck(conf, false));
576       assertEquals(ROWKEYS.length, countRows());
577 
578       // Now let's mess it up, by adding a region with a duplicate startkey
579       HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
580           Bytes.toBytes("A"), Bytes.toBytes("A2"));
581       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
582       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
583           .waitForAssignment(hriDupe);
584       ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
585       TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
586 
587       HBaseFsck hbck = doFsck(conf, false);
588       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
589             ERROR_CODE.DUPE_STARTKEYS});
590       assertEquals(2, hbck.getOverlapGroups(table).size());
591       assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
592 
593       // fix the degenerate region.
594       doFsck(conf,true);
595 
596       // check that the degenerate region is gone and no data loss
597       HBaseFsck hbck2 = doFsck(conf,false);
598       assertNoErrors(hbck2);
599       assertEquals(0, hbck2.getOverlapGroups(table).size());
600       assertEquals(ROWKEYS.length, countRows());
601     } finally {
602       deleteTable(table);
603     }
604   }
605 
606   /**
607    * Get region info from local cluster.
608    */
609   Map<ServerName, List<String>> getDeployedHRIs(
610       final HBaseAdmin admin) throws IOException {
611     ClusterStatus status = admin.getClusterStatus();
612     Collection<ServerName> regionServers = status.getServers();
613     Map<ServerName, List<String>> mm =
614         new HashMap<ServerName, List<String>>();
615     HConnection connection = admin.getConnection();
616     for (ServerName hsi : regionServers) {
617       AdminProtos.AdminService.BlockingInterface server = connection.getAdmin(hsi);
618 
619       // list all online regions from this region server
620       List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
621       List<String> regionNames = new ArrayList<String>();
622       for (HRegionInfo hri : regions) {
623         regionNames.add(hri.getRegionNameAsString());
624       }
625       mm.put(hsi, regionNames);
626     }
627     return mm;
628   }
629 
630   /**
631    * Returns the HSI a region info is on.
632    */
633   ServerName findDeployedHSI(Map<ServerName, List<String>> mm, HRegionInfo hri) {
634     for (Map.Entry<ServerName,List <String>> e : mm.entrySet()) {
635       if (e.getValue().contains(hri.getRegionNameAsString())) {
636         return e.getKey();
637       }
638     }
639     return null;
640   }
641 
642   /**
643    * This create and fixes a bad table with regions that have a duplicate
644    * start key
645    */
646   @Test
647   public void testDupeRegion() throws Exception {
648     TableName table =
649         TableName.valueOf("tableDupeRegion");
650     try {
651       setupTable(table);
652       assertNoErrors(doFsck(conf, false));
653       assertEquals(ROWKEYS.length, countRows());
654 
655       // Now let's mess it up, by adding a region with a duplicate startkey
656       HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
657           Bytes.toBytes("A"), Bytes.toBytes("B"));
658 
659       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
660       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
661           .waitForAssignment(hriDupe);
662       ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
663       TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
664 
665       // Yikes! The assignment manager can't tell between diff between two
666       // different regions with the same start/endkeys since it doesn't
667       // differentiate on ts/regionId!  We actually need to recheck
668       // deployments!
669       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
670       while (findDeployedHSI(getDeployedHRIs(admin), hriDupe) == null) {
671         Thread.sleep(250);
672       }
673 
674       LOG.debug("Finished assignment of dupe region");
675 
676       // TODO why is dupe region different from dupe start keys?
677       HBaseFsck hbck = doFsck(conf, false);
678       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
679             ERROR_CODE.DUPE_STARTKEYS});
680       assertEquals(2, hbck.getOverlapGroups(table).size());
681       assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
682 
683       // fix the degenerate region.
684       doFsck(conf,true);
685 
686       // check that the degenerate region is gone and no data loss
687       HBaseFsck hbck2 = doFsck(conf,false);
688       assertNoErrors(hbck2);
689       assertEquals(0, hbck2.getOverlapGroups(table).size());
690       assertEquals(ROWKEYS.length, countRows());
691     } finally {
692       deleteTable(table);
693     }
694   }
695 
696   /**
697    * This creates and fixes a bad table with regions that has startkey == endkey
698    */
699   @Test
700   public void testDegenerateRegions() throws Exception {
701     TableName table =
702         TableName.valueOf("tableDegenerateRegions");
703     try {
704       setupTable(table);
705       assertNoErrors(doFsck(conf,false));
706       assertEquals(ROWKEYS.length, countRows());
707 
708       // Now let's mess it up, by adding a region with a duplicate startkey
709       HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
710           Bytes.toBytes("B"), Bytes.toBytes("B"));
711       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
712       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
713           .waitForAssignment(hriDupe);
714       ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
715       TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
716 
717       HBaseFsck hbck = doFsck(conf,false);
718       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DEGENERATE_REGION,
719           ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.DUPE_STARTKEYS});
720       assertEquals(2, hbck.getOverlapGroups(table).size());
721       assertEquals(ROWKEYS.length, countRows());
722 
723       // fix the degenerate region.
724       doFsck(conf,true);
725 
726       // check that the degenerate region is gone and no data loss
727       HBaseFsck hbck2 = doFsck(conf,false);
728       assertNoErrors(hbck2);
729       assertEquals(0, hbck2.getOverlapGroups(table).size());
730       assertEquals(ROWKEYS.length, countRows());
731     } finally {
732       deleteTable(table);
733     }
734   }
735 
736   /**
737    * This creates and fixes a bad table where a region is completely contained
738    * by another region.
739    */
740   @Test
741   public void testContainedRegionOverlap() throws Exception {
742     TableName table =
743         TableName.valueOf("tableContainedRegionOverlap");
744     try {
745       setupTable(table);
746       assertEquals(ROWKEYS.length, countRows());
747 
748       // Mess it up by creating an overlap in the metadata
749       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
750           Bytes.toBytes("A2"), Bytes.toBytes("B"));
751       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
752       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
753           .waitForAssignment(hriOverlap);
754       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
755       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
756 
757       HBaseFsck hbck = doFsck(conf, false);
758       assertErrors(hbck, new ERROR_CODE[] {
759           ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
760       assertEquals(2, hbck.getOverlapGroups(table).size());
761       assertEquals(ROWKEYS.length, countRows());
762 
763       // fix the problem.
764       doFsck(conf, true);
765 
766       // verify that overlaps are fixed
767       HBaseFsck hbck2 = doFsck(conf,false);
768       assertNoErrors(hbck2);
769       assertEquals(0, hbck2.getOverlapGroups(table).size());
770       assertEquals(ROWKEYS.length, countRows());
771     } finally {
772        deleteTable(table);
773     }
774   }
775 
776   /**
777    * This creates and fixes a bad table where an overlap group of
778    * 3 regions. Set HBaseFsck.maxMerge to 2 to trigger sideline overlapped
779    * region. Mess around the meta data so that closeRegion/offlineRegion
780    * throws exceptions.
781    */
782   @Test
783   public void testSidelineOverlapRegion() throws Exception {
784     TableName table =
785         TableName.valueOf("testSidelineOverlapRegion");
786     try {
787       setupTable(table);
788       assertEquals(ROWKEYS.length, countRows());
789 
790       // Mess it up by creating an overlap
791       MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
792       HMaster master = cluster.getMaster();
793       HRegionInfo hriOverlap1 = createRegion(conf, tbl.getTableDescriptor(),
794         Bytes.toBytes("A"), Bytes.toBytes("AB"));
795       master.assignRegion(hriOverlap1);
796       master.getAssignmentManager().waitForAssignment(hriOverlap1);
797       HRegionInfo hriOverlap2 = createRegion(conf, tbl.getTableDescriptor(),
798         Bytes.toBytes("AB"), Bytes.toBytes("B"));
799       master.assignRegion(hriOverlap2);
800       master.getAssignmentManager().waitForAssignment(hriOverlap2);
801 
802       HBaseFsck hbck = doFsck(conf, false);
803       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.DUPE_STARTKEYS,
804         ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
805       assertEquals(3, hbck.getOverlapGroups(table).size());
806       assertEquals(ROWKEYS.length, countRows());
807 
808       // mess around the overlapped regions, to trigger NotServingRegionException
809       Multimap<byte[], HbckInfo> overlapGroups = hbck.getOverlapGroups(table);
810       ServerName serverName = null;
811       byte[] regionName = null;
812       for (HbckInfo hbi: overlapGroups.values()) {
813         if ("A".equals(Bytes.toString(hbi.getStartKey()))
814             && "B".equals(Bytes.toString(hbi.getEndKey()))) {
815           regionName = hbi.getRegionName();
816 
817           // get an RS not serving the region to force bad assignment info in to META.
818           int k = cluster.getServerWith(regionName);
819           for (int i = 0; i < 3; i++) {
820             if (i != k) {
821               HRegionServer rs = cluster.getRegionServer(i);
822               serverName = rs.getServerName();
823               break;
824             }
825           }
826 
827           HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
828           HBaseFsckRepair.closeRegionSilentlyAndWait(admin,
829             cluster.getRegionServer(k).getServerName(), hbi.getHdfsHRI());
830           admin.offline(regionName);
831           break;
832         }
833       }
834 
835       assertNotNull(regionName);
836       assertNotNull(serverName);
837       HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
838       Put put = new Put(regionName);
839       put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
840         Bytes.toBytes(serverName.getHostAndPort()));
841       meta.put(put);
842 
843       // fix the problem.
844       HBaseFsck fsck = new HBaseFsck(conf);
845       fsck.connect();
846       fsck.setDisplayFullReport(); // i.e. -details
847       fsck.setTimeLag(0);
848       fsck.setFixAssignments(true);
849       fsck.setFixMeta(true);
850       fsck.setFixHdfsHoles(true);
851       fsck.setFixHdfsOverlaps(true);
852       fsck.setFixHdfsOrphans(true);
853       fsck.setFixVersionFile(true);
854       fsck.setSidelineBigOverlaps(true);
855       fsck.setMaxMerge(2);
856       fsck.onlineHbck();
857 
858       // verify that overlaps are fixed, and there are less rows
859       // since one region is sidelined.
860       HBaseFsck hbck2 = doFsck(conf,false);
861       assertNoErrors(hbck2);
862       assertEquals(0, hbck2.getOverlapGroups(table).size());
863       assertTrue(ROWKEYS.length > countRows());
864     } finally {
865        deleteTable(table);
866     }
867   }
868 
869   /**
870    * This creates and fixes a bad table where a region is completely contained
871    * by another region, and there is a hole (sort of like a bad split)
872    */
873   @Test
874   public void testOverlapAndOrphan() throws Exception {
875     TableName table =
876         TableName.valueOf("tableOverlapAndOrphan");
877     try {
878       setupTable(table);
879       assertEquals(ROWKEYS.length, countRows());
880 
881       // Mess it up by creating an overlap in the metadata
882       TEST_UTIL.getHBaseAdmin().disableTable(table);
883       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
884           Bytes.toBytes("B"), true, true, false, true);
885       TEST_UTIL.getHBaseAdmin().enableTable(table);
886 
887       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
888           Bytes.toBytes("A2"), Bytes.toBytes("B"));
889       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
890       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
891           .waitForAssignment(hriOverlap);
892       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
893       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
894 
895       HBaseFsck hbck = doFsck(conf, false);
896       assertErrors(hbck, new ERROR_CODE[] {
897           ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
898           ERROR_CODE.HOLE_IN_REGION_CHAIN});
899 
900       // fix the problem.
901       doFsck(conf, true);
902 
903       // verify that overlaps are fixed
904       HBaseFsck hbck2 = doFsck(conf,false);
905       assertNoErrors(hbck2);
906       assertEquals(0, hbck2.getOverlapGroups(table).size());
907       assertEquals(ROWKEYS.length, countRows());
908     } finally {
909        deleteTable(table);
910     }
911   }
912 
913   /**
914    * This creates and fixes a bad table where a region overlaps two regions --
915    * a start key contained in another region and its end key is contained in
916    * yet another region.
917    */
918   @Test
919   public void testCoveredStartKey() throws Exception {
920     TableName table =
921         TableName.valueOf("tableCoveredStartKey");
922     try {
923       setupTable(table);
924       assertEquals(ROWKEYS.length, countRows());
925 
926       // Mess it up by creating an overlap in the metadata
927       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
928           Bytes.toBytes("A2"), Bytes.toBytes("B2"));
929       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
930       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
931           .waitForAssignment(hriOverlap);
932       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
933       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
934 
935       HBaseFsck hbck = doFsck(conf, false);
936       assertErrors(hbck, new ERROR_CODE[] {
937           ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
938           ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
939       assertEquals(3, hbck.getOverlapGroups(table).size());
940       assertEquals(ROWKEYS.length, countRows());
941 
942       // fix the problem.
943       doFsck(conf, true);
944 
945       // verify that overlaps are fixed
946       HBaseFsck hbck2 = doFsck(conf, false);
947       assertErrors(hbck2, new ERROR_CODE[0]);
948       assertEquals(0, hbck2.getOverlapGroups(table).size());
949       assertEquals(ROWKEYS.length, countRows());
950     } finally {
951       deleteTable(table);
952     }
953   }
954 
955   /**
956    * This creates and fixes a bad table with a missing region -- hole in meta
957    * and data missing in the fs.
958    */
959   @Test
960   public void testRegionHole() throws Exception {
961     TableName table =
962         TableName.valueOf("tableRegionHole");
963     try {
964       setupTable(table);
965       assertEquals(ROWKEYS.length, countRows());
966 
967       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
968       TEST_UTIL.getHBaseAdmin().disableTable(table);
969       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
970           Bytes.toBytes("C"), true, true, true);
971       TEST_UTIL.getHBaseAdmin().enableTable(table);
972 
973       HBaseFsck hbck = doFsck(conf, false);
974       assertErrors(hbck, new ERROR_CODE[] {
975           ERROR_CODE.HOLE_IN_REGION_CHAIN});
976       // holes are separate from overlap groups
977       assertEquals(0, hbck.getOverlapGroups(table).size());
978 
979       // fix hole
980       doFsck(conf, true);
981 
982       // check that hole fixed
983       assertNoErrors(doFsck(conf,false));
984       assertEquals(ROWKEYS.length - 2 , countRows()); // lost a region so lost a row
985     } finally {
986       deleteTable(table);
987     }
988   }
989 
990   /**
991    * This creates and fixes a bad table with a missing region -- hole in meta
992    * and data present but .regioinfino missing (an orphan hdfs region)in the fs.
993    */
994   @Test
995   public void testHDFSRegioninfoMissing() throws Exception {
996     TableName table =
997         TableName.valueOf("tableHDFSRegioininfoMissing");
998     try {
999       setupTable(table);
1000       assertEquals(ROWKEYS.length, countRows());
1001 
1002       // Mess it up by leaving a hole in the meta data
1003       TEST_UTIL.getHBaseAdmin().disableTable(table);
1004       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1005           Bytes.toBytes("C"), true, true, false, true);
1006       TEST_UTIL.getHBaseAdmin().enableTable(table);
1007 
1008       HBaseFsck hbck = doFsck(conf, false);
1009       assertErrors(hbck, new ERROR_CODE[] {
1010           ERROR_CODE.ORPHAN_HDFS_REGION,
1011           ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1012           ERROR_CODE.HOLE_IN_REGION_CHAIN});
1013       // holes are separate from overlap groups
1014       assertEquals(0, hbck.getOverlapGroups(table).size());
1015 
1016       // fix hole
1017       doFsck(conf, true);
1018 
1019       // check that hole fixed
1020       assertNoErrors(doFsck(conf, false));
1021       assertEquals(ROWKEYS.length, countRows());
1022     } finally {
1023       deleteTable(table);
1024     }
1025   }
1026 
1027   /**
1028    * This creates and fixes a bad table with a region that is missing meta and
1029    * not assigned to a region server.
1030    */
1031   @Test
1032   public void testNotInMetaOrDeployedHole() throws Exception {
1033     TableName table =
1034         TableName.valueOf("tableNotInMetaOrDeployedHole");
1035     try {
1036       setupTable(table);
1037       assertEquals(ROWKEYS.length, countRows());
1038 
1039       // Mess it up by leaving a hole in the meta data
1040       TEST_UTIL.getHBaseAdmin().disableTable(table);
1041       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1042           Bytes.toBytes("C"), true, true, false); // don't rm from fs
1043       TEST_UTIL.getHBaseAdmin().enableTable(table);
1044 
1045       HBaseFsck hbck = doFsck(conf, false);
1046       assertErrors(hbck, new ERROR_CODE[] {
1047           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1048       // holes are separate from overlap groups
1049       assertEquals(0, hbck.getOverlapGroups(table).size());
1050 
1051       // fix hole
1052       assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1053           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1054 
1055       // check that hole fixed
1056       assertNoErrors(doFsck(conf,false));
1057       assertEquals(ROWKEYS.length, countRows());
1058     } finally {
1059       deleteTable(table);
1060     }
1061   }
1062 
1063   /**
1064    * This creates fixes a bad table with a hole in meta.
1065    */
1066   @Test
1067   public void testNotInMetaHole() throws Exception {
1068     TableName table =
1069         TableName.valueOf("tableNotInMetaHole");
1070     try {
1071       setupTable(table);
1072       assertEquals(ROWKEYS.length, countRows());
1073 
1074       // Mess it up by leaving a hole in the meta data
1075       TEST_UTIL.getHBaseAdmin().disableTable(table);
1076       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1077           Bytes.toBytes("C"), false, true, false); // don't rm from fs
1078       TEST_UTIL.getHBaseAdmin().enableTable(table);
1079 
1080       HBaseFsck hbck = doFsck(conf, false);
1081       assertErrors(hbck, new ERROR_CODE[] {
1082           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1083       // holes are separate from overlap groups
1084       assertEquals(0, hbck.getOverlapGroups(table).size());
1085 
1086       // fix hole
1087       assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1088           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1089 
1090       // check that hole fixed
1091       assertNoErrors(doFsck(conf,false));
1092       assertEquals(ROWKEYS.length, countRows());
1093     } finally {
1094       deleteTable(table);
1095     }
1096   }
1097 
1098   /**
1099    * This creates and fixes a bad table with a region that is in meta but has
1100    * no deployment or data hdfs
1101    */
1102   @Test
1103   public void testNotInHdfs() throws Exception {
1104     TableName table =
1105         TableName.valueOf("tableNotInHdfs");
1106     try {
1107       setupTable(table);
1108       assertEquals(ROWKEYS.length, countRows());
1109 
1110       // make sure data in regions, if in hlog only there is no data loss
1111       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1112 
1113       // Mess it up by leaving a hole in the hdfs data
1114       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1115           Bytes.toBytes("C"), false, false, true); // don't rm meta
1116 
1117       HBaseFsck hbck = doFsck(conf, false);
1118       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1119       // holes are separate from overlap groups
1120       assertEquals(0, hbck.getOverlapGroups(table).size());
1121 
1122       // fix hole
1123       doFsck(conf, true);
1124 
1125       // check that hole fixed
1126       assertNoErrors(doFsck(conf,false));
1127       assertEquals(ROWKEYS.length - 2, countRows());
1128     } finally {
1129       deleteTable(table);
1130     }
1131   }
1132 
1133   /**
1134    * This creates entries in hbase:meta with no hdfs data.  This should cleanly
1135    * remove the table.
1136    */
1137   @Test
1138   public void testNoHdfsTable() throws Exception {
1139     TableName table = TableName.valueOf("NoHdfsTable");
1140     setupTable(table);
1141     assertEquals(ROWKEYS.length, countRows());
1142 
1143     // make sure data in regions, if in hlog only there is no data loss
1144     TEST_UTIL.getHBaseAdmin().flush(table.getName());
1145 
1146     // Mess it up by deleting hdfs dirs
1147     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
1148         Bytes.toBytes("A"), false, false, true); // don't rm meta
1149     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1150         Bytes.toBytes("B"), false, false, true); // don't rm meta
1151     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1152         Bytes.toBytes("C"), false, false, true); // don't rm meta
1153     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
1154         Bytes.toBytes(""), false, false, true); // don't rm meta
1155 
1156     // also remove the table directory in hdfs
1157     deleteTableDir(table);
1158 
1159     HBaseFsck hbck = doFsck(conf, false);
1160     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS,
1161         ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS,
1162         ERROR_CODE.NOT_IN_HDFS,});
1163     // holes are separate from overlap groups
1164     assertEquals(0, hbck.getOverlapGroups(table).size());
1165 
1166     // fix hole
1167     doFsck(conf, true); // detect dangling regions and remove those
1168 
1169     // check that hole fixed
1170     assertNoErrors(doFsck(conf,false));
1171     assertFalse("Table "+ table + " should have been deleted",
1172         TEST_UTIL.getHBaseAdmin().tableExists(table));
1173   }
1174 
1175   public void deleteTableDir(TableName table) throws IOException {
1176     Path rootDir = FSUtils.getRootDir(conf);
1177     FileSystem fs = rootDir.getFileSystem(conf);
1178     Path p = FSUtils.getTableDir(rootDir, table);
1179     HBaseFsck.debugLsr(conf, p);
1180     boolean success = fs.delete(p, true);
1181     LOG.info("Deleted " + p + " sucessfully? " + success);
1182   }
1183 
1184   /**
1185    * when the hbase.version file missing, It is fix the fault.
1186    */
1187   @Test
1188   public void testNoVersionFile() throws Exception {
1189     // delete the hbase.version file
1190     Path rootDir = FSUtils.getRootDir(conf);
1191     FileSystem fs = rootDir.getFileSystem(conf);
1192     Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
1193     fs.delete(versionFile, true);
1194 
1195     // test
1196     HBaseFsck hbck = doFsck(conf, false);
1197     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_VERSION_FILE });
1198     // fix hbase.version missing
1199     doFsck(conf, true);
1200 
1201     // no version file fixed
1202     assertNoErrors(doFsck(conf, false));
1203   }
1204 
1205   /**
1206    * The region is not deployed when the table is disabled.
1207    */
1208   @Test
1209   public void testRegionShouldNotBeDeployed() throws Exception {
1210     TableName table =
1211         TableName.valueOf("tableRegionShouldNotBeDeployed");
1212     try {
1213       LOG.info("Starting testRegionShouldNotBeDeployed.");
1214       MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1215       assertTrue(cluster.waitForActiveAndReadyMaster());
1216 
1217 
1218       byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"),
1219           Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") };
1220       HTableDescriptor htdDisabled = new HTableDescriptor(table);
1221       htdDisabled.addFamily(new HColumnDescriptor(FAM));
1222 
1223       // Write the .tableinfo
1224       FSTableDescriptors fstd = new FSTableDescriptors(conf);
1225       fstd.createTableDescriptor(htdDisabled);
1226       List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
1227           TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
1228 
1229       // Let's just assign everything to first RS
1230       HRegionServer hrs = cluster.getRegionServer(0);
1231 
1232       // Create region files.
1233       TEST_UTIL.getHBaseAdmin().disableTable(table);
1234       TEST_UTIL.getHBaseAdmin().enableTable(table);
1235 
1236       // Disable the table and close its regions
1237       TEST_UTIL.getHBaseAdmin().disableTable(table);
1238       HRegionInfo region = disabledRegions.remove(0);
1239       byte[] regionName = region.getRegionName();
1240 
1241       // The region should not be assigned currently
1242       assertTrue(cluster.getServerWith(regionName) == -1);
1243 
1244       // Directly open a region on a region server.
1245       // If going through AM/ZK, the region won't be open.
1246       // Even it is opened, AM will close it which causes
1247       // flakiness of this test.
1248       HRegion r = HRegion.openHRegion(
1249         region, htdDisabled, hrs.getWAL(region), conf);
1250       hrs.addToOnlineRegions(r);
1251 
1252       HBaseFsck hbck = doFsck(conf, false);
1253       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.SHOULD_NOT_BE_DEPLOYED });
1254 
1255       // fix this fault
1256       doFsck(conf, true);
1257 
1258       // check result
1259       assertNoErrors(doFsck(conf, false));
1260     } finally {
1261       TEST_UTIL.getHBaseAdmin().enableTable(table);
1262       deleteTable(table);
1263     }
1264   }
1265 
1266   /**
1267    * This creates two tables and mess both of them and fix them one by one
1268    */
1269   @Test
1270   public void testFixByTable() throws Exception {
1271     TableName table1 =
1272         TableName.valueOf("testFixByTable1");
1273     TableName table2 =
1274         TableName.valueOf("testFixByTable2");
1275     try {
1276       setupTable(table1);
1277       // make sure data in regions, if in hlog only there is no data loss
1278       TEST_UTIL.getHBaseAdmin().flush(table1.getName());
1279       // Mess them up by leaving a hole in the hdfs data
1280       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1281         Bytes.toBytes("C"), false, false, true); // don't rm meta
1282 
1283       setupTable(table2);
1284       // make sure data in regions, if in hlog only there is no data loss
1285       TEST_UTIL.getHBaseAdmin().flush(table2.getName());
1286       // Mess them up by leaving a hole in the hdfs data
1287       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1288         Bytes.toBytes("C"), false, false, true); // don't rm meta
1289 
1290       HBaseFsck hbck = doFsck(conf, false);
1291       assertErrors(hbck, new ERROR_CODE[] {
1292         ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS});
1293 
1294       // fix hole in table 1
1295       doFsck(conf, true, table1);
1296       // check that hole in table 1 fixed
1297       assertNoErrors(doFsck(conf, false, table1));
1298       // check that hole in table 2 still there
1299       assertErrors(doFsck(conf, false, table2),
1300         new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1301 
1302       // fix hole in table 2
1303       doFsck(conf, true, table2);
1304       // check that hole in both tables fixed
1305       assertNoErrors(doFsck(conf, false));
1306       assertEquals(ROWKEYS.length - 2, countRows());
1307     } finally {
1308       deleteTable(table1);
1309       deleteTable(table2);
1310     }
1311   }
1312   /**
1313    * A split parent in meta, in hdfs, and not deployed
1314    */
1315   @Test
1316   public void testLingeringSplitParent() throws Exception {
1317     TableName table =
1318         TableName.valueOf("testLingeringSplitParent");
1319     HTable meta = null;
1320     try {
1321       setupTable(table);
1322       assertEquals(ROWKEYS.length, countRows());
1323 
1324       // make sure data in regions, if in hlog only there is no data loss
1325       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1326       HRegionLocation location = tbl.getRegionLocation("B");
1327 
1328       // Delete one region from meta, but not hdfs, unassign it.
1329       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1330         Bytes.toBytes("C"), true, true, false);
1331 
1332       // Create a new meta entry to fake it as a split parent.
1333       meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName(),
1334           executorService);
1335       HRegionInfo hri = location.getRegionInfo();
1336 
1337       HRegionInfo a = new HRegionInfo(tbl.getName(),
1338         Bytes.toBytes("B"), Bytes.toBytes("BM"));
1339       HRegionInfo b = new HRegionInfo(tbl.getName(),
1340         Bytes.toBytes("BM"), Bytes.toBytes("C"));
1341 
1342       hri.setOffline(true);
1343       hri.setSplit(true);
1344 
1345       MetaEditor.addRegionToMeta(meta, hri, a, b);
1346       meta.flushCommits();
1347       TEST_UTIL.getHBaseAdmin().flush(TableName.META_TABLE_NAME.getName());
1348 
1349       HBaseFsck hbck = doFsck(conf, false);
1350       assertErrors(hbck, new ERROR_CODE[] {
1351         ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1352 
1353       // regular repair cannot fix lingering split parent
1354       hbck = doFsck(conf, true);
1355       assertErrors(hbck, new ERROR_CODE[] {
1356         ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1357       assertFalse(hbck.shouldRerun());
1358       hbck = doFsck(conf, false);
1359       assertErrors(hbck, new ERROR_CODE[] {
1360         ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1361 
1362       // fix lingering split parent
1363       hbck = new HBaseFsck(conf);
1364       hbck.connect();
1365       hbck.setDisplayFullReport(); // i.e. -details
1366       hbck.setTimeLag(0);
1367       hbck.setFixSplitParents(true);
1368       hbck.onlineHbck();
1369       assertTrue(hbck.shouldRerun());
1370 
1371       Get get = new Get(hri.getRegionName());
1372       Result result = meta.get(get);
1373       assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1374         HConstants.SPLITA_QUALIFIER).isEmpty());
1375       assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1376         HConstants.SPLITB_QUALIFIER).isEmpty());
1377       TEST_UTIL.getHBaseAdmin().flush(TableName.META_TABLE_NAME.getName());
1378 
1379       // fix other issues
1380       doFsck(conf, true);
1381 
1382       // check that all are fixed
1383       assertNoErrors(doFsck(conf, false));
1384       assertEquals(ROWKEYS.length, countRows());
1385     } finally {
1386       deleteTable(table);
1387       IOUtils.closeQuietly(meta);
1388     }
1389   }
1390 
1391   /**
1392    * Tests that LINGERING_SPLIT_PARENT is not erroneously reported for
1393    * valid cases where the daughters are there.
1394    */
1395   @Test
1396   public void testValidLingeringSplitParent() throws Exception {
1397     TableName table =
1398         TableName.valueOf("testLingeringSplitParent");
1399     HTable meta = null;
1400     try {
1401       setupTable(table);
1402       assertEquals(ROWKEYS.length, countRows());
1403 
1404       // make sure data in regions, if in hlog only there is no data loss
1405       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1406       HRegionLocation location = tbl.getRegionLocation("B");
1407 
1408       meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName());
1409       HRegionInfo hri = location.getRegionInfo();
1410 
1411       // do a regular split
1412       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1413       byte[] regionName = location.getRegionInfo().getRegionName();
1414       admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1415       TestEndToEndSplitTransaction.blockUntilRegionSplit(
1416           TEST_UTIL.getConfiguration(), 60000, regionName, true);
1417 
1418       // TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on
1419       // for some time until children references are deleted. HBCK erroneously sees this as
1420       // overlapping regions
1421       HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, false, false, null);
1422       assertErrors(hbck, new ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported
1423 
1424       // assert that the split hbase:meta entry is still there.
1425       Get get = new Get(hri.getRegionName());
1426       Result result = meta.get(get);
1427       assertNotNull(result);
1428       assertNotNull(HRegionInfo.getHRegionInfo(result));
1429 
1430       assertEquals(ROWKEYS.length, countRows());
1431 
1432       // assert that we still have the split regions
1433       assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions pre-split.
1434       assertNoErrors(doFsck(conf, false));
1435     } finally {
1436       deleteTable(table);
1437       IOUtils.closeQuietly(meta);
1438     }
1439   }
1440 
1441   /**
1442    * Split crashed after write to hbase:meta finished for the parent region, but
1443    * failed to write daughters (pre HBASE-7721 codebase)
1444    */
1445   @Test(timeout=75000)
1446   public void testSplitDaughtersNotInMeta() throws Exception {
1447     TableName table =
1448         TableName.valueOf("testSplitdaughtersNotInMeta");
1449     HTable meta = null;
1450     try {
1451       setupTable(table);
1452       assertEquals(ROWKEYS.length, countRows());
1453 
1454       // make sure data in regions, if in hlog only there is no data loss
1455       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1456       HRegionLocation location = tbl.getRegionLocation("B");
1457 
1458       meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName());
1459       HRegionInfo hri = location.getRegionInfo();
1460 
1461       // do a regular split
1462       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1463       byte[] regionName = location.getRegionInfo().getRegionName();
1464       admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1465       TestEndToEndSplitTransaction.blockUntilRegionSplit(
1466           TEST_UTIL.getConfiguration(), 60000, regionName, true);
1467 
1468       PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(meta.get(new Get(regionName)));
1469 
1470       // Delete daughter regions from meta, but not hdfs, unassign it.
1471       Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
1472       undeployRegion(admin, hris.get(daughters.getFirst()), daughters.getFirst());
1473       undeployRegion(admin, hris.get(daughters.getSecond()), daughters.getSecond());
1474 
1475       meta.delete(new Delete(daughters.getFirst().getRegionName()));
1476       meta.delete(new Delete(daughters.getSecond().getRegionName()));
1477       meta.flushCommits();
1478 
1479       HBaseFsck hbck = doFsck(conf, false);
1480       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1481           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN}); //no LINGERING_SPLIT_PARENT
1482 
1483       // now fix it. The fix should not revert the region split, but add daughters to META
1484       hbck = doFsck(conf, true, true, false, false, false, false, false, false, false, false, null);
1485       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1486           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1487 
1488       // assert that the split hbase:meta entry is still there.
1489       Get get = new Get(hri.getRegionName());
1490       Result result = meta.get(get);
1491       assertNotNull(result);
1492       assertNotNull(HRegionInfo.getHRegionInfo(result));
1493 
1494       assertEquals(ROWKEYS.length, countRows());
1495 
1496       // assert that we still have the split regions
1497       assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions pre-split.
1498       assertNoErrors(doFsck(conf, false)); //should be fixed by now
1499     } finally {
1500       deleteTable(table);
1501       IOUtils.closeQuietly(meta);
1502     }
1503   }
1504 
1505   /**
1506    * This creates and fixes a bad table with a missing region which is the 1st region -- hole in
1507    * meta and data missing in the fs.
1508    */
1509   @Test(timeout=120000)
1510   public void testMissingFirstRegion() throws Exception {
1511     TableName table =
1512         TableName.valueOf("testMissingFirstRegion");
1513     try {
1514       setupTable(table);
1515       assertEquals(ROWKEYS.length, countRows());
1516 
1517       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1518       TEST_UTIL.getHBaseAdmin().disableTable(table);
1519       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true,
1520           true, true);
1521       TEST_UTIL.getHBaseAdmin().enableTable(table);
1522 
1523       HBaseFsck hbck = doFsck(conf, false);
1524       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY });
1525       // fix hole
1526       doFsck(conf, true);
1527       // check that hole fixed
1528       assertNoErrors(doFsck(conf, false));
1529     } finally {
1530       deleteTable(table);
1531     }
1532   }
1533 
1534   /**
1535    * This creates and fixes a bad table with missing last region -- hole in meta and data missing in
1536    * the fs.
1537    */
1538   @Test(timeout=120000)
1539   public void testMissingLastRegion() throws Exception {
1540     TableName table =
1541         TableName.valueOf("testMissingLastRegion");
1542     try {
1543       setupTable(table);
1544       assertEquals(ROWKEYS.length, countRows());
1545 
1546       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1547       TEST_UTIL.getHBaseAdmin().disableTable(table);
1548       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true,
1549           true, true);
1550       TEST_UTIL.getHBaseAdmin().enableTable(table);
1551 
1552       HBaseFsck hbck = doFsck(conf, false);
1553       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY });
1554       // fix hole
1555       doFsck(conf, true);
1556       // check that hole fixed
1557       assertNoErrors(doFsck(conf, false));
1558     } finally {
1559       deleteTable(table);
1560     }
1561   }
1562 
1563   /**
1564    * Test -noHdfsChecking option can detect and fix assignments issue.
1565    */
1566   @Test
1567   public void testFixAssignmentsAndNoHdfsChecking() throws Exception {
1568     TableName table =
1569         TableName.valueOf("testFixAssignmentsAndNoHdfsChecking");
1570     try {
1571       setupTable(table);
1572       assertEquals(ROWKEYS.length, countRows());
1573 
1574       // Mess it up by closing a region
1575       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1576         Bytes.toBytes("B"), true, false, false, false);
1577 
1578       // verify there is no other errors
1579       HBaseFsck hbck = doFsck(conf, false);
1580       assertErrors(hbck, new ERROR_CODE[] {
1581         ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1582 
1583       // verify that noHdfsChecking report the same errors
1584       HBaseFsck fsck = new HBaseFsck(conf);
1585       fsck.connect();
1586       fsck.setDisplayFullReport(); // i.e. -details
1587       fsck.setTimeLag(0);
1588       fsck.setCheckHdfs(false);
1589       fsck.onlineHbck();
1590       assertErrors(fsck, new ERROR_CODE[] {
1591         ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1592 
1593       // verify that fixAssignments works fine with noHdfsChecking
1594       fsck = new HBaseFsck(conf);
1595       fsck.connect();
1596       fsck.setDisplayFullReport(); // i.e. -details
1597       fsck.setTimeLag(0);
1598       fsck.setCheckHdfs(false);
1599       fsck.setFixAssignments(true);
1600       fsck.onlineHbck();
1601       assertTrue(fsck.shouldRerun());
1602       fsck.onlineHbck();
1603       assertNoErrors(fsck);
1604 
1605       assertEquals(ROWKEYS.length, countRows());
1606     } finally {
1607       deleteTable(table);
1608     }
1609   }
1610 
1611   /**
1612    * Test -noHdfsChecking option can detect region is not in meta but deployed.
1613    * However, it can not fix it without checking Hdfs because we need to get
1614    * the region info from Hdfs in this case, then to patch the meta.
1615    */
1616   @Test
1617   public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception {
1618     TableName table =
1619         TableName.valueOf("testFixMetaNotWorkingWithNoHdfsChecking");
1620     try {
1621       setupTable(table);
1622       assertEquals(ROWKEYS.length, countRows());
1623 
1624       // Mess it up by deleting a region from the metadata
1625       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1626         Bytes.toBytes("B"), false, true, false, false);
1627 
1628       // verify there is no other errors
1629       HBaseFsck hbck = doFsck(conf, false);
1630       assertErrors(hbck, new ERROR_CODE[] {
1631         ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1632 
1633       // verify that noHdfsChecking report the same errors
1634       HBaseFsck fsck = new HBaseFsck(conf);
1635       fsck.connect();
1636       fsck.setDisplayFullReport(); // i.e. -details
1637       fsck.setTimeLag(0);
1638       fsck.setCheckHdfs(false);
1639       fsck.onlineHbck();
1640       assertErrors(fsck, new ERROR_CODE[] {
1641         ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1642 
1643       // verify that fixMeta doesn't work with noHdfsChecking
1644       fsck = new HBaseFsck(conf);
1645       fsck.connect();
1646       fsck.setDisplayFullReport(); // i.e. -details
1647       fsck.setTimeLag(0);
1648       fsck.setCheckHdfs(false);
1649       fsck.setFixAssignments(true);
1650       fsck.setFixMeta(true);
1651       fsck.onlineHbck();
1652       assertFalse(fsck.shouldRerun());
1653       assertErrors(fsck, new ERROR_CODE[] {
1654         ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1655 
1656       // fix the cluster so other tests won't be impacted
1657       fsck = doFsck(conf, true);
1658       assertTrue(fsck.shouldRerun());
1659       fsck = doFsck(conf, true);
1660       assertNoErrors(fsck);
1661     } finally {
1662       deleteTable(table);
1663     }
1664   }
1665 
1666   /**
1667    * Test -fixHdfsHoles doesn't work with -noHdfsChecking option,
1668    * and -noHdfsChecking can't detect orphan Hdfs region.
1669    */
1670   @Test
1671   public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception {
1672     TableName table =
1673         TableName.valueOf("testFixHdfsHolesNotWorkingWithNoHdfsChecking");
1674     try {
1675       setupTable(table);
1676       assertEquals(ROWKEYS.length, countRows());
1677 
1678       // Mess it up by creating an overlap in the metadata
1679       TEST_UTIL.getHBaseAdmin().disableTable(table);
1680       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1681         Bytes.toBytes("B"), true, true, false, true);
1682       TEST_UTIL.getHBaseAdmin().enableTable(table);
1683 
1684       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
1685         Bytes.toBytes("A2"), Bytes.toBytes("B"));
1686       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
1687       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
1688         .waitForAssignment(hriOverlap);
1689       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
1690       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
1691 
1692       HBaseFsck hbck = doFsck(conf, false);
1693       assertErrors(hbck, new ERROR_CODE[] {
1694         ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1695         ERROR_CODE.HOLE_IN_REGION_CHAIN});
1696 
1697       // verify that noHdfsChecking can't detect ORPHAN_HDFS_REGION
1698       HBaseFsck fsck = new HBaseFsck(conf);
1699       fsck.connect();
1700       fsck.setDisplayFullReport(); // i.e. -details
1701       fsck.setTimeLag(0);
1702       fsck.setCheckHdfs(false);
1703       fsck.onlineHbck();
1704       assertErrors(fsck, new ERROR_CODE[] {
1705         ERROR_CODE.HOLE_IN_REGION_CHAIN});
1706 
1707       // verify that fixHdfsHoles doesn't work with noHdfsChecking
1708       fsck = new HBaseFsck(conf);
1709       fsck.connect();
1710       fsck.setDisplayFullReport(); // i.e. -details
1711       fsck.setTimeLag(0);
1712       fsck.setCheckHdfs(false);
1713       fsck.setFixHdfsHoles(true);
1714       fsck.setFixHdfsOverlaps(true);
1715       fsck.setFixHdfsOrphans(true);
1716       fsck.onlineHbck();
1717       assertFalse(fsck.shouldRerun());
1718       assertErrors(fsck, new ERROR_CODE[] {
1719         ERROR_CODE.HOLE_IN_REGION_CHAIN});
1720     } finally {
1721       if (TEST_UTIL.getHBaseAdmin().isTableDisabled(table)) {
1722         TEST_UTIL.getHBaseAdmin().enableTable(table);
1723       }
1724       deleteTable(table);
1725     }
1726   }
1727 
1728   /**
1729    * We don't have an easy way to verify that a flush completed, so we loop until we find a
1730    * legitimate hfile and return it.
1731    * @param fs
1732    * @param table
1733    * @return Path of a flushed hfile.
1734    * @throws IOException
1735    */
1736   Path getFlushedHFile(FileSystem fs, TableName table) throws IOException {
1737     Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
1738     Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
1739     Path famDir = new Path(regionDir, FAM_STR);
1740 
1741     // keep doing this until we get a legit hfile
1742     while (true) {
1743       FileStatus[] hfFss = fs.listStatus(famDir);
1744       if (hfFss.length == 0) {
1745         continue;
1746       }
1747       for (FileStatus hfs : hfFss) {
1748         if (!hfs.isDir()) {
1749           return hfs.getPath();
1750         }
1751       }
1752     }
1753   }
1754 
1755   /**
1756    * This creates a table and then corrupts an hfile.  Hbck should quarantine the file.
1757    */
1758   @Test(timeout=180000)
1759   public void testQuarantineCorruptHFile() throws Exception {
1760     TableName table = TableName.valueOf(name.getMethodName());
1761     try {
1762       setupTable(table);
1763       assertEquals(ROWKEYS.length, countRows());
1764       TEST_UTIL.getHBaseAdmin().flush(table.getName()); // flush is async.
1765 
1766       FileSystem fs = FileSystem.get(conf);
1767       Path hfile = getFlushedHFile(fs, table);
1768 
1769       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1770       TEST_UTIL.getHBaseAdmin().disableTable(table);
1771 
1772       // create new corrupt file called deadbeef (valid hfile name)
1773       Path corrupt = new Path(hfile.getParent(), "deadbeef");
1774       TestHFile.truncateFile(fs, hfile, corrupt);
1775       LOG.info("Created corrupted file " + corrupt);
1776       HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
1777 
1778       // we cannot enable here because enable never finished due to the corrupt region.
1779       HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
1780       assertEquals(res.getRetCode(), 0);
1781       HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
1782       assertEquals(hfcc.getHFilesChecked(), 5);
1783       assertEquals(hfcc.getCorrupted().size(), 1);
1784       assertEquals(hfcc.getFailures().size(), 0);
1785       assertEquals(hfcc.getQuarantined().size(), 1);
1786       assertEquals(hfcc.getMissing().size(), 0);
1787 
1788       // Its been fixed, verify that we can enable.
1789       TEST_UTIL.getHBaseAdmin().enableTable(table);
1790     } finally {
1791       deleteTable(table);
1792     }
1793   }
1794 
1795   /**
1796   * Test that use this should have a timeout, because this method could potentially wait forever.
1797   */
1798   private void doQuarantineTest(TableName table, HBaseFsck hbck, int check,
1799                                 int corrupt, int fail, int quar, int missing) throws Exception {
1800     try {
1801       setupTable(table);
1802       assertEquals(ROWKEYS.length, countRows());
1803       TEST_UTIL.getHBaseAdmin().flush(table.getName()); // flush is async.
1804 
1805       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1806       TEST_UTIL.getHBaseAdmin().disableTable(table);
1807 
1808       String[] args = {"-sidelineCorruptHFiles", "-repairHoles", "-ignorePreCheckPermission",
1809           table.getNameAsString()};
1810       ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1811       HBaseFsck res = hbck.exec(exec, args);
1812 
1813       HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
1814       assertEquals(hfcc.getHFilesChecked(), check);
1815       assertEquals(hfcc.getCorrupted().size(), corrupt);
1816       assertEquals(hfcc.getFailures().size(), fail);
1817       assertEquals(hfcc.getQuarantined().size(), quar);
1818       assertEquals(hfcc.getMissing().size(), missing);
1819 
1820       // its been fixed, verify that we can enable
1821       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1822       admin.enableTableAsync(table);
1823       while (!admin.isTableEnabled(table)) {
1824         try {
1825           Thread.sleep(250);
1826         } catch (InterruptedException e) {
1827           e.printStackTrace();
1828           fail("Interrupted when trying to enable table " + table);
1829         }
1830       }
1831     } finally {
1832       deleteTable(table);
1833     }
1834   }
1835 
1836   /**
1837    * This creates a table and simulates the race situation where a concurrent compaction or split
1838    * has removed an hfile after the corruption checker learned about it.
1839    */
1840   @Test(timeout=180000)
1841   public void testQuarantineMissingHFile() throws Exception {
1842     TableName table = TableName.valueOf(name.getMethodName());
1843     ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1844     // inject a fault in the hfcc created.
1845     final FileSystem fs = FileSystem.get(conf);
1846     HBaseFsck hbck = new HBaseFsck(conf, exec) {
1847       @Override
1848       public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1849         return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1850           boolean attemptedFirstHFile = false;
1851           @Override
1852           protected void checkHFile(Path p) throws IOException {
1853             if (!attemptedFirstHFile) {
1854               attemptedFirstHFile = true;
1855               assertTrue(fs.delete(p, true)); // make sure delete happened.
1856             }
1857             super.checkHFile(p);
1858           }
1859         };
1860       }
1861     };
1862     doQuarantineTest(table, hbck, 4, 0, 0, 0, 1); // 4 attempted, but 1 missing.
1863   }
1864 
1865   /**
1866    * This creates a table and simulates the race situation where a concurrent compaction or split
1867    * has removed an colfam dir before the corruption checker got to it.
1868    */
1869   // Disabled because fails sporadically.  Is this test right?  Timing-wise, there could be no
1870   // files in a column family on initial creation -- as suggested by Matteo.
1871   @Ignore @Test(timeout=180000)
1872   public void testQuarantineMissingFamdir() throws Exception {
1873     TableName table = TableName.valueOf(name.getMethodName());
1874     ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1875     // inject a fault in the hfcc created.
1876     final FileSystem fs = FileSystem.get(conf);
1877     HBaseFsck hbck = new HBaseFsck(conf, exec) {
1878       @Override
1879       public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1880         return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1881           boolean attemptedFirstFamDir = false;
1882           @Override
1883           protected void checkColFamDir(Path p) throws IOException {
1884             if (!attemptedFirstFamDir) {
1885               attemptedFirstFamDir = true;
1886               assertTrue(fs.delete(p, true)); // make sure delete happened.
1887             }
1888             super.checkColFamDir(p);
1889           }
1890         };
1891       }
1892     };
1893     doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
1894   }
1895 
1896   /**
1897    * This creates a table and simulates the race situation where a concurrent compaction or split
1898    * has removed a region dir before the corruption checker got to it.
1899    */
1900   @Test(timeout=180000)
1901   public void testQuarantineMissingRegionDir() throws Exception {
1902     TableName table = TableName.valueOf(name.getMethodName());
1903     ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1904     // inject a fault in the hfcc created.
1905     final FileSystem fs = FileSystem.get(conf);
1906     HBaseFsck hbck = new HBaseFsck(conf, exec) {
1907       @Override
1908       public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1909         return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1910           boolean attemptedFirstRegionDir = false;
1911           @Override
1912           protected void checkRegionDir(Path p) throws IOException {
1913             if (!attemptedFirstRegionDir) {
1914               attemptedFirstRegionDir = true;
1915               assertTrue(fs.delete(p, true)); // make sure delete happened.
1916             }
1917             super.checkRegionDir(p);
1918           }
1919         };
1920       }
1921     };
1922     doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
1923   }
1924 
1925   /**
1926    * Test fixing lingering reference file.
1927    */
1928   @Test
1929   public void testLingeringReferenceFile() throws Exception {
1930     TableName table =
1931         TableName.valueOf("testLingeringReferenceFile");
1932     try {
1933       setupTable(table);
1934       assertEquals(ROWKEYS.length, countRows());
1935 
1936       // Mess it up by creating a fake reference file
1937       FileSystem fs = FileSystem.get(conf);
1938       Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
1939       Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
1940       Path famDir = new Path(regionDir, FAM_STR);
1941       Path fakeReferenceFile = new Path(famDir, "fbce357483ceea.12144538");
1942       fs.create(fakeReferenceFile);
1943 
1944       HBaseFsck hbck = doFsck(conf, false);
1945       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LINGERING_REFERENCE_HFILE });
1946       // fix reference file
1947       doFsck(conf, true);
1948       // check that reference file fixed
1949       assertNoErrors(doFsck(conf, false));
1950     } finally {
1951       deleteTable(table);
1952     }
1953   }
1954 
1955   /**
1956    * Test mission REGIONINFO_QUALIFIER in hbase:meta
1957    */
1958   @Test
1959   public void testMissingRegionInfoQualifier() throws Exception {
1960     TableName table =
1961         TableName.valueOf("testMissingRegionInfoQualifier");
1962     try {
1963       setupTable(table);
1964 
1965       // Mess it up by removing the RegionInfo for one region.
1966       final List<Delete> deletes = new LinkedList<Delete>();
1967       HTable meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName());
1968       MetaScanner.metaScan(conf, new MetaScanner.MetaScannerVisitor() {
1969 
1970         @Override
1971         public boolean processRow(Result rowResult) throws IOException {
1972           HRegionInfo hri = MetaScanner.getHRegionInfo(rowResult);
1973           if (hri != null && !hri.getTable().isSystemTable()) {
1974             Delete delete = new Delete(rowResult.getRow());
1975             delete.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1976             deletes.add(delete);
1977           }
1978           return true;
1979         }
1980 
1981         @Override
1982         public void close() throws IOException {
1983         }
1984       });
1985       meta.delete(deletes);
1986 
1987       // Mess it up by creating a fake hbase:meta entry with no associated RegionInfo
1988       meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
1989         HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes.toBytes("node1:60020")));
1990       meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
1991         HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(1362150791183L)));
1992       meta.close();
1993 
1994       HBaseFsck hbck = doFsck(conf, false);
1995       assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
1996 
1997       // fix reference file
1998       hbck = doFsck(conf, true);
1999 
2000       // check that reference file fixed
2001       assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
2002     } finally {
2003       deleteTable(table);
2004     }
2005   }
2006 
2007 
2008   /**
2009    * Test pluggable error reporter. It can be plugged in
2010    * from system property or configuration.
2011    */
2012   @Test
2013   public void testErrorReporter() throws Exception {
2014     try {
2015       MockErrorReporter.calledCount = 0;
2016       doFsck(conf, false);
2017       assertEquals(MockErrorReporter.calledCount, 0);
2018 
2019       conf.set("hbasefsck.errorreporter", MockErrorReporter.class.getName());
2020       doFsck(conf, false);
2021       assertTrue(MockErrorReporter.calledCount > 20);
2022     } finally {
2023       conf.set("hbasefsck.errorreporter",
2024         PrintingErrorReporter.class.getName());
2025       MockErrorReporter.calledCount = 0;
2026     }
2027   }
2028 
2029   static class MockErrorReporter implements ErrorReporter {
2030     static int calledCount = 0;
2031 
2032     @Override
2033     public void clear() {
2034       calledCount++;
2035     }
2036 
2037     @Override
2038     public void report(String message) {
2039       calledCount++;
2040     }
2041 
2042     @Override
2043     public void reportError(String message) {
2044       calledCount++;
2045     }
2046 
2047     @Override
2048     public void reportError(ERROR_CODE errorCode, String message) {
2049       calledCount++;
2050     }
2051 
2052     @Override
2053     public void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
2054       calledCount++;
2055     }
2056 
2057     @Override
2058     public void reportError(ERROR_CODE errorCode,
2059         String message, TableInfo table, HbckInfo info) {
2060       calledCount++;
2061     }
2062 
2063     @Override
2064     public void reportError(ERROR_CODE errorCode, String message,
2065         TableInfo table, HbckInfo info1, HbckInfo info2) {
2066       calledCount++;
2067     }
2068 
2069     @Override
2070     public int summarize() {
2071       return ++calledCount;
2072     }
2073 
2074     @Override
2075     public void detail(String details) {
2076       calledCount++;
2077     }
2078 
2079     @Override
2080     public ArrayList<ERROR_CODE> getErrorList() {
2081       calledCount++;
2082       return new ArrayList<ERROR_CODE>();
2083     }
2084 
2085     @Override
2086     public void progress() {
2087       calledCount++;
2088     }
2089 
2090     @Override
2091     public void print(String message) {
2092       calledCount++;
2093     }
2094 
2095     @Override
2096     public void resetErrors() {
2097       calledCount++;
2098     }
2099 
2100     @Override
2101     public boolean tableHasErrors(TableInfo table) {
2102       calledCount++;
2103       return false;
2104     }
2105   }
2106 
2107   @Test(timeout=60000)
2108   public void testCheckTableLocks() throws Exception {
2109     IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0);
2110     EnvironmentEdgeManager.injectEdge(edge);
2111     // check no errors
2112     HBaseFsck hbck = doFsck(conf, false);
2113     assertNoErrors(hbck);
2114 
2115     ServerName mockName = ServerName.valueOf("localhost", 60000, 1);
2116 
2117     // obtain one lock
2118     final TableLockManager tableLockManager = TableLockManager.createTableLockManager(conf, TEST_UTIL.getZooKeeperWatcher(), mockName);
2119     TableLock writeLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2120         "testCheckTableLocks");
2121     writeLock.acquire();
2122     hbck = doFsck(conf, false);
2123     assertNoErrors(hbck); // should not have expired, no problems
2124 
2125     edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2126         TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire
2127 
2128     hbck = doFsck(conf, false);
2129     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK});
2130 
2131     final CountDownLatch latch = new CountDownLatch(1);
2132     new Thread() {
2133       @Override
2134       public void run() {
2135         TableLock readLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2136             "testCheckTableLocks");
2137         try {
2138           latch.countDown();
2139           readLock.acquire();
2140         } catch (IOException ex) {
2141           fail();
2142         } catch (IllegalStateException ex) {
2143           return; // expected, since this will be reaped under us.
2144         }
2145         fail("should not have come here");
2146       };
2147     }.start();
2148 
2149     latch.await(); // wait until thread starts
2150     Threads.sleep(300); // wait some more to ensure writeLock.acquire() is called
2151 
2152     hbck = doFsck(conf, false);
2153     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK}); // still one expired, one not-expired
2154 
2155     edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2156         TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire
2157 
2158     hbck = doFsck(conf, false);
2159     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK, ERROR_CODE.EXPIRED_TABLE_LOCK}); // both are expired
2160 
2161     conf.setLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, 1); // reaping from ZKInterProcessWriteLock uses znode cTime,
2162                                                                  // which is not injectable through EnvironmentEdge
2163     Threads.sleep(10);
2164     hbck = doFsck(conf, true); // now fix both cases
2165 
2166     hbck = doFsck(conf, false);
2167     assertNoErrors(hbck);
2168 
2169     // ensure that locks are deleted
2170     writeLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2171         "should acquire without blocking");
2172     writeLock.acquire(); // this should not block.
2173     writeLock.release(); // release for clean state
2174   }
2175 
2176   @Test
2177   public void testMetaOffline() throws Exception {
2178     // check no errors
2179     HBaseFsck hbck = doFsck(conf, false);
2180     assertNoErrors(hbck);
2181     deleteMetaRegion(conf, true, false, false);
2182     hbck = doFsck(conf, false);
2183     // ERROR_CODE.UNKNOWN is coming because we reportError with a message for the hbase:meta
2184     // inconsistency and whether we will be fixing it or not.
2185     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2186     hbck = doFsck(conf, true);
2187     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2188     hbck = doFsck(conf, false);
2189     assertNoErrors(hbck);
2190   }
2191 
2192   private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
2193       boolean regionInfoOnly) throws IOException, InterruptedException {
2194     HConnection connection = HConnectionManager.getConnection(conf);
2195     HRegionLocation metaLocation = connection.locateRegion(TableName.META_TABLE_NAME,
2196         HConstants.EMPTY_START_ROW);
2197     ServerName hsa = metaLocation.getServerName();
2198     HRegionInfo hri = metaLocation.getRegionInfo();
2199     if (unassign) {
2200       LOG.info("Undeploying meta region " + hri + " from server " + hsa);
2201       undeployRegion(new HBaseAdmin(conf), hsa, hri);
2202     }
2203 
2204     if (regionInfoOnly) {
2205       LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
2206       Path rootDir = FSUtils.getRootDir(conf);
2207       FileSystem fs = rootDir.getFileSystem(conf);
2208       Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(),
2209           hri.getEncodedName());
2210       Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
2211       fs.delete(hriPath, true);
2212     }
2213 
2214     if (hdfs) {
2215       LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
2216       Path rootDir = FSUtils.getRootDir(conf);
2217       FileSystem fs = rootDir.getFileSystem(conf);
2218       Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(),
2219           hri.getEncodedName());
2220       HBaseFsck.debugLsr(conf, p);
2221       boolean success = fs.delete(p, true);
2222       LOG.info("Deleted " + p + " sucessfully? " + success);
2223       HBaseFsck.debugLsr(conf, p);
2224     }
2225   }
2226 
2227   @Test
2228   public void testTableWithNoRegions() throws Exception {
2229     // We might end up with empty regions in a table
2230     // see also testNoHdfsTable()
2231     TableName table =
2232         TableName.valueOf(name.getMethodName());
2233     try {
2234       // create table with one region
2235       HTableDescriptor desc = new HTableDescriptor(table);
2236       HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
2237       desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
2238       TEST_UTIL.getHBaseAdmin().createTable(desc);
2239       tbl = new HTable(TEST_UTIL.getConfiguration(), table, executorService);
2240 
2241       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
2242       deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false,
2243           false, true);
2244 
2245       HBaseFsck hbck = doFsck(conf, false);
2246       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
2247 
2248       doFsck(conf, true);
2249 
2250       // fix hole
2251       doFsck(conf, true);
2252 
2253       // check that hole fixed
2254       assertNoErrors(doFsck(conf, false));
2255     } finally {
2256       deleteTable(table);
2257     }
2258 
2259   }
2260 
2261   @Test
2262   public void testHbckAfterRegionMerge() throws Exception {
2263     TableName table = TableName.valueOf("testMergeRegionFilesInHdfs");
2264     HTable meta = null;
2265     try {
2266       // disable CatalogJanitor
2267       TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
2268       setupTable(table);
2269       assertEquals(ROWKEYS.length, countRows());
2270 
2271       // make sure data in regions, if in hlog only there is no data loss
2272       TEST_UTIL.getHBaseAdmin().flush(table.getName());
2273       HRegionInfo region1 = tbl.getRegionLocation("A").getRegionInfo();
2274       HRegionInfo region2 = tbl.getRegionLocation("B").getRegionInfo();
2275 
2276       int regionCountBeforeMerge = tbl.getRegionLocations().size();
2277 
2278       assertNotEquals(region1, region2);
2279 
2280       // do a region merge
2281       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
2282       admin.mergeRegions(region1.getEncodedNameAsBytes(),
2283           region2.getEncodedNameAsBytes(), false);
2284 
2285       // wait until region merged
2286       long timeout = System.currentTimeMillis() + 30 * 1000;
2287       while (true) {
2288         if (tbl.getRegionLocations().size() < regionCountBeforeMerge) {
2289           break;
2290         } else if (System.currentTimeMillis() > timeout) {
2291           fail("Time out waiting on region " + region1.getEncodedName()
2292               + " and " + region2.getEncodedName() + " be merged");
2293         }
2294         Thread.sleep(10);
2295       }
2296 
2297       assertEquals(ROWKEYS.length, countRows());
2298 
2299       HBaseFsck hbck = doFsck(conf, false);
2300       assertNoErrors(hbck); // no errors
2301 
2302     } finally {
2303       TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
2304       deleteTable(table);
2305       IOUtils.closeQuietly(meta);
2306     }
2307   }
2308 
2309   @Test
2310   public void testRegionBoundariesCheck() throws Exception {
2311     HBaseFsck hbck = doFsck(conf, false);
2312     assertNoErrors(hbck); // no errors
2313     try {
2314       hbck.checkRegionBoundaries();
2315     } catch (IllegalArgumentException e) {
2316       if (e.getMessage().endsWith("not a valid DFS filename.")) {
2317         fail("Table directory path is not valid." + e.getMessage());
2318       }
2319     }
2320   }
2321 
2322   @org.junit.Rule
2323   public TestName name = new TestName();
2324 }