View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.util;
20  
21  import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
22  import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
23  import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
24  import static org.junit.Assert.assertEquals;
25  import static org.junit.Assert.assertFalse;
26  import static org.junit.Assert.assertNotEquals;
27  import static org.junit.Assert.assertNotNull;
28  import static org.junit.Assert.assertTrue;
29  import static org.junit.Assert.fail;
30  
31  import java.io.IOException;
32  import java.util.ArrayList;
33  import java.util.Collection;
34  import java.util.HashMap;
35  import java.util.LinkedList;
36  import java.util.List;
37  import java.util.Map;
38  import java.util.Map.Entry;
39  import java.util.concurrent.Callable;
40  import java.util.concurrent.CountDownLatch;
41  import java.util.concurrent.ExecutorService;
42  import java.util.concurrent.Executors;
43  import java.util.concurrent.Future;
44  import java.util.concurrent.ScheduledThreadPoolExecutor;
45  import java.util.concurrent.SynchronousQueue;
46  import java.util.concurrent.ThreadPoolExecutor;
47  import java.util.concurrent.TimeUnit;
48  import java.util.concurrent.atomic.AtomicBoolean;
49  
50  import org.apache.commons.io.IOUtils;
51  import org.apache.commons.logging.Log;
52  import org.apache.commons.logging.LogFactory;
53  import org.apache.hadoop.conf.Configuration;
54  import org.apache.hadoop.fs.FileStatus;
55  import org.apache.hadoop.fs.FileSystem;
56  import org.apache.hadoop.fs.Path;
57  import org.apache.hadoop.hbase.ClusterStatus;
58  import org.apache.hadoop.hbase.HBaseTestingUtility;
59  import org.apache.hadoop.hbase.HColumnDescriptor;
60  import org.apache.hadoop.hbase.HConstants;
61  import org.apache.hadoop.hbase.HRegionInfo;
62  import org.apache.hadoop.hbase.HRegionLocation;
63  import org.apache.hadoop.hbase.HTableDescriptor;
64  import org.apache.hadoop.hbase.testclassification.LargeTests;
65  import org.apache.hadoop.hbase.MiniHBaseCluster;
66  import org.apache.hadoop.hbase.ServerName;
67  import org.apache.hadoop.hbase.TableName;
68  import org.apache.hadoop.hbase.catalog.MetaEditor;
69  import org.apache.hadoop.hbase.client.Delete;
70  import org.apache.hadoop.hbase.client.Durability;
71  import org.apache.hadoop.hbase.client.Get;
72  import org.apache.hadoop.hbase.client.HBaseAdmin;
73  import org.apache.hadoop.hbase.client.HConnection;
74  import org.apache.hadoop.hbase.client.HConnectionManager;
75  import org.apache.hadoop.hbase.client.HTable;
76  import org.apache.hadoop.hbase.client.MetaScanner;
77  import org.apache.hadoop.hbase.client.Put;
78  import org.apache.hadoop.hbase.client.Result;
79  import org.apache.hadoop.hbase.client.ResultScanner;
80  import org.apache.hadoop.hbase.client.Scan;
81  import org.apache.hadoop.hbase.io.hfile.TestHFile;
82  import org.apache.hadoop.hbase.master.AssignmentManager;
83  import org.apache.hadoop.hbase.master.HMaster;
84  import org.apache.hadoop.hbase.master.RegionStates;
85  import org.apache.hadoop.hbase.master.TableLockManager;
86  import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
87  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
88  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
89  import org.apache.hadoop.hbase.regionserver.HRegion;
90  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
91  import org.apache.hadoop.hbase.regionserver.HRegionServer;
92  import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
93  import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
94  import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
95  import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;
96  import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter;
97  import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
98  import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
99  import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
100 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
101 import org.apache.zookeeper.KeeperException;
102 import org.junit.AfterClass;
103 import org.junit.Assert;
104 import org.junit.BeforeClass;
105 import org.junit.Ignore;
106 import org.junit.Test;
107 import org.junit.experimental.categories.Category;
108 import org.junit.rules.TestName;
109 
110 import com.google.common.collect.Multimap;
111 
112 /**
113  * This tests HBaseFsck's ability to detect reasons for inconsistent tables.
114  */
115 @Category(LargeTests.class)
116 public class TestHBaseFsck {
117   final static Log LOG = LogFactory.getLog(TestHBaseFsck.class);
118   private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
119   private final static Configuration conf = TEST_UTIL.getConfiguration();
120   private final static String FAM_STR = "fam";
121   private final static byte[] FAM = Bytes.toBytes(FAM_STR);
122   private final static int REGION_ONLINE_TIMEOUT = 800;
123   private static RegionStates regionStates;
124   private static ExecutorService executorService;
125 
126   // for the instance, reset every test run
127   private HTable tbl;
128   private final static byte[][] SPLITS = new byte[][] { Bytes.toBytes("A"),
129     Bytes.toBytes("B"), Bytes.toBytes("C") };
130   // one row per region.
131   private final static byte[][] ROWKEYS= new byte[][] {
132     Bytes.toBytes("00"), Bytes.toBytes("50"), Bytes.toBytes("A0"), Bytes.toBytes("A5"),
133     Bytes.toBytes("B0"), Bytes.toBytes("B5"), Bytes.toBytes("C0"), Bytes.toBytes("C5") };
134 
135   @SuppressWarnings("deprecation")
136   @BeforeClass
137   public static void setUpBeforeClass() throws Exception {
138     TEST_UTIL.getConfiguration().setInt("hbase.regionserver.handler.count", 2);
139     TEST_UTIL.getConfiguration().setInt("hbase.regionserver.metahandler.count", 2);
140     TEST_UTIL.startMiniCluster(3);
141     TEST_UTIL.setHDFSClientRetry(0);
142 
143     executorService = new ThreadPoolExecutor(1, Integer.MAX_VALUE, 60, TimeUnit.SECONDS,
144         new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
145 
146     AssignmentManager assignmentManager =
147       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
148     regionStates = assignmentManager.getRegionStates();
149     TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, true);
150   }
151 
152   @AfterClass
153   public static void tearDownAfterClass() throws Exception {
154     TEST_UTIL.shutdownMiniCluster();
155   }
156 
157   @Test
158   public void testHBaseFsck() throws Exception {
159     assertNoErrors(doFsck(conf, false));
160     String table = "tableBadMetaAssign";
161     TEST_UTIL.createTable(Bytes.toBytes(table), FAM);
162 
163     // We created 1 table, should be fine
164     assertNoErrors(doFsck(conf, false));
165 
166     // Now let's mess it up and change the assignment in hbase:meta to
167     // point to a different region server
168     HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
169     Scan scan = new Scan();
170     scan.setStartRow(Bytes.toBytes(table+",,"));
171     ResultScanner scanner = meta.getScanner(scan);
172     HRegionInfo hri = null;
173 
174     Result res = scanner.next();
175     ServerName currServer =
176       ServerName.parseFrom(res.getValue(HConstants.CATALOG_FAMILY,
177           HConstants.SERVER_QUALIFIER));
178     long startCode = Bytes.toLong(res.getValue(HConstants.CATALOG_FAMILY,
179         HConstants.STARTCODE_QUALIFIER));
180 
181     for (JVMClusterUtil.RegionServerThread rs :
182         TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
183 
184       ServerName sn = rs.getRegionServer().getServerName();
185 
186       // When we find a diff RS, change the assignment and break
187       if (!currServer.getHostAndPort().equals(sn.getHostAndPort()) ||
188           startCode != sn.getStartcode()) {
189         Put put = new Put(res.getRow());
190         put.setDurability(Durability.SKIP_WAL);
191         put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
192           Bytes.toBytes(sn.getHostAndPort()));
193         put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
194           Bytes.toBytes(sn.getStartcode()));
195         meta.put(put);
196         hri = HRegionInfo.getHRegionInfo(res);
197         break;
198       }
199     }
200 
201     // Try to fix the data
202     assertErrors(doFsck(conf, true), new ERROR_CODE[]{
203         ERROR_CODE.SERVER_DOES_NOT_MATCH_META});
204 
205     TEST_UTIL.getHBaseCluster().getMaster()
206       .getAssignmentManager().waitForAssignment(hri);
207 
208     // Should be fixed now
209     assertNoErrors(doFsck(conf, false));
210 
211     // comment needed - what is the purpose of this line
212     HTable t = new HTable(conf, Bytes.toBytes(table), executorService);
213     ResultScanner s = t.getScanner(new Scan());
214     s.close();
215     t.close();
216 
217     scanner.close();
218     meta.close();
219   }
220 
221   @Test(timeout=180000)
222   public void testFixAssignmentsWhenMETAinTransition() throws Exception {
223     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
224     HBaseAdmin admin = null;
225     try {
226       admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
227       admin.closeRegion(cluster.getServerHoldingMeta(),
228           HRegionInfo.FIRST_META_REGIONINFO);
229     } finally {
230       if (admin != null) {
231         admin.close();
232       }
233     }
234     regionStates.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
235     MetaRegionTracker.deleteMetaLocation(cluster.getMaster().getZooKeeper());
236     assertFalse(regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
237     HBaseFsck hbck = doFsck(conf, true);
238     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.UNKNOWN, ERROR_CODE.NO_META_REGION,
239         ERROR_CODE.NULL_META_REGION });
240     assertNoErrors(doFsck(conf, false));
241   }
242 
243   /**
244    * Create a new region in META.
245    */
246   private HRegionInfo createRegion(Configuration conf, final HTableDescriptor
247       htd, byte[] startKey, byte[] endKey)
248       throws IOException {
249     HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
250     HRegionInfo hri = new HRegionInfo(htd.getTableName(), startKey, endKey);
251     MetaEditor.addRegionToMeta(meta, hri);
252     meta.close();
253     return hri;
254   }
255 
256   /**
257    * Debugging method to dump the contents of meta.
258    */
259   private void dumpMeta(TableName tableName) throws IOException {
260     List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(tableName);
261     for (byte[] row : metaRows) {
262       LOG.info(Bytes.toString(row));
263     }
264   }
265 
266   /**
267    * This method is used to undeploy a region -- close it and attempt to
268    * remove its state from the Master.
269    */
270   private void undeployRegion(HBaseAdmin admin, ServerName sn,
271       HRegionInfo hri) throws IOException, InterruptedException {
272     try {
273       HBaseFsckRepair.closeRegionSilentlyAndWait(admin, sn, hri);
274       if (!hri.isMetaTable()) {
275         admin.offline(hri.getRegionName());
276       }
277     } catch (IOException ioe) {
278       LOG.warn("Got exception when attempting to offline region "
279           + Bytes.toString(hri.getRegionName()), ioe);
280     }
281   }
282   /**
283    * Delete a region from assignments, meta, or completely from hdfs.
284    * @param unassign if true unassign region if assigned
285    * @param metaRow  if true remove region's row from META
286    * @param hdfs if true remove region's dir in HDFS
287    */
288   private void deleteRegion(Configuration conf, final HTableDescriptor htd,
289       byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
290       boolean hdfs) throws IOException, InterruptedException {
291     deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false);
292   }
293 
294   /**
295    * Delete a region from assignments, meta, or completely from hdfs.
296    * @param unassign if true unassign region if assigned
297    * @param metaRow  if true remove region's row from META
298    * @param hdfs if true remove region's dir in HDFS
299    * @param regionInfoOnly if true remove a region dir's .regioninfo file
300    */
301   private void deleteRegion(Configuration conf, final HTableDescriptor htd,
302       byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
303       boolean hdfs, boolean regionInfoOnly) throws IOException, InterruptedException {
304     LOG.info("** Before delete:");
305     dumpMeta(htd.getTableName());
306 
307     Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
308     for (Entry<HRegionInfo, ServerName> e: hris.entrySet()) {
309       HRegionInfo hri = e.getKey();
310       ServerName hsa = e.getValue();
311       if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
312           && Bytes.compareTo(hri.getEndKey(), endKey) == 0) {
313 
314         LOG.info("RegionName: " +hri.getRegionNameAsString());
315         byte[] deleteRow = hri.getRegionName();
316 
317         if (unassign) {
318           LOG.info("Undeploying region " + hri + " from server " + hsa);
319           undeployRegion(new HBaseAdmin(conf), hsa, hri);
320         }
321 
322         if (regionInfoOnly) {
323           LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
324           Path rootDir = FSUtils.getRootDir(conf);
325           FileSystem fs = rootDir.getFileSystem(conf);
326           Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
327               hri.getEncodedName());
328           Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
329           fs.delete(hriPath, true);
330         }
331 
332         if (hdfs) {
333           LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
334           Path rootDir = FSUtils.getRootDir(conf);
335           FileSystem fs = rootDir.getFileSystem(conf);
336           Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
337               hri.getEncodedName());
338           HBaseFsck.debugLsr(conf, p);
339           boolean success = fs.delete(p, true);
340           LOG.info("Deleted " + p + " sucessfully? " + success);
341           HBaseFsck.debugLsr(conf, p);
342         }
343 
344         if (metaRow) {
345           HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
346           Delete delete = new Delete(deleteRow);
347           meta.delete(delete);
348         }
349       }
350       LOG.info(hri.toString() + hsa.toString());
351     }
352 
353     TEST_UTIL.getMetaTableRows(htd.getTableName());
354     LOG.info("*** After delete:");
355     dumpMeta(htd.getTableName());
356   }
357 
358   /**
359    * Setup a clean table before we start mucking with it.
360    *
361    * @throws IOException
362    * @throws InterruptedException
363    * @throws KeeperException
364    */
365   HTable setupTable(TableName tablename) throws Exception {
366     HTableDescriptor desc = new HTableDescriptor(tablename);
367     HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
368     desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
369     TEST_UTIL.getHBaseAdmin().createTable(desc, SPLITS);
370     tbl = new HTable(TEST_UTIL.getConfiguration(), tablename, executorService);
371 
372     List<Put> puts = new ArrayList<Put>();
373     for (byte[] row : ROWKEYS) {
374       Put p = new Put(row);
375       p.add(FAM, Bytes.toBytes("val"), row);
376       puts.add(p);
377     }
378     tbl.put(puts);
379     tbl.flushCommits();
380     return tbl;
381   }
382 
383   /**
384    * Counts the number of row to verify data loss or non-dataloss.
385    */
386   int countRows() throws IOException {
387      Scan s = new Scan();
388      ResultScanner rs = tbl.getScanner(s);
389      int i = 0;
390      while(rs.next() !=null) {
391        i++;
392      }
393      return i;
394   }
395 
396   /**
397    * delete table in preparation for next test
398    *
399    * @param tablename
400    * @throws IOException
401    */
402   void deleteTable(TableName tablename) throws IOException {
403     HBaseAdmin admin = new HBaseAdmin(conf);
404     admin.getConnection().clearRegionCache();
405     if (admin.isTableEnabled(tablename)) {
406       admin.disableTableAsync(tablename);
407     }
408     long totalWait = 0;
409     long maxWait = 30*1000;
410     long sleepTime = 250;
411     while (!admin.isTableDisabled(tablename)) {
412       try {
413         Thread.sleep(sleepTime);
414         totalWait += sleepTime;
415         if (totalWait >= maxWait) {
416           fail("Waited too long for table to be disabled + " + tablename);
417         }
418       } catch (InterruptedException e) {
419         e.printStackTrace();
420         fail("Interrupted when trying to disable table " + tablename);
421       }
422     }
423     admin.deleteTable(tablename);
424   }
425 
426   /**
427    * This creates a clean table and confirms that the table is clean.
428    */
429   @Test
430   public void testHBaseFsckClean() throws Exception {
431     assertNoErrors(doFsck(conf, false));
432     TableName table = TableName.valueOf("tableClean");
433     try {
434       HBaseFsck hbck = doFsck(conf, false);
435       assertNoErrors(hbck);
436 
437       setupTable(table);
438       assertEquals(ROWKEYS.length, countRows());
439 
440       // We created 1 table, should be fine
441       hbck = doFsck(conf, false);
442       assertNoErrors(hbck);
443       assertEquals(0, hbck.getOverlapGroups(table).size());
444       assertEquals(ROWKEYS.length, countRows());
445     } finally {
446       deleteTable(table);
447     }
448   }
449 
450   /**
451    * Test thread pooling in the case where there are more regions than threads
452    */
453   @Test
454   public void testHbckThreadpooling() throws Exception {
455     TableName table =
456         TableName.valueOf("tableDupeStartKey");
457     try {
458       // Create table with 4 regions
459       setupTable(table);
460 
461       // limit number of threads to 1.
462       Configuration newconf = new Configuration(conf);
463       newconf.setInt("hbasefsck.numthreads", 1);
464       assertNoErrors(doFsck(newconf, false));
465 
466       // We should pass without triggering a RejectedExecutionException
467     } finally {
468       deleteTable(table);
469     }
470   }
471 
472   @Test
473   public void testHbckFixOrphanTable() throws Exception {
474     TableName table = TableName.valueOf("tableInfo");
475     FileSystem fs = null;
476     Path tableinfo = null;
477     try {
478       setupTable(table);
479       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
480 
481       Path hbaseTableDir = FSUtils.getTableDir(
482           FSUtils.getRootDir(conf), table);
483       fs = hbaseTableDir.getFileSystem(conf);
484       FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
485       tableinfo = status.getPath();
486       fs.rename(tableinfo, new Path("/.tableinfo"));
487 
488       //to report error if .tableinfo is missing.
489       HBaseFsck hbck = doFsck(conf, false);
490       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE });
491 
492       // fix OrphanTable with default .tableinfo (htd not yet cached on master)
493       hbck = doFsck(conf, true);
494       assertNoErrors(hbck);
495       status = null;
496       status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
497       assertNotNull(status);
498 
499       HTableDescriptor htd = admin.getTableDescriptor(table);
500       htd.setValue("NOT_DEFAULT", "true");
501       admin.disableTable(table);
502       admin.modifyTable(table, htd);
503       admin.enableTable(table);
504       fs.delete(status.getPath(), true);
505 
506       // fix OrphanTable with cache
507       htd = admin.getTableDescriptor(table); // warms up cached htd on master
508       hbck = doFsck(conf, true);
509       assertNoErrors(hbck);
510       status = null;
511       status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
512       assertNotNull(status);
513       htd = admin.getTableDescriptor(table);
514       assertEquals(htd.getValue("NOT_DEFAULT"), "true");
515     } finally {
516       fs.rename(new Path("/.tableinfo"), tableinfo);
517       deleteTable(table);
518     }
519   }
520 
521   /**
522    * This test makes sure that parallel instances of Hbck is disabled.
523    *
524    * @throws Exception
525    */
526   @Test
527   public void testParallelHbck() throws Exception {
528     final ExecutorService service;
529     final Future<HBaseFsck> hbck1,hbck2;
530 
531     class RunHbck implements Callable<HBaseFsck>{
532       boolean fail = true;
533       @Override
534       public HBaseFsck call(){
535         try{
536           return doFsck(conf, false);
537         } catch(Exception e){
538           if (e.getMessage().contains("Duplicate hbck")) {
539             fail = false;
540           } else {
541             LOG.fatal("hbck failed.", e);
542           }
543         }
544         // If we reach here, then an exception was caught
545         if (fail) fail();
546         return null;
547       }
548     }
549     service = Executors.newFixedThreadPool(2);
550     hbck1 = service.submit(new RunHbck());
551     hbck2 = service.submit(new RunHbck());
552     service.shutdown();
553     //wait for 15 seconds, for both hbck calls finish
554     service.awaitTermination(15, TimeUnit.SECONDS);
555     HBaseFsck h1 = hbck1.get();
556     HBaseFsck h2 = hbck2.get();
557     // Make sure only one of the calls was successful
558     assert(h1 == null || h2 == null);
559     if (h1 != null) {
560       assert(h1.getRetCode() >= 0);
561     }
562     if (h2 != null) {
563       assert(h2.getRetCode() >= 0);
564     }
565   }
566 
567   /**
568    * This create and fixes a bad table with regions that have a duplicate
569    * start key
570    */
571   @Test
572   public void testDupeStartKey() throws Exception {
573     TableName table =
574         TableName.valueOf("tableDupeStartKey");
575     try {
576       setupTable(table);
577       assertNoErrors(doFsck(conf, false));
578       assertEquals(ROWKEYS.length, countRows());
579 
580       // Now let's mess it up, by adding a region with a duplicate startkey
581       HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
582           Bytes.toBytes("A"), Bytes.toBytes("A2"));
583       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
584       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
585           .waitForAssignment(hriDupe);
586       ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
587       TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
588 
589       HBaseFsck hbck = doFsck(conf, false);
590       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
591             ERROR_CODE.DUPE_STARTKEYS});
592       assertEquals(2, hbck.getOverlapGroups(table).size());
593       assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
594 
595       // fix the degenerate region.
596       doFsck(conf,true);
597 
598       // check that the degenerate region is gone and no data loss
599       HBaseFsck hbck2 = doFsck(conf,false);
600       assertNoErrors(hbck2);
601       assertEquals(0, hbck2.getOverlapGroups(table).size());
602       assertEquals(ROWKEYS.length, countRows());
603     } finally {
604       deleteTable(table);
605     }
606   }
607 
608   /**
609    * Get region info from local cluster.
610    */
611   Map<ServerName, List<String>> getDeployedHRIs(
612       final HBaseAdmin admin) throws IOException {
613     ClusterStatus status = admin.getClusterStatus();
614     Collection<ServerName> regionServers = status.getServers();
615     Map<ServerName, List<String>> mm =
616         new HashMap<ServerName, List<String>>();
617     HConnection connection = admin.getConnection();
618     for (ServerName hsi : regionServers) {
619       AdminProtos.AdminService.BlockingInterface server = connection.getAdmin(hsi);
620 
621       // list all online regions from this region server
622       List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
623       List<String> regionNames = new ArrayList<String>();
624       for (HRegionInfo hri : regions) {
625         regionNames.add(hri.getRegionNameAsString());
626       }
627       mm.put(hsi, regionNames);
628     }
629     return mm;
630   }
631 
632   /**
633    * Returns the HSI a region info is on.
634    */
635   ServerName findDeployedHSI(Map<ServerName, List<String>> mm, HRegionInfo hri) {
636     for (Map.Entry<ServerName,List <String>> e : mm.entrySet()) {
637       if (e.getValue().contains(hri.getRegionNameAsString())) {
638         return e.getKey();
639       }
640     }
641     return null;
642   }
643 
644   /**
645    * This create and fixes a bad table with regions that have a duplicate
646    * start key
647    */
648   @Test
649   public void testDupeRegion() throws Exception {
650     TableName table =
651         TableName.valueOf("tableDupeRegion");
652     try {
653       setupTable(table);
654       assertNoErrors(doFsck(conf, false));
655       assertEquals(ROWKEYS.length, countRows());
656 
657       // Now let's mess it up, by adding a region with a duplicate startkey
658       HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
659           Bytes.toBytes("A"), Bytes.toBytes("B"));
660 
661       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
662       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
663           .waitForAssignment(hriDupe);
664       ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
665       TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
666 
667       // Yikes! The assignment manager can't tell between diff between two
668       // different regions with the same start/endkeys since it doesn't
669       // differentiate on ts/regionId!  We actually need to recheck
670       // deployments!
671       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
672       while (findDeployedHSI(getDeployedHRIs(admin), hriDupe) == null) {
673         Thread.sleep(250);
674       }
675 
676       LOG.debug("Finished assignment of dupe region");
677 
678       // TODO why is dupe region different from dupe start keys?
679       HBaseFsck hbck = doFsck(conf, false);
680       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
681             ERROR_CODE.DUPE_STARTKEYS});
682       assertEquals(2, hbck.getOverlapGroups(table).size());
683       assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
684 
685       // fix the degenerate region.
686       doFsck(conf,true);
687 
688       // check that the degenerate region is gone and no data loss
689       HBaseFsck hbck2 = doFsck(conf,false);
690       assertNoErrors(hbck2);
691       assertEquals(0, hbck2.getOverlapGroups(table).size());
692       assertEquals(ROWKEYS.length, countRows());
693     } finally {
694       deleteTable(table);
695     }
696   }
697 
698   /**
699    * This creates and fixes a bad table with regions that has startkey == endkey
700    */
701   @Test
702   public void testDegenerateRegions() throws Exception {
703     TableName table =
704         TableName.valueOf("tableDegenerateRegions");
705     try {
706       setupTable(table);
707       assertNoErrors(doFsck(conf,false));
708       assertEquals(ROWKEYS.length, countRows());
709 
710       // Now let's mess it up, by adding a region with a duplicate startkey
711       HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
712           Bytes.toBytes("B"), Bytes.toBytes("B"));
713       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
714       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
715           .waitForAssignment(hriDupe);
716       ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
717       TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
718 
719       HBaseFsck hbck = doFsck(conf,false);
720       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DEGENERATE_REGION,
721           ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.DUPE_STARTKEYS});
722       assertEquals(2, hbck.getOverlapGroups(table).size());
723       assertEquals(ROWKEYS.length, countRows());
724 
725       // fix the degenerate region.
726       doFsck(conf,true);
727 
728       // check that the degenerate region is gone and no data loss
729       HBaseFsck hbck2 = doFsck(conf,false);
730       assertNoErrors(hbck2);
731       assertEquals(0, hbck2.getOverlapGroups(table).size());
732       assertEquals(ROWKEYS.length, countRows());
733     } finally {
734       deleteTable(table);
735     }
736   }
737 
738   /**
739    * This creates and fixes a bad table where a region is completely contained
740    * by another region.
741    */
742   @Test
743   public void testContainedRegionOverlap() throws Exception {
744     TableName table =
745         TableName.valueOf("tableContainedRegionOverlap");
746     try {
747       setupTable(table);
748       assertEquals(ROWKEYS.length, countRows());
749 
750       // Mess it up by creating an overlap in the metadata
751       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
752           Bytes.toBytes("A2"), Bytes.toBytes("B"));
753       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
754       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
755           .waitForAssignment(hriOverlap);
756       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
757       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
758 
759       HBaseFsck hbck = doFsck(conf, false);
760       assertErrors(hbck, new ERROR_CODE[] {
761           ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
762       assertEquals(2, hbck.getOverlapGroups(table).size());
763       assertEquals(ROWKEYS.length, countRows());
764 
765       // fix the problem.
766       doFsck(conf, true);
767 
768       // verify that overlaps are fixed
769       HBaseFsck hbck2 = doFsck(conf,false);
770       assertNoErrors(hbck2);
771       assertEquals(0, hbck2.getOverlapGroups(table).size());
772       assertEquals(ROWKEYS.length, countRows());
773     } finally {
774        deleteTable(table);
775     }
776   }
777 
778   /**
779    * This creates and fixes a bad table where an overlap group of
780    * 3 regions. Set HBaseFsck.maxMerge to 2 to trigger sideline overlapped
781    * region. Mess around the meta data so that closeRegion/offlineRegion
782    * throws exceptions.
783    */
784   @Test
785   public void testSidelineOverlapRegion() throws Exception {
786     TableName table =
787         TableName.valueOf("testSidelineOverlapRegion");
788     try {
789       setupTable(table);
790       assertEquals(ROWKEYS.length, countRows());
791 
792       // Mess it up by creating an overlap
793       MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
794       HMaster master = cluster.getMaster();
795       HRegionInfo hriOverlap1 = createRegion(conf, tbl.getTableDescriptor(),
796         Bytes.toBytes("A"), Bytes.toBytes("AB"));
797       master.assignRegion(hriOverlap1);
798       master.getAssignmentManager().waitForAssignment(hriOverlap1);
799       HRegionInfo hriOverlap2 = createRegion(conf, tbl.getTableDescriptor(),
800         Bytes.toBytes("AB"), Bytes.toBytes("B"));
801       master.assignRegion(hriOverlap2);
802       master.getAssignmentManager().waitForAssignment(hriOverlap2);
803 
804       HBaseFsck hbck = doFsck(conf, false);
805       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.DUPE_STARTKEYS,
806         ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
807       assertEquals(3, hbck.getOverlapGroups(table).size());
808       assertEquals(ROWKEYS.length, countRows());
809 
810       // mess around the overlapped regions, to trigger NotServingRegionException
811       Multimap<byte[], HbckInfo> overlapGroups = hbck.getOverlapGroups(table);
812       ServerName serverName = null;
813       byte[] regionName = null;
814       for (HbckInfo hbi: overlapGroups.values()) {
815         if ("A".equals(Bytes.toString(hbi.getStartKey()))
816             && "B".equals(Bytes.toString(hbi.getEndKey()))) {
817           regionName = hbi.getRegionName();
818 
819           // get an RS not serving the region to force bad assignment info in to META.
820           int k = cluster.getServerWith(regionName);
821           for (int i = 0; i < 3; i++) {
822             if (i != k) {
823               HRegionServer rs = cluster.getRegionServer(i);
824               serverName = rs.getServerName();
825               break;
826             }
827           }
828 
829           HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
830           HBaseFsckRepair.closeRegionSilentlyAndWait(admin,
831             cluster.getRegionServer(k).getServerName(), hbi.getHdfsHRI());
832           admin.offline(regionName);
833           break;
834         }
835       }
836 
837       assertNotNull(regionName);
838       assertNotNull(serverName);
839       HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
840       Put put = new Put(regionName);
841       put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
842         Bytes.toBytes(serverName.getHostAndPort()));
843       meta.put(put);
844 
845       // fix the problem.
846       HBaseFsck fsck = new HBaseFsck(conf);
847       fsck.connect();
848       fsck.setDisplayFullReport(); // i.e. -details
849       fsck.setTimeLag(0);
850       fsck.setFixAssignments(true);
851       fsck.setFixMeta(true);
852       fsck.setFixHdfsHoles(true);
853       fsck.setFixHdfsOverlaps(true);
854       fsck.setFixHdfsOrphans(true);
855       fsck.setFixVersionFile(true);
856       fsck.setSidelineBigOverlaps(true);
857       fsck.setMaxMerge(2);
858       fsck.onlineHbck();
859 
860       // verify that overlaps are fixed, and there are less rows
861       // since one region is sidelined.
862       HBaseFsck hbck2 = doFsck(conf,false);
863       assertNoErrors(hbck2);
864       assertEquals(0, hbck2.getOverlapGroups(table).size());
865       assertTrue(ROWKEYS.length > countRows());
866     } finally {
867        deleteTable(table);
868     }
869   }
870 
871   /**
872    * This creates and fixes a bad table where a region is completely contained
873    * by another region, and there is a hole (sort of like a bad split)
874    */
875   @Test
876   public void testOverlapAndOrphan() throws Exception {
877     TableName table =
878         TableName.valueOf("tableOverlapAndOrphan");
879     try {
880       setupTable(table);
881       assertEquals(ROWKEYS.length, countRows());
882 
883       // Mess it up by creating an overlap in the metadata
884       TEST_UTIL.getHBaseAdmin().disableTable(table);
885       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
886           Bytes.toBytes("B"), true, true, false, true);
887       TEST_UTIL.getHBaseAdmin().enableTable(table);
888 
889       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
890           Bytes.toBytes("A2"), Bytes.toBytes("B"));
891       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
892       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
893           .waitForAssignment(hriOverlap);
894       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
895       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
896 
897       HBaseFsck hbck = doFsck(conf, false);
898       assertErrors(hbck, new ERROR_CODE[] {
899           ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
900           ERROR_CODE.HOLE_IN_REGION_CHAIN});
901 
902       // fix the problem.
903       doFsck(conf, true);
904 
905       // verify that overlaps are fixed
906       HBaseFsck hbck2 = doFsck(conf,false);
907       assertNoErrors(hbck2);
908       assertEquals(0, hbck2.getOverlapGroups(table).size());
909       assertEquals(ROWKEYS.length, countRows());
910     } finally {
911        deleteTable(table);
912     }
913   }
914 
915   /**
916    * This creates and fixes a bad table where a region overlaps two regions --
917    * a start key contained in another region and its end key is contained in
918    * yet another region.
919    */
920   @Test
921   public void testCoveredStartKey() throws Exception {
922     TableName table =
923         TableName.valueOf("tableCoveredStartKey");
924     try {
925       setupTable(table);
926       assertEquals(ROWKEYS.length, countRows());
927 
928       // Mess it up by creating an overlap in the metadata
929       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
930           Bytes.toBytes("A2"), Bytes.toBytes("B2"));
931       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
932       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
933           .waitForAssignment(hriOverlap);
934       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
935       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
936 
937       HBaseFsck hbck = doFsck(conf, false);
938       assertErrors(hbck, new ERROR_CODE[] {
939           ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
940           ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
941       assertEquals(3, hbck.getOverlapGroups(table).size());
942       assertEquals(ROWKEYS.length, countRows());
943 
944       // fix the problem.
945       doFsck(conf, true);
946 
947       // verify that overlaps are fixed
948       HBaseFsck hbck2 = doFsck(conf, false);
949       assertErrors(hbck2, new ERROR_CODE[0]);
950       assertEquals(0, hbck2.getOverlapGroups(table).size());
951       assertEquals(ROWKEYS.length, countRows());
952     } finally {
953       deleteTable(table);
954     }
955   }
956 
957   /**
958    * This creates and fixes a bad table with a missing region -- hole in meta
959    * and data missing in the fs.
960    */
961   @Test
962   public void testRegionHole() throws Exception {
963     TableName table =
964         TableName.valueOf("tableRegionHole");
965     try {
966       setupTable(table);
967       assertEquals(ROWKEYS.length, countRows());
968 
969       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
970       TEST_UTIL.getHBaseAdmin().disableTable(table);
971       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
972           Bytes.toBytes("C"), true, true, true);
973       TEST_UTIL.getHBaseAdmin().enableTable(table);
974 
975       HBaseFsck hbck = doFsck(conf, false);
976       assertErrors(hbck, new ERROR_CODE[] {
977           ERROR_CODE.HOLE_IN_REGION_CHAIN});
978       // holes are separate from overlap groups
979       assertEquals(0, hbck.getOverlapGroups(table).size());
980 
981       // fix hole
982       doFsck(conf, true);
983 
984       // check that hole fixed
985       assertNoErrors(doFsck(conf,false));
986       assertEquals(ROWKEYS.length - 2 , countRows()); // lost a region so lost a row
987     } finally {
988       deleteTable(table);
989     }
990   }
991 
992   /**
993    * This creates and fixes a bad table with a missing region -- hole in meta
994    * and data present but .regioinfino missing (an orphan hdfs region)in the fs.
995    */
996   @Test
997   public void testHDFSRegioninfoMissing() throws Exception {
998     TableName table =
999         TableName.valueOf("tableHDFSRegioininfoMissing");
1000     try {
1001       setupTable(table);
1002       assertEquals(ROWKEYS.length, countRows());
1003 
1004       // Mess it up by leaving a hole in the meta data
1005       TEST_UTIL.getHBaseAdmin().disableTable(table);
1006       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1007           Bytes.toBytes("C"), true, true, false, true);
1008       TEST_UTIL.getHBaseAdmin().enableTable(table);
1009 
1010       HBaseFsck hbck = doFsck(conf, false);
1011       assertErrors(hbck, new ERROR_CODE[] {
1012           ERROR_CODE.ORPHAN_HDFS_REGION,
1013           ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1014           ERROR_CODE.HOLE_IN_REGION_CHAIN});
1015       // holes are separate from overlap groups
1016       assertEquals(0, hbck.getOverlapGroups(table).size());
1017 
1018       // fix hole
1019       doFsck(conf, true);
1020 
1021       // check that hole fixed
1022       assertNoErrors(doFsck(conf, false));
1023       assertEquals(ROWKEYS.length, countRows());
1024     } finally {
1025       deleteTable(table);
1026     }
1027   }
1028 
1029   /**
1030    * This creates and fixes a bad table with a region that is missing meta and
1031    * not assigned to a region server.
1032    */
1033   @Test
1034   public void testNotInMetaOrDeployedHole() throws Exception {
1035     TableName table =
1036         TableName.valueOf("tableNotInMetaOrDeployedHole");
1037     try {
1038       setupTable(table);
1039       assertEquals(ROWKEYS.length, countRows());
1040 
1041       // Mess it up by leaving a hole in the meta data
1042       TEST_UTIL.getHBaseAdmin().disableTable(table);
1043       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1044           Bytes.toBytes("C"), true, true, false); // don't rm from fs
1045       TEST_UTIL.getHBaseAdmin().enableTable(table);
1046 
1047       HBaseFsck hbck = doFsck(conf, false);
1048       assertErrors(hbck, new ERROR_CODE[] {
1049           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1050       // holes are separate from overlap groups
1051       assertEquals(0, hbck.getOverlapGroups(table).size());
1052 
1053       // fix hole
1054       assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1055           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1056 
1057       // check that hole fixed
1058       assertNoErrors(doFsck(conf,false));
1059       assertEquals(ROWKEYS.length, countRows());
1060     } finally {
1061       deleteTable(table);
1062     }
1063   }
1064 
1065   /**
1066    * This creates fixes a bad table with a hole in meta.
1067    */
1068   @Test
1069   public void testNotInMetaHole() throws Exception {
1070     TableName table =
1071         TableName.valueOf("tableNotInMetaHole");
1072     try {
1073       setupTable(table);
1074       assertEquals(ROWKEYS.length, countRows());
1075 
1076       // Mess it up by leaving a hole in the meta data
1077       TEST_UTIL.getHBaseAdmin().disableTable(table);
1078       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1079           Bytes.toBytes("C"), false, true, false); // don't rm from fs
1080       TEST_UTIL.getHBaseAdmin().enableTable(table);
1081 
1082       HBaseFsck hbck = doFsck(conf, false);
1083       assertErrors(hbck, new ERROR_CODE[] {
1084           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1085       // holes are separate from overlap groups
1086       assertEquals(0, hbck.getOverlapGroups(table).size());
1087 
1088       // fix hole
1089       assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1090           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1091 
1092       // check that hole fixed
1093       assertNoErrors(doFsck(conf,false));
1094       assertEquals(ROWKEYS.length, countRows());
1095     } finally {
1096       deleteTable(table);
1097     }
1098   }
1099 
1100   /**
1101    * This creates and fixes a bad table with a region that is in meta but has
1102    * no deployment or data hdfs
1103    */
1104   @Test
1105   public void testNotInHdfs() throws Exception {
1106     TableName table =
1107         TableName.valueOf("tableNotInHdfs");
1108     try {
1109       setupTable(table);
1110       assertEquals(ROWKEYS.length, countRows());
1111 
1112       // make sure data in regions, if in hlog only there is no data loss
1113       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1114 
1115       // Mess it up by leaving a hole in the hdfs data
1116       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1117           Bytes.toBytes("C"), false, false, true); // don't rm meta
1118 
1119       HBaseFsck hbck = doFsck(conf, false);
1120       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1121       // holes are separate from overlap groups
1122       assertEquals(0, hbck.getOverlapGroups(table).size());
1123 
1124       // fix hole
1125       doFsck(conf, true);
1126 
1127       // check that hole fixed
1128       assertNoErrors(doFsck(conf,false));
1129       assertEquals(ROWKEYS.length - 2, countRows());
1130     } finally {
1131       deleteTable(table);
1132     }
1133   }
1134 
1135   /**
1136    * This creates entries in hbase:meta with no hdfs data.  This should cleanly
1137    * remove the table.
1138    */
1139   @Test
1140   public void testNoHdfsTable() throws Exception {
1141     TableName table = TableName.valueOf("NoHdfsTable");
1142     setupTable(table);
1143     assertEquals(ROWKEYS.length, countRows());
1144 
1145     // make sure data in regions, if in hlog only there is no data loss
1146     TEST_UTIL.getHBaseAdmin().flush(table.getName());
1147 
1148     // Mess it up by deleting hdfs dirs
1149     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
1150         Bytes.toBytes("A"), false, false, true); // don't rm meta
1151     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1152         Bytes.toBytes("B"), false, false, true); // don't rm meta
1153     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1154         Bytes.toBytes("C"), false, false, true); // don't rm meta
1155     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
1156         Bytes.toBytes(""), false, false, true); // don't rm meta
1157 
1158     // also remove the table directory in hdfs
1159     deleteTableDir(table);
1160 
1161     HBaseFsck hbck = doFsck(conf, false);
1162     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS,
1163         ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS,
1164         ERROR_CODE.NOT_IN_HDFS,});
1165     // holes are separate from overlap groups
1166     assertEquals(0, hbck.getOverlapGroups(table).size());
1167 
1168     // fix hole
1169     doFsck(conf, true); // detect dangling regions and remove those
1170 
1171     // check that hole fixed
1172     assertNoErrors(doFsck(conf,false));
1173     assertFalse("Table "+ table + " should have been deleted",
1174         TEST_UTIL.getHBaseAdmin().tableExists(table));
1175   }
1176 
1177   public void deleteTableDir(TableName table) throws IOException {
1178     Path rootDir = FSUtils.getRootDir(conf);
1179     FileSystem fs = rootDir.getFileSystem(conf);
1180     Path p = FSUtils.getTableDir(rootDir, table);
1181     HBaseFsck.debugLsr(conf, p);
1182     boolean success = fs.delete(p, true);
1183     LOG.info("Deleted " + p + " sucessfully? " + success);
1184   }
1185 
1186   /**
1187    * when the hbase.version file missing, It is fix the fault.
1188    */
1189   @Test
1190   public void testNoVersionFile() throws Exception {
1191     // delete the hbase.version file
1192     Path rootDir = FSUtils.getRootDir(conf);
1193     FileSystem fs = rootDir.getFileSystem(conf);
1194     Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
1195     fs.delete(versionFile, true);
1196 
1197     // test
1198     HBaseFsck hbck = doFsck(conf, false);
1199     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_VERSION_FILE });
1200     // fix hbase.version missing
1201     doFsck(conf, true);
1202 
1203     // no version file fixed
1204     assertNoErrors(doFsck(conf, false));
1205   }
1206 
1207   /**
1208    * The region is not deployed when the table is disabled.
1209    */
1210   @Test
1211   public void testRegionShouldNotBeDeployed() throws Exception {
1212     TableName table =
1213         TableName.valueOf("tableRegionShouldNotBeDeployed");
1214     try {
1215       LOG.info("Starting testRegionShouldNotBeDeployed.");
1216       MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1217       assertTrue(cluster.waitForActiveAndReadyMaster());
1218 
1219 
1220       byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"),
1221           Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") };
1222       HTableDescriptor htdDisabled = new HTableDescriptor(table);
1223       htdDisabled.addFamily(new HColumnDescriptor(FAM));
1224 
1225       // Write the .tableinfo
1226       FSTableDescriptors fstd = new FSTableDescriptors(conf);
1227       fstd.createTableDescriptor(htdDisabled);
1228       List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
1229           TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
1230 
1231       // Let's just assign everything to first RS
1232       HRegionServer hrs = cluster.getRegionServer(0);
1233 
1234       // Create region files.
1235       TEST_UTIL.getHBaseAdmin().disableTable(table);
1236       TEST_UTIL.getHBaseAdmin().enableTable(table);
1237 
1238       // Disable the table and close its regions
1239       TEST_UTIL.getHBaseAdmin().disableTable(table);
1240       HRegionInfo region = disabledRegions.remove(0);
1241       byte[] regionName = region.getRegionName();
1242 
1243       // The region should not be assigned currently
1244       assertTrue(cluster.getServerWith(regionName) == -1);
1245 
1246       // Directly open a region on a region server.
1247       // If going through AM/ZK, the region won't be open.
1248       // Even it is opened, AM will close it which causes
1249       // flakiness of this test.
1250       HRegion r = HRegion.openHRegion(
1251         region, htdDisabled, hrs.getWAL(region), conf);
1252       hrs.addToOnlineRegions(r);
1253 
1254       HBaseFsck hbck = doFsck(conf, false);
1255       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.SHOULD_NOT_BE_DEPLOYED });
1256 
1257       // fix this fault
1258       doFsck(conf, true);
1259 
1260       // check result
1261       assertNoErrors(doFsck(conf, false));
1262     } finally {
1263       TEST_UTIL.getHBaseAdmin().enableTable(table);
1264       deleteTable(table);
1265     }
1266   }
1267 
1268   /**
1269    * This creates two tables and mess both of them and fix them one by one
1270    */
1271   @Test
1272   public void testFixByTable() throws Exception {
1273     TableName table1 =
1274         TableName.valueOf("testFixByTable1");
1275     TableName table2 =
1276         TableName.valueOf("testFixByTable2");
1277     try {
1278       setupTable(table1);
1279       // make sure data in regions, if in hlog only there is no data loss
1280       TEST_UTIL.getHBaseAdmin().flush(table1.getName());
1281       // Mess them up by leaving a hole in the hdfs data
1282       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1283         Bytes.toBytes("C"), false, false, true); // don't rm meta
1284 
1285       setupTable(table2);
1286       // make sure data in regions, if in hlog only there is no data loss
1287       TEST_UTIL.getHBaseAdmin().flush(table2.getName());
1288       // Mess them up by leaving a hole in the hdfs data
1289       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1290         Bytes.toBytes("C"), false, false, true); // don't rm meta
1291 
1292       HBaseFsck hbck = doFsck(conf, false);
1293       assertErrors(hbck, new ERROR_CODE[] {
1294         ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS});
1295 
1296       // fix hole in table 1
1297       doFsck(conf, true, table1);
1298       // check that hole in table 1 fixed
1299       assertNoErrors(doFsck(conf, false, table1));
1300       // check that hole in table 2 still there
1301       assertErrors(doFsck(conf, false, table2),
1302         new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1303 
1304       // fix hole in table 2
1305       doFsck(conf, true, table2);
1306       // check that hole in both tables fixed
1307       assertNoErrors(doFsck(conf, false));
1308       assertEquals(ROWKEYS.length - 2, countRows());
1309     } finally {
1310       deleteTable(table1);
1311       deleteTable(table2);
1312     }
1313   }
1314   /**
1315    * A split parent in meta, in hdfs, and not deployed
1316    */
1317   @Test
1318   public void testLingeringSplitParent() throws Exception {
1319     TableName table =
1320         TableName.valueOf("testLingeringSplitParent");
1321     HTable meta = null;
1322     try {
1323       setupTable(table);
1324       assertEquals(ROWKEYS.length, countRows());
1325 
1326       // make sure data in regions, if in hlog only there is no data loss
1327       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1328       HRegionLocation location = tbl.getRegionLocation("B");
1329 
1330       // Delete one region from meta, but not hdfs, unassign it.
1331       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1332         Bytes.toBytes("C"), true, true, false);
1333 
1334       // Create a new meta entry to fake it as a split parent.
1335       meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
1336       HRegionInfo hri = location.getRegionInfo();
1337 
1338       HRegionInfo a = new HRegionInfo(tbl.getName(),
1339         Bytes.toBytes("B"), Bytes.toBytes("BM"));
1340       HRegionInfo b = new HRegionInfo(tbl.getName(),
1341         Bytes.toBytes("BM"), Bytes.toBytes("C"));
1342 
1343       hri.setOffline(true);
1344       hri.setSplit(true);
1345 
1346       MetaEditor.addRegionToMeta(meta, hri, a, b);
1347       meta.flushCommits();
1348       TEST_UTIL.getHBaseAdmin().flush(TableName.META_TABLE_NAME.getName());
1349 
1350       HBaseFsck hbck = doFsck(conf, false);
1351       assertErrors(hbck, new ERROR_CODE[] {
1352         ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1353 
1354       // regular repair cannot fix lingering split parent
1355       hbck = doFsck(conf, true);
1356       assertErrors(hbck, new ERROR_CODE[] {
1357         ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1358       assertFalse(hbck.shouldRerun());
1359       hbck = doFsck(conf, false);
1360       assertErrors(hbck, new ERROR_CODE[] {
1361         ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1362 
1363       // fix lingering split parent
1364       hbck = new HBaseFsck(conf);
1365       hbck.connect();
1366       hbck.setDisplayFullReport(); // i.e. -details
1367       hbck.setTimeLag(0);
1368       hbck.setFixSplitParents(true);
1369       hbck.onlineHbck();
1370       assertTrue(hbck.shouldRerun());
1371 
1372       Get get = new Get(hri.getRegionName());
1373       Result result = meta.get(get);
1374       assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1375         HConstants.SPLITA_QUALIFIER).isEmpty());
1376       assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1377         HConstants.SPLITB_QUALIFIER).isEmpty());
1378       TEST_UTIL.getHBaseAdmin().flush(TableName.META_TABLE_NAME.getName());
1379 
1380       // fix other issues
1381       doFsck(conf, true);
1382 
1383       // check that all are fixed
1384       assertNoErrors(doFsck(conf, false));
1385       assertEquals(ROWKEYS.length, countRows());
1386     } finally {
1387       deleteTable(table);
1388       IOUtils.closeQuietly(meta);
1389     }
1390   }
1391 
1392   /**
1393    * Tests that LINGERING_SPLIT_PARENT is not erroneously reported for
1394    * valid cases where the daughters are there.
1395    */
1396   @Test
1397   public void testValidLingeringSplitParent() throws Exception {
1398     TableName table =
1399         TableName.valueOf("testLingeringSplitParent");
1400     HTable meta = null;
1401     try {
1402       setupTable(table);
1403       assertEquals(ROWKEYS.length, countRows());
1404 
1405       // make sure data in regions, if in hlog only there is no data loss
1406       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1407       HRegionLocation location = tbl.getRegionLocation("B");
1408 
1409       meta = new HTable(conf, TableName.META_TABLE_NAME);
1410       HRegionInfo hri = location.getRegionInfo();
1411 
1412       // do a regular split
1413       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1414       byte[] regionName = location.getRegionInfo().getRegionName();
1415       admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1416       TestEndToEndSplitTransaction.blockUntilRegionSplit(
1417           TEST_UTIL.getConfiguration(), 60000, regionName, true);
1418 
1419       // TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on
1420       // for some time until children references are deleted. HBCK erroneously sees this as
1421       // overlapping regions
1422       HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, false, false, null);
1423       assertErrors(hbck, new ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported
1424 
1425       // assert that the split hbase:meta entry is still there.
1426       Get get = new Get(hri.getRegionName());
1427       Result result = meta.get(get);
1428       assertNotNull(result);
1429       assertNotNull(HRegionInfo.getHRegionInfo(result));
1430 
1431       assertEquals(ROWKEYS.length, countRows());
1432 
1433       // assert that we still have the split regions
1434       assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions pre-split.
1435       assertNoErrors(doFsck(conf, false));
1436     } finally {
1437       deleteTable(table);
1438       IOUtils.closeQuietly(meta);
1439     }
1440   }
1441 
1442   /**
1443    * Split crashed after write to hbase:meta finished for the parent region, but
1444    * failed to write daughters (pre HBASE-7721 codebase)
1445    */
1446   @Test(timeout=75000)
1447   public void testSplitDaughtersNotInMeta() throws Exception {
1448     TableName table =
1449         TableName.valueOf("testSplitdaughtersNotInMeta");
1450     HTable meta = null;
1451     try {
1452       setupTable(table);
1453       assertEquals(ROWKEYS.length, countRows());
1454 
1455       // make sure data in regions, if in hlog only there is no data loss
1456       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1457       HRegionLocation location = tbl.getRegionLocation("B");
1458 
1459       meta = new HTable(conf, TableName.META_TABLE_NAME);
1460       HRegionInfo hri = location.getRegionInfo();
1461 
1462       // do a regular split
1463       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1464       byte[] regionName = location.getRegionInfo().getRegionName();
1465       admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1466       TestEndToEndSplitTransaction.blockUntilRegionSplit(
1467           TEST_UTIL.getConfiguration(), 60000, regionName, true);
1468 
1469       PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(meta.get(new Get(regionName)));
1470 
1471       // Delete daughter regions from meta, but not hdfs, unassign it.
1472       Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
1473       undeployRegion(admin, hris.get(daughters.getFirst()), daughters.getFirst());
1474       undeployRegion(admin, hris.get(daughters.getSecond()), daughters.getSecond());
1475 
1476       meta.delete(new Delete(daughters.getFirst().getRegionName()));
1477       meta.delete(new Delete(daughters.getSecond().getRegionName()));
1478       meta.flushCommits();
1479 
1480       HBaseFsck hbck = doFsck(conf, false);
1481       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1482           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN}); //no LINGERING_SPLIT_PARENT
1483 
1484       // now fix it. The fix should not revert the region split, but add daughters to META
1485       hbck = doFsck(conf, true, true, false, false, false, false, false, false, false, false, null);
1486       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1487           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1488 
1489       // assert that the split hbase:meta entry is still there.
1490       Get get = new Get(hri.getRegionName());
1491       Result result = meta.get(get);
1492       assertNotNull(result);
1493       assertNotNull(HRegionInfo.getHRegionInfo(result));
1494 
1495       assertEquals(ROWKEYS.length, countRows());
1496 
1497       // assert that we still have the split regions
1498       assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions pre-split.
1499       assertNoErrors(doFsck(conf, false)); //should be fixed by now
1500     } finally {
1501       deleteTable(table);
1502       IOUtils.closeQuietly(meta);
1503     }
1504   }
1505 
1506   /**
1507    * This creates and fixes a bad table with a missing region which is the 1st region -- hole in
1508    * meta and data missing in the fs.
1509    */
1510   @Test(timeout=120000)
1511   public void testMissingFirstRegion() throws Exception {
1512     TableName table =
1513         TableName.valueOf("testMissingFirstRegion");
1514     try {
1515       setupTable(table);
1516       assertEquals(ROWKEYS.length, countRows());
1517 
1518       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1519       TEST_UTIL.getHBaseAdmin().disableTable(table);
1520       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true,
1521           true, true);
1522       TEST_UTIL.getHBaseAdmin().enableTable(table);
1523 
1524       HBaseFsck hbck = doFsck(conf, false);
1525       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY });
1526       // fix hole
1527       doFsck(conf, true);
1528       // check that hole fixed
1529       assertNoErrors(doFsck(conf, false));
1530     } finally {
1531       deleteTable(table);
1532     }
1533   }
1534 
1535   /**
1536    * This creates and fixes a bad table with a missing region which is the 1st region -- hole in
1537    * meta and data missing in the fs.
1538    */
1539   @Test(timeout=120000)
1540   public void testRegionDeployedNotInHdfs() throws Exception {
1541     TableName table =
1542         TableName.valueOf("testSingleRegionDeployedNotInHdfs");
1543     try {
1544       setupTable(table);
1545       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1546 
1547       // Mess it up by deleting region dir
1548       deleteRegion(conf, tbl.getTableDescriptor(),
1549         HConstants.EMPTY_START_ROW, Bytes.toBytes("A"), false,
1550         false, true);
1551 
1552       HBaseFsck hbck = doFsck(conf, false);
1553       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
1554       // fix hole
1555       doFsck(conf, true);
1556       // check that hole fixed
1557       assertNoErrors(doFsck(conf, false));
1558     } finally {
1559       deleteTable(table);
1560     }
1561   }
1562 
1563   /**
1564    * This creates and fixes a bad table with missing last region -- hole in meta and data missing in
1565    * the fs.
1566    */
1567   @Test(timeout=120000)
1568   public void testMissingLastRegion() throws Exception {
1569     TableName table =
1570         TableName.valueOf("testMissingLastRegion");
1571     try {
1572       setupTable(table);
1573       assertEquals(ROWKEYS.length, countRows());
1574 
1575       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1576       TEST_UTIL.getHBaseAdmin().disableTable(table);
1577       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true,
1578           true, true);
1579       TEST_UTIL.getHBaseAdmin().enableTable(table);
1580 
1581       HBaseFsck hbck = doFsck(conf, false);
1582       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY });
1583       // fix hole
1584       doFsck(conf, true);
1585       // check that hole fixed
1586       assertNoErrors(doFsck(conf, false));
1587     } finally {
1588       deleteTable(table);
1589     }
1590   }
1591 
1592   /**
1593    * Test -noHdfsChecking option can detect and fix assignments issue.
1594    */
1595   @Test
1596   public void testFixAssignmentsAndNoHdfsChecking() throws Exception {
1597     TableName table =
1598         TableName.valueOf("testFixAssignmentsAndNoHdfsChecking");
1599     try {
1600       setupTable(table);
1601       assertEquals(ROWKEYS.length, countRows());
1602 
1603       // Mess it up by closing a region
1604       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1605         Bytes.toBytes("B"), true, false, false, false);
1606 
1607       // verify there is no other errors
1608       HBaseFsck hbck = doFsck(conf, false);
1609       assertErrors(hbck, new ERROR_CODE[] {
1610         ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1611 
1612       // verify that noHdfsChecking report the same errors
1613       HBaseFsck fsck = new HBaseFsck(conf);
1614       fsck.connect();
1615       fsck.setDisplayFullReport(); // i.e. -details
1616       fsck.setTimeLag(0);
1617       fsck.setCheckHdfs(false);
1618       fsck.onlineHbck();
1619       assertErrors(fsck, new ERROR_CODE[] {
1620         ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1621 
1622       // verify that fixAssignments works fine with noHdfsChecking
1623       fsck = new HBaseFsck(conf);
1624       fsck.connect();
1625       fsck.setDisplayFullReport(); // i.e. -details
1626       fsck.setTimeLag(0);
1627       fsck.setCheckHdfs(false);
1628       fsck.setFixAssignments(true);
1629       fsck.onlineHbck();
1630       assertTrue(fsck.shouldRerun());
1631       fsck.onlineHbck();
1632       assertNoErrors(fsck);
1633 
1634       assertEquals(ROWKEYS.length, countRows());
1635     } finally {
1636       deleteTable(table);
1637     }
1638   }
1639 
1640   /**
1641    * Test -noHdfsChecking option can detect region is not in meta but deployed.
1642    * However, it can not fix it without checking Hdfs because we need to get
1643    * the region info from Hdfs in this case, then to patch the meta.
1644    */
1645   @Test
1646   public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception {
1647     TableName table =
1648         TableName.valueOf("testFixMetaNotWorkingWithNoHdfsChecking");
1649     try {
1650       setupTable(table);
1651       assertEquals(ROWKEYS.length, countRows());
1652 
1653       // Mess it up by deleting a region from the metadata
1654       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1655         Bytes.toBytes("B"), false, true, false, false);
1656 
1657       // verify there is no other errors
1658       HBaseFsck hbck = doFsck(conf, false);
1659       assertErrors(hbck, new ERROR_CODE[] {
1660         ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1661 
1662       // verify that noHdfsChecking report the same errors
1663       HBaseFsck fsck = new HBaseFsck(conf);
1664       fsck.connect();
1665       fsck.setDisplayFullReport(); // i.e. -details
1666       fsck.setTimeLag(0);
1667       fsck.setCheckHdfs(false);
1668       fsck.onlineHbck();
1669       assertErrors(fsck, new ERROR_CODE[] {
1670         ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1671 
1672       // verify that fixMeta doesn't work with noHdfsChecking
1673       fsck = new HBaseFsck(conf);
1674       fsck.connect();
1675       fsck.setDisplayFullReport(); // i.e. -details
1676       fsck.setTimeLag(0);
1677       fsck.setCheckHdfs(false);
1678       fsck.setFixAssignments(true);
1679       fsck.setFixMeta(true);
1680       fsck.onlineHbck();
1681       assertFalse(fsck.shouldRerun());
1682       assertErrors(fsck, new ERROR_CODE[] {
1683         ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1684 
1685       // fix the cluster so other tests won't be impacted
1686       fsck = doFsck(conf, true);
1687       assertTrue(fsck.shouldRerun());
1688       fsck = doFsck(conf, true);
1689       assertNoErrors(fsck);
1690     } finally {
1691       deleteTable(table);
1692     }
1693   }
1694 
1695   /**
1696    * Test -fixHdfsHoles doesn't work with -noHdfsChecking option,
1697    * and -noHdfsChecking can't detect orphan Hdfs region.
1698    */
1699   @Test
1700   public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception {
1701     TableName table =
1702         TableName.valueOf("testFixHdfsHolesNotWorkingWithNoHdfsChecking");
1703     try {
1704       setupTable(table);
1705       assertEquals(ROWKEYS.length, countRows());
1706 
1707       // Mess it up by creating an overlap in the metadata
1708       TEST_UTIL.getHBaseAdmin().disableTable(table);
1709       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1710         Bytes.toBytes("B"), true, true, false, true);
1711       TEST_UTIL.getHBaseAdmin().enableTable(table);
1712 
1713       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
1714         Bytes.toBytes("A2"), Bytes.toBytes("B"));
1715       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
1716       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
1717         .waitForAssignment(hriOverlap);
1718       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
1719       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
1720 
1721       HBaseFsck hbck = doFsck(conf, false);
1722       assertErrors(hbck, new ERROR_CODE[] {
1723         ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1724         ERROR_CODE.HOLE_IN_REGION_CHAIN});
1725 
1726       // verify that noHdfsChecking can't detect ORPHAN_HDFS_REGION
1727       HBaseFsck fsck = new HBaseFsck(conf);
1728       fsck.connect();
1729       fsck.setDisplayFullReport(); // i.e. -details
1730       fsck.setTimeLag(0);
1731       fsck.setCheckHdfs(false);
1732       fsck.onlineHbck();
1733       assertErrors(fsck, new ERROR_CODE[] {
1734         ERROR_CODE.HOLE_IN_REGION_CHAIN});
1735 
1736       // verify that fixHdfsHoles doesn't work with noHdfsChecking
1737       fsck = new HBaseFsck(conf);
1738       fsck.connect();
1739       fsck.setDisplayFullReport(); // i.e. -details
1740       fsck.setTimeLag(0);
1741       fsck.setCheckHdfs(false);
1742       fsck.setFixHdfsHoles(true);
1743       fsck.setFixHdfsOverlaps(true);
1744       fsck.setFixHdfsOrphans(true);
1745       fsck.onlineHbck();
1746       assertFalse(fsck.shouldRerun());
1747       assertErrors(fsck, new ERROR_CODE[] {
1748         ERROR_CODE.HOLE_IN_REGION_CHAIN});
1749     } finally {
1750       if (TEST_UTIL.getHBaseAdmin().isTableDisabled(table)) {
1751         TEST_UTIL.getHBaseAdmin().enableTable(table);
1752       }
1753       deleteTable(table);
1754     }
1755   }
1756 
1757   /**
1758    * We don't have an easy way to verify that a flush completed, so we loop until we find a
1759    * legitimate hfile and return it.
1760    * @param fs
1761    * @param table
1762    * @return Path of a flushed hfile.
1763    * @throws IOException
1764    */
1765   Path getFlushedHFile(FileSystem fs, TableName table) throws IOException {
1766     Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
1767     Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
1768     Path famDir = new Path(regionDir, FAM_STR);
1769 
1770     // keep doing this until we get a legit hfile
1771     while (true) {
1772       FileStatus[] hfFss = fs.listStatus(famDir);
1773       if (hfFss.length == 0) {
1774         continue;
1775       }
1776       for (FileStatus hfs : hfFss) {
1777         if (!hfs.isDir()) {
1778           return hfs.getPath();
1779         }
1780       }
1781     }
1782   }
1783 
1784   /**
1785    * This creates a table and then corrupts an hfile.  Hbck should quarantine the file.
1786    */
1787   @Test(timeout=180000)
1788   public void testQuarantineCorruptHFile() throws Exception {
1789     TableName table = TableName.valueOf(name.getMethodName());
1790     try {
1791       setupTable(table);
1792       assertEquals(ROWKEYS.length, countRows());
1793       TEST_UTIL.getHBaseAdmin().flush(table.getName()); // flush is async.
1794 
1795       FileSystem fs = FileSystem.get(conf);
1796       Path hfile = getFlushedHFile(fs, table);
1797 
1798       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1799       TEST_UTIL.getHBaseAdmin().disableTable(table);
1800 
1801       // create new corrupt file called deadbeef (valid hfile name)
1802       Path corrupt = new Path(hfile.getParent(), "deadbeef");
1803       TestHFile.truncateFile(fs, hfile, corrupt);
1804       LOG.info("Created corrupted file " + corrupt);
1805       HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
1806 
1807       // we cannot enable here because enable never finished due to the corrupt region.
1808       HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
1809       assertEquals(res.getRetCode(), 0);
1810       HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
1811       assertEquals(hfcc.getHFilesChecked(), 5);
1812       assertEquals(hfcc.getCorrupted().size(), 1);
1813       assertEquals(hfcc.getFailures().size(), 0);
1814       assertEquals(hfcc.getQuarantined().size(), 1);
1815       assertEquals(hfcc.getMissing().size(), 0);
1816 
1817       // Its been fixed, verify that we can enable.
1818       TEST_UTIL.getHBaseAdmin().enableTable(table);
1819     } finally {
1820       deleteTable(table);
1821     }
1822   }
1823 
1824   /**
1825   * Test that use this should have a timeout, because this method could potentially wait forever.
1826   */
1827   private void doQuarantineTest(TableName table, HBaseFsck hbck, int check,
1828                                 int corrupt, int fail, int quar, int missing) throws Exception {
1829     try {
1830       setupTable(table);
1831       assertEquals(ROWKEYS.length, countRows());
1832       TEST_UTIL.getHBaseAdmin().flush(table.getName()); // flush is async.
1833 
1834       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1835       TEST_UTIL.getHBaseAdmin().disableTable(table);
1836 
1837       String[] args = {"-sidelineCorruptHFiles", "-repairHoles", "-ignorePreCheckPermission",
1838           table.getNameAsString()};
1839       ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1840       HBaseFsck res = hbck.exec(exec, args);
1841 
1842       HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
1843       assertEquals(hfcc.getHFilesChecked(), check);
1844       assertEquals(hfcc.getCorrupted().size(), corrupt);
1845       assertEquals(hfcc.getFailures().size(), fail);
1846       assertEquals(hfcc.getQuarantined().size(), quar);
1847       assertEquals(hfcc.getMissing().size(), missing);
1848 
1849       // its been fixed, verify that we can enable
1850       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1851       admin.enableTableAsync(table);
1852       while (!admin.isTableEnabled(table)) {
1853         try {
1854           Thread.sleep(250);
1855         } catch (InterruptedException e) {
1856           e.printStackTrace();
1857           fail("Interrupted when trying to enable table " + table);
1858         }
1859       }
1860     } finally {
1861       deleteTable(table);
1862     }
1863   }
1864 
1865   /**
1866    * This creates a table and simulates the race situation where a concurrent compaction or split
1867    * has removed an hfile after the corruption checker learned about it.
1868    */
1869   @Test(timeout=180000)
1870   public void testQuarantineMissingHFile() throws Exception {
1871     TableName table = TableName.valueOf(name.getMethodName());
1872     ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1873     // inject a fault in the hfcc created.
1874     final FileSystem fs = FileSystem.get(conf);
1875     HBaseFsck hbck = new HBaseFsck(conf, exec) {
1876       @Override
1877       public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1878         return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1879           AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1880           @Override
1881           protected void checkHFile(Path p) throws IOException {
1882             if (attemptedFirstHFile.compareAndSet(false, true)) {
1883               assertTrue(fs.delete(p, true)); // make sure delete happened.
1884             }
1885             super.checkHFile(p);
1886           }
1887         };
1888       }
1889     };
1890     doQuarantineTest(table, hbck, 4, 0, 0, 0, 1); // 4 attempted, but 1 missing.
1891   }
1892 
1893   /**
1894    * This creates a table and simulates the race situation where a concurrent compaction or split
1895    * has removed an colfam dir before the corruption checker got to it.
1896    */
1897   // Disabled because fails sporadically.  Is this test right?  Timing-wise, there could be no
1898   // files in a column family on initial creation -- as suggested by Matteo.
1899   @Ignore @Test(timeout=180000)
1900   public void testQuarantineMissingFamdir() throws Exception {
1901     TableName table = TableName.valueOf(name.getMethodName());
1902     ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1903     // inject a fault in the hfcc created.
1904     final FileSystem fs = FileSystem.get(conf);
1905     HBaseFsck hbck = new HBaseFsck(conf, exec) {
1906       @Override
1907       public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1908         return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1909           AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1910           @Override
1911           protected void checkColFamDir(Path p) throws IOException {
1912             if (attemptedFirstHFile.compareAndSet(false, true)) {
1913               assertTrue(fs.delete(p, true)); // make sure delete happened.
1914             }
1915             super.checkColFamDir(p);
1916           }
1917         };
1918       }
1919     };
1920     doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
1921   }
1922 
1923   /**
1924    * This creates a table and simulates the race situation where a concurrent compaction or split
1925    * has removed a region dir before the corruption checker got to it.
1926    */
1927   @Test(timeout=180000)
1928   public void testQuarantineMissingRegionDir() throws Exception {
1929     TableName table = TableName.valueOf(name.getMethodName());
1930     ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1931     // inject a fault in the hfcc created.
1932     final FileSystem fs = FileSystem.get(conf);
1933     HBaseFsck hbck = new HBaseFsck(conf, exec) {
1934       @Override
1935       public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1936         return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1937           AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1938           @Override
1939           protected void checkRegionDir(Path p) throws IOException {
1940             if (attemptedFirstHFile.compareAndSet(false, true)) {
1941               assertTrue(fs.delete(p, true)); // make sure delete happened.
1942             }
1943             super.checkRegionDir(p);
1944           }
1945         };
1946       }
1947     };
1948     doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
1949   }
1950 
1951   /**
1952    * Test fixing lingering reference file.
1953    */
1954   @Test
1955   public void testLingeringReferenceFile() throws Exception {
1956     TableName table =
1957         TableName.valueOf("testLingeringReferenceFile");
1958     try {
1959       setupTable(table);
1960       assertEquals(ROWKEYS.length, countRows());
1961 
1962       // Mess it up by creating a fake reference file
1963       FileSystem fs = FileSystem.get(conf);
1964       Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
1965       Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
1966       Path famDir = new Path(regionDir, FAM_STR);
1967       Path fakeReferenceFile = new Path(famDir, "fbce357483ceea.12144538");
1968       fs.create(fakeReferenceFile);
1969 
1970       HBaseFsck hbck = doFsck(conf, false);
1971       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LINGERING_REFERENCE_HFILE });
1972       // fix reference file
1973       doFsck(conf, true);
1974       // check that reference file fixed
1975       assertNoErrors(doFsck(conf, false));
1976     } finally {
1977       deleteTable(table);
1978     }
1979   }
1980 
1981   /**
1982    * Test mission REGIONINFO_QUALIFIER in hbase:meta
1983    */
1984   @Test
1985   public void testMissingRegionInfoQualifier() throws Exception {
1986     TableName table =
1987         TableName.valueOf("testMissingRegionInfoQualifier");
1988     try {
1989       setupTable(table);
1990 
1991       // Mess it up by removing the RegionInfo for one region.
1992       final List<Delete> deletes = new LinkedList<Delete>();
1993       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
1994       MetaScanner.metaScan(conf, new MetaScanner.MetaScannerVisitor() {
1995 
1996         @Override
1997         public boolean processRow(Result rowResult) throws IOException {
1998           HRegionInfo hri = MetaScanner.getHRegionInfo(rowResult);
1999           if (hri != null && !hri.getTable().isSystemTable()) {
2000             Delete delete = new Delete(rowResult.getRow());
2001             delete.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2002             deletes.add(delete);
2003           }
2004           return true;
2005         }
2006 
2007         @Override
2008         public void close() throws IOException {
2009         }
2010       });
2011       meta.delete(deletes);
2012 
2013       // Mess it up by creating a fake hbase:meta entry with no associated RegionInfo
2014       meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
2015         HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes.toBytes("node1:60020")));
2016       meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
2017         HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(1362150791183L)));
2018       meta.close();
2019 
2020       HBaseFsck hbck = doFsck(conf, false);
2021       assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
2022 
2023       // fix reference file
2024       hbck = doFsck(conf, true);
2025 
2026       // check that reference file fixed
2027       assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
2028     } finally {
2029       deleteTable(table);
2030     }
2031   }
2032 
2033 
2034   /**
2035    * Test pluggable error reporter. It can be plugged in
2036    * from system property or configuration.
2037    */
2038   @Test
2039   public void testErrorReporter() throws Exception {
2040     try {
2041       MockErrorReporter.calledCount = 0;
2042       doFsck(conf, false);
2043       assertEquals(MockErrorReporter.calledCount, 0);
2044 
2045       conf.set("hbasefsck.errorreporter", MockErrorReporter.class.getName());
2046       doFsck(conf, false);
2047       assertTrue(MockErrorReporter.calledCount > 20);
2048     } finally {
2049       conf.set("hbasefsck.errorreporter",
2050         PrintingErrorReporter.class.getName());
2051       MockErrorReporter.calledCount = 0;
2052     }
2053   }
2054 
2055   static class MockErrorReporter implements ErrorReporter {
2056     static int calledCount = 0;
2057 
2058     @Override
2059     public void clear() {
2060       calledCount++;
2061     }
2062 
2063     @Override
2064     public void report(String message) {
2065       calledCount++;
2066     }
2067 
2068     @Override
2069     public void reportError(String message) {
2070       calledCount++;
2071     }
2072 
2073     @Override
2074     public void reportError(ERROR_CODE errorCode, String message) {
2075       calledCount++;
2076     }
2077 
2078     @Override
2079     public void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
2080       calledCount++;
2081     }
2082 
2083     @Override
2084     public void reportError(ERROR_CODE errorCode,
2085         String message, TableInfo table, HbckInfo info) {
2086       calledCount++;
2087     }
2088 
2089     @Override
2090     public void reportError(ERROR_CODE errorCode, String message,
2091         TableInfo table, HbckInfo info1, HbckInfo info2) {
2092       calledCount++;
2093     }
2094 
2095     @Override
2096     public int summarize() {
2097       return ++calledCount;
2098     }
2099 
2100     @Override
2101     public void detail(String details) {
2102       calledCount++;
2103     }
2104 
2105     @Override
2106     public ArrayList<ERROR_CODE> getErrorList() {
2107       calledCount++;
2108       return new ArrayList<ERROR_CODE>();
2109     }
2110 
2111     @Override
2112     public void progress() {
2113       calledCount++;
2114     }
2115 
2116     @Override
2117     public void print(String message) {
2118       calledCount++;
2119     }
2120 
2121     @Override
2122     public void resetErrors() {
2123       calledCount++;
2124     }
2125 
2126     @Override
2127     public boolean tableHasErrors(TableInfo table) {
2128       calledCount++;
2129       return false;
2130     }
2131   }
2132 
2133   @Test(timeout=60000)
2134   public void testCheckTableLocks() throws Exception {
2135     IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0);
2136     EnvironmentEdgeManager.injectEdge(edge);
2137     // check no errors
2138     HBaseFsck hbck = doFsck(conf, false);
2139     assertNoErrors(hbck);
2140 
2141     ServerName mockName = ServerName.valueOf("localhost", 60000, 1);
2142 
2143     // obtain one lock
2144     final TableLockManager tableLockManager = TableLockManager.createTableLockManager(conf, TEST_UTIL.getZooKeeperWatcher(), mockName);
2145     TableLock writeLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2146         "testCheckTableLocks");
2147     writeLock.acquire();
2148     hbck = doFsck(conf, false);
2149     assertNoErrors(hbck); // should not have expired, no problems
2150 
2151     edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2152         TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire
2153 
2154     hbck = doFsck(conf, false);
2155     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK});
2156 
2157     final CountDownLatch latch = new CountDownLatch(1);
2158     new Thread() {
2159       @Override
2160       public void run() {
2161         TableLock readLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2162             "testCheckTableLocks");
2163         try {
2164           latch.countDown();
2165           readLock.acquire();
2166         } catch (IOException ex) {
2167           fail();
2168         } catch (IllegalStateException ex) {
2169           return; // expected, since this will be reaped under us.
2170         }
2171         fail("should not have come here");
2172       };
2173     }.start();
2174 
2175     latch.await(); // wait until thread starts
2176     Threads.sleep(300); // wait some more to ensure writeLock.acquire() is called
2177 
2178     hbck = doFsck(conf, false);
2179     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK}); // still one expired, one not-expired
2180 
2181     edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2182         TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire
2183 
2184     hbck = doFsck(conf, false);
2185     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK, ERROR_CODE.EXPIRED_TABLE_LOCK}); // both are expired
2186 
2187     conf.setLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, 1); // reaping from ZKInterProcessWriteLock uses znode cTime,
2188                                                                  // which is not injectable through EnvironmentEdge
2189     Threads.sleep(10);
2190     hbck = doFsck(conf, true); // now fix both cases
2191 
2192     hbck = doFsck(conf, false);
2193     assertNoErrors(hbck);
2194 
2195     // ensure that locks are deleted
2196     writeLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2197         "should acquire without blocking");
2198     writeLock.acquire(); // this should not block.
2199     writeLock.release(); // release for clean state
2200   }
2201 
2202   @Test
2203   public void testMetaOffline() throws Exception {
2204     // check no errors
2205     HBaseFsck hbck = doFsck(conf, false);
2206     assertNoErrors(hbck);
2207     deleteMetaRegion(conf, true, false, false);
2208     hbck = doFsck(conf, false);
2209     // ERROR_CODE.UNKNOWN is coming because we reportError with a message for the hbase:meta
2210     // inconsistency and whether we will be fixing it or not.
2211     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2212     hbck = doFsck(conf, true);
2213     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2214     hbck = doFsck(conf, false);
2215     assertNoErrors(hbck);
2216   }
2217 
2218   private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
2219       boolean regionInfoOnly) throws IOException, InterruptedException {
2220     HConnection connection = HConnectionManager.getConnection(conf);
2221     HRegionLocation metaLocation = connection.locateRegion(TableName.META_TABLE_NAME,
2222         HConstants.EMPTY_START_ROW);
2223     ServerName hsa = metaLocation.getServerName();
2224     HRegionInfo hri = metaLocation.getRegionInfo();
2225     if (unassign) {
2226       LOG.info("Undeploying meta region " + hri + " from server " + hsa);
2227       undeployRegion(new HBaseAdmin(conf), hsa, hri);
2228     }
2229 
2230     if (regionInfoOnly) {
2231       LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
2232       Path rootDir = FSUtils.getRootDir(conf);
2233       FileSystem fs = rootDir.getFileSystem(conf);
2234       Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
2235           hri.getEncodedName());
2236       Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
2237       fs.delete(hriPath, true);
2238     }
2239 
2240     if (hdfs) {
2241       LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
2242       Path rootDir = FSUtils.getRootDir(conf);
2243       FileSystem fs = rootDir.getFileSystem(conf);
2244       Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
2245           hri.getEncodedName());
2246       HBaseFsck.debugLsr(conf, p);
2247       boolean success = fs.delete(p, true);
2248       LOG.info("Deleted " + p + " sucessfully? " + success);
2249       HBaseFsck.debugLsr(conf, p);
2250     }
2251   }
2252 
2253   @Test
2254   public void testTableWithNoRegions() throws Exception {
2255     // We might end up with empty regions in a table
2256     // see also testNoHdfsTable()
2257     TableName table =
2258         TableName.valueOf(name.getMethodName());
2259     try {
2260       // create table with one region
2261       HTableDescriptor desc = new HTableDescriptor(table);
2262       HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
2263       desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
2264       TEST_UTIL.getHBaseAdmin().createTable(desc);
2265       tbl = new HTable(TEST_UTIL.getConfiguration(), table, executorService);
2266 
2267       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
2268       deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false,
2269           false, true);
2270 
2271       HBaseFsck hbck = doFsck(conf, false);
2272       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
2273 
2274       doFsck(conf, true);
2275 
2276       // fix hole
2277       doFsck(conf, true);
2278 
2279       // check that hole fixed
2280       assertNoErrors(doFsck(conf, false));
2281     } finally {
2282       deleteTable(table);
2283     }
2284 
2285   }
2286 
2287   @Test
2288   public void testHbckAfterRegionMerge() throws Exception {
2289     TableName table = TableName.valueOf("testMergeRegionFilesInHdfs");
2290     HTable meta = null;
2291     try {
2292       // disable CatalogJanitor
2293       TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
2294       setupTable(table);
2295       assertEquals(ROWKEYS.length, countRows());
2296 
2297       // make sure data in regions, if in hlog only there is no data loss
2298       TEST_UTIL.getHBaseAdmin().flush(table.getName());
2299       HRegionInfo region1 = tbl.getRegionLocation("A").getRegionInfo();
2300       HRegionInfo region2 = tbl.getRegionLocation("B").getRegionInfo();
2301 
2302       int regionCountBeforeMerge = tbl.getRegionLocations().size();
2303 
2304       assertNotEquals(region1, region2);
2305 
2306       // do a region merge
2307       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
2308       admin.mergeRegions(region1.getEncodedNameAsBytes(),
2309           region2.getEncodedNameAsBytes(), false);
2310 
2311       // wait until region merged
2312       long timeout = System.currentTimeMillis() + 30 * 1000;
2313       while (true) {
2314         if (tbl.getRegionLocations().size() < regionCountBeforeMerge) {
2315           break;
2316         } else if (System.currentTimeMillis() > timeout) {
2317           fail("Time out waiting on region " + region1.getEncodedName()
2318               + " and " + region2.getEncodedName() + " be merged");
2319         }
2320         Thread.sleep(10);
2321       }
2322 
2323       assertEquals(ROWKEYS.length, countRows());
2324 
2325       HBaseFsck hbck = doFsck(conf, false);
2326       assertNoErrors(hbck); // no errors
2327 
2328     } finally {
2329       TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
2330       deleteTable(table);
2331       IOUtils.closeQuietly(meta);
2332     }
2333   }
2334 
2335   @Test
2336   public void testRegionBoundariesCheck() throws Exception {
2337     HBaseFsck hbck = doFsck(conf, false);
2338     assertNoErrors(hbck); // no errors
2339     try {
2340       hbck.checkRegionBoundaries();
2341     } catch (IllegalArgumentException e) {
2342       if (e.getMessage().endsWith("not a valid DFS filename.")) {
2343         fail("Table directory path is not valid." + e.getMessage());
2344       }
2345     }
2346   }
2347 
2348   @org.junit.Rule
2349   public TestName name = new TestName();
2350 
2351   @Test
2352   public void testReadOnlyProperty() throws Exception {
2353     HBaseFsck hbck = doFsck(conf, false);
2354     Assert.assertEquals("shouldIgnorePreCheckPermission", true,
2355       hbck.shouldIgnorePreCheckPermission());
2356 
2357     hbck = doFsck(conf, true);
2358     Assert.assertEquals("shouldIgnorePreCheckPermission", false,
2359       hbck.shouldIgnorePreCheckPermission());
2360 
2361     hbck = doFsck(conf, true);
2362     hbck.setIgnorePreCheckPermission(true);
2363     Assert.assertEquals("shouldIgnorePreCheckPermission", true,
2364       hbck.shouldIgnorePreCheckPermission());
2365   }
2366 }