View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.util;
20  
21  import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
22  import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
23  import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
24  import static org.junit.Assert.assertEquals;
25  import static org.junit.Assert.assertFalse;
26  import static org.junit.Assert.assertNotEquals;
27  import static org.junit.Assert.assertNotNull;
28  import static org.junit.Assert.assertTrue;
29  import static org.junit.Assert.fail;
30  
31  import java.io.IOException;
32  import java.util.ArrayList;
33  import java.util.Collection;
34  import java.util.HashMap;
35  import java.util.LinkedList;
36  import java.util.List;
37  import java.util.Map;
38  import java.util.Map.Entry;
39  import java.util.concurrent.Callable;
40  import java.util.concurrent.CountDownLatch;
41  import java.util.concurrent.ExecutorService;
42  import java.util.concurrent.Executors;
43  import java.util.concurrent.Future;
44  import java.util.concurrent.ScheduledThreadPoolExecutor;
45  import java.util.concurrent.SynchronousQueue;
46  import java.util.concurrent.ThreadPoolExecutor;
47  import java.util.concurrent.TimeUnit;
48  import java.util.concurrent.atomic.AtomicBoolean;
49  
50  import org.apache.commons.io.IOUtils;
51  import org.apache.commons.logging.Log;
52  import org.apache.commons.logging.LogFactory;
53  import org.apache.hadoop.conf.Configuration;
54  import org.apache.hadoop.fs.FileStatus;
55  import org.apache.hadoop.fs.FileSystem;
56  import org.apache.hadoop.fs.Path;
57  import org.apache.hadoop.hbase.ClusterStatus;
58  import org.apache.hadoop.hbase.HBaseTestingUtility;
59  import org.apache.hadoop.hbase.HColumnDescriptor;
60  import org.apache.hadoop.hbase.HConstants;
61  import org.apache.hadoop.hbase.HRegionInfo;
62  import org.apache.hadoop.hbase.HRegionLocation;
63  import org.apache.hadoop.hbase.HTableDescriptor;
64  import org.apache.hadoop.hbase.LargeTests;
65  import org.apache.hadoop.hbase.MiniHBaseCluster;
66  import org.apache.hadoop.hbase.ServerName;
67  import org.apache.hadoop.hbase.TableName;
68  import org.apache.hadoop.hbase.catalog.MetaEditor;
69  import org.apache.hadoop.hbase.client.Delete;
70  import org.apache.hadoop.hbase.client.Durability;
71  import org.apache.hadoop.hbase.client.Get;
72  import org.apache.hadoop.hbase.client.HBaseAdmin;
73  import org.apache.hadoop.hbase.client.HConnection;
74  import org.apache.hadoop.hbase.client.HConnectionManager;
75  import org.apache.hadoop.hbase.client.HTable;
76  import org.apache.hadoop.hbase.client.MetaScanner;
77  import org.apache.hadoop.hbase.client.Put;
78  import org.apache.hadoop.hbase.client.Result;
79  import org.apache.hadoop.hbase.client.ResultScanner;
80  import org.apache.hadoop.hbase.client.Scan;
81  import org.apache.hadoop.hbase.io.hfile.TestHFile;
82  import org.apache.hadoop.hbase.master.AssignmentManager;
83  import org.apache.hadoop.hbase.master.HMaster;
84  import org.apache.hadoop.hbase.master.RegionStates;
85  import org.apache.hadoop.hbase.master.TableLockManager;
86  import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
87  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
88  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
89  import org.apache.hadoop.hbase.regionserver.HRegion;
90  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
91  import org.apache.hadoop.hbase.regionserver.HRegionServer;
92  import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
93  import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
94  import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
95  import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;
96  import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter;
97  import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
98  import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
99  import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
100 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
101 import org.apache.zookeeper.KeeperException;
102 import org.junit.AfterClass;
103 import org.junit.Assert;
104 import org.junit.BeforeClass;
105 import org.junit.Ignore;
106 import org.junit.Test;
107 import org.junit.experimental.categories.Category;
108 import org.junit.rules.TestName;
109 
110 import com.google.common.collect.Multimap;
111 
112 /**
113  * This tests HBaseFsck's ability to detect reasons for inconsistent tables.
114  */
115 @Category(LargeTests.class)
116 public class TestHBaseFsck {
117   final static Log LOG = LogFactory.getLog(TestHBaseFsck.class);
118   private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
119   private final static Configuration conf = TEST_UTIL.getConfiguration();
120   private final static String FAM_STR = "fam";
121   private final static byte[] FAM = Bytes.toBytes(FAM_STR);
122   private final static int REGION_ONLINE_TIMEOUT = 800;
123   private static RegionStates regionStates;
124   private static ExecutorService executorService;
125 
126   // for the instance, reset every test run
127   private HTable tbl;
128   private final static byte[][] SPLITS = new byte[][] { Bytes.toBytes("A"),
129     Bytes.toBytes("B"), Bytes.toBytes("C") };
130   // one row per region.
131   private final static byte[][] ROWKEYS= new byte[][] {
132     Bytes.toBytes("00"), Bytes.toBytes("50"), Bytes.toBytes("A0"), Bytes.toBytes("A5"),
133     Bytes.toBytes("B0"), Bytes.toBytes("B5"), Bytes.toBytes("C0"), Bytes.toBytes("C5") };
134 
135   @SuppressWarnings("deprecation")
136   @BeforeClass
137   public static void setUpBeforeClass() throws Exception {
138     TEST_UTIL.getConfiguration().setInt("hbase.regionserver.handler.count", 2);
139     TEST_UTIL.getConfiguration().setInt("hbase.regionserver.metahandler.count", 2);
140     TEST_UTIL.startMiniCluster(3);
141     TEST_UTIL.setHDFSClientRetry(0);
142 
143     executorService = new ThreadPoolExecutor(1, Integer.MAX_VALUE, 60, TimeUnit.SECONDS,
144         new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
145 
146     AssignmentManager assignmentManager =
147       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
148     regionStates = assignmentManager.getRegionStates();
149     TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, true);
150   }
151 
152   @AfterClass
153   public static void tearDownAfterClass() throws Exception {
154     TEST_UTIL.shutdownMiniCluster();
155   }
156 
157   @Test
158   public void testHBaseFsck() throws Exception {
159     assertNoErrors(doFsck(conf, false));
160     String table = "tableBadMetaAssign";
161     TEST_UTIL.createTable(Bytes.toBytes(table), FAM);
162 
163     // We created 1 table, should be fine
164     assertNoErrors(doFsck(conf, false));
165 
166     // Now let's mess it up and change the assignment in hbase:meta to
167     // point to a different region server
168     HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
169     Scan scan = new Scan();
170     scan.setStartRow(Bytes.toBytes(table+",,"));
171     ResultScanner scanner = meta.getScanner(scan);
172     HRegionInfo hri = null;
173 
174     Result res = scanner.next();
175     ServerName currServer =
176       ServerName.parseFrom(res.getValue(HConstants.CATALOG_FAMILY,
177           HConstants.SERVER_QUALIFIER));
178     long startCode = Bytes.toLong(res.getValue(HConstants.CATALOG_FAMILY,
179         HConstants.STARTCODE_QUALIFIER));
180 
181     for (JVMClusterUtil.RegionServerThread rs :
182         TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
183 
184       ServerName sn = rs.getRegionServer().getServerName();
185 
186       // When we find a diff RS, change the assignment and break
187       if (!currServer.getHostAndPort().equals(sn.getHostAndPort()) ||
188           startCode != sn.getStartcode()) {
189         Put put = new Put(res.getRow());
190         put.setDurability(Durability.SKIP_WAL);
191         put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
192           Bytes.toBytes(sn.getHostAndPort()));
193         put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
194           Bytes.toBytes(sn.getStartcode()));
195         meta.put(put);
196         hri = HRegionInfo.getHRegionInfo(res);
197         break;
198       }
199     }
200 
201     // Try to fix the data
202     assertErrors(doFsck(conf, true), new ERROR_CODE[]{
203         ERROR_CODE.SERVER_DOES_NOT_MATCH_META});
204 
205     TEST_UTIL.getHBaseCluster().getMaster()
206       .getAssignmentManager().waitForAssignment(hri);
207 
208     // Should be fixed now
209     assertNoErrors(doFsck(conf, false));
210 
211     // comment needed - what is the purpose of this line
212     HTable t = new HTable(conf, Bytes.toBytes(table), executorService);
213     ResultScanner s = t.getScanner(new Scan());
214     s.close();
215     t.close();
216 
217     scanner.close();
218     meta.close();
219   }
220 
221   @Test(timeout=180000)
222   public void testFixAssignmentsWhenMETAinTransition() throws Exception {
223     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
224     HBaseAdmin admin = null;
225     try {
226       admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
227       admin.closeRegion(cluster.getServerHoldingMeta(),
228           HRegionInfo.FIRST_META_REGIONINFO);
229     } finally {
230       if (admin != null) {
231         admin.close();
232       }
233     }
234     regionStates.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
235     MetaRegionTracker.deleteMetaLocation(cluster.getMaster().getZooKeeper());
236     assertFalse(regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
237     HBaseFsck hbck = doFsck(conf, true);
238     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.UNKNOWN, ERROR_CODE.NO_META_REGION,
239         ERROR_CODE.NULL_META_REGION });
240     assertNoErrors(doFsck(conf, false));
241   }
242 
243   /**
244    * Create a new region in META.
245    */
246   private HRegionInfo createRegion(Configuration conf, final HTableDescriptor
247       htd, byte[] startKey, byte[] endKey)
248       throws IOException {
249     HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
250     HRegionInfo hri = new HRegionInfo(htd.getTableName(), startKey, endKey);
251     MetaEditor.addRegionToMeta(meta, hri);
252     meta.close();
253     return hri;
254   }
255 
256   /**
257    * Debugging method to dump the contents of meta.
258    */
259   private void dumpMeta(TableName tableName) throws IOException {
260     List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(tableName);
261     for (byte[] row : metaRows) {
262       LOG.info(Bytes.toString(row));
263     }
264   }
265 
266   /**
267    * This method is used to undeploy a region -- close it and attempt to
268    * remove its state from the Master.
269    */
270   private void undeployRegion(HBaseAdmin admin, ServerName sn,
271       HRegionInfo hri) throws IOException, InterruptedException {
272     try {
273       HBaseFsckRepair.closeRegionSilentlyAndWait(admin, sn, hri);
274       if (!hri.isMetaTable()) {
275         admin.offline(hri.getRegionName());
276       }
277     } catch (IOException ioe) {
278       LOG.warn("Got exception when attempting to offline region "
279           + Bytes.toString(hri.getRegionName()), ioe);
280     }
281   }
282   /**
283    * Delete a region from assignments, meta, or completely from hdfs.
284    * @param unassign if true unassign region if assigned
285    * @param metaRow  if true remove region's row from META
286    * @param hdfs if true remove region's dir in HDFS
287    */
288   private void deleteRegion(Configuration conf, final HTableDescriptor htd,
289       byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
290       boolean hdfs) throws IOException, InterruptedException {
291     deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false);
292   }
293 
294   /**
295    * Delete a region from assignments, meta, or completely from hdfs.
296    * @param unassign if true unassign region if assigned
297    * @param metaRow  if true remove region's row from META
298    * @param hdfs if true remove region's dir in HDFS
299    * @param regionInfoOnly if true remove a region dir's .regioninfo file
300    */
301   private void deleteRegion(Configuration conf, final HTableDescriptor htd,
302       byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
303       boolean hdfs, boolean regionInfoOnly) throws IOException, InterruptedException {
304     LOG.info("** Before delete:");
305     dumpMeta(htd.getTableName());
306 
307     Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
308     for (Entry<HRegionInfo, ServerName> e: hris.entrySet()) {
309       HRegionInfo hri = e.getKey();
310       ServerName hsa = e.getValue();
311       if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
312           && Bytes.compareTo(hri.getEndKey(), endKey) == 0) {
313 
314         LOG.info("RegionName: " +hri.getRegionNameAsString());
315         byte[] deleteRow = hri.getRegionName();
316 
317         if (unassign) {
318           LOG.info("Undeploying region " + hri + " from server " + hsa);
319           undeployRegion(new HBaseAdmin(conf), hsa, hri);
320         }
321 
322         if (regionInfoOnly) {
323           LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
324           Path rootDir = FSUtils.getRootDir(conf);
325           FileSystem fs = rootDir.getFileSystem(conf);
326           Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
327               hri.getEncodedName());
328           Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
329           fs.delete(hriPath, true);
330         }
331 
332         if (hdfs) {
333           LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
334           Path rootDir = FSUtils.getRootDir(conf);
335           FileSystem fs = rootDir.getFileSystem(conf);
336           Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
337               hri.getEncodedName());
338           HBaseFsck.debugLsr(conf, p);
339           boolean success = fs.delete(p, true);
340           LOG.info("Deleted " + p + " sucessfully? " + success);
341           HBaseFsck.debugLsr(conf, p);
342         }
343 
344         if (metaRow) {
345           HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
346           Delete delete = new Delete(deleteRow);
347           meta.delete(delete);
348         }
349       }
350       LOG.info(hri.toString() + hsa.toString());
351     }
352 
353     TEST_UTIL.getMetaTableRows(htd.getTableName());
354     LOG.info("*** After delete:");
355     dumpMeta(htd.getTableName());
356   }
357 
358   /**
359    * Setup a clean table before we start mucking with it.
360    *
361    * @throws IOException
362    * @throws InterruptedException
363    * @throws KeeperException
364    */
365   HTable setupTable(TableName tablename) throws Exception {
366     HTableDescriptor desc = new HTableDescriptor(tablename);
367     HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
368     desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
369     TEST_UTIL.getHBaseAdmin().createTable(desc, SPLITS);
370     tbl = new HTable(TEST_UTIL.getConfiguration(), tablename, executorService);
371 
372     List<Put> puts = new ArrayList<Put>();
373     for (byte[] row : ROWKEYS) {
374       Put p = new Put(row);
375       p.add(FAM, Bytes.toBytes("val"), row);
376       puts.add(p);
377     }
378     tbl.put(puts);
379     tbl.flushCommits();
380     return tbl;
381   }
382 
383   /**
384    * Counts the number of row to verify data loss or non-dataloss.
385    */
386   int countRows() throws IOException {
387      Scan s = new Scan();
388      ResultScanner rs = tbl.getScanner(s);
389      int i = 0;
390      while(rs.next() !=null) {
391        i++;
392      }
393      return i;
394   }
395 
396   /**
397    * delete table in preparation for next test
398    *
399    * @param tablename
400    * @throws IOException
401    */
402   void deleteTable(TableName tablename) throws IOException {
403     HBaseAdmin admin = new HBaseAdmin(conf);
404     admin.getConnection().clearRegionCache();
405     if (admin.isTableEnabled(tablename)) {
406       admin.disableTableAsync(tablename);
407     }
408     long totalWait = 0;
409     long maxWait = 30*1000;
410     long sleepTime = 250;
411     while (!admin.isTableDisabled(tablename)) {
412       try {
413         Thread.sleep(sleepTime);
414         totalWait += sleepTime;
415         if (totalWait >= maxWait) {
416           fail("Waited too long for table to be disabled + " + tablename);
417         }
418       } catch (InterruptedException e) {
419         e.printStackTrace();
420         fail("Interrupted when trying to disable table " + tablename);
421       }
422     }
423     admin.deleteTable(tablename);
424   }
425 
426   /**
427    * This creates a clean table and confirms that the table is clean.
428    */
429   @Test
430   public void testHBaseFsckClean() throws Exception {
431     assertNoErrors(doFsck(conf, false));
432     TableName table = TableName.valueOf("tableClean");
433     try {
434       HBaseFsck hbck = doFsck(conf, false);
435       assertNoErrors(hbck);
436 
437       setupTable(table);
438       assertEquals(ROWKEYS.length, countRows());
439 
440       // We created 1 table, should be fine
441       hbck = doFsck(conf, false);
442       assertNoErrors(hbck);
443       assertEquals(0, hbck.getOverlapGroups(table).size());
444       assertEquals(ROWKEYS.length, countRows());
445     } finally {
446       deleteTable(table);
447     }
448   }
449 
450   /**
451    * Test thread pooling in the case where there are more regions than threads
452    */
453   @Test
454   public void testHbckThreadpooling() throws Exception {
455     TableName table =
456         TableName.valueOf("tableDupeStartKey");
457     try {
458       // Create table with 4 regions
459       setupTable(table);
460 
461       // limit number of threads to 1.
462       Configuration newconf = new Configuration(conf);
463       newconf.setInt("hbasefsck.numthreads", 1);
464       assertNoErrors(doFsck(newconf, false));
465 
466       // We should pass without triggering a RejectedExecutionException
467     } finally {
468       deleteTable(table);
469     }
470   }
471 
472   @Test
473   public void testHbckFixOrphanTable() throws Exception {
474     TableName table = TableName.valueOf("tableInfo");
475     FileSystem fs = null;
476     Path tableinfo = null;
477     try {
478       setupTable(table);
479       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
480 
481       Path hbaseTableDir = FSUtils.getTableDir(
482           FSUtils.getRootDir(conf), table);
483       fs = hbaseTableDir.getFileSystem(conf);
484       FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
485       tableinfo = status.getPath();
486       fs.rename(tableinfo, new Path("/.tableinfo"));
487 
488       //to report error if .tableinfo is missing.
489       HBaseFsck hbck = doFsck(conf, false);
490       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE });
491 
492       // fix OrphanTable with default .tableinfo (htd not yet cached on master)
493       hbck = doFsck(conf, true);
494       assertNoErrors(hbck);
495       status = null;
496       status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
497       assertNotNull(status);
498 
499       HTableDescriptor htd = admin.getTableDescriptor(table);
500       htd.setValue("NOT_DEFAULT", "true");
501       admin.disableTable(table);
502       admin.modifyTable(table, htd);
503       admin.enableTable(table);
504       fs.delete(status.getPath(), true);
505 
506       // fix OrphanTable with cache
507       htd = admin.getTableDescriptor(table); // warms up cached htd on master
508       hbck = doFsck(conf, true);
509       assertNoErrors(hbck);
510       status = null;
511       status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
512       assertNotNull(status);
513       htd = admin.getTableDescriptor(table);
514       assertEquals(htd.getValue("NOT_DEFAULT"), "true");
515     } finally {
516       fs.rename(new Path("/.tableinfo"), tableinfo);
517       deleteTable(table);
518     }
519   }
520 
521   /**
522    * This test makes sure that parallel instances of Hbck is disabled.
523    *
524    * @throws Exception
525    */
526   @Test
527   public void testParallelHbck() throws Exception {
528     final ExecutorService service;
529     final Future<HBaseFsck> hbck1,hbck2;
530 
531     class RunHbck implements Callable<HBaseFsck>{
532       boolean fail = true;
533       @Override
534       public HBaseFsck call(){
535         try{
536           return doFsck(conf, false);
537         } catch(Exception e){
538           if (e.getMessage().contains("Duplicate hbck")) {
539             fail = false;
540           } else {
541             LOG.fatal("hbck failed.", e);
542           }
543         }
544         // If we reach here, then an exception was caught
545         if (fail) fail();
546         return null;
547       }
548     }
549     service = Executors.newFixedThreadPool(2);
550     hbck1 = service.submit(new RunHbck());
551     hbck2 = service.submit(new RunHbck());
552     service.shutdown();
553     //wait for 15 seconds, for both hbck calls finish
554     service.awaitTermination(15, TimeUnit.SECONDS);
555     HBaseFsck h1 = hbck1.get();
556     HBaseFsck h2 = hbck2.get();
557     // Make sure only one of the calls was successful
558     assert(h1 == null || h2 == null);
559     if (h1 != null) {
560       assert(h1.getRetCode() >= 0);
561     }
562     if (h2 != null) {
563       assert(h2.getRetCode() >= 0);
564     }
565   }
566 
567   /**
568    * This create and fixes a bad table with regions that have a duplicate
569    * start key
570    */
571   @Test
572   public void testDupeStartKey() throws Exception {
573     TableName table =
574         TableName.valueOf("tableDupeStartKey");
575     try {
576       setupTable(table);
577       assertNoErrors(doFsck(conf, false));
578       assertEquals(ROWKEYS.length, countRows());
579 
580       // Now let's mess it up, by adding a region with a duplicate startkey
581       HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
582           Bytes.toBytes("A"), Bytes.toBytes("A2"));
583       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
584       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
585           .waitForAssignment(hriDupe);
586       ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
587       TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
588 
589       HBaseFsck hbck = doFsck(conf, false);
590       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
591             ERROR_CODE.DUPE_STARTKEYS});
592       assertEquals(2, hbck.getOverlapGroups(table).size());
593       assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
594 
595       // fix the degenerate region.
596       doFsck(conf,true);
597 
598       // check that the degenerate region is gone and no data loss
599       HBaseFsck hbck2 = doFsck(conf,false);
600       assertNoErrors(hbck2);
601       assertEquals(0, hbck2.getOverlapGroups(table).size());
602       assertEquals(ROWKEYS.length, countRows());
603     } finally {
604       deleteTable(table);
605     }
606   }
607 
608   /**
609    * Get region info from local cluster.
610    */
611   Map<ServerName, List<String>> getDeployedHRIs(
612       final HBaseAdmin admin) throws IOException {
613     ClusterStatus status = admin.getClusterStatus();
614     Collection<ServerName> regionServers = status.getServers();
615     Map<ServerName, List<String>> mm =
616         new HashMap<ServerName, List<String>>();
617     HConnection connection = admin.getConnection();
618     for (ServerName hsi : regionServers) {
619       AdminProtos.AdminService.BlockingInterface server = connection.getAdmin(hsi);
620 
621       // list all online regions from this region server
622       List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
623       List<String> regionNames = new ArrayList<String>();
624       for (HRegionInfo hri : regions) {
625         regionNames.add(hri.getRegionNameAsString());
626       }
627       mm.put(hsi, regionNames);
628     }
629     return mm;
630   }
631 
632   /**
633    * Returns the HSI a region info is on.
634    */
635   ServerName findDeployedHSI(Map<ServerName, List<String>> mm, HRegionInfo hri) {
636     for (Map.Entry<ServerName,List <String>> e : mm.entrySet()) {
637       if (e.getValue().contains(hri.getRegionNameAsString())) {
638         return e.getKey();
639       }
640     }
641     return null;
642   }
643 
644   /**
645    * This create and fixes a bad table with regions that have a duplicate
646    * start key
647    */
648   @Test
649   public void testDupeRegion() throws Exception {
650     TableName table =
651         TableName.valueOf("tableDupeRegion");
652     try {
653       setupTable(table);
654       assertNoErrors(doFsck(conf, false));
655       assertEquals(ROWKEYS.length, countRows());
656 
657       // Now let's mess it up, by adding a region with a duplicate startkey
658       HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
659           Bytes.toBytes("A"), Bytes.toBytes("B"));
660 
661       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
662       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
663           .waitForAssignment(hriDupe);
664       ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
665       TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
666 
667       // Yikes! The assignment manager can't tell between diff between two
668       // different regions with the same start/endkeys since it doesn't
669       // differentiate on ts/regionId!  We actually need to recheck
670       // deployments!
671       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
672       while (findDeployedHSI(getDeployedHRIs(admin), hriDupe) == null) {
673         Thread.sleep(250);
674       }
675 
676       LOG.debug("Finished assignment of dupe region");
677 
678       // TODO why is dupe region different from dupe start keys?
679       HBaseFsck hbck = doFsck(conf, false);
680       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
681             ERROR_CODE.DUPE_STARTKEYS});
682       assertEquals(2, hbck.getOverlapGroups(table).size());
683       assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
684 
685       // fix the degenerate region.
686       doFsck(conf,true);
687 
688       // check that the degenerate region is gone and no data loss
689       HBaseFsck hbck2 = doFsck(conf,false);
690       assertNoErrors(hbck2);
691       assertEquals(0, hbck2.getOverlapGroups(table).size());
692       assertEquals(ROWKEYS.length, countRows());
693     } finally {
694       deleteTable(table);
695     }
696   }
697 
698   /**
699    * This creates and fixes a bad table with regions that has startkey == endkey
700    */
701   @Test
702   public void testDegenerateRegions() throws Exception {
703     TableName table =
704         TableName.valueOf("tableDegenerateRegions");
705     try {
706       setupTable(table);
707       assertNoErrors(doFsck(conf,false));
708       assertEquals(ROWKEYS.length, countRows());
709 
710       // Now let's mess it up, by adding a region with a duplicate startkey
711       HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
712           Bytes.toBytes("B"), Bytes.toBytes("B"));
713       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
714       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
715           .waitForAssignment(hriDupe);
716       ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
717       TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
718 
719       HBaseFsck hbck = doFsck(conf,false);
720       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DEGENERATE_REGION,
721           ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.DUPE_STARTKEYS});
722       assertEquals(2, hbck.getOverlapGroups(table).size());
723       assertEquals(ROWKEYS.length, countRows());
724 
725       // fix the degenerate region.
726       doFsck(conf,true);
727 
728       // check that the degenerate region is gone and no data loss
729       HBaseFsck hbck2 = doFsck(conf,false);
730       assertNoErrors(hbck2);
731       assertEquals(0, hbck2.getOverlapGroups(table).size());
732       assertEquals(ROWKEYS.length, countRows());
733     } finally {
734       deleteTable(table);
735     }
736   }
737 
738   /**
739    * This creates and fixes a bad table where a region is completely contained
740    * by another region.
741    */
742   @Test
743   public void testContainedRegionOverlap() throws Exception {
744     TableName table =
745         TableName.valueOf("tableContainedRegionOverlap");
746     try {
747       setupTable(table);
748       assertEquals(ROWKEYS.length, countRows());
749 
750       // Mess it up by creating an overlap in the metadata
751       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
752           Bytes.toBytes("A2"), Bytes.toBytes("B"));
753       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
754       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
755           .waitForAssignment(hriOverlap);
756       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
757       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
758 
759       HBaseFsck hbck = doFsck(conf, false);
760       assertErrors(hbck, new ERROR_CODE[] {
761           ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
762       assertEquals(2, hbck.getOverlapGroups(table).size());
763       assertEquals(ROWKEYS.length, countRows());
764 
765       // fix the problem.
766       doFsck(conf, true);
767 
768       // verify that overlaps are fixed
769       HBaseFsck hbck2 = doFsck(conf,false);
770       assertNoErrors(hbck2);
771       assertEquals(0, hbck2.getOverlapGroups(table).size());
772       assertEquals(ROWKEYS.length, countRows());
773     } finally {
774        deleteTable(table);
775     }
776   }
777 
778   /**
779    * This creates and fixes a bad table where an overlap group of
780    * 3 regions. Set HBaseFsck.maxMerge to 2 to trigger sideline overlapped
781    * region. Mess around the meta data so that closeRegion/offlineRegion
782    * throws exceptions.
783    */
784   @Test
785   public void testSidelineOverlapRegion() throws Exception {
786     TableName table =
787         TableName.valueOf("testSidelineOverlapRegion");
788     try {
789       setupTable(table);
790       assertEquals(ROWKEYS.length, countRows());
791 
792       // Mess it up by creating an overlap
793       MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
794       HMaster master = cluster.getMaster();
795       HRegionInfo hriOverlap1 = createRegion(conf, tbl.getTableDescriptor(),
796         Bytes.toBytes("A"), Bytes.toBytes("AB"));
797       master.assignRegion(hriOverlap1);
798       master.getAssignmentManager().waitForAssignment(hriOverlap1);
799       HRegionInfo hriOverlap2 = createRegion(conf, tbl.getTableDescriptor(),
800         Bytes.toBytes("AB"), Bytes.toBytes("B"));
801       master.assignRegion(hriOverlap2);
802       master.getAssignmentManager().waitForAssignment(hriOverlap2);
803 
804       HBaseFsck hbck = doFsck(conf, false);
805       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.DUPE_STARTKEYS,
806         ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
807       assertEquals(3, hbck.getOverlapGroups(table).size());
808       assertEquals(ROWKEYS.length, countRows());
809 
810       // mess around the overlapped regions, to trigger NotServingRegionException
811       Multimap<byte[], HbckInfo> overlapGroups = hbck.getOverlapGroups(table);
812       ServerName serverName = null;
813       byte[] regionName = null;
814       for (HbckInfo hbi: overlapGroups.values()) {
815         if ("A".equals(Bytes.toString(hbi.getStartKey()))
816             && "B".equals(Bytes.toString(hbi.getEndKey()))) {
817           regionName = hbi.getRegionName();
818 
819           // get an RS not serving the region to force bad assignment info in to META.
820           int k = cluster.getServerWith(regionName);
821           for (int i = 0; i < 3; i++) {
822             if (i != k) {
823               HRegionServer rs = cluster.getRegionServer(i);
824               serverName = rs.getServerName();
825               break;
826             }
827           }
828 
829           HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
830           HBaseFsckRepair.closeRegionSilentlyAndWait(admin,
831             cluster.getRegionServer(k).getServerName(), hbi.getHdfsHRI());
832           admin.offline(regionName);
833           break;
834         }
835       }
836 
837       assertNotNull(regionName);
838       assertNotNull(serverName);
839       HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
840       Put put = new Put(regionName);
841       put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
842         Bytes.toBytes(serverName.getHostAndPort()));
843       meta.put(put);
844 
845       // fix the problem.
846       HBaseFsck fsck = new HBaseFsck(conf);
847       fsck.connect();
848       fsck.setDisplayFullReport(); // i.e. -details
849       fsck.setTimeLag(0);
850       fsck.setFixAssignments(true);
851       fsck.setFixMeta(true);
852       fsck.setFixHdfsHoles(true);
853       fsck.setFixHdfsOverlaps(true);
854       fsck.setFixHdfsOrphans(true);
855       fsck.setFixVersionFile(true);
856       fsck.setSidelineBigOverlaps(true);
857       fsck.setMaxMerge(2);
858       fsck.onlineHbck();
859 
860       // verify that overlaps are fixed, and there are less rows
861       // since one region is sidelined.
862       HBaseFsck hbck2 = doFsck(conf,false);
863       assertNoErrors(hbck2);
864       assertEquals(0, hbck2.getOverlapGroups(table).size());
865       assertTrue(ROWKEYS.length > countRows());
866     } finally {
867        deleteTable(table);
868     }
869   }
870 
871   /**
872    * This creates and fixes a bad table where a region is completely contained
873    * by another region, and there is a hole (sort of like a bad split)
874    */
875   @Test
876   public void testOverlapAndOrphan() throws Exception {
877     TableName table =
878         TableName.valueOf("tableOverlapAndOrphan");
879     try {
880       setupTable(table);
881       assertEquals(ROWKEYS.length, countRows());
882 
883       // Mess it up by creating an overlap in the metadata
884       TEST_UTIL.getHBaseAdmin().disableTable(table);
885       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
886           Bytes.toBytes("B"), true, true, false, true);
887       TEST_UTIL.getHBaseAdmin().enableTable(table);
888 
889       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
890           Bytes.toBytes("A2"), Bytes.toBytes("B"));
891       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
892       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
893           .waitForAssignment(hriOverlap);
894       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
895       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
896 
897       HBaseFsck hbck = doFsck(conf, false);
898       assertErrors(hbck, new ERROR_CODE[] {
899           ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
900           ERROR_CODE.HOLE_IN_REGION_CHAIN});
901 
902       // fix the problem.
903       doFsck(conf, true);
904 
905       // verify that overlaps are fixed
906       HBaseFsck hbck2 = doFsck(conf,false);
907       assertNoErrors(hbck2);
908       assertEquals(0, hbck2.getOverlapGroups(table).size());
909       assertEquals(ROWKEYS.length, countRows());
910     } finally {
911        deleteTable(table);
912     }
913   }
914 
915   /**
916    * This creates and fixes a bad table where a region overlaps two regions --
917    * a start key contained in another region and its end key is contained in
918    * yet another region.
919    */
920   @Test
921   public void testCoveredStartKey() throws Exception {
922     TableName table =
923         TableName.valueOf("tableCoveredStartKey");
924     try {
925       setupTable(table);
926       assertEquals(ROWKEYS.length, countRows());
927 
928       // Mess it up by creating an overlap in the metadata
929       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
930           Bytes.toBytes("A2"), Bytes.toBytes("B2"));
931       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
932       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
933           .waitForAssignment(hriOverlap);
934       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
935       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
936 
937       HBaseFsck hbck = doFsck(conf, false);
938       assertErrors(hbck, new ERROR_CODE[] {
939           ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
940           ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
941       assertEquals(3, hbck.getOverlapGroups(table).size());
942       assertEquals(ROWKEYS.length, countRows());
943 
944       // fix the problem.
945       doFsck(conf, true);
946 
947       // verify that overlaps are fixed
948       HBaseFsck hbck2 = doFsck(conf, false);
949       assertErrors(hbck2, new ERROR_CODE[0]);
950       assertEquals(0, hbck2.getOverlapGroups(table).size());
951       assertEquals(ROWKEYS.length, countRows());
952     } finally {
953       deleteTable(table);
954     }
955   }
956 
957   /**
958    * This creates and fixes a bad table with a missing region -- hole in meta
959    * and data missing in the fs.
960    */
961   @Test
962   public void testRegionHole() throws Exception {
963     TableName table =
964         TableName.valueOf("tableRegionHole");
965     try {
966       setupTable(table);
967       assertEquals(ROWKEYS.length, countRows());
968 
969       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
970       TEST_UTIL.getHBaseAdmin().disableTable(table);
971       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
972           Bytes.toBytes("C"), true, true, true);
973       TEST_UTIL.getHBaseAdmin().enableTable(table);
974 
975       HBaseFsck hbck = doFsck(conf, false);
976       assertErrors(hbck, new ERROR_CODE[] {
977           ERROR_CODE.HOLE_IN_REGION_CHAIN});
978       // holes are separate from overlap groups
979       assertEquals(0, hbck.getOverlapGroups(table).size());
980 
981       // fix hole
982       doFsck(conf, true);
983 
984       // check that hole fixed
985       assertNoErrors(doFsck(conf,false));
986       assertEquals(ROWKEYS.length - 2 , countRows()); // lost a region so lost a row
987     } finally {
988       deleteTable(table);
989     }
990   }
991 
992   /**
993    * This creates and fixes a bad table with a missing region -- hole in meta
994    * and data present but .regioinfino missing (an orphan hdfs region)in the fs.
995    */
996   @Test
997   public void testHDFSRegioninfoMissing() throws Exception {
998     TableName table =
999         TableName.valueOf("tableHDFSRegioininfoMissing");
1000     try {
1001       setupTable(table);
1002       assertEquals(ROWKEYS.length, countRows());
1003 
1004       // Mess it up by leaving a hole in the meta data
1005       TEST_UTIL.getHBaseAdmin().disableTable(table);
1006       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1007           Bytes.toBytes("C"), true, true, false, true);
1008       TEST_UTIL.getHBaseAdmin().enableTable(table);
1009 
1010       HBaseFsck hbck = doFsck(conf, false);
1011       assertErrors(hbck, new ERROR_CODE[] {
1012           ERROR_CODE.ORPHAN_HDFS_REGION,
1013           ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1014           ERROR_CODE.HOLE_IN_REGION_CHAIN});
1015       // holes are separate from overlap groups
1016       assertEquals(0, hbck.getOverlapGroups(table).size());
1017 
1018       // fix hole
1019       doFsck(conf, true);
1020 
1021       // check that hole fixed
1022       assertNoErrors(doFsck(conf, false));
1023       assertEquals(ROWKEYS.length, countRows());
1024     } finally {
1025       deleteTable(table);
1026     }
1027   }
1028 
1029   /**
1030    * This creates and fixes a bad table with a region that is missing meta and
1031    * not assigned to a region server.
1032    */
1033   @Test
1034   public void testNotInMetaOrDeployedHole() throws Exception {
1035     TableName table =
1036         TableName.valueOf("tableNotInMetaOrDeployedHole");
1037     try {
1038       setupTable(table);
1039       assertEquals(ROWKEYS.length, countRows());
1040 
1041       // Mess it up by leaving a hole in the meta data
1042       TEST_UTIL.getHBaseAdmin().disableTable(table);
1043       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1044           Bytes.toBytes("C"), true, true, false); // don't rm from fs
1045       TEST_UTIL.getHBaseAdmin().enableTable(table);
1046 
1047       HBaseFsck hbck = doFsck(conf, false);
1048       assertErrors(hbck, new ERROR_CODE[] {
1049           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1050       // holes are separate from overlap groups
1051       assertEquals(0, hbck.getOverlapGroups(table).size());
1052 
1053       // fix hole
1054       assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1055           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1056 
1057       // check that hole fixed
1058       assertNoErrors(doFsck(conf,false));
1059       assertEquals(ROWKEYS.length, countRows());
1060     } finally {
1061       deleteTable(table);
1062     }
1063   }
1064 
1065   /**
1066    * This creates fixes a bad table with a hole in meta.
1067    */
1068   @Test
1069   public void testNotInMetaHole() throws Exception {
1070     TableName table =
1071         TableName.valueOf("tableNotInMetaHole");
1072     try {
1073       setupTable(table);
1074       assertEquals(ROWKEYS.length, countRows());
1075 
1076       // Mess it up by leaving a hole in the meta data
1077       TEST_UTIL.getHBaseAdmin().disableTable(table);
1078       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1079           Bytes.toBytes("C"), false, true, false); // don't rm from fs
1080       TEST_UTIL.getHBaseAdmin().enableTable(table);
1081 
1082       HBaseFsck hbck = doFsck(conf, false);
1083       assertErrors(hbck, new ERROR_CODE[] {
1084           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1085       // holes are separate from overlap groups
1086       assertEquals(0, hbck.getOverlapGroups(table).size());
1087 
1088       // fix hole
1089       assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1090           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1091 
1092       // check that hole fixed
1093       assertNoErrors(doFsck(conf,false));
1094       assertEquals(ROWKEYS.length, countRows());
1095     } finally {
1096       deleteTable(table);
1097     }
1098   }
1099 
1100   /**
1101    * This creates and fixes a bad table with a region that is in meta but has
1102    * no deployment or data hdfs
1103    */
1104   @Test
1105   public void testNotInHdfs() throws Exception {
1106     TableName table =
1107         TableName.valueOf("tableNotInHdfs");
1108     try {
1109       setupTable(table);
1110       assertEquals(ROWKEYS.length, countRows());
1111 
1112       // make sure data in regions, if in hlog only there is no data loss
1113       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1114 
1115       // Mess it up by leaving a hole in the hdfs data
1116       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1117           Bytes.toBytes("C"), false, false, true); // don't rm meta
1118 
1119       HBaseFsck hbck = doFsck(conf, false);
1120       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1121       // holes are separate from overlap groups
1122       assertEquals(0, hbck.getOverlapGroups(table).size());
1123 
1124       // fix hole
1125       doFsck(conf, true);
1126 
1127       // check that hole fixed
1128       assertNoErrors(doFsck(conf,false));
1129       assertEquals(ROWKEYS.length - 2, countRows());
1130     } finally {
1131       deleteTable(table);
1132     }
1133   }
1134 
1135   /**
1136    * This creates entries in hbase:meta with no hdfs data.  This should cleanly
1137    * remove the table.
1138    */
1139   @Test
1140   public void testNoHdfsTable() throws Exception {
1141     TableName table = TableName.valueOf("NoHdfsTable");
1142     setupTable(table);
1143     assertEquals(ROWKEYS.length, countRows());
1144 
1145     // make sure data in regions, if in hlog only there is no data loss
1146     TEST_UTIL.getHBaseAdmin().flush(table.getName());
1147 
1148     // Mess it up by deleting hdfs dirs
1149     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
1150         Bytes.toBytes("A"), false, false, true); // don't rm meta
1151     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1152         Bytes.toBytes("B"), false, false, true); // don't rm meta
1153     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1154         Bytes.toBytes("C"), false, false, true); // don't rm meta
1155     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
1156         Bytes.toBytes(""), false, false, true); // don't rm meta
1157 
1158     // also remove the table directory in hdfs
1159     deleteTableDir(table);
1160 
1161     HBaseFsck hbck = doFsck(conf, false);
1162     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS,
1163         ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS,
1164         ERROR_CODE.NOT_IN_HDFS,});
1165     // holes are separate from overlap groups
1166     assertEquals(0, hbck.getOverlapGroups(table).size());
1167 
1168     // fix hole
1169     doFsck(conf, true); // detect dangling regions and remove those
1170 
1171     // check that hole fixed
1172     assertNoErrors(doFsck(conf,false));
1173     assertFalse("Table "+ table + " should have been deleted",
1174         TEST_UTIL.getHBaseAdmin().tableExists(table));
1175   }
1176 
1177   public void deleteTableDir(TableName table) throws IOException {
1178     Path rootDir = FSUtils.getRootDir(conf);
1179     FileSystem fs = rootDir.getFileSystem(conf);
1180     Path p = FSUtils.getTableDir(rootDir, table);
1181     HBaseFsck.debugLsr(conf, p);
1182     boolean success = fs.delete(p, true);
1183     LOG.info("Deleted " + p + " sucessfully? " + success);
1184   }
1185 
1186   /**
1187    * when the hbase.version file missing, It is fix the fault.
1188    */
1189   @Test
1190   public void testNoVersionFile() throws Exception {
1191     // delete the hbase.version file
1192     Path rootDir = FSUtils.getRootDir(conf);
1193     FileSystem fs = rootDir.getFileSystem(conf);
1194     Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
1195     fs.delete(versionFile, true);
1196 
1197     // test
1198     HBaseFsck hbck = doFsck(conf, false);
1199     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_VERSION_FILE });
1200     // fix hbase.version missing
1201     doFsck(conf, true);
1202 
1203     // no version file fixed
1204     assertNoErrors(doFsck(conf, false));
1205   }
1206 
1207   /**
1208    * The region is not deployed when the table is disabled.
1209    */
1210   @Test
1211   public void testRegionShouldNotBeDeployed() throws Exception {
1212     TableName table =
1213         TableName.valueOf("tableRegionShouldNotBeDeployed");
1214     try {
1215       LOG.info("Starting testRegionShouldNotBeDeployed.");
1216       MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1217       assertTrue(cluster.waitForActiveAndReadyMaster());
1218 
1219 
1220       byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"),
1221           Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") };
1222       HTableDescriptor htdDisabled = new HTableDescriptor(table);
1223       htdDisabled.addFamily(new HColumnDescriptor(FAM));
1224 
1225       // Write the .tableinfo
1226       FSTableDescriptors fstd = new FSTableDescriptors(conf);
1227       fstd.createTableDescriptor(htdDisabled);
1228       List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
1229           TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
1230 
1231       // Let's just assign everything to first RS
1232       HRegionServer hrs = cluster.getRegionServer(0);
1233 
1234       // Create region files.
1235       TEST_UTIL.getHBaseAdmin().disableTable(table);
1236       TEST_UTIL.getHBaseAdmin().enableTable(table);
1237 
1238       // Disable the table and close its regions
1239       TEST_UTIL.getHBaseAdmin().disableTable(table);
1240       HRegionInfo region = disabledRegions.remove(0);
1241       byte[] regionName = region.getRegionName();
1242 
1243       // The region should not be assigned currently
1244       assertTrue(cluster.getServerWith(regionName) == -1);
1245 
1246       // Directly open a region on a region server.
1247       // If going through AM/ZK, the region won't be open.
1248       // Even it is opened, AM will close it which causes
1249       // flakiness of this test.
1250       HRegion r = HRegion.openHRegion(
1251         region, htdDisabled, hrs.getWAL(region), conf);
1252       hrs.addToOnlineRegions(r);
1253 
1254       HBaseFsck hbck = doFsck(conf, false);
1255       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.SHOULD_NOT_BE_DEPLOYED });
1256 
1257       // fix this fault
1258       doFsck(conf, true);
1259 
1260       // check result
1261       assertNoErrors(doFsck(conf, false));
1262     } finally {
1263       TEST_UTIL.getHBaseAdmin().enableTable(table);
1264       deleteTable(table);
1265     }
1266   }
1267 
1268   /**
1269    * This creates two tables and mess both of them and fix them one by one
1270    */
1271   @Test
1272   public void testFixByTable() throws Exception {
1273     TableName table1 =
1274         TableName.valueOf("testFixByTable1");
1275     TableName table2 =
1276         TableName.valueOf("testFixByTable2");
1277     try {
1278       setupTable(table1);
1279       // make sure data in regions, if in hlog only there is no data loss
1280       TEST_UTIL.getHBaseAdmin().flush(table1.getName());
1281       // Mess them up by leaving a hole in the hdfs data
1282       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1283         Bytes.toBytes("C"), false, false, true); // don't rm meta
1284 
1285       setupTable(table2);
1286       // make sure data in regions, if in hlog only there is no data loss
1287       TEST_UTIL.getHBaseAdmin().flush(table2.getName());
1288       // Mess them up by leaving a hole in the hdfs data
1289       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1290         Bytes.toBytes("C"), false, false, true); // don't rm meta
1291 
1292       HBaseFsck hbck = doFsck(conf, false);
1293       assertErrors(hbck, new ERROR_CODE[] {
1294         ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS});
1295 
1296       // fix hole in table 1
1297       doFsck(conf, true, table1);
1298       // check that hole in table 1 fixed
1299       assertNoErrors(doFsck(conf, false, table1));
1300       // check that hole in table 2 still there
1301       assertErrors(doFsck(conf, false, table2),
1302         new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1303 
1304       // fix hole in table 2
1305       doFsck(conf, true, table2);
1306       // check that hole in both tables fixed
1307       assertNoErrors(doFsck(conf, false));
1308       assertEquals(ROWKEYS.length - 2, countRows());
1309     } finally {
1310       deleteTable(table1);
1311       deleteTable(table2);
1312     }
1313   }
1314   /**
1315    * A split parent in meta, in hdfs, and not deployed
1316    */
1317   @Test
1318   public void testLingeringSplitParent() throws Exception {
1319     TableName table =
1320         TableName.valueOf("testLingeringSplitParent");
1321     HTable meta = null;
1322     try {
1323       setupTable(table);
1324       assertEquals(ROWKEYS.length, countRows());
1325 
1326       // make sure data in regions, if in hlog only there is no data loss
1327       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1328       HRegionLocation location = tbl.getRegionLocation("B");
1329 
1330       // Delete one region from meta, but not hdfs, unassign it.
1331       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1332         Bytes.toBytes("C"), true, true, false);
1333 
1334       // Create a new meta entry to fake it as a split parent.
1335       meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
1336       HRegionInfo hri = location.getRegionInfo();
1337 
1338       HRegionInfo a = new HRegionInfo(tbl.getName(),
1339         Bytes.toBytes("B"), Bytes.toBytes("BM"));
1340       HRegionInfo b = new HRegionInfo(tbl.getName(),
1341         Bytes.toBytes("BM"), Bytes.toBytes("C"));
1342 
1343       hri.setOffline(true);
1344       hri.setSplit(true);
1345 
1346       MetaEditor.addRegionToMeta(meta, hri, a, b);
1347       meta.flushCommits();
1348       TEST_UTIL.getHBaseAdmin().flush(TableName.META_TABLE_NAME.getName());
1349 
1350       HBaseFsck hbck = doFsck(conf, false);
1351       assertErrors(hbck, new ERROR_CODE[] {
1352         ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1353 
1354       // regular repair cannot fix lingering split parent
1355       hbck = doFsck(conf, true);
1356       assertErrors(hbck, new ERROR_CODE[] {
1357         ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1358       assertFalse(hbck.shouldRerun());
1359       hbck = doFsck(conf, false);
1360       assertErrors(hbck, new ERROR_CODE[] {
1361         ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1362 
1363       // fix lingering split parent
1364       hbck = new HBaseFsck(conf);
1365       hbck.connect();
1366       hbck.setDisplayFullReport(); // i.e. -details
1367       hbck.setTimeLag(0);
1368       hbck.setFixSplitParents(true);
1369       hbck.onlineHbck();
1370       assertTrue(hbck.shouldRerun());
1371 
1372       Get get = new Get(hri.getRegionName());
1373       Result result = meta.get(get);
1374       assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1375         HConstants.SPLITA_QUALIFIER).isEmpty());
1376       assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1377         HConstants.SPLITB_QUALIFIER).isEmpty());
1378       TEST_UTIL.getHBaseAdmin().flush(TableName.META_TABLE_NAME.getName());
1379 
1380       // fix other issues
1381       doFsck(conf, true);
1382 
1383       // check that all are fixed
1384       assertNoErrors(doFsck(conf, false));
1385       assertEquals(ROWKEYS.length, countRows());
1386     } finally {
1387       deleteTable(table);
1388       IOUtils.closeQuietly(meta);
1389     }
1390   }
1391 
1392   /**
1393    * Tests that LINGERING_SPLIT_PARENT is not erroneously reported for
1394    * valid cases where the daughters are there.
1395    */
1396   @Test
1397   public void testValidLingeringSplitParent() throws Exception {
1398     TableName table =
1399         TableName.valueOf("testLingeringSplitParent");
1400     HTable meta = null;
1401     try {
1402       setupTable(table);
1403       assertEquals(ROWKEYS.length, countRows());
1404 
1405       // make sure data in regions, if in hlog only there is no data loss
1406       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1407       HRegionLocation location = tbl.getRegionLocation("B");
1408 
1409       meta = new HTable(conf, TableName.META_TABLE_NAME);
1410       HRegionInfo hri = location.getRegionInfo();
1411 
1412       // do a regular split
1413       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1414       byte[] regionName = location.getRegionInfo().getRegionName();
1415       admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1416       TestEndToEndSplitTransaction.blockUntilRegionSplit(
1417           TEST_UTIL.getConfiguration(), 60000, regionName, true);
1418 
1419       // TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on
1420       // for some time until children references are deleted. HBCK erroneously sees this as
1421       // overlapping regions
1422       HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, false, false, null);
1423       assertErrors(hbck, new ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported
1424 
1425       // assert that the split hbase:meta entry is still there.
1426       Get get = new Get(hri.getRegionName());
1427       Result result = meta.get(get);
1428       assertNotNull(result);
1429       assertNotNull(HRegionInfo.getHRegionInfo(result));
1430 
1431       assertEquals(ROWKEYS.length, countRows());
1432 
1433       // assert that we still have the split regions
1434       assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions pre-split.
1435       assertNoErrors(doFsck(conf, false));
1436     } finally {
1437       deleteTable(table);
1438       IOUtils.closeQuietly(meta);
1439     }
1440   }
1441 
1442   /**
1443    * Split crashed after write to hbase:meta finished for the parent region, but
1444    * failed to write daughters (pre HBASE-7721 codebase)
1445    */
1446   @Test(timeout=75000)
1447   public void testSplitDaughtersNotInMeta() throws Exception {
1448     TableName table =
1449         TableName.valueOf("testSplitdaughtersNotInMeta");
1450     HTable meta = null;
1451     try {
1452       setupTable(table);
1453       assertEquals(ROWKEYS.length, countRows());
1454 
1455       // make sure data in regions, if in hlog only there is no data loss
1456       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1457       HRegionLocation location = tbl.getRegionLocation("B");
1458 
1459       meta = new HTable(conf, TableName.META_TABLE_NAME);
1460       HRegionInfo hri = location.getRegionInfo();
1461 
1462       // do a regular split
1463       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1464       byte[] regionName = location.getRegionInfo().getRegionName();
1465       admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1466       TestEndToEndSplitTransaction.blockUntilRegionSplit(
1467           TEST_UTIL.getConfiguration(), 60000, regionName, true);
1468 
1469       PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(meta.get(new Get(regionName)));
1470 
1471       // Delete daughter regions from meta, but not hdfs, unassign it.
1472       Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
1473       undeployRegion(admin, hris.get(daughters.getFirst()), daughters.getFirst());
1474       undeployRegion(admin, hris.get(daughters.getSecond()), daughters.getSecond());
1475 
1476       meta.delete(new Delete(daughters.getFirst().getRegionName()));
1477       meta.delete(new Delete(daughters.getSecond().getRegionName()));
1478       meta.flushCommits();
1479 
1480       HBaseFsck hbck = doFsck(conf, false);
1481       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1482           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN}); //no LINGERING_SPLIT_PARENT
1483 
1484       // now fix it. The fix should not revert the region split, but add daughters to META
1485       hbck = doFsck(conf, true, true, false, false, false, false, false, false, false, false, null);
1486       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1487           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1488 
1489       // assert that the split hbase:meta entry is still there.
1490       Get get = new Get(hri.getRegionName());
1491       Result result = meta.get(get);
1492       assertNotNull(result);
1493       assertNotNull(HRegionInfo.getHRegionInfo(result));
1494 
1495       assertEquals(ROWKEYS.length, countRows());
1496 
1497       // assert that we still have the split regions
1498       assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions pre-split.
1499       assertNoErrors(doFsck(conf, false)); //should be fixed by now
1500     } finally {
1501       deleteTable(table);
1502       IOUtils.closeQuietly(meta);
1503     }
1504   }
1505 
1506   /**
1507    * This creates and fixes a bad table with a missing region which is the 1st region -- hole in
1508    * meta and data missing in the fs.
1509    */
1510   @Test(timeout=120000)
1511   public void testMissingFirstRegion() throws Exception {
1512     TableName table =
1513         TableName.valueOf("testMissingFirstRegion");
1514     try {
1515       setupTable(table);
1516       assertEquals(ROWKEYS.length, countRows());
1517 
1518       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1519       TEST_UTIL.getHBaseAdmin().disableTable(table);
1520       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true,
1521           true, true);
1522       TEST_UTIL.getHBaseAdmin().enableTable(table);
1523 
1524       HBaseFsck hbck = doFsck(conf, false);
1525       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY });
1526       // fix hole
1527       doFsck(conf, true);
1528       // check that hole fixed
1529       assertNoErrors(doFsck(conf, false));
1530     } finally {
1531       deleteTable(table);
1532     }
1533   }
1534 
1535   /**
1536    * This creates and fixes a bad table with missing last region -- hole in meta and data missing in
1537    * the fs.
1538    */
1539   @Test(timeout=120000)
1540   public void testMissingLastRegion() throws Exception {
1541     TableName table =
1542         TableName.valueOf("testMissingLastRegion");
1543     try {
1544       setupTable(table);
1545       assertEquals(ROWKEYS.length, countRows());
1546 
1547       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1548       TEST_UTIL.getHBaseAdmin().disableTable(table);
1549       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true,
1550           true, true);
1551       TEST_UTIL.getHBaseAdmin().enableTable(table);
1552 
1553       HBaseFsck hbck = doFsck(conf, false);
1554       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY });
1555       // fix hole
1556       doFsck(conf, true);
1557       // check that hole fixed
1558       assertNoErrors(doFsck(conf, false));
1559     } finally {
1560       deleteTable(table);
1561     }
1562   }
1563 
1564   /**
1565    * Test -noHdfsChecking option can detect and fix assignments issue.
1566    */
1567   @Test
1568   public void testFixAssignmentsAndNoHdfsChecking() throws Exception {
1569     TableName table =
1570         TableName.valueOf("testFixAssignmentsAndNoHdfsChecking");
1571     try {
1572       setupTable(table);
1573       assertEquals(ROWKEYS.length, countRows());
1574 
1575       // Mess it up by closing a region
1576       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1577         Bytes.toBytes("B"), true, false, false, false);
1578 
1579       // verify there is no other errors
1580       HBaseFsck hbck = doFsck(conf, false);
1581       assertErrors(hbck, new ERROR_CODE[] {
1582         ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1583 
1584       // verify that noHdfsChecking report the same errors
1585       HBaseFsck fsck = new HBaseFsck(conf);
1586       fsck.connect();
1587       fsck.setDisplayFullReport(); // i.e. -details
1588       fsck.setTimeLag(0);
1589       fsck.setCheckHdfs(false);
1590       fsck.onlineHbck();
1591       assertErrors(fsck, new ERROR_CODE[] {
1592         ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1593 
1594       // verify that fixAssignments works fine with noHdfsChecking
1595       fsck = new HBaseFsck(conf);
1596       fsck.connect();
1597       fsck.setDisplayFullReport(); // i.e. -details
1598       fsck.setTimeLag(0);
1599       fsck.setCheckHdfs(false);
1600       fsck.setFixAssignments(true);
1601       fsck.onlineHbck();
1602       assertTrue(fsck.shouldRerun());
1603       fsck.onlineHbck();
1604       assertNoErrors(fsck);
1605 
1606       assertEquals(ROWKEYS.length, countRows());
1607     } finally {
1608       deleteTable(table);
1609     }
1610   }
1611 
1612   /**
1613    * Test -noHdfsChecking option can detect region is not in meta but deployed.
1614    * However, it can not fix it without checking Hdfs because we need to get
1615    * the region info from Hdfs in this case, then to patch the meta.
1616    */
1617   @Test
1618   public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception {
1619     TableName table =
1620         TableName.valueOf("testFixMetaNotWorkingWithNoHdfsChecking");
1621     try {
1622       setupTable(table);
1623       assertEquals(ROWKEYS.length, countRows());
1624 
1625       // Mess it up by deleting a region from the metadata
1626       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1627         Bytes.toBytes("B"), false, true, false, false);
1628 
1629       // verify there is no other errors
1630       HBaseFsck hbck = doFsck(conf, false);
1631       assertErrors(hbck, new ERROR_CODE[] {
1632         ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1633 
1634       // verify that noHdfsChecking report the same errors
1635       HBaseFsck fsck = new HBaseFsck(conf);
1636       fsck.connect();
1637       fsck.setDisplayFullReport(); // i.e. -details
1638       fsck.setTimeLag(0);
1639       fsck.setCheckHdfs(false);
1640       fsck.onlineHbck();
1641       assertErrors(fsck, new ERROR_CODE[] {
1642         ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1643 
1644       // verify that fixMeta doesn't work with noHdfsChecking
1645       fsck = new HBaseFsck(conf);
1646       fsck.connect();
1647       fsck.setDisplayFullReport(); // i.e. -details
1648       fsck.setTimeLag(0);
1649       fsck.setCheckHdfs(false);
1650       fsck.setFixAssignments(true);
1651       fsck.setFixMeta(true);
1652       fsck.onlineHbck();
1653       assertFalse(fsck.shouldRerun());
1654       assertErrors(fsck, new ERROR_CODE[] {
1655         ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1656 
1657       // fix the cluster so other tests won't be impacted
1658       fsck = doFsck(conf, true);
1659       assertTrue(fsck.shouldRerun());
1660       fsck = doFsck(conf, true);
1661       assertNoErrors(fsck);
1662     } finally {
1663       deleteTable(table);
1664     }
1665   }
1666 
1667   /**
1668    * Test -fixHdfsHoles doesn't work with -noHdfsChecking option,
1669    * and -noHdfsChecking can't detect orphan Hdfs region.
1670    */
1671   @Test
1672   public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception {
1673     TableName table =
1674         TableName.valueOf("testFixHdfsHolesNotWorkingWithNoHdfsChecking");
1675     try {
1676       setupTable(table);
1677       assertEquals(ROWKEYS.length, countRows());
1678 
1679       // Mess it up by creating an overlap in the metadata
1680       TEST_UTIL.getHBaseAdmin().disableTable(table);
1681       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1682         Bytes.toBytes("B"), true, true, false, true);
1683       TEST_UTIL.getHBaseAdmin().enableTable(table);
1684 
1685       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
1686         Bytes.toBytes("A2"), Bytes.toBytes("B"));
1687       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
1688       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
1689         .waitForAssignment(hriOverlap);
1690       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
1691       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
1692 
1693       HBaseFsck hbck = doFsck(conf, false);
1694       assertErrors(hbck, new ERROR_CODE[] {
1695         ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1696         ERROR_CODE.HOLE_IN_REGION_CHAIN});
1697 
1698       // verify that noHdfsChecking can't detect ORPHAN_HDFS_REGION
1699       HBaseFsck fsck = new HBaseFsck(conf);
1700       fsck.connect();
1701       fsck.setDisplayFullReport(); // i.e. -details
1702       fsck.setTimeLag(0);
1703       fsck.setCheckHdfs(false);
1704       fsck.onlineHbck();
1705       assertErrors(fsck, new ERROR_CODE[] {
1706         ERROR_CODE.HOLE_IN_REGION_CHAIN});
1707 
1708       // verify that fixHdfsHoles doesn't work with noHdfsChecking
1709       fsck = new HBaseFsck(conf);
1710       fsck.connect();
1711       fsck.setDisplayFullReport(); // i.e. -details
1712       fsck.setTimeLag(0);
1713       fsck.setCheckHdfs(false);
1714       fsck.setFixHdfsHoles(true);
1715       fsck.setFixHdfsOverlaps(true);
1716       fsck.setFixHdfsOrphans(true);
1717       fsck.onlineHbck();
1718       assertFalse(fsck.shouldRerun());
1719       assertErrors(fsck, new ERROR_CODE[] {
1720         ERROR_CODE.HOLE_IN_REGION_CHAIN});
1721     } finally {
1722       if (TEST_UTIL.getHBaseAdmin().isTableDisabled(table)) {
1723         TEST_UTIL.getHBaseAdmin().enableTable(table);
1724       }
1725       deleteTable(table);
1726     }
1727   }
1728 
1729   /**
1730    * We don't have an easy way to verify that a flush completed, so we loop until we find a
1731    * legitimate hfile and return it.
1732    * @param fs
1733    * @param table
1734    * @return Path of a flushed hfile.
1735    * @throws IOException
1736    */
1737   Path getFlushedHFile(FileSystem fs, TableName table) throws IOException {
1738     Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
1739     Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
1740     Path famDir = new Path(regionDir, FAM_STR);
1741 
1742     // keep doing this until we get a legit hfile
1743     while (true) {
1744       FileStatus[] hfFss = fs.listStatus(famDir);
1745       if (hfFss.length == 0) {
1746         continue;
1747       }
1748       for (FileStatus hfs : hfFss) {
1749         if (!hfs.isDir()) {
1750           return hfs.getPath();
1751         }
1752       }
1753     }
1754   }
1755 
1756   /**
1757    * This creates a table and then corrupts an hfile.  Hbck should quarantine the file.
1758    */
1759   @Test(timeout=180000)
1760   public void testQuarantineCorruptHFile() throws Exception {
1761     TableName table = TableName.valueOf(name.getMethodName());
1762     try {
1763       setupTable(table);
1764       assertEquals(ROWKEYS.length, countRows());
1765       TEST_UTIL.getHBaseAdmin().flush(table.getName()); // flush is async.
1766 
1767       FileSystem fs = FileSystem.get(conf);
1768       Path hfile = getFlushedHFile(fs, table);
1769 
1770       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1771       TEST_UTIL.getHBaseAdmin().disableTable(table);
1772 
1773       // create new corrupt file called deadbeef (valid hfile name)
1774       Path corrupt = new Path(hfile.getParent(), "deadbeef");
1775       TestHFile.truncateFile(fs, hfile, corrupt);
1776       LOG.info("Created corrupted file " + corrupt);
1777       HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
1778 
1779       // we cannot enable here because enable never finished due to the corrupt region.
1780       HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
1781       assertEquals(res.getRetCode(), 0);
1782       HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
1783       assertEquals(hfcc.getHFilesChecked(), 5);
1784       assertEquals(hfcc.getCorrupted().size(), 1);
1785       assertEquals(hfcc.getFailures().size(), 0);
1786       assertEquals(hfcc.getQuarantined().size(), 1);
1787       assertEquals(hfcc.getMissing().size(), 0);
1788 
1789       // Its been fixed, verify that we can enable.
1790       TEST_UTIL.getHBaseAdmin().enableTable(table);
1791     } finally {
1792       deleteTable(table);
1793     }
1794   }
1795 
1796   /**
1797   * Test that use this should have a timeout, because this method could potentially wait forever.
1798   */
1799   private void doQuarantineTest(TableName table, HBaseFsck hbck, int check,
1800                                 int corrupt, int fail, int quar, int missing) throws Exception {
1801     try {
1802       setupTable(table);
1803       assertEquals(ROWKEYS.length, countRows());
1804       TEST_UTIL.getHBaseAdmin().flush(table.getName()); // flush is async.
1805 
1806       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1807       TEST_UTIL.getHBaseAdmin().disableTable(table);
1808 
1809       String[] args = {"-sidelineCorruptHFiles", "-repairHoles", "-ignorePreCheckPermission",
1810           table.getNameAsString()};
1811       ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1812       HBaseFsck res = hbck.exec(exec, args);
1813 
1814       HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
1815       assertEquals(hfcc.getHFilesChecked(), check);
1816       assertEquals(hfcc.getCorrupted().size(), corrupt);
1817       assertEquals(hfcc.getFailures().size(), fail);
1818       assertEquals(hfcc.getQuarantined().size(), quar);
1819       assertEquals(hfcc.getMissing().size(), missing);
1820 
1821       // its been fixed, verify that we can enable
1822       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1823       admin.enableTableAsync(table);
1824       while (!admin.isTableEnabled(table)) {
1825         try {
1826           Thread.sleep(250);
1827         } catch (InterruptedException e) {
1828           e.printStackTrace();
1829           fail("Interrupted when trying to enable table " + table);
1830         }
1831       }
1832     } finally {
1833       deleteTable(table);
1834     }
1835   }
1836 
1837   /**
1838    * This creates a table and simulates the race situation where a concurrent compaction or split
1839    * has removed an hfile after the corruption checker learned about it.
1840    */
1841   @Test(timeout=180000)
1842   public void testQuarantineMissingHFile() throws Exception {
1843     TableName table = TableName.valueOf(name.getMethodName());
1844     ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1845     // inject a fault in the hfcc created.
1846     final FileSystem fs = FileSystem.get(conf);
1847     HBaseFsck hbck = new HBaseFsck(conf, exec) {
1848       @Override
1849       public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1850         return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1851           AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1852           @Override
1853           protected void checkHFile(Path p) throws IOException {
1854             if (attemptedFirstHFile.compareAndSet(false, true)) {
1855               assertTrue(fs.delete(p, true)); // make sure delete happened.
1856             }
1857             super.checkHFile(p);
1858           }
1859         };
1860       }
1861     };
1862     doQuarantineTest(table, hbck, 4, 0, 0, 0, 1); // 4 attempted, but 1 missing.
1863   }
1864 
1865   /**
1866    * This creates a table and simulates the race situation where a concurrent compaction or split
1867    * has removed an colfam dir before the corruption checker got to it.
1868    */
1869   // Disabled because fails sporadically.  Is this test right?  Timing-wise, there could be no
1870   // files in a column family on initial creation -- as suggested by Matteo.
1871   @Ignore @Test(timeout=180000)
1872   public void testQuarantineMissingFamdir() throws Exception {
1873     TableName table = TableName.valueOf(name.getMethodName());
1874     ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1875     // inject a fault in the hfcc created.
1876     final FileSystem fs = FileSystem.get(conf);
1877     HBaseFsck hbck = new HBaseFsck(conf, exec) {
1878       @Override
1879       public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1880         return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1881           AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1882           @Override
1883           protected void checkColFamDir(Path p) throws IOException {
1884             if (attemptedFirstHFile.compareAndSet(false, true)) {
1885               assertTrue(fs.delete(p, true)); // make sure delete happened.
1886             }
1887             super.checkColFamDir(p);
1888           }
1889         };
1890       }
1891     };
1892     doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
1893   }
1894 
1895   /**
1896    * This creates a table and simulates the race situation where a concurrent compaction or split
1897    * has removed a region dir before the corruption checker got to it.
1898    */
1899   @Test(timeout=180000)
1900   public void testQuarantineMissingRegionDir() throws Exception {
1901     TableName table = TableName.valueOf(name.getMethodName());
1902     ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1903     // inject a fault in the hfcc created.
1904     final FileSystem fs = FileSystem.get(conf);
1905     HBaseFsck hbck = new HBaseFsck(conf, exec) {
1906       @Override
1907       public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1908         return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1909           AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1910           @Override
1911           protected void checkRegionDir(Path p) throws IOException {
1912             if (attemptedFirstHFile.compareAndSet(false, true)) {
1913               assertTrue(fs.delete(p, true)); // make sure delete happened.
1914             }
1915             super.checkRegionDir(p);
1916           }
1917         };
1918       }
1919     };
1920     doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
1921   }
1922 
1923   /**
1924    * Test fixing lingering reference file.
1925    */
1926   @Test
1927   public void testLingeringReferenceFile() throws Exception {
1928     TableName table =
1929         TableName.valueOf("testLingeringReferenceFile");
1930     try {
1931       setupTable(table);
1932       assertEquals(ROWKEYS.length, countRows());
1933 
1934       // Mess it up by creating a fake reference file
1935       FileSystem fs = FileSystem.get(conf);
1936       Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
1937       Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
1938       Path famDir = new Path(regionDir, FAM_STR);
1939       Path fakeReferenceFile = new Path(famDir, "fbce357483ceea.12144538");
1940       fs.create(fakeReferenceFile);
1941 
1942       HBaseFsck hbck = doFsck(conf, false);
1943       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LINGERING_REFERENCE_HFILE });
1944       // fix reference file
1945       doFsck(conf, true);
1946       // check that reference file fixed
1947       assertNoErrors(doFsck(conf, false));
1948     } finally {
1949       deleteTable(table);
1950     }
1951   }
1952 
1953   /**
1954    * Test mission REGIONINFO_QUALIFIER in hbase:meta
1955    */
1956   @Test
1957   public void testMissingRegionInfoQualifier() throws Exception {
1958     TableName table =
1959         TableName.valueOf("testMissingRegionInfoQualifier");
1960     try {
1961       setupTable(table);
1962 
1963       // Mess it up by removing the RegionInfo for one region.
1964       final List<Delete> deletes = new LinkedList<Delete>();
1965       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
1966       MetaScanner.metaScan(conf, new MetaScanner.MetaScannerVisitor() {
1967 
1968         @Override
1969         public boolean processRow(Result rowResult) throws IOException {
1970           HRegionInfo hri = MetaScanner.getHRegionInfo(rowResult);
1971           if (hri != null && !hri.getTable().isSystemTable()) {
1972             Delete delete = new Delete(rowResult.getRow());
1973             delete.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1974             deletes.add(delete);
1975           }
1976           return true;
1977         }
1978 
1979         @Override
1980         public void close() throws IOException {
1981         }
1982       });
1983       meta.delete(deletes);
1984 
1985       // Mess it up by creating a fake hbase:meta entry with no associated RegionInfo
1986       meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
1987         HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes.toBytes("node1:60020")));
1988       meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
1989         HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(1362150791183L)));
1990       meta.close();
1991 
1992       HBaseFsck hbck = doFsck(conf, false);
1993       assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
1994 
1995       // fix reference file
1996       hbck = doFsck(conf, true);
1997 
1998       // check that reference file fixed
1999       assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
2000     } finally {
2001       deleteTable(table);
2002     }
2003   }
2004 
2005 
2006   /**
2007    * Test pluggable error reporter. It can be plugged in
2008    * from system property or configuration.
2009    */
2010   @Test
2011   public void testErrorReporter() throws Exception {
2012     try {
2013       MockErrorReporter.calledCount = 0;
2014       doFsck(conf, false);
2015       assertEquals(MockErrorReporter.calledCount, 0);
2016 
2017       conf.set("hbasefsck.errorreporter", MockErrorReporter.class.getName());
2018       doFsck(conf, false);
2019       assertTrue(MockErrorReporter.calledCount > 20);
2020     } finally {
2021       conf.set("hbasefsck.errorreporter",
2022         PrintingErrorReporter.class.getName());
2023       MockErrorReporter.calledCount = 0;
2024     }
2025   }
2026 
2027   static class MockErrorReporter implements ErrorReporter {
2028     static int calledCount = 0;
2029 
2030     @Override
2031     public void clear() {
2032       calledCount++;
2033     }
2034 
2035     @Override
2036     public void report(String message) {
2037       calledCount++;
2038     }
2039 
2040     @Override
2041     public void reportError(String message) {
2042       calledCount++;
2043     }
2044 
2045     @Override
2046     public void reportError(ERROR_CODE errorCode, String message) {
2047       calledCount++;
2048     }
2049 
2050     @Override
2051     public void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
2052       calledCount++;
2053     }
2054 
2055     @Override
2056     public void reportError(ERROR_CODE errorCode,
2057         String message, TableInfo table, HbckInfo info) {
2058       calledCount++;
2059     }
2060 
2061     @Override
2062     public void reportError(ERROR_CODE errorCode, String message,
2063         TableInfo table, HbckInfo info1, HbckInfo info2) {
2064       calledCount++;
2065     }
2066 
2067     @Override
2068     public int summarize() {
2069       return ++calledCount;
2070     }
2071 
2072     @Override
2073     public void detail(String details) {
2074       calledCount++;
2075     }
2076 
2077     @Override
2078     public ArrayList<ERROR_CODE> getErrorList() {
2079       calledCount++;
2080       return new ArrayList<ERROR_CODE>();
2081     }
2082 
2083     @Override
2084     public void progress() {
2085       calledCount++;
2086     }
2087 
2088     @Override
2089     public void print(String message) {
2090       calledCount++;
2091     }
2092 
2093     @Override
2094     public void resetErrors() {
2095       calledCount++;
2096     }
2097 
2098     @Override
2099     public boolean tableHasErrors(TableInfo table) {
2100       calledCount++;
2101       return false;
2102     }
2103   }
2104 
2105   @Test(timeout=60000)
2106   public void testCheckTableLocks() throws Exception {
2107     IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0);
2108     EnvironmentEdgeManager.injectEdge(edge);
2109     // check no errors
2110     HBaseFsck hbck = doFsck(conf, false);
2111     assertNoErrors(hbck);
2112 
2113     ServerName mockName = ServerName.valueOf("localhost", 60000, 1);
2114 
2115     // obtain one lock
2116     final TableLockManager tableLockManager = TableLockManager.createTableLockManager(conf, TEST_UTIL.getZooKeeperWatcher(), mockName);
2117     TableLock writeLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2118         "testCheckTableLocks");
2119     writeLock.acquire();
2120     hbck = doFsck(conf, false);
2121     assertNoErrors(hbck); // should not have expired, no problems
2122 
2123     edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2124         TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire
2125 
2126     hbck = doFsck(conf, false);
2127     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK});
2128 
2129     final CountDownLatch latch = new CountDownLatch(1);
2130     new Thread() {
2131       @Override
2132       public void run() {
2133         TableLock readLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2134             "testCheckTableLocks");
2135         try {
2136           latch.countDown();
2137           readLock.acquire();
2138         } catch (IOException ex) {
2139           fail();
2140         } catch (IllegalStateException ex) {
2141           return; // expected, since this will be reaped under us.
2142         }
2143         fail("should not have come here");
2144       };
2145     }.start();
2146 
2147     latch.await(); // wait until thread starts
2148     Threads.sleep(300); // wait some more to ensure writeLock.acquire() is called
2149 
2150     hbck = doFsck(conf, false);
2151     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK}); // still one expired, one not-expired
2152 
2153     edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2154         TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire
2155 
2156     hbck = doFsck(conf, false);
2157     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK, ERROR_CODE.EXPIRED_TABLE_LOCK}); // both are expired
2158 
2159     conf.setLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, 1); // reaping from ZKInterProcessWriteLock uses znode cTime,
2160                                                                  // which is not injectable through EnvironmentEdge
2161     Threads.sleep(10);
2162     hbck = doFsck(conf, true); // now fix both cases
2163 
2164     hbck = doFsck(conf, false);
2165     assertNoErrors(hbck);
2166 
2167     // ensure that locks are deleted
2168     writeLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2169         "should acquire without blocking");
2170     writeLock.acquire(); // this should not block.
2171     writeLock.release(); // release for clean state
2172   }
2173 
2174   @Test
2175   public void testMetaOffline() throws Exception {
2176     // check no errors
2177     HBaseFsck hbck = doFsck(conf, false);
2178     assertNoErrors(hbck);
2179     deleteMetaRegion(conf, true, false, false);
2180     hbck = doFsck(conf, false);
2181     // ERROR_CODE.UNKNOWN is coming because we reportError with a message for the hbase:meta
2182     // inconsistency and whether we will be fixing it or not.
2183     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2184     hbck = doFsck(conf, true);
2185     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2186     hbck = doFsck(conf, false);
2187     assertNoErrors(hbck);
2188   }
2189 
2190   private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
2191       boolean regionInfoOnly) throws IOException, InterruptedException {
2192     HConnection connection = HConnectionManager.getConnection(conf);
2193     HRegionLocation metaLocation = connection.locateRegion(TableName.META_TABLE_NAME,
2194         HConstants.EMPTY_START_ROW);
2195     ServerName hsa = metaLocation.getServerName();
2196     HRegionInfo hri = metaLocation.getRegionInfo();
2197     if (unassign) {
2198       LOG.info("Undeploying meta region " + hri + " from server " + hsa);
2199       undeployRegion(new HBaseAdmin(conf), hsa, hri);
2200     }
2201 
2202     if (regionInfoOnly) {
2203       LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
2204       Path rootDir = FSUtils.getRootDir(conf);
2205       FileSystem fs = rootDir.getFileSystem(conf);
2206       Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
2207           hri.getEncodedName());
2208       Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
2209       fs.delete(hriPath, true);
2210     }
2211 
2212     if (hdfs) {
2213       LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
2214       Path rootDir = FSUtils.getRootDir(conf);
2215       FileSystem fs = rootDir.getFileSystem(conf);
2216       Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
2217           hri.getEncodedName());
2218       HBaseFsck.debugLsr(conf, p);
2219       boolean success = fs.delete(p, true);
2220       LOG.info("Deleted " + p + " sucessfully? " + success);
2221       HBaseFsck.debugLsr(conf, p);
2222     }
2223   }
2224 
2225   @Test
2226   public void testTableWithNoRegions() throws Exception {
2227     // We might end up with empty regions in a table
2228     // see also testNoHdfsTable()
2229     TableName table =
2230         TableName.valueOf(name.getMethodName());
2231     try {
2232       // create table with one region
2233       HTableDescriptor desc = new HTableDescriptor(table);
2234       HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
2235       desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
2236       TEST_UTIL.getHBaseAdmin().createTable(desc);
2237       tbl = new HTable(TEST_UTIL.getConfiguration(), table, executorService);
2238 
2239       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
2240       deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false,
2241           false, true);
2242 
2243       HBaseFsck hbck = doFsck(conf, false);
2244       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
2245 
2246       doFsck(conf, true);
2247 
2248       // fix hole
2249       doFsck(conf, true);
2250 
2251       // check that hole fixed
2252       assertNoErrors(doFsck(conf, false));
2253     } finally {
2254       deleteTable(table);
2255     }
2256 
2257   }
2258 
2259   @Test
2260   public void testHbckAfterRegionMerge() throws Exception {
2261     TableName table = TableName.valueOf("testMergeRegionFilesInHdfs");
2262     HTable meta = null;
2263     try {
2264       // disable CatalogJanitor
2265       TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
2266       setupTable(table);
2267       assertEquals(ROWKEYS.length, countRows());
2268 
2269       // make sure data in regions, if in hlog only there is no data loss
2270       TEST_UTIL.getHBaseAdmin().flush(table.getName());
2271       HRegionInfo region1 = tbl.getRegionLocation("A").getRegionInfo();
2272       HRegionInfo region2 = tbl.getRegionLocation("B").getRegionInfo();
2273 
2274       int regionCountBeforeMerge = tbl.getRegionLocations().size();
2275 
2276       assertNotEquals(region1, region2);
2277 
2278       // do a region merge
2279       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
2280       admin.mergeRegions(region1.getEncodedNameAsBytes(),
2281           region2.getEncodedNameAsBytes(), false);
2282 
2283       // wait until region merged
2284       long timeout = System.currentTimeMillis() + 30 * 1000;
2285       while (true) {
2286         if (tbl.getRegionLocations().size() < regionCountBeforeMerge) {
2287           break;
2288         } else if (System.currentTimeMillis() > timeout) {
2289           fail("Time out waiting on region " + region1.getEncodedName()
2290               + " and " + region2.getEncodedName() + " be merged");
2291         }
2292         Thread.sleep(10);
2293       }
2294 
2295       assertEquals(ROWKEYS.length, countRows());
2296 
2297       HBaseFsck hbck = doFsck(conf, false);
2298       assertNoErrors(hbck); // no errors
2299 
2300     } finally {
2301       TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
2302       deleteTable(table);
2303       IOUtils.closeQuietly(meta);
2304     }
2305   }
2306 
2307   @Test
2308   public void testRegionBoundariesCheck() throws Exception {
2309     HBaseFsck hbck = doFsck(conf, false);
2310     assertNoErrors(hbck); // no errors
2311     try {
2312       hbck.checkRegionBoundaries();
2313     } catch (IllegalArgumentException e) {
2314       if (e.getMessage().endsWith("not a valid DFS filename.")) {
2315         fail("Table directory path is not valid." + e.getMessage());
2316       }
2317     }
2318   }
2319 
2320   @org.junit.Rule
2321   public TestName name = new TestName();
2322 
2323   @Test
2324   public void testReadOnlyProperty() throws Exception {
2325     HBaseFsck hbck = doFsck(conf, false);
2326     Assert.assertEquals("shouldIgnorePreCheckPermission", true,
2327       hbck.shouldIgnorePreCheckPermission());
2328 
2329     hbck = doFsck(conf, true);
2330     Assert.assertEquals("shouldIgnorePreCheckPermission", false,
2331       hbck.shouldIgnorePreCheckPermission());
2332 
2333     hbck = doFsck(conf, true);
2334     hbck.setIgnorePreCheckPermission(true);
2335     Assert.assertEquals("shouldIgnorePreCheckPermission", true,
2336       hbck.shouldIgnorePreCheckPermission());
2337   }
2338 }