1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.util;
20
21 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
22 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
23 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
24 import static org.junit.Assert.assertEquals;
25 import static org.junit.Assert.assertFalse;
26 import static org.junit.Assert.assertNotEquals;
27 import static org.junit.Assert.assertNotNull;
28 import static org.junit.Assert.assertTrue;
29 import static org.junit.Assert.fail;
30
31 import java.io.IOException;
32 import java.util.ArrayList;
33 import java.util.Collection;
34 import java.util.HashMap;
35 import java.util.LinkedList;
36 import java.util.List;
37 import java.util.Map;
38 import java.util.concurrent.Callable;
39 import java.util.concurrent.CountDownLatch;
40 import java.util.concurrent.ExecutorService;
41 import java.util.concurrent.Executors;
42 import java.util.concurrent.Future;
43 import java.util.concurrent.ScheduledThreadPoolExecutor;
44 import java.util.concurrent.SynchronousQueue;
45 import java.util.concurrent.ThreadPoolExecutor;
46 import java.util.concurrent.TimeUnit;
47 import java.util.concurrent.atomic.AtomicBoolean;
48
49 import org.apache.commons.io.IOUtils;
50 import org.apache.commons.logging.Log;
51 import org.apache.commons.logging.LogFactory;
52 import org.apache.hadoop.conf.Configuration;
53 import org.apache.hadoop.fs.FileStatus;
54 import org.apache.hadoop.fs.FileSystem;
55 import org.apache.hadoop.fs.Path;
56 import org.apache.hadoop.hbase.ClusterStatus;
57 import org.apache.hadoop.hbase.HBaseTestingUtility;
58 import org.apache.hadoop.hbase.HColumnDescriptor;
59 import org.apache.hadoop.hbase.HConstants;
60 import org.apache.hadoop.hbase.HRegionInfo;
61 import org.apache.hadoop.hbase.HRegionLocation;
62 import org.apache.hadoop.hbase.HTableDescriptor;
63 import org.apache.hadoop.hbase.TableExistsException;
64 import org.apache.hadoop.hbase.testclassification.LargeTests;
65 import org.apache.hadoop.hbase.MiniHBaseCluster;
66 import org.apache.hadoop.hbase.ServerName;
67 import org.apache.hadoop.hbase.TableName;
68 import org.apache.hadoop.hbase.MetaTableAccessor;
69 import org.apache.hadoop.hbase.client.Admin;
70 import org.apache.hadoop.hbase.client.ClusterConnection;
71 import org.apache.hadoop.hbase.client.Connection;
72 import org.apache.hadoop.hbase.client.ConnectionFactory;
73 import org.apache.hadoop.hbase.client.Delete;
74 import org.apache.hadoop.hbase.client.Durability;
75 import org.apache.hadoop.hbase.client.Get;
76 import org.apache.hadoop.hbase.client.HBaseAdmin;
77 import org.apache.hadoop.hbase.client.HConnection;
78 import org.apache.hadoop.hbase.client.HTable;
79 import org.apache.hadoop.hbase.client.MetaScanner;
80 import org.apache.hadoop.hbase.client.Put;
81 import org.apache.hadoop.hbase.client.Result;
82 import org.apache.hadoop.hbase.client.ResultScanner;
83 import org.apache.hadoop.hbase.client.Scan;
84 import org.apache.hadoop.hbase.client.Table;
85 import org.apache.hadoop.hbase.io.hfile.TestHFile;
86 import org.apache.hadoop.hbase.master.AssignmentManager;
87 import org.apache.hadoop.hbase.master.HMaster;
88 import org.apache.hadoop.hbase.master.RegionState;
89 import org.apache.hadoop.hbase.master.RegionStates;
90 import org.apache.hadoop.hbase.master.TableLockManager;
91 import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
92 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
93 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
94 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
95 import org.apache.hadoop.hbase.regionserver.HRegion;
96 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
97 import org.apache.hadoop.hbase.regionserver.HRegionServer;
98 import org.apache.hadoop.hbase.regionserver.SplitTransaction;
99 import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
100 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
101 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
102 import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;
103 import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter;
104 import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
105 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
106 import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
107 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
108 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
109 import org.apache.zookeeper.KeeperException;
110 import org.junit.AfterClass;
111 import org.junit.Assert;
112 import org.junit.Before;
113 import org.junit.BeforeClass;
114 import org.junit.Ignore;
115 import org.junit.Test;
116 import org.junit.experimental.categories.Category;
117 import org.junit.rules.TestName;
118
119 import com.google.common.collect.Multimap;
120
121
122
123
124 @Category(LargeTests.class)
125 public class TestHBaseFsck {
126 static final int POOL_SIZE = 7;
127
128 final static Log LOG = LogFactory.getLog(TestHBaseFsck.class);
129 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
130 private final static Configuration conf = TEST_UTIL.getConfiguration();
131 private final static String FAM_STR = "fam";
132 private final static byte[] FAM = Bytes.toBytes(FAM_STR);
133 private final static int REGION_ONLINE_TIMEOUT = 800;
134 private static RegionStates regionStates;
135 private static ExecutorService tableExecutorService;
136 private static ScheduledThreadPoolExecutor hbfsckExecutorService;
137 private static ClusterConnection connection;
138 private static Admin admin;
139
140
141 private HTable tbl;
142 private final static byte[][] SPLITS = new byte[][] { Bytes.toBytes("A"),
143 Bytes.toBytes("B"), Bytes.toBytes("C") };
144
145 private final static byte[][] ROWKEYS= new byte[][] {
146 Bytes.toBytes("00"), Bytes.toBytes("50"), Bytes.toBytes("A0"), Bytes.toBytes("A5"),
147 Bytes.toBytes("B0"), Bytes.toBytes("B5"), Bytes.toBytes("C0"), Bytes.toBytes("C5") };
148
149 @BeforeClass
150 public static void setUpBeforeClass() throws Exception {
151 conf.setInt("hbase.regionserver.handler.count", 2);
152 conf.setInt("hbase.regionserver.metahandler.count", 2);
153
154 conf.setInt("hbase.htable.threads.max", POOL_SIZE);
155 conf.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE);
156 conf.setInt("hbase.hconnection.threads.core", POOL_SIZE);
157 conf.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT);
158 TEST_UTIL.startMiniCluster(3);
159
160 tableExecutorService = new ThreadPoolExecutor(1, POOL_SIZE, 60, TimeUnit.SECONDS,
161 new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
162
163 hbfsckExecutorService = new ScheduledThreadPoolExecutor(POOL_SIZE);
164
165 AssignmentManager assignmentManager =
166 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
167 regionStates = assignmentManager.getRegionStates();
168
169 connection = (ClusterConnection) TEST_UTIL.getConnection();
170
171 admin = connection.getAdmin();
172 admin.setBalancerRunning(false, true);
173 }
174
175 @AfterClass
176 public static void tearDownAfterClass() throws Exception {
177 tableExecutorService.shutdown();
178 hbfsckExecutorService.shutdown();
179 admin.close();
180 TEST_UTIL.shutdownMiniCluster();
181 }
182
183 @Before
184 public void setUp() {
185 EnvironmentEdgeManager.reset();
186 }
187
188 @Test (timeout=180000)
189 public void testHBaseFsck() throws Exception {
190 assertNoErrors(doFsck(conf, false));
191 TableName table = TableName.valueOf("tableBadMetaAssign");
192 TEST_UTIL.createTable(table, FAM);
193
194
195 assertNoErrors(doFsck(conf, false));
196
197
198
199 Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
200 Scan scan = new Scan();
201 scan.setStartRow(Bytes.toBytes(table+",,"));
202 ResultScanner scanner = meta.getScanner(scan);
203 HRegionInfo hri = null;
204
205 Result res = scanner.next();
206 ServerName currServer =
207 ServerName.parseFrom(res.getValue(HConstants.CATALOG_FAMILY,
208 HConstants.SERVER_QUALIFIER));
209 long startCode = Bytes.toLong(res.getValue(HConstants.CATALOG_FAMILY,
210 HConstants.STARTCODE_QUALIFIER));
211
212 for (JVMClusterUtil.RegionServerThread rs :
213 TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
214
215 ServerName sn = rs.getRegionServer().getServerName();
216
217
218 if (!currServer.getHostAndPort().equals(sn.getHostAndPort()) ||
219 startCode != sn.getStartcode()) {
220 Put put = new Put(res.getRow());
221 put.setDurability(Durability.SKIP_WAL);
222 put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
223 Bytes.toBytes(sn.getHostAndPort()));
224 put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
225 Bytes.toBytes(sn.getStartcode()));
226 meta.put(put);
227 hri = MetaTableAccessor.getHRegionInfo(res);
228 break;
229 }
230 }
231
232
233 assertErrors(doFsck(conf, true), new ERROR_CODE[]{
234 ERROR_CODE.SERVER_DOES_NOT_MATCH_META});
235
236 TEST_UTIL.getHBaseCluster().getMaster()
237 .getAssignmentManager().waitForAssignment(hri);
238
239
240 assertNoErrors(doFsck(conf, false));
241
242
243 Table t = connection.getTable(table, tableExecutorService);
244 ResultScanner s = t.getScanner(new Scan());
245 s.close();
246 t.close();
247
248 scanner.close();
249 meta.close();
250 }
251
252 @Test(timeout=180000)
253 public void testFixAssignmentsWhenMETAinTransition() throws Exception {
254 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
255 admin.closeRegion(cluster.getServerHoldingMeta(), HRegionInfo.FIRST_META_REGIONINFO);
256 regionStates.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
257 new MetaTableLocator().deleteMetaLocation(cluster.getMaster().getZooKeeper());
258 assertFalse(regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
259 HBaseFsck hbck = doFsck(conf, true);
260 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.UNKNOWN, ERROR_CODE.NO_META_REGION,
261 ERROR_CODE.NULL_META_REGION });
262 assertNoErrors(doFsck(conf, false));
263 }
264
265
266
267
268 private HRegionInfo createRegion(final HTableDescriptor
269 htd, byte[] startKey, byte[] endKey)
270 throws IOException {
271 Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
272 HRegionInfo hri = new HRegionInfo(htd.getTableName(), startKey, endKey);
273 MetaTableAccessor.addRegionToMeta(meta, hri);
274 meta.close();
275 return hri;
276 }
277
278
279
280
281 private void dumpMeta(TableName tableName) throws IOException {
282 List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(tableName);
283 for (byte[] row : metaRows) {
284 LOG.info(Bytes.toString(row));
285 }
286 }
287
288
289
290
291
292 private void undeployRegion(Connection conn, ServerName sn,
293 HRegionInfo hri) throws IOException, InterruptedException {
294 try {
295 HBaseFsckRepair.closeRegionSilentlyAndWait((HConnection) conn, sn, hri);
296 if (!hri.isMetaTable()) {
297 admin.offline(hri.getRegionName());
298 }
299 } catch (IOException ioe) {
300 LOG.warn("Got exception when attempting to offline region "
301 + Bytes.toString(hri.getRegionName()), ioe);
302 }
303 }
304
305
306
307
308
309
310 private void deleteRegion(Configuration conf, final HTableDescriptor htd,
311 byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
312 boolean hdfs) throws IOException, InterruptedException {
313 deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false);
314 }
315
316
317
318
319
320
321
322
323 private void deleteRegion(Configuration conf, final HTableDescriptor htd,
324 byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
325 boolean hdfs, boolean regionInfoOnly) throws IOException, InterruptedException {
326 LOG.info("** Before delete:");
327 dumpMeta(htd.getTableName());
328
329 List<HRegionLocation> locations = tbl.getAllRegionLocations();
330 for (HRegionLocation location : locations) {
331 HRegionInfo hri = location.getRegionInfo();
332 ServerName hsa = location.getServerName();
333 if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
334 && Bytes.compareTo(hri.getEndKey(), endKey) == 0) {
335
336 LOG.info("RegionName: " +hri.getRegionNameAsString());
337 byte[] deleteRow = hri.getRegionName();
338
339 if (unassign) {
340 LOG.info("Undeploying region " + hri + " from server " + hsa);
341 undeployRegion(connection, hsa, hri);
342 }
343
344 if (regionInfoOnly) {
345 LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
346 Path rootDir = FSUtils.getRootDir(conf);
347 FileSystem fs = rootDir.getFileSystem(conf);
348 Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
349 hri.getEncodedName());
350 Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
351 fs.delete(hriPath, true);
352 }
353
354 if (hdfs) {
355 LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
356 Path rootDir = FSUtils.getRootDir(conf);
357 FileSystem fs = rootDir.getFileSystem(conf);
358 Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
359 hri.getEncodedName());
360 HBaseFsck.debugLsr(conf, p);
361 boolean success = fs.delete(p, true);
362 LOG.info("Deleted " + p + " sucessfully? " + success);
363 HBaseFsck.debugLsr(conf, p);
364 }
365
366 if (metaRow) {
367 try (Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService)) {
368 Delete delete = new Delete(deleteRow);
369 meta.delete(delete);
370 }
371 }
372 }
373 LOG.info(hri.toString() + hsa.toString());
374 }
375
376 TEST_UTIL.getMetaTableRows(htd.getTableName());
377 LOG.info("*** After delete:");
378 dumpMeta(htd.getTableName());
379 }
380
381
382
383
384
385
386
387
388
389
390 void setupTable(TableName tablename) throws Exception {
391 setupTableWithRegionReplica(tablename, 1);
392 }
393
394
395
396
397
398
399
400
401
402
403 void setupTableWithRegionReplica(TableName tablename, int replicaCount) throws Exception {
404 HTableDescriptor desc = new HTableDescriptor(tablename);
405 desc.setRegionReplication(replicaCount);
406 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
407 desc.addFamily(hcd);
408 admin.createTable(desc, SPLITS);
409 tbl = (HTable) connection.getTable(tablename, tableExecutorService);
410 List<Put> puts = new ArrayList<Put>();
411 for (byte[] row : ROWKEYS) {
412 Put p = new Put(row);
413 p.add(FAM, Bytes.toBytes("val"), row);
414 puts.add(p);
415 }
416 tbl.put(puts);
417 tbl.flushCommits();
418 }
419
420
421
422
423 int countRows() throws IOException {
424 Scan s = new Scan();
425 ResultScanner rs = tbl.getScanner(s);
426 int i = 0;
427 while(rs.next() !=null) {
428 i++;
429 }
430 return i;
431 }
432
433
434
435
436
437
438
439 void cleanupTable(TableName tablename) throws IOException {
440 if (tbl != null) {
441 tbl.close();
442 tbl = null;
443 }
444
445 ((ClusterConnection) connection).clearRegionCache();
446 TEST_UTIL.deleteTable(tablename);
447
448 }
449
450
451
452
453 @Test (timeout=180000)
454 public void testHBaseFsckClean() throws Exception {
455 assertNoErrors(doFsck(conf, false));
456 TableName table = TableName.valueOf("tableClean");
457 try {
458 HBaseFsck hbck = doFsck(conf, false);
459 assertNoErrors(hbck);
460
461 setupTable(table);
462 assertEquals(ROWKEYS.length, countRows());
463
464
465 hbck = doFsck(conf, false);
466 assertNoErrors(hbck);
467 assertEquals(0, hbck.getOverlapGroups(table).size());
468 assertEquals(ROWKEYS.length, countRows());
469 } finally {
470 cleanupTable(table);
471 }
472 }
473
474
475
476
477 @Test (timeout=180000)
478 public void testHbckThreadpooling() throws Exception {
479 TableName table =
480 TableName.valueOf("tableDupeStartKey");
481 try {
482
483 setupTable(table);
484
485
486 Configuration newconf = new Configuration(conf);
487 newconf.setInt("hbasefsck.numthreads", 1);
488 assertNoErrors(doFsck(newconf, false));
489
490
491 } finally {
492 cleanupTable(table);
493 }
494 }
495
496 @Test (timeout=180000)
497 public void testHbckFixOrphanTable() throws Exception {
498 TableName table = TableName.valueOf("tableInfo");
499 FileSystem fs = null;
500 Path tableinfo = null;
501 try {
502 setupTable(table);
503
504 Path hbaseTableDir = FSUtils.getTableDir(
505 FSUtils.getRootDir(conf), table);
506 fs = hbaseTableDir.getFileSystem(conf);
507 FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
508 tableinfo = status.getPath();
509 fs.rename(tableinfo, new Path("/.tableinfo"));
510
511
512 HBaseFsck hbck = doFsck(conf, false);
513 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE });
514
515
516 hbck = doFsck(conf, true);
517 assertNoErrors(hbck);
518 status = null;
519 status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
520 assertNotNull(status);
521
522 HTableDescriptor htd = admin.getTableDescriptor(table);
523 htd.setValue("NOT_DEFAULT", "true");
524 admin.disableTable(table);
525 admin.modifyTable(table, htd);
526 admin.enableTable(table);
527 fs.delete(status.getPath(), true);
528
529
530 htd = admin.getTableDescriptor(table);
531 hbck = doFsck(conf, true);
532 assertNoErrors(hbck);
533 status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
534 assertNotNull(status);
535 htd = admin.getTableDescriptor(table);
536 assertEquals(htd.getValue("NOT_DEFAULT"), "true");
537 } finally {
538 fs.rename(new Path("/.tableinfo"), tableinfo);
539 cleanupTable(table);
540 }
541 }
542
543
544
545
546
547
548 @Test (timeout=180000)
549 public void testParallelHbck() throws Exception {
550 final ExecutorService service;
551 final Future<HBaseFsck> hbck1,hbck2;
552
553 class RunHbck implements Callable<HBaseFsck>{
554 boolean fail = true;
555 @Override
556 public HBaseFsck call(){
557 try{
558 return doFsck(conf, false);
559 } catch(Exception e){
560 if (e.getMessage().contains("Duplicate hbck")) {
561 fail = false;
562 }
563 }
564
565 if (fail) fail();
566 return null;
567 }
568 }
569 service = Executors.newFixedThreadPool(2);
570 hbck1 = service.submit(new RunHbck());
571 hbck2 = service.submit(new RunHbck());
572 service.shutdown();
573
574 service.awaitTermination(15, TimeUnit.SECONDS);
575 HBaseFsck h1 = hbck1.get();
576 HBaseFsck h2 = hbck2.get();
577
578 assert(h1 == null || h2 == null);
579 if (h1 != null) {
580 assert(h1.getRetCode() >= 0);
581 }
582 if (h2 != null) {
583 assert(h2.getRetCode() >= 0);
584 }
585 }
586
587
588
589
590
591 @Test (timeout=180000)
592 public void testDupeStartKey() throws Exception {
593 TableName table =
594 TableName.valueOf("tableDupeStartKey");
595 try {
596 setupTable(table);
597 assertNoErrors(doFsck(conf, false));
598 assertEquals(ROWKEYS.length, countRows());
599
600
601 HRegionInfo hriDupe =
602 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("A2"));
603 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
604 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
605 .waitForAssignment(hriDupe);
606 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
607 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
608
609 HBaseFsck hbck = doFsck(conf, false);
610 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
611 ERROR_CODE.DUPE_STARTKEYS});
612 assertEquals(2, hbck.getOverlapGroups(table).size());
613 assertEquals(ROWKEYS.length, countRows());
614
615
616 doFsck(conf,true);
617
618
619 HBaseFsck hbck2 = doFsck(conf,false);
620 assertNoErrors(hbck2);
621 assertEquals(0, hbck2.getOverlapGroups(table).size());
622 assertEquals(ROWKEYS.length, countRows());
623 } finally {
624 cleanupTable(table);
625 }
626 }
627
628
629
630
631
632 @Test (timeout=180000)
633 public void testHbckWithRegionReplica() throws Exception {
634 TableName table =
635 TableName.valueOf("tableWithReplica");
636 try {
637 setupTableWithRegionReplica(table, 2);
638 assertNoErrors(doFsck(conf, false));
639 assertEquals(ROWKEYS.length, countRows());
640 } finally {
641 cleanupTable(table);
642 }
643 }
644
645
646
647
648 Map<ServerName, List<String>> getDeployedHRIs(final HBaseAdmin admin) throws IOException {
649 ClusterStatus status = admin.getClusterStatus();
650 Collection<ServerName> regionServers = status.getServers();
651 Map<ServerName, List<String>> mm =
652 new HashMap<ServerName, List<String>>();
653 for (ServerName hsi : regionServers) {
654 AdminProtos.AdminService.BlockingInterface server = ((HConnection) connection).getAdmin(hsi);
655
656
657 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
658 List<String> regionNames = new ArrayList<String>();
659 for (HRegionInfo hri : regions) {
660 regionNames.add(hri.getRegionNameAsString());
661 }
662 mm.put(hsi, regionNames);
663 }
664 return mm;
665 }
666
667
668
669
670 ServerName findDeployedHSI(Map<ServerName, List<String>> mm, HRegionInfo hri) {
671 for (Map.Entry<ServerName,List <String>> e : mm.entrySet()) {
672 if (e.getValue().contains(hri.getRegionNameAsString())) {
673 return e.getKey();
674 }
675 }
676 return null;
677 }
678
679
680
681
682
683 @Test (timeout=180000)
684 public void testDupeRegion() throws Exception {
685 TableName table =
686 TableName.valueOf("tableDupeRegion");
687 try {
688 setupTable(table);
689 assertNoErrors(doFsck(conf, false));
690 assertEquals(ROWKEYS.length, countRows());
691
692
693 HRegionInfo hriDupe =
694 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"));
695
696 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
697 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
698 .waitForAssignment(hriDupe);
699 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
700 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
701
702
703
704
705
706 while (findDeployedHSI(getDeployedHRIs((HBaseAdmin) admin), hriDupe) == null) {
707 Thread.sleep(250);
708 }
709
710 LOG.debug("Finished assignment of dupe region");
711
712
713 HBaseFsck hbck = doFsck(conf, false);
714 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
715 ERROR_CODE.DUPE_STARTKEYS});
716 assertEquals(2, hbck.getOverlapGroups(table).size());
717 assertEquals(ROWKEYS.length, countRows());
718
719
720 doFsck(conf,true);
721
722
723 HBaseFsck hbck2 = doFsck(conf,false);
724 assertNoErrors(hbck2);
725 assertEquals(0, hbck2.getOverlapGroups(table).size());
726 assertEquals(ROWKEYS.length, countRows());
727 } finally {
728 cleanupTable(table);
729 }
730 }
731
732
733
734
735 @Test (timeout=180000)
736 public void testDegenerateRegions() throws Exception {
737 TableName table = TableName.valueOf("tableDegenerateRegions");
738 try {
739 setupTable(table);
740 assertNoErrors(doFsck(conf,false));
741 assertEquals(ROWKEYS.length, countRows());
742
743
744 HRegionInfo hriDupe =
745 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("B"));
746 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
747 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
748 .waitForAssignment(hriDupe);
749 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
750 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
751
752 HBaseFsck hbck = doFsck(conf,false);
753 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DEGENERATE_REGION, ERROR_CODE.DUPE_STARTKEYS,
754 ERROR_CODE.DUPE_STARTKEYS });
755 assertEquals(2, hbck.getOverlapGroups(table).size());
756 assertEquals(ROWKEYS.length, countRows());
757
758
759 doFsck(conf,true);
760
761
762 HBaseFsck hbck2 = doFsck(conf,false);
763 assertNoErrors(hbck2);
764 assertEquals(0, hbck2.getOverlapGroups(table).size());
765 assertEquals(ROWKEYS.length, countRows());
766 } finally {
767 cleanupTable(table);
768 }
769 }
770
771
772
773
774
775 @Test (timeout=180000)
776 public void testContainedRegionOverlap() throws Exception {
777 TableName table =
778 TableName.valueOf("tableContainedRegionOverlap");
779 try {
780 setupTable(table);
781 assertEquals(ROWKEYS.length, countRows());
782
783
784 HRegionInfo hriOverlap =
785 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
786 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
787 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
788 .waitForAssignment(hriOverlap);
789 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
790 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
791
792 HBaseFsck hbck = doFsck(conf, false);
793 assertErrors(hbck, new ERROR_CODE[] {
794 ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
795 assertEquals(2, hbck.getOverlapGroups(table).size());
796 assertEquals(ROWKEYS.length, countRows());
797
798
799 doFsck(conf, true);
800
801
802 HBaseFsck hbck2 = doFsck(conf,false);
803 assertNoErrors(hbck2);
804 assertEquals(0, hbck2.getOverlapGroups(table).size());
805 assertEquals(ROWKEYS.length, countRows());
806 } finally {
807 cleanupTable(table);
808 }
809 }
810
811
812
813
814
815
816
817 @Test (timeout=180000)
818 public void testSidelineOverlapRegion() throws Exception {
819 TableName table =
820 TableName.valueOf("testSidelineOverlapRegion");
821 try {
822 setupTable(table);
823 assertEquals(ROWKEYS.length, countRows());
824
825
826 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
827 HMaster master = cluster.getMaster();
828 HRegionInfo hriOverlap1 =
829 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("AB"));
830 master.assignRegion(hriOverlap1);
831 master.getAssignmentManager().waitForAssignment(hriOverlap1);
832 HRegionInfo hriOverlap2 =
833 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("AB"), Bytes.toBytes("B"));
834 master.assignRegion(hriOverlap2);
835 master.getAssignmentManager().waitForAssignment(hriOverlap2);
836
837 HBaseFsck hbck = doFsck(conf, false);
838 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.DUPE_STARTKEYS,
839 ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
840 assertEquals(3, hbck.getOverlapGroups(table).size());
841 assertEquals(ROWKEYS.length, countRows());
842
843
844 Multimap<byte[], HbckInfo> overlapGroups = hbck.getOverlapGroups(table);
845 ServerName serverName = null;
846 byte[] regionName = null;
847 for (HbckInfo hbi: overlapGroups.values()) {
848 if ("A".equals(Bytes.toString(hbi.getStartKey()))
849 && "B".equals(Bytes.toString(hbi.getEndKey()))) {
850 regionName = hbi.getRegionName();
851
852
853 int k = cluster.getServerWith(regionName);
854 for (int i = 0; i < 3; i++) {
855 if (i != k) {
856 HRegionServer rs = cluster.getRegionServer(i);
857 serverName = rs.getServerName();
858 break;
859 }
860 }
861
862 HBaseFsckRepair.closeRegionSilentlyAndWait((HConnection) connection,
863 cluster.getRegionServer(k).getServerName(), hbi.getHdfsHRI());
864 admin.offline(regionName);
865 break;
866 }
867 }
868
869 assertNotNull(regionName);
870 assertNotNull(serverName);
871 try (Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService)) {
872 Put put = new Put(regionName);
873 put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
874 Bytes.toBytes(serverName.getHostAndPort()));
875 meta.put(put);
876 }
877
878
879 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
880 fsck.connect();
881 fsck.setDisplayFullReport();
882 fsck.setTimeLag(0);
883 fsck.setFixAssignments(true);
884 fsck.setFixMeta(true);
885 fsck.setFixHdfsHoles(true);
886 fsck.setFixHdfsOverlaps(true);
887 fsck.setFixHdfsOrphans(true);
888 fsck.setFixVersionFile(true);
889 fsck.setSidelineBigOverlaps(true);
890 fsck.setMaxMerge(2);
891 fsck.onlineHbck();
892 fsck.close();
893
894
895
896 HBaseFsck hbck2 = doFsck(conf,false);
897 assertNoErrors(hbck2);
898 assertEquals(0, hbck2.getOverlapGroups(table).size());
899 assertTrue(ROWKEYS.length > countRows());
900 } finally {
901 cleanupTable(table);
902 }
903 }
904
905
906
907
908
909 @Test (timeout=180000)
910 public void testOverlapAndOrphan() throws Exception {
911 TableName table =
912 TableName.valueOf("tableOverlapAndOrphan");
913 try {
914 setupTable(table);
915 assertEquals(ROWKEYS.length, countRows());
916
917
918 admin.disableTable(table);
919 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
920 Bytes.toBytes("B"), true, true, false, true);
921 admin.enableTable(table);
922
923 HRegionInfo hriOverlap =
924 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
925 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
926 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
927 .waitForAssignment(hriOverlap);
928 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
929 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
930
931 HBaseFsck hbck = doFsck(conf, false);
932 assertErrors(hbck, new ERROR_CODE[] {
933 ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
934 ERROR_CODE.HOLE_IN_REGION_CHAIN});
935
936
937 doFsck(conf, true);
938
939
940 HBaseFsck hbck2 = doFsck(conf,false);
941 assertNoErrors(hbck2);
942 assertEquals(0, hbck2.getOverlapGroups(table).size());
943 assertEquals(ROWKEYS.length, countRows());
944 } finally {
945 cleanupTable(table);
946 }
947 }
948
949
950
951
952
953
954 @Test (timeout=180000)
955 public void testCoveredStartKey() throws Exception {
956 TableName table =
957 TableName.valueOf("tableCoveredStartKey");
958 try {
959 setupTable(table);
960 assertEquals(ROWKEYS.length, countRows());
961
962
963 HRegionInfo hriOverlap =
964 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B2"));
965 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
966 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
967 .waitForAssignment(hriOverlap);
968 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
969 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
970
971 HBaseFsck hbck = doFsck(conf, false);
972 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
973 ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
974 assertEquals(3, hbck.getOverlapGroups(table).size());
975 assertEquals(ROWKEYS.length, countRows());
976
977
978 doFsck(conf, true);
979
980
981 HBaseFsck hbck2 = doFsck(conf, false);
982 assertErrors(hbck2, new ERROR_CODE[0]);
983 assertEquals(0, hbck2.getOverlapGroups(table).size());
984 assertEquals(ROWKEYS.length, countRows());
985 } finally {
986 cleanupTable(table);
987 }
988 }
989
990
991
992
993
994 @Test (timeout=180000)
995 public void testRegionHole() throws Exception {
996 TableName table =
997 TableName.valueOf("tableRegionHole");
998 try {
999 setupTable(table);
1000 assertEquals(ROWKEYS.length, countRows());
1001
1002
1003 admin.disableTable(table);
1004 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1005 Bytes.toBytes("C"), true, true, true);
1006 admin.enableTable(table);
1007
1008 HBaseFsck hbck = doFsck(conf, false);
1009 assertErrors(hbck, new ERROR_CODE[] {
1010 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1011
1012 assertEquals(0, hbck.getOverlapGroups(table).size());
1013
1014
1015 doFsck(conf, true);
1016
1017
1018 assertNoErrors(doFsck(conf,false));
1019 assertEquals(ROWKEYS.length - 2 , countRows());
1020 } finally {
1021 cleanupTable(table);
1022 }
1023 }
1024
1025
1026
1027
1028
1029 @Test (timeout=180000)
1030 public void testHDFSRegioninfoMissing() throws Exception {
1031 TableName table = TableName.valueOf("tableHDFSRegioninfoMissing");
1032 try {
1033 setupTable(table);
1034 assertEquals(ROWKEYS.length, countRows());
1035
1036
1037 admin.disableTable(table);
1038 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1039 Bytes.toBytes("C"), true, true, false, true);
1040 admin.enableTable(table);
1041
1042 HBaseFsck hbck = doFsck(conf, false);
1043 assertErrors(hbck, new ERROR_CODE[] {
1044 ERROR_CODE.ORPHAN_HDFS_REGION,
1045 ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1046 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1047
1048 assertEquals(0, hbck.getOverlapGroups(table).size());
1049
1050
1051 doFsck(conf, true);
1052
1053
1054 assertNoErrors(doFsck(conf, false));
1055 assertEquals(ROWKEYS.length, countRows());
1056 } finally {
1057 cleanupTable(table);
1058 }
1059 }
1060
1061
1062
1063
1064
1065 @Test (timeout=180000)
1066 public void testNotInMetaOrDeployedHole() throws Exception {
1067 TableName table =
1068 TableName.valueOf("tableNotInMetaOrDeployedHole");
1069 try {
1070 setupTable(table);
1071 assertEquals(ROWKEYS.length, countRows());
1072
1073
1074 admin.disableTable(table);
1075 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1076 Bytes.toBytes("C"), true, true, false);
1077 admin.enableTable(table);
1078
1079 HBaseFsck hbck = doFsck(conf, false);
1080 assertErrors(hbck, new ERROR_CODE[] {
1081 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1082
1083 assertEquals(0, hbck.getOverlapGroups(table).size());
1084
1085
1086 assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1087 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1088
1089
1090 assertNoErrors(doFsck(conf,false));
1091 assertEquals(ROWKEYS.length, countRows());
1092 } finally {
1093 cleanupTable(table);
1094 }
1095 }
1096
1097
1098
1099
1100 @Test (timeout=180000)
1101 public void testNotInMetaHole() throws Exception {
1102 TableName table =
1103 TableName.valueOf("tableNotInMetaHole");
1104 try {
1105 setupTable(table);
1106 assertEquals(ROWKEYS.length, countRows());
1107
1108
1109 admin.disableTable(table);
1110 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1111 Bytes.toBytes("C"), false, true, false);
1112 admin.enableTable(table);
1113
1114 HBaseFsck hbck = doFsck(conf, false);
1115 assertErrors(hbck, new ERROR_CODE[] {
1116 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1117
1118 assertEquals(0, hbck.getOverlapGroups(table).size());
1119
1120
1121 assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1122 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1123
1124
1125 assertNoErrors(doFsck(conf,false));
1126 assertEquals(ROWKEYS.length, countRows());
1127 } finally {
1128 cleanupTable(table);
1129 }
1130 }
1131
1132
1133
1134
1135
1136 @Test (timeout=180000)
1137 public void testNotInHdfs() throws Exception {
1138 TableName table =
1139 TableName.valueOf("tableNotInHdfs");
1140 try {
1141 setupTable(table);
1142 assertEquals(ROWKEYS.length, countRows());
1143
1144
1145 admin.flush(table);
1146
1147
1148 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1149 Bytes.toBytes("C"), false, false, true);
1150
1151 HBaseFsck hbck = doFsck(conf, false);
1152 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1153
1154 assertEquals(0, hbck.getOverlapGroups(table).size());
1155
1156
1157 doFsck(conf, true);
1158
1159
1160 assertNoErrors(doFsck(conf,false));
1161 assertEquals(ROWKEYS.length - 2, countRows());
1162 } finally {
1163 cleanupTable(table);
1164 }
1165 }
1166
1167
1168
1169
1170
1171 @Test (timeout=180000)
1172 public void testNoHdfsTable() throws Exception {
1173 TableName table = TableName.valueOf("NoHdfsTable");
1174 setupTable(table);
1175 assertEquals(ROWKEYS.length, countRows());
1176
1177
1178 admin.flush(table);
1179
1180
1181 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
1182 Bytes.toBytes("A"), false, false, true);
1183 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1184 Bytes.toBytes("B"), false, false, true);
1185 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1186 Bytes.toBytes("C"), false, false, true);
1187 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
1188 Bytes.toBytes(""), false, false, true);
1189
1190
1191 deleteTableDir(table);
1192
1193 HBaseFsck hbck = doFsck(conf, false);
1194 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS,
1195 ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS,
1196 ERROR_CODE.NOT_IN_HDFS,});
1197
1198 assertEquals(0, hbck.getOverlapGroups(table).size());
1199
1200
1201 doFsck(conf, true);
1202
1203
1204 assertNoErrors(doFsck(conf,false));
1205 assertFalse("Table " + table + " should have been deleted", admin.tableExists(table));
1206 }
1207
1208 public void deleteTableDir(TableName table) throws IOException {
1209 Path rootDir = FSUtils.getRootDir(conf);
1210 FileSystem fs = rootDir.getFileSystem(conf);
1211 Path p = FSUtils.getTableDir(rootDir, table);
1212 HBaseFsck.debugLsr(conf, p);
1213 boolean success = fs.delete(p, true);
1214 LOG.info("Deleted " + p + " sucessfully? " + success);
1215 }
1216
1217
1218
1219
1220 @Test (timeout=180000)
1221 public void testNoVersionFile() throws Exception {
1222
1223 Path rootDir = FSUtils.getRootDir(conf);
1224 FileSystem fs = rootDir.getFileSystem(conf);
1225 Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
1226 fs.delete(versionFile, true);
1227
1228
1229 HBaseFsck hbck = doFsck(conf, false);
1230 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_VERSION_FILE });
1231
1232 doFsck(conf, true);
1233
1234
1235 assertNoErrors(doFsck(conf, false));
1236 }
1237
1238
1239
1240
1241 @Test (timeout=180000)
1242 public void testRegionShouldNotBeDeployed() throws Exception {
1243 TableName table =
1244 TableName.valueOf("tableRegionShouldNotBeDeployed");
1245 try {
1246 LOG.info("Starting testRegionShouldNotBeDeployed.");
1247 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1248 assertTrue(cluster.waitForActiveAndReadyMaster());
1249
1250
1251 byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"),
1252 Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") };
1253 HTableDescriptor htdDisabled = new HTableDescriptor(table);
1254 htdDisabled.addFamily(new HColumnDescriptor(FAM));
1255
1256
1257 FSTableDescriptors fstd = new FSTableDescriptors(conf);
1258 fstd.createTableDescriptor(htdDisabled);
1259 List<HRegionInfo> disabledRegions =
1260 TEST_UTIL.createMultiRegionsInMeta(conf, htdDisabled, SPLIT_KEYS);
1261
1262
1263 HRegionServer hrs = cluster.getRegionServer(0);
1264
1265
1266 admin.disableTable(table);
1267 admin.enableTable(table);
1268
1269
1270 admin.disableTable(table);
1271 HRegionInfo region = disabledRegions.remove(0);
1272 byte[] regionName = region.getRegionName();
1273
1274
1275 assertTrue(cluster.getServerWith(regionName) == -1);
1276
1277
1278
1279
1280
1281 HRegion r = HRegion.openHRegion(
1282 region, htdDisabled, hrs.getWAL(region), conf);
1283 hrs.addToOnlineRegions(r);
1284
1285 HBaseFsck hbck = doFsck(conf, false);
1286 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.SHOULD_NOT_BE_DEPLOYED });
1287
1288
1289 doFsck(conf, true);
1290
1291
1292 assertNoErrors(doFsck(conf, false));
1293 } finally {
1294 admin.enableTable(table);
1295 cleanupTable(table);
1296 }
1297 }
1298
1299
1300
1301
1302 @Test (timeout=180000)
1303 public void testFixByTable() throws Exception {
1304 TableName table1 =
1305 TableName.valueOf("testFixByTable1");
1306 TableName table2 =
1307 TableName.valueOf("testFixByTable2");
1308 try {
1309 setupTable(table1);
1310
1311 admin.flush(table1);
1312
1313 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1314 Bytes.toBytes("C"), false, false, true);
1315
1316 setupTable(table2);
1317
1318 admin.flush(table2);
1319
1320 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1321 Bytes.toBytes("C"), false, false, true);
1322
1323 HBaseFsck hbck = doFsck(conf, false);
1324 assertErrors(hbck, new ERROR_CODE[] {
1325 ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS});
1326
1327
1328 doFsck(conf, true, table1);
1329
1330 assertNoErrors(doFsck(conf, false, table1));
1331
1332 assertErrors(doFsck(conf, false, table2),
1333 new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1334
1335
1336 doFsck(conf, true, table2);
1337
1338 assertNoErrors(doFsck(conf, false));
1339 assertEquals(ROWKEYS.length - 2, countRows());
1340 } finally {
1341 cleanupTable(table1);
1342 cleanupTable(table2);
1343 }
1344 }
1345
1346
1347
1348 @Test (timeout=180000)
1349 public void testLingeringSplitParent() throws Exception {
1350 TableName table =
1351 TableName.valueOf("testLingeringSplitParent");
1352 Table meta = null;
1353 try {
1354 setupTable(table);
1355 assertEquals(ROWKEYS.length, countRows());
1356
1357
1358 admin.flush(table);
1359 HRegionLocation location = tbl.getRegionLocation("B");
1360
1361
1362 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1363 Bytes.toBytes("C"), true, true, false);
1364
1365
1366 meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
1367 HRegionInfo hri = location.getRegionInfo();
1368
1369 HRegionInfo a = new HRegionInfo(tbl.getName(),
1370 Bytes.toBytes("B"), Bytes.toBytes("BM"));
1371 HRegionInfo b = new HRegionInfo(tbl.getName(),
1372 Bytes.toBytes("BM"), Bytes.toBytes("C"));
1373
1374 hri.setOffline(true);
1375 hri.setSplit(true);
1376
1377 MetaTableAccessor.addRegionToMeta(meta, hri, a, b);
1378 meta.close();
1379 admin.flush(TableName.META_TABLE_NAME);
1380
1381 HBaseFsck hbck = doFsck(conf, false);
1382 assertErrors(hbck, new ERROR_CODE[] {
1383 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1384
1385
1386 hbck = doFsck(conf, true);
1387 assertErrors(hbck, new ERROR_CODE[] {
1388 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN });
1389 assertFalse(hbck.shouldRerun());
1390 hbck = doFsck(conf, false);
1391 assertErrors(hbck, new ERROR_CODE[] {
1392 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1393
1394
1395 hbck = new HBaseFsck(conf, hbfsckExecutorService);
1396 hbck.connect();
1397 hbck.setDisplayFullReport();
1398 hbck.setTimeLag(0);
1399 hbck.setFixSplitParents(true);
1400 hbck.onlineHbck();
1401 assertTrue(hbck.shouldRerun());
1402 hbck.close();
1403
1404 Get get = new Get(hri.getRegionName());
1405 Result result = meta.get(get);
1406 assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1407 HConstants.SPLITA_QUALIFIER).isEmpty());
1408 assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1409 HConstants.SPLITB_QUALIFIER).isEmpty());
1410 admin.flush(TableName.META_TABLE_NAME);
1411
1412
1413 doFsck(conf, true);
1414
1415
1416 assertNoErrors(doFsck(conf, false));
1417 assertEquals(ROWKEYS.length, countRows());
1418 } finally {
1419 cleanupTable(table);
1420 IOUtils.closeQuietly(meta);
1421 }
1422 }
1423
1424
1425
1426
1427
1428 @Test (timeout=180000)
1429 public void testValidLingeringSplitParent() throws Exception {
1430 TableName table =
1431 TableName.valueOf("testLingeringSplitParent");
1432 Table meta = null;
1433 try {
1434 setupTable(table);
1435 assertEquals(ROWKEYS.length, countRows());
1436
1437
1438 admin.flush(table);
1439 HRegionLocation location = tbl.getRegionLocation(Bytes.toBytes("B"));
1440
1441 meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
1442 HRegionInfo hri = location.getRegionInfo();
1443
1444
1445 byte[] regionName = location.getRegionInfo().getRegionName();
1446 admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1447 TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true);
1448
1449
1450
1451
1452 HBaseFsck hbck = doFsck(
1453 conf, true, true, false, false, false, true, true, true, false, false, false, null);
1454 assertErrors(hbck, new ERROR_CODE[] {});
1455
1456
1457 Get get = new Get(hri.getRegionName());
1458 Result result = meta.get(get);
1459 assertNotNull(result);
1460 assertNotNull(MetaTableAccessor.getHRegionInfo(result));
1461
1462 assertEquals(ROWKEYS.length, countRows());
1463
1464
1465 assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1);
1466 assertNoErrors(doFsck(conf, false));
1467 } finally {
1468 cleanupTable(table);
1469 IOUtils.closeQuietly(meta);
1470 }
1471 }
1472
1473
1474
1475
1476
1477 @Test(timeout=75000)
1478 public void testSplitDaughtersNotInMeta() throws Exception {
1479 TableName table = TableName.valueOf("testSplitdaughtersNotInMeta");
1480 Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
1481 try {
1482 setupTable(table);
1483 assertEquals(ROWKEYS.length, countRows());
1484
1485
1486 admin.flush(table);
1487 HRegionLocation location = tbl.getRegionLocation(Bytes.toBytes("B"));
1488
1489 HRegionInfo hri = location.getRegionInfo();
1490
1491
1492 byte[] regionName = location.getRegionInfo().getRegionName();
1493 admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1494 TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true);
1495
1496 PairOfSameType<HRegionInfo> daughters =
1497 MetaTableAccessor.getDaughterRegions(meta.get(new Get(regionName)));
1498
1499
1500 Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
1501 undeployRegion(connection, hris.get(daughters.getFirst()), daughters.getFirst());
1502 undeployRegion(connection, hris.get(daughters.getSecond()), daughters.getSecond());
1503
1504 List<Delete> deletes = new ArrayList<>();
1505 deletes.add(new Delete(daughters.getFirst().getRegionName()));
1506 deletes.add(new Delete(daughters.getSecond().getRegionName()));
1507 meta.delete(deletes);
1508
1509
1510 RegionStates regionStates = TEST_UTIL.getMiniHBaseCluster().getMaster().
1511 getAssignmentManager().getRegionStates();
1512 regionStates.deleteRegion(daughters.getFirst());
1513 regionStates.deleteRegion(daughters.getSecond());
1514
1515 HBaseFsck hbck = doFsck(conf, false);
1516 assertErrors(hbck,
1517 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1518 ERROR_CODE.HOLE_IN_REGION_CHAIN });
1519
1520
1521 hbck = doFsck(
1522 conf, true, true, false, false, false, false, false, false, false, false, false, null);
1523 assertErrors(hbck,
1524 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1525 ERROR_CODE.HOLE_IN_REGION_CHAIN });
1526
1527
1528 Get get = new Get(hri.getRegionName());
1529 Result result = meta.get(get);
1530 assertNotNull(result);
1531 assertNotNull(MetaTableAccessor.getHRegionInfo(result));
1532
1533 assertEquals(ROWKEYS.length, countRows());
1534
1535
1536 assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1);
1537 assertNoErrors(doFsck(conf, false));
1538 } finally {
1539 meta.close();
1540 cleanupTable(table);
1541 }
1542 }
1543
1544
1545
1546
1547
1548 @Test(timeout=120000)
1549 public void testMissingFirstRegion() throws Exception {
1550 TableName table = TableName.valueOf("testMissingFirstRegion");
1551 try {
1552 setupTable(table);
1553 assertEquals(ROWKEYS.length, countRows());
1554
1555
1556 admin.disableTable(table);
1557 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true,
1558 true, true);
1559 admin.enableTable(table);
1560
1561 HBaseFsck hbck = doFsck(conf, false);
1562 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY });
1563
1564 doFsck(conf, true);
1565
1566 assertNoErrors(doFsck(conf, false));
1567 } finally {
1568 cleanupTable(table);
1569 }
1570 }
1571
1572
1573
1574
1575
1576 @Test(timeout=120000)
1577 public void testRegionDeployedNotInHdfs() throws Exception {
1578 TableName table =
1579 TableName.valueOf("testSingleRegionDeployedNotInHdfs");
1580 try {
1581 setupTable(table);
1582 admin.flush(table);
1583
1584
1585 deleteRegion(conf, tbl.getTableDescriptor(),
1586 HConstants.EMPTY_START_ROW, Bytes.toBytes("A"), false,
1587 false, true);
1588
1589 HBaseFsck hbck = doFsck(conf, false);
1590 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
1591
1592 doFsck(conf, true);
1593
1594 assertNoErrors(doFsck(conf, false));
1595 } finally {
1596 cleanupTable(table);
1597 }
1598 }
1599
1600
1601
1602
1603
1604 @Test(timeout=120000)
1605 public void testMissingLastRegion() throws Exception {
1606 TableName table =
1607 TableName.valueOf("testMissingLastRegion");
1608 try {
1609 setupTable(table);
1610 assertEquals(ROWKEYS.length, countRows());
1611
1612
1613 admin.disableTable(table);
1614 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true,
1615 true, true);
1616 admin.enableTable(table);
1617
1618 HBaseFsck hbck = doFsck(conf, false);
1619 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY });
1620
1621 doFsck(conf, true);
1622
1623 assertNoErrors(doFsck(conf, false));
1624 } finally {
1625 cleanupTable(table);
1626 }
1627 }
1628
1629
1630
1631
1632 @Test (timeout=180000)
1633 public void testFixAssignmentsAndNoHdfsChecking() throws Exception {
1634 TableName table =
1635 TableName.valueOf("testFixAssignmentsAndNoHdfsChecking");
1636 try {
1637 setupTable(table);
1638 assertEquals(ROWKEYS.length, countRows());
1639
1640
1641 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1642 Bytes.toBytes("B"), true, false, false, false);
1643
1644
1645 HBaseFsck hbck = doFsck(conf, false);
1646 assertErrors(hbck, new ERROR_CODE[] {
1647 ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1648
1649
1650 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
1651 fsck.connect();
1652 fsck.setDisplayFullReport();
1653 fsck.setTimeLag(0);
1654 fsck.setCheckHdfs(false);
1655 fsck.onlineHbck();
1656 assertErrors(fsck, new ERROR_CODE[] {
1657 ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1658 fsck.close();
1659
1660
1661 fsck = new HBaseFsck(conf, hbfsckExecutorService);
1662 fsck.connect();
1663 fsck.setDisplayFullReport();
1664 fsck.setTimeLag(0);
1665 fsck.setCheckHdfs(false);
1666 fsck.setFixAssignments(true);
1667 fsck.onlineHbck();
1668 assertTrue(fsck.shouldRerun());
1669 fsck.onlineHbck();
1670 assertNoErrors(fsck);
1671
1672 assertEquals(ROWKEYS.length, countRows());
1673
1674 fsck.close();
1675 } finally {
1676 cleanupTable(table);
1677 }
1678 }
1679
1680
1681
1682
1683
1684
1685 @Test (timeout=180000)
1686 public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception {
1687 TableName table =
1688 TableName.valueOf("testFixMetaNotWorkingWithNoHdfsChecking");
1689 try {
1690 setupTable(table);
1691 assertEquals(ROWKEYS.length, countRows());
1692
1693
1694 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1695 Bytes.toBytes("B"), false, true, false, false);
1696
1697
1698 HBaseFsck hbck = doFsck(conf, false);
1699 assertErrors(hbck,
1700 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN });
1701
1702
1703 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
1704 fsck.connect();
1705 fsck.setDisplayFullReport();
1706 fsck.setTimeLag(0);
1707 fsck.setCheckHdfs(false);
1708 fsck.onlineHbck();
1709 assertErrors(fsck,
1710 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN });
1711 fsck.close();
1712
1713
1714 fsck = new HBaseFsck(conf, hbfsckExecutorService);
1715 fsck.connect();
1716 fsck.setDisplayFullReport();
1717 fsck.setTimeLag(0);
1718 fsck.setCheckHdfs(false);
1719 fsck.setFixAssignments(true);
1720 fsck.setFixMeta(true);
1721 fsck.onlineHbck();
1722 assertFalse(fsck.shouldRerun());
1723 assertErrors(fsck,
1724 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN });
1725 fsck.close();
1726
1727
1728 fsck = doFsck(conf, true);
1729 assertTrue(fsck.shouldRerun());
1730 fsck = doFsck(conf, true);
1731 assertNoErrors(fsck);
1732 } finally {
1733 cleanupTable(table);
1734 }
1735 }
1736
1737
1738
1739
1740
1741 @Test (timeout=180000)
1742 public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception {
1743 TableName table =
1744 TableName.valueOf("testFixHdfsHolesNotWorkingWithNoHdfsChecking");
1745 try {
1746 setupTable(table);
1747 assertEquals(ROWKEYS.length, countRows());
1748
1749
1750 admin.disableTable(table);
1751 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1752 Bytes.toBytes("B"), true, true, false, true);
1753 admin.enableTable(table);
1754
1755 HRegionInfo hriOverlap =
1756 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
1757 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
1758 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
1759 .waitForAssignment(hriOverlap);
1760 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
1761 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
1762
1763 HBaseFsck hbck = doFsck(conf, false);
1764 assertErrors(hbck, new ERROR_CODE[] {
1765 ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1766 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1767
1768
1769 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
1770 fsck.connect();
1771 fsck.setDisplayFullReport();
1772 fsck.setTimeLag(0);
1773 fsck.setCheckHdfs(false);
1774 fsck.onlineHbck();
1775 assertErrors(fsck, new ERROR_CODE[] {
1776 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1777 fsck.close();
1778
1779
1780 fsck = new HBaseFsck(conf, hbfsckExecutorService);
1781 fsck.connect();
1782 fsck.setDisplayFullReport();
1783 fsck.setTimeLag(0);
1784 fsck.setCheckHdfs(false);
1785 fsck.setFixHdfsHoles(true);
1786 fsck.setFixHdfsOverlaps(true);
1787 fsck.setFixHdfsOrphans(true);
1788 fsck.onlineHbck();
1789 assertFalse(fsck.shouldRerun());
1790 assertErrors(fsck, new ERROR_CODE[] { ERROR_CODE.HOLE_IN_REGION_CHAIN});
1791 fsck.close();
1792 } finally {
1793 if (admin.isTableDisabled(table)) {
1794 admin.enableTable(table);
1795 }
1796 cleanupTable(table);
1797 }
1798 }
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808 Path getFlushedHFile(FileSystem fs, TableName table) throws IOException {
1809 Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
1810 Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
1811 Path famDir = new Path(regionDir, FAM_STR);
1812
1813
1814 while (true) {
1815 FileStatus[] hfFss = fs.listStatus(famDir);
1816 if (hfFss.length == 0) {
1817 continue;
1818 }
1819 for (FileStatus hfs : hfFss) {
1820 if (!hfs.isDirectory()) {
1821 return hfs.getPath();
1822 }
1823 }
1824 }
1825 }
1826
1827
1828
1829
1830 @Test(timeout=180000)
1831 public void testQuarantineCorruptHFile() throws Exception {
1832 TableName table = TableName.valueOf(name.getMethodName());
1833 try {
1834 setupTable(table);
1835 assertEquals(ROWKEYS.length, countRows());
1836 admin.flush(table);
1837
1838 FileSystem fs = FileSystem.get(conf);
1839 Path hfile = getFlushedHFile(fs, table);
1840
1841
1842 admin.disableTable(table);
1843
1844
1845 Path corrupt = new Path(hfile.getParent(), "deadbeef");
1846 TestHFile.truncateFile(fs, hfile, corrupt);
1847 LOG.info("Created corrupted file " + corrupt);
1848 HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
1849
1850
1851 HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
1852 assertEquals(res.getRetCode(), 0);
1853 HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
1854 assertEquals(hfcc.getHFilesChecked(), 5);
1855 assertEquals(hfcc.getCorrupted().size(), 1);
1856 assertEquals(hfcc.getFailures().size(), 0);
1857 assertEquals(hfcc.getQuarantined().size(), 1);
1858 assertEquals(hfcc.getMissing().size(), 0);
1859
1860
1861 admin.enableTable(table);
1862 } finally {
1863 cleanupTable(table);
1864 }
1865 }
1866
1867
1868
1869
1870 private void doQuarantineTest(TableName table, HBaseFsck hbck, int check,
1871 int corrupt, int fail, int quar, int missing) throws Exception {
1872 try {
1873 setupTable(table);
1874 assertEquals(ROWKEYS.length, countRows());
1875 admin.flush(table);
1876
1877
1878 admin.disableTable(table);
1879
1880 String[] args = {"-sidelineCorruptHFiles", "-repairHoles", "-ignorePreCheckPermission",
1881 table.getNameAsString()};
1882 HBaseFsck res = hbck.exec(hbfsckExecutorService, args);
1883
1884 HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
1885 assertEquals(hfcc.getHFilesChecked(), check);
1886 assertEquals(hfcc.getCorrupted().size(), corrupt);
1887 assertEquals(hfcc.getFailures().size(), fail);
1888 assertEquals(hfcc.getQuarantined().size(), quar);
1889 assertEquals(hfcc.getMissing().size(), missing);
1890
1891
1892 admin.enableTableAsync(table);
1893 while (!admin.isTableEnabled(table)) {
1894 try {
1895 Thread.sleep(250);
1896 } catch (InterruptedException e) {
1897 e.printStackTrace();
1898 fail("Interrupted when trying to enable table " + table);
1899 }
1900 }
1901 } finally {
1902 cleanupTable(table);
1903 }
1904 }
1905
1906
1907
1908
1909
1910 @Test(timeout=180000)
1911 public void testQuarantineMissingHFile() throws Exception {
1912 TableName table = TableName.valueOf(name.getMethodName());
1913
1914
1915 final FileSystem fs = FileSystem.get(conf);
1916 HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
1917 @Override
1918 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1919 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1920 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1921 @Override
1922 protected void checkHFile(Path p) throws IOException {
1923 if (attemptedFirstHFile.compareAndSet(false, true)) {
1924 assertTrue(fs.delete(p, true));
1925 }
1926 super.checkHFile(p);
1927 }
1928 };
1929 }
1930 };
1931 doQuarantineTest(table, hbck, 4, 0, 0, 0, 1);
1932 hbck.close();
1933 }
1934
1935
1936
1937
1938
1939
1940
1941 @Ignore @Test(timeout=180000)
1942 public void testQuarantineMissingFamdir() throws Exception {
1943 TableName table = TableName.valueOf(name.getMethodName());
1944
1945 final FileSystem fs = FileSystem.get(conf);
1946 HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
1947 @Override
1948 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1949 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1950 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1951 @Override
1952 protected void checkColFamDir(Path p) throws IOException {
1953 if (attemptedFirstHFile.compareAndSet(false, true)) {
1954 assertTrue(fs.delete(p, true));
1955 }
1956 super.checkColFamDir(p);
1957 }
1958 };
1959 }
1960 };
1961 doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
1962 hbck.close();
1963 }
1964
1965
1966
1967
1968
1969 @Test(timeout=180000)
1970 public void testQuarantineMissingRegionDir() throws Exception {
1971 TableName table = TableName.valueOf(name.getMethodName());
1972
1973 final FileSystem fs = FileSystem.get(conf);
1974 HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
1975 @Override
1976 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles)
1977 throws IOException {
1978 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1979 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1980 @Override
1981 protected void checkRegionDir(Path p) throws IOException {
1982 if (attemptedFirstHFile.compareAndSet(false, true)) {
1983 assertTrue(fs.delete(p, true));
1984 }
1985 super.checkRegionDir(p);
1986 }
1987 };
1988 }
1989 };
1990 doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
1991 hbck.close();
1992 }
1993
1994
1995
1996
1997 @Test (timeout=180000)
1998 public void testLingeringReferenceFile() throws Exception {
1999 TableName table =
2000 TableName.valueOf("testLingeringReferenceFile");
2001 try {
2002 setupTable(table);
2003 assertEquals(ROWKEYS.length, countRows());
2004
2005
2006 FileSystem fs = FileSystem.get(conf);
2007 Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
2008 Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
2009 Path famDir = new Path(regionDir, FAM_STR);
2010 Path fakeReferenceFile = new Path(famDir, "fbce357483ceea.12144538");
2011 fs.create(fakeReferenceFile);
2012
2013 HBaseFsck hbck = doFsck(conf, false);
2014 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LINGERING_REFERENCE_HFILE });
2015
2016 doFsck(conf, true);
2017
2018 assertNoErrors(doFsck(conf, false));
2019 } finally {
2020 cleanupTable(table);
2021 }
2022 }
2023
2024
2025
2026
2027 @Test (timeout=180000)
2028 public void testMissingRegionInfoQualifier() throws Exception {
2029 Connection connection = ConnectionFactory.createConnection(conf);
2030 TableName table = TableName.valueOf("testMissingRegionInfoQualifier");
2031 try {
2032 setupTable(table);
2033
2034
2035 final List<Delete> deletes = new LinkedList<Delete>();
2036 Table meta = connection.getTable(TableName.META_TABLE_NAME, hbfsckExecutorService);
2037 MetaScanner.metaScan(connection, new MetaScanner.MetaScannerVisitor() {
2038
2039 @Override
2040 public boolean processRow(Result rowResult) throws IOException {
2041 HRegionInfo hri = MetaTableAccessor.getHRegionInfo(rowResult);
2042 if (hri != null && !hri.getTable().isSystemTable()) {
2043 Delete delete = new Delete(rowResult.getRow());
2044 delete.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2045 deletes.add(delete);
2046 }
2047 return true;
2048 }
2049
2050 @Override
2051 public void close() throws IOException {
2052 }
2053 });
2054 meta.delete(deletes);
2055
2056
2057 meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
2058 HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes.toBytes("node1:60020")));
2059 meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
2060 HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(1362150791183L)));
2061 meta.close();
2062
2063 HBaseFsck hbck = doFsck(conf, false);
2064 assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
2065
2066
2067 hbck = doFsck(conf, true);
2068
2069
2070 assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
2071 } finally {
2072 cleanupTable(table);
2073 }
2074 connection.close();
2075 }
2076
2077
2078
2079
2080
2081 @Test (timeout=180000)
2082 public void testErrorReporter() throws Exception {
2083 try {
2084 MockErrorReporter.calledCount = 0;
2085 doFsck(conf, false);
2086 assertEquals(MockErrorReporter.calledCount, 0);
2087
2088 conf.set("hbasefsck.errorreporter", MockErrorReporter.class.getName());
2089 doFsck(conf, false);
2090 assertTrue(MockErrorReporter.calledCount > 20);
2091 } finally {
2092 conf.set("hbasefsck.errorreporter",
2093 PrintingErrorReporter.class.getName());
2094 MockErrorReporter.calledCount = 0;
2095 }
2096 }
2097
2098 static class MockErrorReporter implements ErrorReporter {
2099 static int calledCount = 0;
2100
2101 @Override
2102 public void clear() {
2103 calledCount++;
2104 }
2105
2106 @Override
2107 public void report(String message) {
2108 calledCount++;
2109 }
2110
2111 @Override
2112 public void reportError(String message) {
2113 calledCount++;
2114 }
2115
2116 @Override
2117 public void reportError(ERROR_CODE errorCode, String message) {
2118 calledCount++;
2119 }
2120
2121 @Override
2122 public void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
2123 calledCount++;
2124 }
2125
2126 @Override
2127 public void reportError(ERROR_CODE errorCode,
2128 String message, TableInfo table, HbckInfo info) {
2129 calledCount++;
2130 }
2131
2132 @Override
2133 public void reportError(ERROR_CODE errorCode, String message,
2134 TableInfo table, HbckInfo info1, HbckInfo info2) {
2135 calledCount++;
2136 }
2137
2138 @Override
2139 public int summarize() {
2140 return ++calledCount;
2141 }
2142
2143 @Override
2144 public void detail(String details) {
2145 calledCount++;
2146 }
2147
2148 @Override
2149 public ArrayList<ERROR_CODE> getErrorList() {
2150 calledCount++;
2151 return new ArrayList<ERROR_CODE>();
2152 }
2153
2154 @Override
2155 public void progress() {
2156 calledCount++;
2157 }
2158
2159 @Override
2160 public void print(String message) {
2161 calledCount++;
2162 }
2163
2164 @Override
2165 public void resetErrors() {
2166 calledCount++;
2167 }
2168
2169 @Override
2170 public boolean tableHasErrors(TableInfo table) {
2171 calledCount++;
2172 return false;
2173 }
2174 }
2175
2176 @Test(timeout=180000)
2177 public void testCheckTableLocks() throws Exception {
2178 IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0);
2179 EnvironmentEdgeManager.injectEdge(edge);
2180
2181 HBaseFsck hbck = doFsck(conf, false);
2182 assertNoErrors(hbck);
2183
2184 ServerName mockName = ServerName.valueOf("localhost", 60000, 1);
2185
2186
2187 final TableLockManager tableLockManager = TableLockManager.createTableLockManager(conf, TEST_UTIL.getZooKeeperWatcher(), mockName);
2188 TableLock writeLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2189 "testCheckTableLocks");
2190 writeLock.acquire();
2191 hbck = doFsck(conf, false);
2192 assertNoErrors(hbck);
2193
2194 edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2195 TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS));
2196
2197 hbck = doFsck(conf, false);
2198 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK});
2199
2200 final CountDownLatch latch = new CountDownLatch(1);
2201 new Thread() {
2202 @Override
2203 public void run() {
2204 TableLock readLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2205 "testCheckTableLocks");
2206 try {
2207 latch.countDown();
2208 readLock.acquire();
2209 } catch (IOException ex) {
2210 fail();
2211 } catch (IllegalStateException ex) {
2212 return;
2213 }
2214 fail("should not have come here");
2215 };
2216 }.start();
2217
2218 latch.await();
2219 Threads.sleep(300);
2220
2221 hbck = doFsck(conf, false);
2222 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK});
2223
2224 edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2225 TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS));
2226
2227 hbck = doFsck(conf, false);
2228 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK, ERROR_CODE.EXPIRED_TABLE_LOCK});
2229
2230 conf.setLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, 1);
2231
2232 Threads.sleep(10);
2233 hbck = doFsck(conf, true);
2234
2235 hbck = doFsck(conf, false);
2236 assertNoErrors(hbck);
2237
2238
2239 writeLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2240 "should acquire without blocking");
2241 writeLock.acquire();
2242 writeLock.release();
2243 }
2244
2245
2246
2247
2248 @Test
2249 public void testOrphanedTableZNode() throws Exception {
2250 TableName table = TableName.valueOf("testOrphanedZKTableEntry");
2251
2252 try {
2253 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getTableStateManager()
2254 .setTableState(table, ZooKeeperProtos.Table.State.ENABLING);
2255
2256 try {
2257 setupTable(table);
2258 Assert.fail(
2259 "Create table should fail when its ZNode has already existed with ENABLING state.");
2260 } catch(TableExistsException t) {
2261
2262 }
2263
2264 try {
2265 cleanupTable(table);
2266 } catch (IOException e) {
2267
2268
2269 }
2270
2271 HBaseFsck hbck = doFsck(conf, false);
2272 assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY));
2273
2274
2275 hbck = doFsck(conf, true);
2276
2277
2278 hbck = doFsck(conf, false);
2279 assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY));
2280
2281 setupTable(table);
2282 } finally {
2283
2284
2285 try {
2286 cleanupTable(table);
2287 } catch (IOException e) {
2288
2289
2290 }
2291 }
2292 }
2293
2294 @Test (timeout=180000)
2295 public void testMetaOffline() throws Exception {
2296
2297 HBaseFsck hbck = doFsck(conf, false);
2298 assertNoErrors(hbck);
2299 deleteMetaRegion(conf, true, false, false);
2300 hbck = doFsck(conf, false);
2301
2302
2303 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2304 hbck = doFsck(conf, true);
2305 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2306 hbck = doFsck(conf, false);
2307 assertNoErrors(hbck);
2308 }
2309
2310 private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
2311 boolean regionInfoOnly) throws IOException, InterruptedException {
2312 HRegionLocation metaLocation = connection.getRegionLocator(TableName.META_TABLE_NAME)
2313 .getRegionLocation(HConstants.EMPTY_START_ROW);
2314 ServerName hsa = metaLocation.getServerName();
2315 HRegionInfo hri = metaLocation.getRegionInfo();
2316 if (unassign) {
2317 LOG.info("Undeploying meta region " + hri + " from server " + hsa);
2318 try (Connection unmanagedConnection = ConnectionFactory.createConnection(conf)) {
2319 undeployRegion(unmanagedConnection, hsa, hri);
2320 }
2321 }
2322
2323 if (regionInfoOnly) {
2324 LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
2325 Path rootDir = FSUtils.getRootDir(conf);
2326 FileSystem fs = rootDir.getFileSystem(conf);
2327 Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
2328 hri.getEncodedName());
2329 Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
2330 fs.delete(hriPath, true);
2331 }
2332
2333 if (hdfs) {
2334 LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
2335 Path rootDir = FSUtils.getRootDir(conf);
2336 FileSystem fs = rootDir.getFileSystem(conf);
2337 Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
2338 hri.getEncodedName());
2339 HBaseFsck.debugLsr(conf, p);
2340 boolean success = fs.delete(p, true);
2341 LOG.info("Deleted " + p + " sucessfully? " + success);
2342 HBaseFsck.debugLsr(conf, p);
2343 }
2344 }
2345
2346 @Test (timeout=180000)
2347 public void testTableWithNoRegions() throws Exception {
2348
2349
2350 TableName table =
2351 TableName.valueOf(name.getMethodName());
2352 try {
2353
2354 HTableDescriptor desc = new HTableDescriptor(table);
2355 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
2356 desc.addFamily(hcd);
2357 admin.createTable(desc);
2358 tbl = (HTable) connection.getTable(table, tableExecutorService);
2359
2360
2361 deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW,
2362 HConstants.EMPTY_END_ROW, false, false, true);
2363
2364 HBaseFsck hbck = doFsck(conf, false);
2365 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
2366
2367 doFsck(conf, true);
2368
2369
2370 doFsck(conf, true);
2371
2372
2373 assertNoErrors(doFsck(conf, false));
2374 } finally {
2375 cleanupTable(table);
2376 }
2377
2378 }
2379
2380 @Test (timeout=180000)
2381 public void testHbckAfterRegionMerge() throws Exception {
2382 TableName table = TableName.valueOf("testMergeRegionFilesInHdfs");
2383 Table meta = null;
2384 try {
2385
2386 TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
2387 setupTable(table);
2388 assertEquals(ROWKEYS.length, countRows());
2389
2390
2391 admin.flush(table);
2392 HRegionInfo region1 = tbl.getRegionLocation(Bytes.toBytes("A")).getRegionInfo();
2393 HRegionInfo region2 = tbl.getRegionLocation(Bytes.toBytes("B")).getRegionInfo();
2394
2395 int regionCountBeforeMerge = tbl.getRegionLocations().size();
2396
2397 assertNotEquals(region1, region2);
2398
2399
2400 admin.mergeRegions(region1.getEncodedNameAsBytes(),
2401 region2.getEncodedNameAsBytes(), false);
2402
2403
2404 long timeout = System.currentTimeMillis() + 30 * 1000;
2405 while (true) {
2406 if (tbl.getRegionLocations().size() < regionCountBeforeMerge) {
2407 break;
2408 } else if (System.currentTimeMillis() > timeout) {
2409 fail("Time out waiting on region " + region1.getEncodedName()
2410 + " and " + region2.getEncodedName() + " be merged");
2411 }
2412 Thread.sleep(10);
2413 }
2414
2415 assertEquals(ROWKEYS.length, countRows());
2416
2417 HBaseFsck hbck = doFsck(conf, false);
2418 assertNoErrors(hbck);
2419
2420 } finally {
2421 TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
2422 cleanupTable(table);
2423 IOUtils.closeQuietly(meta);
2424 }
2425 }
2426
2427 @Test (timeout = 180000)
2428 public void testRegionBoundariesCheck() throws Exception {
2429 HBaseFsck hbck = doFsck(conf, false);
2430 assertNoErrors(hbck);
2431 try {
2432 hbck.checkRegionBoundaries();
2433 } catch (IllegalArgumentException e) {
2434 if (e.getMessage().endsWith("not a valid DFS filename.")) {
2435 fail("Table directory path is not valid." + e.getMessage());
2436 }
2437 }
2438 }
2439
2440 @org.junit.Rule
2441 public TestName name = new TestName();
2442
2443 @Test (timeout=180000)
2444 public void testReadOnlyProperty() throws Exception {
2445 HBaseFsck hbck = doFsck(conf, false);
2446 Assert.assertEquals("shouldIgnorePreCheckPermission", true,
2447 hbck.shouldIgnorePreCheckPermission());
2448
2449 hbck = doFsck(conf, true);
2450 Assert.assertEquals("shouldIgnorePreCheckPermission", false,
2451 hbck.shouldIgnorePreCheckPermission());
2452
2453 hbck = doFsck(conf, true);
2454 hbck.setIgnorePreCheckPermission(true);
2455 Assert.assertEquals("shouldIgnorePreCheckPermission", true,
2456 hbck.shouldIgnorePreCheckPermission());
2457 }
2458
2459 @Test (timeout=180000)
2460 public void testCleanUpDaughtersNotInMetaAfterFailedSplit() throws Exception {
2461 TableName table = TableName.valueOf("testCleanUpDaughtersNotInMetaAfterFailedSplit");
2462 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
2463 try {
2464 HTableDescriptor desc = new HTableDescriptor(table);
2465 desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
2466 admin.createTable(desc);
2467 tbl = new HTable(cluster.getConfiguration(), desc.getTableName());
2468 for (int i = 0; i < 5; i++) {
2469 Put p1 = new Put(("r" + i).getBytes());
2470 p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
2471 tbl.put(p1);
2472 }
2473 admin.flush(desc.getTableName());
2474 List<HRegion> regions = cluster.getRegions(desc.getTableName());
2475 int serverWith = cluster.getServerWith(regions.get(0).getRegionName());
2476 HRegionServer regionServer = cluster.getRegionServer(serverWith);
2477 cluster.getServerWith(regions.get(0).getRegionName());
2478 SplitTransaction st = new SplitTransaction(regions.get(0), Bytes.toBytes("r3"));
2479 st.prepare();
2480 st.stepsBeforePONR(regionServer, regionServer, false);
2481 AssignmentManager am = cluster.getMaster().getAssignmentManager();
2482 Map<String, RegionState> regionsInTransition = am.getRegionStates().getRegionsInTransition();
2483 for (RegionState state : regionsInTransition.values()) {
2484 am.regionOffline(state.getRegion());
2485 }
2486 ZKAssign.deleteNodeFailSilent(regionServer.getZooKeeper(), regions.get(0).getRegionInfo());
2487 Map<HRegionInfo, ServerName> regionsMap = new HashMap<HRegionInfo, ServerName>();
2488 regionsMap.put(regions.get(0).getRegionInfo(), regionServer.getServerName());
2489 am.assign(regionsMap);
2490 am.waitForAssignment(regions.get(0).getRegionInfo());
2491 HBaseFsck hbck = doFsck(conf, false);
2492 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
2493 ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
2494
2495 assertEquals(0, hbck.getOverlapGroups(table).size());
2496
2497
2498 assertErrors(
2499 doFsck(
2500 conf, false, true, false, false, false, false, false, false, false, false, false, null),
2501 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
2502 ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
2503
2504
2505 assertNoErrors(doFsck(conf, false));
2506 assertEquals(5, countRows());
2507 } finally {
2508 if (tbl != null) {
2509 tbl.close();
2510 tbl = null;
2511 }
2512 cleanupTable(table);
2513 }
2514 }
2515
2516 }