1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.util;
20
21 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
22 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
23 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
24 import static org.junit.Assert.assertEquals;
25 import static org.junit.Assert.assertFalse;
26 import static org.junit.Assert.assertNotEquals;
27 import static org.junit.Assert.assertNotNull;
28 import static org.junit.Assert.assertTrue;
29 import static org.junit.Assert.fail;
30
31 import java.io.IOException;
32 import java.util.ArrayList;
33 import java.util.Collection;
34 import java.util.HashMap;
35 import java.util.LinkedList;
36 import java.util.List;
37 import java.util.Map;
38 import java.util.concurrent.Callable;
39 import java.util.concurrent.CountDownLatch;
40 import java.util.concurrent.ExecutorService;
41 import java.util.concurrent.Executors;
42 import java.util.concurrent.Future;
43 import java.util.concurrent.ScheduledThreadPoolExecutor;
44 import java.util.concurrent.SynchronousQueue;
45 import java.util.concurrent.ThreadPoolExecutor;
46 import java.util.concurrent.TimeUnit;
47 import java.util.concurrent.atomic.AtomicBoolean;
48
49 import org.apache.commons.io.IOUtils;
50 import org.apache.commons.logging.Log;
51 import org.apache.commons.logging.LogFactory;
52 import org.apache.hadoop.conf.Configuration;
53 import org.apache.hadoop.fs.FileStatus;
54 import org.apache.hadoop.fs.FileSystem;
55 import org.apache.hadoop.fs.Path;
56 import org.apache.hadoop.hbase.ClusterStatus;
57 import org.apache.hadoop.hbase.HBaseTestingUtility;
58 import org.apache.hadoop.hbase.HColumnDescriptor;
59 import org.apache.hadoop.hbase.HConstants;
60 import org.apache.hadoop.hbase.HRegionInfo;
61 import org.apache.hadoop.hbase.HRegionLocation;
62 import org.apache.hadoop.hbase.HTableDescriptor;
63 import org.apache.hadoop.hbase.TableExistsException;
64 import org.apache.hadoop.hbase.testclassification.LargeTests;
65 import org.apache.hadoop.hbase.MiniHBaseCluster;
66 import org.apache.hadoop.hbase.ServerName;
67 import org.apache.hadoop.hbase.TableName;
68 import org.apache.hadoop.hbase.MetaTableAccessor;
69 import org.apache.hadoop.hbase.client.Admin;
70 import org.apache.hadoop.hbase.client.ClusterConnection;
71 import org.apache.hadoop.hbase.client.Connection;
72 import org.apache.hadoop.hbase.client.ConnectionFactory;
73 import org.apache.hadoop.hbase.client.Delete;
74 import org.apache.hadoop.hbase.client.Durability;
75 import org.apache.hadoop.hbase.client.Get;
76 import org.apache.hadoop.hbase.client.HBaseAdmin;
77 import org.apache.hadoop.hbase.client.HConnection;
78 import org.apache.hadoop.hbase.client.HTable;
79 import org.apache.hadoop.hbase.client.MetaScanner;
80 import org.apache.hadoop.hbase.client.Put;
81 import org.apache.hadoop.hbase.client.Result;
82 import org.apache.hadoop.hbase.client.ResultScanner;
83 import org.apache.hadoop.hbase.client.Scan;
84 import org.apache.hadoop.hbase.client.Table;
85 import org.apache.hadoop.hbase.io.hfile.TestHFile;
86 import org.apache.hadoop.hbase.master.AssignmentManager;
87 import org.apache.hadoop.hbase.master.HMaster;
88 import org.apache.hadoop.hbase.master.RegionState;
89 import org.apache.hadoop.hbase.master.RegionStates;
90 import org.apache.hadoop.hbase.master.TableLockManager;
91 import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
92 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
93 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
94 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
95 import org.apache.hadoop.hbase.regionserver.HRegion;
96 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
97 import org.apache.hadoop.hbase.regionserver.HRegionServer;
98 import org.apache.hadoop.hbase.regionserver.SplitTransaction;
99 import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
100 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
101 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
102 import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;
103 import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter;
104 import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
105 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
106 import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
107 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
108 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
109 import org.apache.zookeeper.KeeperException;
110 import org.junit.AfterClass;
111 import org.junit.Assert;
112 import org.junit.BeforeClass;
113 import org.junit.Ignore;
114 import org.junit.Test;
115 import org.junit.experimental.categories.Category;
116 import org.junit.rules.TestName;
117
118 import com.google.common.collect.Multimap;
119
120
121
122
123 @Category(LargeTests.class)
124 public class TestHBaseFsck {
125 static final int POOL_SIZE = 7;
126
127 final static Log LOG = LogFactory.getLog(TestHBaseFsck.class);
128 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
129 private final static Configuration conf = TEST_UTIL.getConfiguration();
130 private final static String FAM_STR = "fam";
131 private final static byte[] FAM = Bytes.toBytes(FAM_STR);
132 private final static int REGION_ONLINE_TIMEOUT = 800;
133 private static RegionStates regionStates;
134 private static ExecutorService tableExecutorService;
135 private static ScheduledThreadPoolExecutor hbfsckExecutorService;
136 private static ClusterConnection connection;
137 private static Admin admin;
138
139
140 private HTable tbl;
141 private final static byte[][] SPLITS = new byte[][] { Bytes.toBytes("A"),
142 Bytes.toBytes("B"), Bytes.toBytes("C") };
143
144 private final static byte[][] ROWKEYS= new byte[][] {
145 Bytes.toBytes("00"), Bytes.toBytes("50"), Bytes.toBytes("A0"), Bytes.toBytes("A5"),
146 Bytes.toBytes("B0"), Bytes.toBytes("B5"), Bytes.toBytes("C0"), Bytes.toBytes("C5") };
147
148 @BeforeClass
149 public static void setUpBeforeClass() throws Exception {
150 conf.setInt("hbase.regionserver.handler.count", 2);
151 conf.setInt("hbase.regionserver.metahandler.count", 2);
152
153 conf.setInt("hbase.htable.threads.max", POOL_SIZE);
154 conf.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE);
155 conf.setInt("hbase.hconnection.threads.core", POOL_SIZE);
156 conf.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT);
157 TEST_UTIL.startMiniCluster(3);
158
159 tableExecutorService = new ThreadPoolExecutor(1, POOL_SIZE, 60, TimeUnit.SECONDS,
160 new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
161
162 hbfsckExecutorService = new ScheduledThreadPoolExecutor(POOL_SIZE);
163
164 AssignmentManager assignmentManager =
165 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
166 regionStates = assignmentManager.getRegionStates();
167
168 connection = (ClusterConnection) TEST_UTIL.getConnection();
169
170 admin = connection.getAdmin();
171 admin.setBalancerRunning(false, true);
172 }
173
174 @AfterClass
175 public static void tearDownAfterClass() throws Exception {
176 tableExecutorService.shutdown();
177 hbfsckExecutorService.shutdown();
178 admin.close();
179 TEST_UTIL.shutdownMiniCluster();
180 }
181
182 @Test (timeout=180000)
183 public void testHBaseFsck() throws Exception {
184 assertNoErrors(doFsck(conf, false));
185 TableName table = TableName.valueOf("tableBadMetaAssign");
186 TEST_UTIL.createTable(table, FAM);
187
188
189 assertNoErrors(doFsck(conf, false));
190
191
192
193 Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
194 Scan scan = new Scan();
195 scan.setStartRow(Bytes.toBytes(table+",,"));
196 ResultScanner scanner = meta.getScanner(scan);
197 HRegionInfo hri = null;
198
199 Result res = scanner.next();
200 ServerName currServer =
201 ServerName.parseFrom(res.getValue(HConstants.CATALOG_FAMILY,
202 HConstants.SERVER_QUALIFIER));
203 long startCode = Bytes.toLong(res.getValue(HConstants.CATALOG_FAMILY,
204 HConstants.STARTCODE_QUALIFIER));
205
206 for (JVMClusterUtil.RegionServerThread rs :
207 TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
208
209 ServerName sn = rs.getRegionServer().getServerName();
210
211
212 if (!currServer.getHostAndPort().equals(sn.getHostAndPort()) ||
213 startCode != sn.getStartcode()) {
214 Put put = new Put(res.getRow());
215 put.setDurability(Durability.SKIP_WAL);
216 put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
217 Bytes.toBytes(sn.getHostAndPort()));
218 put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
219 Bytes.toBytes(sn.getStartcode()));
220 meta.put(put);
221 hri = MetaTableAccessor.getHRegionInfo(res);
222 break;
223 }
224 }
225
226
227 assertErrors(doFsck(conf, true), new ERROR_CODE[]{
228 ERROR_CODE.SERVER_DOES_NOT_MATCH_META});
229
230 TEST_UTIL.getHBaseCluster().getMaster()
231 .getAssignmentManager().waitForAssignment(hri);
232
233
234 assertNoErrors(doFsck(conf, false));
235
236
237 Table t = connection.getTable(table, tableExecutorService);
238 ResultScanner s = t.getScanner(new Scan());
239 s.close();
240 t.close();
241
242 scanner.close();
243 meta.close();
244 }
245
246 @Test(timeout=180000)
247 public void testFixAssignmentsWhenMETAinTransition() throws Exception {
248 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
249 admin.closeRegion(cluster.getServerHoldingMeta(), HRegionInfo.FIRST_META_REGIONINFO);
250 regionStates.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
251 new MetaTableLocator().deleteMetaLocation(cluster.getMaster().getZooKeeper());
252 assertFalse(regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
253 HBaseFsck hbck = doFsck(conf, true);
254 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.UNKNOWN, ERROR_CODE.NO_META_REGION,
255 ERROR_CODE.NULL_META_REGION });
256 assertNoErrors(doFsck(conf, false));
257 }
258
259
260
261
262 private HRegionInfo createRegion(final HTableDescriptor
263 htd, byte[] startKey, byte[] endKey)
264 throws IOException {
265 Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
266 HRegionInfo hri = new HRegionInfo(htd.getTableName(), startKey, endKey);
267 MetaTableAccessor.addRegionToMeta(meta, hri);
268 meta.close();
269 return hri;
270 }
271
272
273
274
275 private void dumpMeta(TableName tableName) throws IOException {
276 List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(tableName);
277 for (byte[] row : metaRows) {
278 LOG.info(Bytes.toString(row));
279 }
280 }
281
282
283
284
285
286 private void undeployRegion(Connection conn, ServerName sn,
287 HRegionInfo hri) throws IOException, InterruptedException {
288 try {
289 HBaseFsckRepair.closeRegionSilentlyAndWait((HConnection) conn, sn, hri);
290 if (!hri.isMetaTable()) {
291 admin.offline(hri.getRegionName());
292 }
293 } catch (IOException ioe) {
294 LOG.warn("Got exception when attempting to offline region "
295 + Bytes.toString(hri.getRegionName()), ioe);
296 }
297 }
298
299
300
301
302
303
304 private void deleteRegion(Configuration conf, final HTableDescriptor htd,
305 byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
306 boolean hdfs) throws IOException, InterruptedException {
307 deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false);
308 }
309
310
311
312
313
314
315
316
317 private void deleteRegion(Configuration conf, final HTableDescriptor htd,
318 byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
319 boolean hdfs, boolean regionInfoOnly) throws IOException, InterruptedException {
320 LOG.info("** Before delete:");
321 dumpMeta(htd.getTableName());
322
323 List<HRegionLocation> locations = tbl.getAllRegionLocations();
324 for (HRegionLocation location : locations) {
325 HRegionInfo hri = location.getRegionInfo();
326 ServerName hsa = location.getServerName();
327 if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
328 && Bytes.compareTo(hri.getEndKey(), endKey) == 0) {
329
330 LOG.info("RegionName: " +hri.getRegionNameAsString());
331 byte[] deleteRow = hri.getRegionName();
332
333 if (unassign) {
334 LOG.info("Undeploying region " + hri + " from server " + hsa);
335 undeployRegion(connection, hsa, hri);
336 }
337
338 if (regionInfoOnly) {
339 LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
340 Path rootDir = FSUtils.getRootDir(conf);
341 FileSystem fs = rootDir.getFileSystem(conf);
342 Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
343 hri.getEncodedName());
344 Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
345 fs.delete(hriPath, true);
346 }
347
348 if (hdfs) {
349 LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
350 Path rootDir = FSUtils.getRootDir(conf);
351 FileSystem fs = rootDir.getFileSystem(conf);
352 Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
353 hri.getEncodedName());
354 HBaseFsck.debugLsr(conf, p);
355 boolean success = fs.delete(p, true);
356 LOG.info("Deleted " + p + " sucessfully? " + success);
357 HBaseFsck.debugLsr(conf, p);
358 }
359
360 if (metaRow) {
361 try (Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService)) {
362 Delete delete = new Delete(deleteRow);
363 meta.delete(delete);
364 }
365 }
366 }
367 LOG.info(hri.toString() + hsa.toString());
368 }
369
370 TEST_UTIL.getMetaTableRows(htd.getTableName());
371 LOG.info("*** After delete:");
372 dumpMeta(htd.getTableName());
373 }
374
375
376
377
378
379
380
381
382
383
384 void setupTable(TableName tablename) throws Exception {
385 setupTableWithRegionReplica(tablename, 1);
386 }
387
388
389
390
391
392
393
394
395
396
397 void setupTableWithRegionReplica(TableName tablename, int replicaCount) throws Exception {
398 HTableDescriptor desc = new HTableDescriptor(tablename);
399 desc.setRegionReplication(replicaCount);
400 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
401 desc.addFamily(hcd);
402 admin.createTable(desc, SPLITS);
403 tbl = (HTable) connection.getTable(tablename, tableExecutorService);
404 List<Put> puts = new ArrayList<Put>();
405 for (byte[] row : ROWKEYS) {
406 Put p = new Put(row);
407 p.add(FAM, Bytes.toBytes("val"), row);
408 puts.add(p);
409 }
410 tbl.put(puts);
411 tbl.flushCommits();
412 }
413
414
415
416
417 int countRows() throws IOException {
418 Scan s = new Scan();
419 ResultScanner rs = tbl.getScanner(s);
420 int i = 0;
421 while(rs.next() !=null) {
422 i++;
423 }
424 return i;
425 }
426
427
428
429
430
431
432
433 void cleanupTable(TableName tablename) throws IOException {
434 if (tbl != null) {
435 tbl.close();
436 tbl = null;
437 }
438
439 ((ClusterConnection) connection).clearRegionCache();
440 TEST_UTIL.deleteTable(tablename);
441
442 }
443
444
445
446
447 @Test (timeout=180000)
448 public void testHBaseFsckClean() throws Exception {
449 assertNoErrors(doFsck(conf, false));
450 TableName table = TableName.valueOf("tableClean");
451 try {
452 HBaseFsck hbck = doFsck(conf, false);
453 assertNoErrors(hbck);
454
455 setupTable(table);
456 assertEquals(ROWKEYS.length, countRows());
457
458
459 hbck = doFsck(conf, false);
460 assertNoErrors(hbck);
461 assertEquals(0, hbck.getOverlapGroups(table).size());
462 assertEquals(ROWKEYS.length, countRows());
463 } finally {
464 cleanupTable(table);
465 }
466 }
467
468
469
470
471 @Test (timeout=180000)
472 public void testHbckThreadpooling() throws Exception {
473 TableName table =
474 TableName.valueOf("tableDupeStartKey");
475 try {
476
477 setupTable(table);
478
479
480 Configuration newconf = new Configuration(conf);
481 newconf.setInt("hbasefsck.numthreads", 1);
482 assertNoErrors(doFsck(newconf, false));
483
484
485 } finally {
486 cleanupTable(table);
487 }
488 }
489
490 @Test (timeout=180000)
491 public void testHbckFixOrphanTable() throws Exception {
492 TableName table = TableName.valueOf("tableInfo");
493 FileSystem fs = null;
494 Path tableinfo = null;
495 try {
496 setupTable(table);
497
498 Path hbaseTableDir = FSUtils.getTableDir(
499 FSUtils.getRootDir(conf), table);
500 fs = hbaseTableDir.getFileSystem(conf);
501 FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
502 tableinfo = status.getPath();
503 fs.rename(tableinfo, new Path("/.tableinfo"));
504
505
506 HBaseFsck hbck = doFsck(conf, false);
507 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE });
508
509
510 hbck = doFsck(conf, true);
511 assertNoErrors(hbck);
512 status = null;
513 status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
514 assertNotNull(status);
515
516 HTableDescriptor htd = admin.getTableDescriptor(table);
517 htd.setValue("NOT_DEFAULT", "true");
518 admin.disableTable(table);
519 admin.modifyTable(table, htd);
520 admin.enableTable(table);
521 fs.delete(status.getPath(), true);
522
523
524 htd = admin.getTableDescriptor(table);
525 hbck = doFsck(conf, true);
526 assertNoErrors(hbck);
527 status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
528 assertNotNull(status);
529 htd = admin.getTableDescriptor(table);
530 assertEquals(htd.getValue("NOT_DEFAULT"), "true");
531 } finally {
532 fs.rename(new Path("/.tableinfo"), tableinfo);
533 cleanupTable(table);
534 }
535 }
536
537
538
539
540
541
542 @Test (timeout=180000)
543 public void testParallelHbck() throws Exception {
544 final ExecutorService service;
545 final Future<HBaseFsck> hbck1,hbck2;
546
547 class RunHbck implements Callable<HBaseFsck>{
548 boolean fail = true;
549 @Override
550 public HBaseFsck call(){
551 try{
552 return doFsck(conf, false);
553 } catch(Exception e){
554 if (e.getMessage().contains("Duplicate hbck")) {
555 fail = false;
556 }
557 }
558
559 if (fail) fail();
560 return null;
561 }
562 }
563 service = Executors.newFixedThreadPool(2);
564 hbck1 = service.submit(new RunHbck());
565 hbck2 = service.submit(new RunHbck());
566 service.shutdown();
567
568 service.awaitTermination(15, TimeUnit.SECONDS);
569 HBaseFsck h1 = hbck1.get();
570 HBaseFsck h2 = hbck2.get();
571
572 assert(h1 == null || h2 == null);
573 if (h1 != null) {
574 assert(h1.getRetCode() >= 0);
575 }
576 if (h2 != null) {
577 assert(h2.getRetCode() >= 0);
578 }
579 }
580
581
582
583
584
585 @Test (timeout=180000)
586 public void testDupeStartKey() throws Exception {
587 TableName table =
588 TableName.valueOf("tableDupeStartKey");
589 try {
590 setupTable(table);
591 assertNoErrors(doFsck(conf, false));
592 assertEquals(ROWKEYS.length, countRows());
593
594
595 HRegionInfo hriDupe =
596 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("A2"));
597 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
598 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
599 .waitForAssignment(hriDupe);
600 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
601 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
602
603 HBaseFsck hbck = doFsck(conf, false);
604 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
605 ERROR_CODE.DUPE_STARTKEYS});
606 assertEquals(2, hbck.getOverlapGroups(table).size());
607 assertEquals(ROWKEYS.length, countRows());
608
609
610 doFsck(conf,true);
611
612
613 HBaseFsck hbck2 = doFsck(conf,false);
614 assertNoErrors(hbck2);
615 assertEquals(0, hbck2.getOverlapGroups(table).size());
616 assertEquals(ROWKEYS.length, countRows());
617 } finally {
618 cleanupTable(table);
619 }
620 }
621
622
623
624
625
626 @Test (timeout=180000)
627 public void testHbckWithRegionReplica() throws Exception {
628 TableName table =
629 TableName.valueOf("tableWithReplica");
630 try {
631 setupTableWithRegionReplica(table, 2);
632 assertNoErrors(doFsck(conf, false));
633 assertEquals(ROWKEYS.length, countRows());
634 } finally {
635 cleanupTable(table);
636 }
637 }
638
639
640
641
642 Map<ServerName, List<String>> getDeployedHRIs(final HBaseAdmin admin) throws IOException {
643 ClusterStatus status = admin.getClusterStatus();
644 Collection<ServerName> regionServers = status.getServers();
645 Map<ServerName, List<String>> mm =
646 new HashMap<ServerName, List<String>>();
647 for (ServerName hsi : regionServers) {
648 AdminProtos.AdminService.BlockingInterface server = ((HConnection) connection).getAdmin(hsi);
649
650
651 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
652 List<String> regionNames = new ArrayList<String>();
653 for (HRegionInfo hri : regions) {
654 regionNames.add(hri.getRegionNameAsString());
655 }
656 mm.put(hsi, regionNames);
657 }
658 return mm;
659 }
660
661
662
663
664 ServerName findDeployedHSI(Map<ServerName, List<String>> mm, HRegionInfo hri) {
665 for (Map.Entry<ServerName,List <String>> e : mm.entrySet()) {
666 if (e.getValue().contains(hri.getRegionNameAsString())) {
667 return e.getKey();
668 }
669 }
670 return null;
671 }
672
673
674
675
676
677 @Test (timeout=180000)
678 public void testDupeRegion() throws Exception {
679 TableName table =
680 TableName.valueOf("tableDupeRegion");
681 try {
682 setupTable(table);
683 assertNoErrors(doFsck(conf, false));
684 assertEquals(ROWKEYS.length, countRows());
685
686
687 HRegionInfo hriDupe =
688 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"));
689
690 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
691 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
692 .waitForAssignment(hriDupe);
693 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
694 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
695
696
697
698
699
700 while (findDeployedHSI(getDeployedHRIs((HBaseAdmin) admin), hriDupe) == null) {
701 Thread.sleep(250);
702 }
703
704 LOG.debug("Finished assignment of dupe region");
705
706
707 HBaseFsck hbck = doFsck(conf, false);
708 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
709 ERROR_CODE.DUPE_STARTKEYS});
710 assertEquals(2, hbck.getOverlapGroups(table).size());
711 assertEquals(ROWKEYS.length, countRows());
712
713
714 doFsck(conf,true);
715
716
717 HBaseFsck hbck2 = doFsck(conf,false);
718 assertNoErrors(hbck2);
719 assertEquals(0, hbck2.getOverlapGroups(table).size());
720 assertEquals(ROWKEYS.length, countRows());
721 } finally {
722 cleanupTable(table);
723 }
724 }
725
726
727
728
729 @Test (timeout=180000)
730 public void testDegenerateRegions() throws Exception {
731 TableName table = TableName.valueOf("tableDegenerateRegions");
732 try {
733 setupTable(table);
734 assertNoErrors(doFsck(conf,false));
735 assertEquals(ROWKEYS.length, countRows());
736
737
738 HRegionInfo hriDupe =
739 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("B"));
740 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
741 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
742 .waitForAssignment(hriDupe);
743 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
744 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
745
746 HBaseFsck hbck = doFsck(conf,false);
747 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DEGENERATE_REGION, ERROR_CODE.DUPE_STARTKEYS,
748 ERROR_CODE.DUPE_STARTKEYS });
749 assertEquals(2, hbck.getOverlapGroups(table).size());
750 assertEquals(ROWKEYS.length, countRows());
751
752
753 doFsck(conf,true);
754
755
756 HBaseFsck hbck2 = doFsck(conf,false);
757 assertNoErrors(hbck2);
758 assertEquals(0, hbck2.getOverlapGroups(table).size());
759 assertEquals(ROWKEYS.length, countRows());
760 } finally {
761 cleanupTable(table);
762 }
763 }
764
765
766
767
768
769 @Test (timeout=180000)
770 public void testContainedRegionOverlap() throws Exception {
771 TableName table =
772 TableName.valueOf("tableContainedRegionOverlap");
773 try {
774 setupTable(table);
775 assertEquals(ROWKEYS.length, countRows());
776
777
778 HRegionInfo hriOverlap =
779 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
780 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
781 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
782 .waitForAssignment(hriOverlap);
783 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
784 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
785
786 HBaseFsck hbck = doFsck(conf, false);
787 assertErrors(hbck, new ERROR_CODE[] {
788 ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
789 assertEquals(2, hbck.getOverlapGroups(table).size());
790 assertEquals(ROWKEYS.length, countRows());
791
792
793 doFsck(conf, true);
794
795
796 HBaseFsck hbck2 = doFsck(conf,false);
797 assertNoErrors(hbck2);
798 assertEquals(0, hbck2.getOverlapGroups(table).size());
799 assertEquals(ROWKEYS.length, countRows());
800 } finally {
801 cleanupTable(table);
802 }
803 }
804
805
806
807
808
809
810
811 @Test (timeout=180000)
812 public void testSidelineOverlapRegion() throws Exception {
813 TableName table =
814 TableName.valueOf("testSidelineOverlapRegion");
815 try {
816 setupTable(table);
817 assertEquals(ROWKEYS.length, countRows());
818
819
820 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
821 HMaster master = cluster.getMaster();
822 HRegionInfo hriOverlap1 =
823 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("AB"));
824 master.assignRegion(hriOverlap1);
825 master.getAssignmentManager().waitForAssignment(hriOverlap1);
826 HRegionInfo hriOverlap2 =
827 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("AB"), Bytes.toBytes("B"));
828 master.assignRegion(hriOverlap2);
829 master.getAssignmentManager().waitForAssignment(hriOverlap2);
830
831 HBaseFsck hbck = doFsck(conf, false);
832 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.DUPE_STARTKEYS,
833 ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
834 assertEquals(3, hbck.getOverlapGroups(table).size());
835 assertEquals(ROWKEYS.length, countRows());
836
837
838 Multimap<byte[], HbckInfo> overlapGroups = hbck.getOverlapGroups(table);
839 ServerName serverName = null;
840 byte[] regionName = null;
841 for (HbckInfo hbi: overlapGroups.values()) {
842 if ("A".equals(Bytes.toString(hbi.getStartKey()))
843 && "B".equals(Bytes.toString(hbi.getEndKey()))) {
844 regionName = hbi.getRegionName();
845
846
847 int k = cluster.getServerWith(regionName);
848 for (int i = 0; i < 3; i++) {
849 if (i != k) {
850 HRegionServer rs = cluster.getRegionServer(i);
851 serverName = rs.getServerName();
852 break;
853 }
854 }
855
856 HBaseFsckRepair.closeRegionSilentlyAndWait((HConnection) connection,
857 cluster.getRegionServer(k).getServerName(), hbi.getHdfsHRI());
858 admin.offline(regionName);
859 break;
860 }
861 }
862
863 assertNotNull(regionName);
864 assertNotNull(serverName);
865 try (Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService)) {
866 Put put = new Put(regionName);
867 put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
868 Bytes.toBytes(serverName.getHostAndPort()));
869 meta.put(put);
870 }
871
872
873 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
874 fsck.connect();
875 fsck.setDisplayFullReport();
876 fsck.setTimeLag(0);
877 fsck.setFixAssignments(true);
878 fsck.setFixMeta(true);
879 fsck.setFixHdfsHoles(true);
880 fsck.setFixHdfsOverlaps(true);
881 fsck.setFixHdfsOrphans(true);
882 fsck.setFixVersionFile(true);
883 fsck.setSidelineBigOverlaps(true);
884 fsck.setMaxMerge(2);
885 fsck.onlineHbck();
886 fsck.close();
887
888
889
890 HBaseFsck hbck2 = doFsck(conf,false);
891 assertNoErrors(hbck2);
892 assertEquals(0, hbck2.getOverlapGroups(table).size());
893 assertTrue(ROWKEYS.length > countRows());
894 } finally {
895 cleanupTable(table);
896 }
897 }
898
899
900
901
902
903 @Test (timeout=180000)
904 public void testOverlapAndOrphan() throws Exception {
905 TableName table =
906 TableName.valueOf("tableOverlapAndOrphan");
907 try {
908 setupTable(table);
909 assertEquals(ROWKEYS.length, countRows());
910
911
912 admin.disableTable(table);
913 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
914 Bytes.toBytes("B"), true, true, false, true);
915 admin.enableTable(table);
916
917 HRegionInfo hriOverlap =
918 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
919 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
920 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
921 .waitForAssignment(hriOverlap);
922 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
923 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
924
925 HBaseFsck hbck = doFsck(conf, false);
926 assertErrors(hbck, new ERROR_CODE[] {
927 ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
928 ERROR_CODE.HOLE_IN_REGION_CHAIN});
929
930
931 doFsck(conf, true);
932
933
934 HBaseFsck hbck2 = doFsck(conf,false);
935 assertNoErrors(hbck2);
936 assertEquals(0, hbck2.getOverlapGroups(table).size());
937 assertEquals(ROWKEYS.length, countRows());
938 } finally {
939 cleanupTable(table);
940 }
941 }
942
943
944
945
946
947
948 @Test (timeout=180000)
949 public void testCoveredStartKey() throws Exception {
950 TableName table =
951 TableName.valueOf("tableCoveredStartKey");
952 try {
953 setupTable(table);
954 assertEquals(ROWKEYS.length, countRows());
955
956
957 HRegionInfo hriOverlap =
958 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B2"));
959 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
960 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
961 .waitForAssignment(hriOverlap);
962 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
963 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
964
965 HBaseFsck hbck = doFsck(conf, false);
966 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
967 ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
968 assertEquals(3, hbck.getOverlapGroups(table).size());
969 assertEquals(ROWKEYS.length, countRows());
970
971
972 doFsck(conf, true);
973
974
975 HBaseFsck hbck2 = doFsck(conf, false);
976 assertErrors(hbck2, new ERROR_CODE[0]);
977 assertEquals(0, hbck2.getOverlapGroups(table).size());
978 assertEquals(ROWKEYS.length, countRows());
979 } finally {
980 cleanupTable(table);
981 }
982 }
983
984
985
986
987
988 @Test (timeout=180000)
989 public void testRegionHole() throws Exception {
990 TableName table =
991 TableName.valueOf("tableRegionHole");
992 try {
993 setupTable(table);
994 assertEquals(ROWKEYS.length, countRows());
995
996
997 admin.disableTable(table);
998 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
999 Bytes.toBytes("C"), true, true, true);
1000 admin.enableTable(table);
1001
1002 HBaseFsck hbck = doFsck(conf, false);
1003 assertErrors(hbck, new ERROR_CODE[] {
1004 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1005
1006 assertEquals(0, hbck.getOverlapGroups(table).size());
1007
1008
1009 doFsck(conf, true);
1010
1011
1012 assertNoErrors(doFsck(conf,false));
1013 assertEquals(ROWKEYS.length - 2 , countRows());
1014 } finally {
1015 cleanupTable(table);
1016 }
1017 }
1018
1019
1020
1021
1022
1023 @Test (timeout=180000)
1024 public void testHDFSRegioninfoMissing() throws Exception {
1025 TableName table = TableName.valueOf("tableHDFSRegioninfoMissing");
1026 try {
1027 setupTable(table);
1028 assertEquals(ROWKEYS.length, countRows());
1029
1030
1031 admin.disableTable(table);
1032 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1033 Bytes.toBytes("C"), true, true, false, true);
1034 admin.enableTable(table);
1035
1036 HBaseFsck hbck = doFsck(conf, false);
1037 assertErrors(hbck, new ERROR_CODE[] {
1038 ERROR_CODE.ORPHAN_HDFS_REGION,
1039 ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1040 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1041
1042 assertEquals(0, hbck.getOverlapGroups(table).size());
1043
1044
1045 doFsck(conf, true);
1046
1047
1048 assertNoErrors(doFsck(conf, false));
1049 assertEquals(ROWKEYS.length, countRows());
1050 } finally {
1051 cleanupTable(table);
1052 }
1053 }
1054
1055
1056
1057
1058
1059 @Test (timeout=180000)
1060 public void testNotInMetaOrDeployedHole() throws Exception {
1061 TableName table =
1062 TableName.valueOf("tableNotInMetaOrDeployedHole");
1063 try {
1064 setupTable(table);
1065 assertEquals(ROWKEYS.length, countRows());
1066
1067
1068 admin.disableTable(table);
1069 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1070 Bytes.toBytes("C"), true, true, false);
1071 admin.enableTable(table);
1072
1073 HBaseFsck hbck = doFsck(conf, false);
1074 assertErrors(hbck, new ERROR_CODE[] {
1075 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1076
1077 assertEquals(0, hbck.getOverlapGroups(table).size());
1078
1079
1080 assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1081 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1082
1083
1084 assertNoErrors(doFsck(conf,false));
1085 assertEquals(ROWKEYS.length, countRows());
1086 } finally {
1087 cleanupTable(table);
1088 }
1089 }
1090
1091
1092
1093
1094 @Test (timeout=180000)
1095 public void testNotInMetaHole() throws Exception {
1096 TableName table =
1097 TableName.valueOf("tableNotInMetaHole");
1098 try {
1099 setupTable(table);
1100 assertEquals(ROWKEYS.length, countRows());
1101
1102
1103 admin.disableTable(table);
1104 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1105 Bytes.toBytes("C"), false, true, false);
1106 admin.enableTable(table);
1107
1108 HBaseFsck hbck = doFsck(conf, false);
1109 assertErrors(hbck, new ERROR_CODE[] {
1110 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1111
1112 assertEquals(0, hbck.getOverlapGroups(table).size());
1113
1114
1115 assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1116 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1117
1118
1119 assertNoErrors(doFsck(conf,false));
1120 assertEquals(ROWKEYS.length, countRows());
1121 } finally {
1122 cleanupTable(table);
1123 }
1124 }
1125
1126
1127
1128
1129
1130 @Test (timeout=180000)
1131 public void testNotInHdfs() throws Exception {
1132 TableName table =
1133 TableName.valueOf("tableNotInHdfs");
1134 try {
1135 setupTable(table);
1136 assertEquals(ROWKEYS.length, countRows());
1137
1138
1139 admin.flush(table);
1140
1141
1142 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1143 Bytes.toBytes("C"), false, false, true);
1144
1145 HBaseFsck hbck = doFsck(conf, false);
1146 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1147
1148 assertEquals(0, hbck.getOverlapGroups(table).size());
1149
1150
1151 doFsck(conf, true);
1152
1153
1154 assertNoErrors(doFsck(conf,false));
1155 assertEquals(ROWKEYS.length - 2, countRows());
1156 } finally {
1157 cleanupTable(table);
1158 }
1159 }
1160
1161
1162
1163
1164
1165 @Test (timeout=180000)
1166 public void testNoHdfsTable() throws Exception {
1167 TableName table = TableName.valueOf("NoHdfsTable");
1168 setupTable(table);
1169 assertEquals(ROWKEYS.length, countRows());
1170
1171
1172 admin.flush(table);
1173
1174
1175 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
1176 Bytes.toBytes("A"), false, false, true);
1177 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1178 Bytes.toBytes("B"), false, false, true);
1179 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1180 Bytes.toBytes("C"), false, false, true);
1181 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
1182 Bytes.toBytes(""), false, false, true);
1183
1184
1185 deleteTableDir(table);
1186
1187 HBaseFsck hbck = doFsck(conf, false);
1188 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS,
1189 ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS,
1190 ERROR_CODE.NOT_IN_HDFS,});
1191
1192 assertEquals(0, hbck.getOverlapGroups(table).size());
1193
1194
1195 doFsck(conf, true);
1196
1197
1198 assertNoErrors(doFsck(conf,false));
1199 assertFalse("Table " + table + " should have been deleted", admin.tableExists(table));
1200 }
1201
1202 public void deleteTableDir(TableName table) throws IOException {
1203 Path rootDir = FSUtils.getRootDir(conf);
1204 FileSystem fs = rootDir.getFileSystem(conf);
1205 Path p = FSUtils.getTableDir(rootDir, table);
1206 HBaseFsck.debugLsr(conf, p);
1207 boolean success = fs.delete(p, true);
1208 LOG.info("Deleted " + p + " sucessfully? " + success);
1209 }
1210
1211
1212
1213
1214 @Test (timeout=180000)
1215 public void testNoVersionFile() throws Exception {
1216
1217 Path rootDir = FSUtils.getRootDir(conf);
1218 FileSystem fs = rootDir.getFileSystem(conf);
1219 Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
1220 fs.delete(versionFile, true);
1221
1222
1223 HBaseFsck hbck = doFsck(conf, false);
1224 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_VERSION_FILE });
1225
1226 doFsck(conf, true);
1227
1228
1229 assertNoErrors(doFsck(conf, false));
1230 }
1231
1232
1233
1234
1235 @Test (timeout=180000)
1236 public void testRegionShouldNotBeDeployed() throws Exception {
1237 TableName table =
1238 TableName.valueOf("tableRegionShouldNotBeDeployed");
1239 try {
1240 LOG.info("Starting testRegionShouldNotBeDeployed.");
1241 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1242 assertTrue(cluster.waitForActiveAndReadyMaster());
1243
1244
1245 byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"),
1246 Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") };
1247 HTableDescriptor htdDisabled = new HTableDescriptor(table);
1248 htdDisabled.addFamily(new HColumnDescriptor(FAM));
1249
1250
1251 FSTableDescriptors fstd = new FSTableDescriptors(conf);
1252 fstd.createTableDescriptor(htdDisabled);
1253 List<HRegionInfo> disabledRegions =
1254 TEST_UTIL.createMultiRegionsInMeta(conf, htdDisabled, SPLIT_KEYS);
1255
1256
1257 HRegionServer hrs = cluster.getRegionServer(0);
1258
1259
1260 admin.disableTable(table);
1261 admin.enableTable(table);
1262
1263
1264 admin.disableTable(table);
1265 HRegionInfo region = disabledRegions.remove(0);
1266 byte[] regionName = region.getRegionName();
1267
1268
1269 assertTrue(cluster.getServerWith(regionName) == -1);
1270
1271
1272
1273
1274
1275 HRegion r = HRegion.openHRegion(
1276 region, htdDisabled, hrs.getWAL(region), conf);
1277 hrs.addToOnlineRegions(r);
1278
1279 HBaseFsck hbck = doFsck(conf, false);
1280 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.SHOULD_NOT_BE_DEPLOYED });
1281
1282
1283 doFsck(conf, true);
1284
1285
1286 assertNoErrors(doFsck(conf, false));
1287 } finally {
1288 admin.enableTable(table);
1289 cleanupTable(table);
1290 }
1291 }
1292
1293
1294
1295
1296 @Test (timeout=180000)
1297 public void testFixByTable() throws Exception {
1298 TableName table1 =
1299 TableName.valueOf("testFixByTable1");
1300 TableName table2 =
1301 TableName.valueOf("testFixByTable2");
1302 try {
1303 setupTable(table1);
1304
1305 admin.flush(table1);
1306
1307 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1308 Bytes.toBytes("C"), false, false, true);
1309
1310 setupTable(table2);
1311
1312 admin.flush(table2);
1313
1314 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1315 Bytes.toBytes("C"), false, false, true);
1316
1317 HBaseFsck hbck = doFsck(conf, false);
1318 assertErrors(hbck, new ERROR_CODE[] {
1319 ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS});
1320
1321
1322 doFsck(conf, true, table1);
1323
1324 assertNoErrors(doFsck(conf, false, table1));
1325
1326 assertErrors(doFsck(conf, false, table2),
1327 new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1328
1329
1330 doFsck(conf, true, table2);
1331
1332 assertNoErrors(doFsck(conf, false));
1333 assertEquals(ROWKEYS.length - 2, countRows());
1334 } finally {
1335 cleanupTable(table1);
1336 cleanupTable(table2);
1337 }
1338 }
1339
1340
1341
1342 @Test (timeout=180000)
1343 public void testLingeringSplitParent() throws Exception {
1344 TableName table =
1345 TableName.valueOf("testLingeringSplitParent");
1346 Table meta = null;
1347 try {
1348 setupTable(table);
1349 assertEquals(ROWKEYS.length, countRows());
1350
1351
1352 admin.flush(table);
1353 HRegionLocation location = tbl.getRegionLocation("B");
1354
1355
1356 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1357 Bytes.toBytes("C"), true, true, false);
1358
1359
1360 meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
1361 HRegionInfo hri = location.getRegionInfo();
1362
1363 HRegionInfo a = new HRegionInfo(tbl.getName(),
1364 Bytes.toBytes("B"), Bytes.toBytes("BM"));
1365 HRegionInfo b = new HRegionInfo(tbl.getName(),
1366 Bytes.toBytes("BM"), Bytes.toBytes("C"));
1367
1368 hri.setOffline(true);
1369 hri.setSplit(true);
1370
1371 MetaTableAccessor.addRegionToMeta(meta, hri, a, b);
1372 meta.close();
1373 admin.flush(TableName.META_TABLE_NAME);
1374
1375 HBaseFsck hbck = doFsck(conf, false);
1376 assertErrors(hbck, new ERROR_CODE[] {
1377 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1378
1379
1380 hbck = doFsck(conf, true);
1381 assertErrors(hbck, new ERROR_CODE[] {
1382 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN });
1383 assertFalse(hbck.shouldRerun());
1384 hbck = doFsck(conf, false);
1385 assertErrors(hbck, new ERROR_CODE[] {
1386 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1387
1388
1389 hbck = new HBaseFsck(conf, hbfsckExecutorService);
1390 hbck.connect();
1391 hbck.setDisplayFullReport();
1392 hbck.setTimeLag(0);
1393 hbck.setFixSplitParents(true);
1394 hbck.onlineHbck();
1395 assertTrue(hbck.shouldRerun());
1396 hbck.close();
1397
1398 Get get = new Get(hri.getRegionName());
1399 Result result = meta.get(get);
1400 assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1401 HConstants.SPLITA_QUALIFIER).isEmpty());
1402 assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1403 HConstants.SPLITB_QUALIFIER).isEmpty());
1404 admin.flush(TableName.META_TABLE_NAME);
1405
1406
1407 doFsck(conf, true);
1408
1409
1410 assertNoErrors(doFsck(conf, false));
1411 assertEquals(ROWKEYS.length, countRows());
1412 } finally {
1413 cleanupTable(table);
1414 IOUtils.closeQuietly(meta);
1415 }
1416 }
1417
1418
1419
1420
1421
1422 @Test (timeout=180000)
1423 public void testValidLingeringSplitParent() throws Exception {
1424 TableName table =
1425 TableName.valueOf("testLingeringSplitParent");
1426 Table meta = null;
1427 try {
1428 setupTable(table);
1429 assertEquals(ROWKEYS.length, countRows());
1430
1431
1432 admin.flush(table);
1433 HRegionLocation location = tbl.getRegionLocation(Bytes.toBytes("B"));
1434
1435 meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
1436 HRegionInfo hri = location.getRegionInfo();
1437
1438
1439 byte[] regionName = location.getRegionInfo().getRegionName();
1440 admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1441 TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true);
1442
1443
1444
1445
1446 HBaseFsck hbck = doFsck(
1447 conf, true, true, false, false, false, true, true, true, false, false, false, null);
1448 assertErrors(hbck, new ERROR_CODE[] {});
1449
1450
1451 Get get = new Get(hri.getRegionName());
1452 Result result = meta.get(get);
1453 assertNotNull(result);
1454 assertNotNull(MetaTableAccessor.getHRegionInfo(result));
1455
1456 assertEquals(ROWKEYS.length, countRows());
1457
1458
1459 assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1);
1460 assertNoErrors(doFsck(conf, false));
1461 } finally {
1462 cleanupTable(table);
1463 IOUtils.closeQuietly(meta);
1464 }
1465 }
1466
1467
1468
1469
1470
1471 @Test(timeout=75000)
1472 public void testSplitDaughtersNotInMeta() throws Exception {
1473 TableName table = TableName.valueOf("testSplitdaughtersNotInMeta");
1474 Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
1475 try {
1476 setupTable(table);
1477 assertEquals(ROWKEYS.length, countRows());
1478
1479
1480 admin.flush(table);
1481 HRegionLocation location = tbl.getRegionLocation(Bytes.toBytes("B"));
1482
1483 HRegionInfo hri = location.getRegionInfo();
1484
1485
1486 byte[] regionName = location.getRegionInfo().getRegionName();
1487 admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1488 TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true);
1489
1490 PairOfSameType<HRegionInfo> daughters =
1491 MetaTableAccessor.getDaughterRegions(meta.get(new Get(regionName)));
1492
1493
1494 Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
1495 undeployRegion(connection, hris.get(daughters.getFirst()), daughters.getFirst());
1496 undeployRegion(connection, hris.get(daughters.getSecond()), daughters.getSecond());
1497
1498 List<Delete> deletes = new ArrayList<>();
1499 deletes.add(new Delete(daughters.getFirst().getRegionName()));
1500 deletes.add(new Delete(daughters.getSecond().getRegionName()));
1501 meta.delete(deletes);
1502
1503
1504 RegionStates regionStates = TEST_UTIL.getMiniHBaseCluster().getMaster().
1505 getAssignmentManager().getRegionStates();
1506 regionStates.deleteRegion(daughters.getFirst());
1507 regionStates.deleteRegion(daughters.getSecond());
1508
1509 HBaseFsck hbck = doFsck(conf, false);
1510 assertErrors(hbck,
1511 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1512 ERROR_CODE.HOLE_IN_REGION_CHAIN });
1513
1514
1515 hbck = doFsck(
1516 conf, true, true, false, false, false, false, false, false, false, false, false, null);
1517 assertErrors(hbck,
1518 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1519 ERROR_CODE.HOLE_IN_REGION_CHAIN });
1520
1521
1522 Get get = new Get(hri.getRegionName());
1523 Result result = meta.get(get);
1524 assertNotNull(result);
1525 assertNotNull(MetaTableAccessor.getHRegionInfo(result));
1526
1527 assertEquals(ROWKEYS.length, countRows());
1528
1529
1530 assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1);
1531 assertNoErrors(doFsck(conf, false));
1532 } finally {
1533 meta.close();
1534 cleanupTable(table);
1535 }
1536 }
1537
1538
1539
1540
1541
1542 @Test(timeout=120000)
1543 public void testMissingFirstRegion() throws Exception {
1544 TableName table = TableName.valueOf("testMissingFirstRegion");
1545 try {
1546 setupTable(table);
1547 assertEquals(ROWKEYS.length, countRows());
1548
1549
1550 admin.disableTable(table);
1551 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true,
1552 true, true);
1553 admin.enableTable(table);
1554
1555 HBaseFsck hbck = doFsck(conf, false);
1556 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY });
1557
1558 doFsck(conf, true);
1559
1560 assertNoErrors(doFsck(conf, false));
1561 } finally {
1562 cleanupTable(table);
1563 }
1564 }
1565
1566
1567
1568
1569
1570 @Test(timeout=120000)
1571 public void testRegionDeployedNotInHdfs() throws Exception {
1572 TableName table =
1573 TableName.valueOf("testSingleRegionDeployedNotInHdfs");
1574 try {
1575 setupTable(table);
1576 admin.flush(table);
1577
1578
1579 deleteRegion(conf, tbl.getTableDescriptor(),
1580 HConstants.EMPTY_START_ROW, Bytes.toBytes("A"), false,
1581 false, true);
1582
1583 HBaseFsck hbck = doFsck(conf, false);
1584 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
1585
1586 doFsck(conf, true);
1587
1588 assertNoErrors(doFsck(conf, false));
1589 } finally {
1590 cleanupTable(table);
1591 }
1592 }
1593
1594
1595
1596
1597
1598 @Test(timeout=120000)
1599 public void testMissingLastRegion() throws Exception {
1600 TableName table =
1601 TableName.valueOf("testMissingLastRegion");
1602 try {
1603 setupTable(table);
1604 assertEquals(ROWKEYS.length, countRows());
1605
1606
1607 admin.disableTable(table);
1608 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true,
1609 true, true);
1610 admin.enableTable(table);
1611
1612 HBaseFsck hbck = doFsck(conf, false);
1613 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY });
1614
1615 doFsck(conf, true);
1616
1617 assertNoErrors(doFsck(conf, false));
1618 } finally {
1619 cleanupTable(table);
1620 }
1621 }
1622
1623
1624
1625
1626 @Test (timeout=180000)
1627 public void testFixAssignmentsAndNoHdfsChecking() throws Exception {
1628 TableName table =
1629 TableName.valueOf("testFixAssignmentsAndNoHdfsChecking");
1630 try {
1631 setupTable(table);
1632 assertEquals(ROWKEYS.length, countRows());
1633
1634
1635 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1636 Bytes.toBytes("B"), true, false, false, false);
1637
1638
1639 HBaseFsck hbck = doFsck(conf, false);
1640 assertErrors(hbck, new ERROR_CODE[] {
1641 ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1642
1643
1644 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
1645 fsck.connect();
1646 fsck.setDisplayFullReport();
1647 fsck.setTimeLag(0);
1648 fsck.setCheckHdfs(false);
1649 fsck.onlineHbck();
1650 assertErrors(fsck, new ERROR_CODE[] {
1651 ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1652 fsck.close();
1653
1654
1655 fsck = new HBaseFsck(conf, hbfsckExecutorService);
1656 fsck.connect();
1657 fsck.setDisplayFullReport();
1658 fsck.setTimeLag(0);
1659 fsck.setCheckHdfs(false);
1660 fsck.setFixAssignments(true);
1661 fsck.onlineHbck();
1662 assertTrue(fsck.shouldRerun());
1663 fsck.onlineHbck();
1664 assertNoErrors(fsck);
1665
1666 assertEquals(ROWKEYS.length, countRows());
1667
1668 fsck.close();
1669 } finally {
1670 cleanupTable(table);
1671 }
1672 }
1673
1674
1675
1676
1677
1678
1679 @Test (timeout=180000)
1680 public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception {
1681 TableName table =
1682 TableName.valueOf("testFixMetaNotWorkingWithNoHdfsChecking");
1683 try {
1684 setupTable(table);
1685 assertEquals(ROWKEYS.length, countRows());
1686
1687
1688 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1689 Bytes.toBytes("B"), false, true, false, false);
1690
1691
1692 HBaseFsck hbck = doFsck(conf, false);
1693 assertErrors(hbck,
1694 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN });
1695
1696
1697 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
1698 fsck.connect();
1699 fsck.setDisplayFullReport();
1700 fsck.setTimeLag(0);
1701 fsck.setCheckHdfs(false);
1702 fsck.onlineHbck();
1703 assertErrors(fsck,
1704 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN });
1705 fsck.close();
1706
1707
1708 fsck = new HBaseFsck(conf, hbfsckExecutorService);
1709 fsck.connect();
1710 fsck.setDisplayFullReport();
1711 fsck.setTimeLag(0);
1712 fsck.setCheckHdfs(false);
1713 fsck.setFixAssignments(true);
1714 fsck.setFixMeta(true);
1715 fsck.onlineHbck();
1716 assertFalse(fsck.shouldRerun());
1717 assertErrors(fsck,
1718 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN });
1719 fsck.close();
1720
1721
1722 fsck = doFsck(conf, true);
1723 assertTrue(fsck.shouldRerun());
1724 fsck = doFsck(conf, true);
1725 assertNoErrors(fsck);
1726 } finally {
1727 cleanupTable(table);
1728 }
1729 }
1730
1731
1732
1733
1734
1735 @Test (timeout=180000)
1736 public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception {
1737 TableName table =
1738 TableName.valueOf("testFixHdfsHolesNotWorkingWithNoHdfsChecking");
1739 try {
1740 setupTable(table);
1741 assertEquals(ROWKEYS.length, countRows());
1742
1743
1744 admin.disableTable(table);
1745 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1746 Bytes.toBytes("B"), true, true, false, true);
1747 admin.enableTable(table);
1748
1749 HRegionInfo hriOverlap =
1750 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
1751 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
1752 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
1753 .waitForAssignment(hriOverlap);
1754 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
1755 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
1756
1757 HBaseFsck hbck = doFsck(conf, false);
1758 assertErrors(hbck, new ERROR_CODE[] {
1759 ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1760 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1761
1762
1763 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
1764 fsck.connect();
1765 fsck.setDisplayFullReport();
1766 fsck.setTimeLag(0);
1767 fsck.setCheckHdfs(false);
1768 fsck.onlineHbck();
1769 assertErrors(fsck, new ERROR_CODE[] {
1770 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1771 fsck.close();
1772
1773
1774 fsck = new HBaseFsck(conf, hbfsckExecutorService);
1775 fsck.connect();
1776 fsck.setDisplayFullReport();
1777 fsck.setTimeLag(0);
1778 fsck.setCheckHdfs(false);
1779 fsck.setFixHdfsHoles(true);
1780 fsck.setFixHdfsOverlaps(true);
1781 fsck.setFixHdfsOrphans(true);
1782 fsck.onlineHbck();
1783 assertFalse(fsck.shouldRerun());
1784 assertErrors(fsck, new ERROR_CODE[] { ERROR_CODE.HOLE_IN_REGION_CHAIN});
1785 fsck.close();
1786 } finally {
1787 if (admin.isTableDisabled(table)) {
1788 admin.enableTable(table);
1789 }
1790 cleanupTable(table);
1791 }
1792 }
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802 Path getFlushedHFile(FileSystem fs, TableName table) throws IOException {
1803 Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
1804 Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
1805 Path famDir = new Path(regionDir, FAM_STR);
1806
1807
1808 while (true) {
1809 FileStatus[] hfFss = fs.listStatus(famDir);
1810 if (hfFss.length == 0) {
1811 continue;
1812 }
1813 for (FileStatus hfs : hfFss) {
1814 if (!hfs.isDirectory()) {
1815 return hfs.getPath();
1816 }
1817 }
1818 }
1819 }
1820
1821
1822
1823
1824 @Test(timeout=180000)
1825 public void testQuarantineCorruptHFile() throws Exception {
1826 TableName table = TableName.valueOf(name.getMethodName());
1827 try {
1828 setupTable(table);
1829 assertEquals(ROWKEYS.length, countRows());
1830 admin.flush(table);
1831
1832 FileSystem fs = FileSystem.get(conf);
1833 Path hfile = getFlushedHFile(fs, table);
1834
1835
1836 admin.disableTable(table);
1837
1838
1839 Path corrupt = new Path(hfile.getParent(), "deadbeef");
1840 TestHFile.truncateFile(fs, hfile, corrupt);
1841 LOG.info("Created corrupted file " + corrupt);
1842 HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
1843
1844
1845 HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
1846 assertEquals(res.getRetCode(), 0);
1847 HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
1848 assertEquals(hfcc.getHFilesChecked(), 5);
1849 assertEquals(hfcc.getCorrupted().size(), 1);
1850 assertEquals(hfcc.getFailures().size(), 0);
1851 assertEquals(hfcc.getQuarantined().size(), 1);
1852 assertEquals(hfcc.getMissing().size(), 0);
1853
1854
1855 admin.enableTable(table);
1856 } finally {
1857 cleanupTable(table);
1858 }
1859 }
1860
1861
1862
1863
1864 private void doQuarantineTest(TableName table, HBaseFsck hbck, int check,
1865 int corrupt, int fail, int quar, int missing) throws Exception {
1866 try {
1867 setupTable(table);
1868 assertEquals(ROWKEYS.length, countRows());
1869 admin.flush(table);
1870
1871
1872 admin.disableTable(table);
1873
1874 String[] args = {"-sidelineCorruptHFiles", "-repairHoles", "-ignorePreCheckPermission",
1875 table.getNameAsString()};
1876 HBaseFsck res = hbck.exec(hbfsckExecutorService, args);
1877
1878 HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
1879 assertEquals(hfcc.getHFilesChecked(), check);
1880 assertEquals(hfcc.getCorrupted().size(), corrupt);
1881 assertEquals(hfcc.getFailures().size(), fail);
1882 assertEquals(hfcc.getQuarantined().size(), quar);
1883 assertEquals(hfcc.getMissing().size(), missing);
1884
1885
1886 admin.enableTableAsync(table);
1887 while (!admin.isTableEnabled(table)) {
1888 try {
1889 Thread.sleep(250);
1890 } catch (InterruptedException e) {
1891 e.printStackTrace();
1892 fail("Interrupted when trying to enable table " + table);
1893 }
1894 }
1895 } finally {
1896 cleanupTable(table);
1897 }
1898 }
1899
1900
1901
1902
1903
1904 @Test(timeout=180000)
1905 public void testQuarantineMissingHFile() throws Exception {
1906 TableName table = TableName.valueOf(name.getMethodName());
1907
1908
1909 final FileSystem fs = FileSystem.get(conf);
1910 HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
1911 @Override
1912 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1913 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1914 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1915 @Override
1916 protected void checkHFile(Path p) throws IOException {
1917 if (attemptedFirstHFile.compareAndSet(false, true)) {
1918 assertTrue(fs.delete(p, true));
1919 }
1920 super.checkHFile(p);
1921 }
1922 };
1923 }
1924 };
1925 doQuarantineTest(table, hbck, 4, 0, 0, 0, 1);
1926 hbck.close();
1927 }
1928
1929
1930
1931
1932
1933
1934
1935 @Ignore @Test(timeout=180000)
1936 public void testQuarantineMissingFamdir() throws Exception {
1937 TableName table = TableName.valueOf(name.getMethodName());
1938
1939 final FileSystem fs = FileSystem.get(conf);
1940 HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
1941 @Override
1942 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1943 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1944 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1945 @Override
1946 protected void checkColFamDir(Path p) throws IOException {
1947 if (attemptedFirstHFile.compareAndSet(false, true)) {
1948 assertTrue(fs.delete(p, true));
1949 }
1950 super.checkColFamDir(p);
1951 }
1952 };
1953 }
1954 };
1955 doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
1956 hbck.close();
1957 }
1958
1959
1960
1961
1962
1963 @Test(timeout=180000)
1964 public void testQuarantineMissingRegionDir() throws Exception {
1965 TableName table = TableName.valueOf(name.getMethodName());
1966
1967 final FileSystem fs = FileSystem.get(conf);
1968 HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
1969 @Override
1970 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles)
1971 throws IOException {
1972 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1973 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1974 @Override
1975 protected void checkRegionDir(Path p) throws IOException {
1976 if (attemptedFirstHFile.compareAndSet(false, true)) {
1977 assertTrue(fs.delete(p, true));
1978 }
1979 super.checkRegionDir(p);
1980 }
1981 };
1982 }
1983 };
1984 doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
1985 hbck.close();
1986 }
1987
1988
1989
1990
1991 @Test (timeout=180000)
1992 public void testLingeringReferenceFile() throws Exception {
1993 TableName table =
1994 TableName.valueOf("testLingeringReferenceFile");
1995 try {
1996 setupTable(table);
1997 assertEquals(ROWKEYS.length, countRows());
1998
1999
2000 FileSystem fs = FileSystem.get(conf);
2001 Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
2002 Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
2003 Path famDir = new Path(regionDir, FAM_STR);
2004 Path fakeReferenceFile = new Path(famDir, "fbce357483ceea.12144538");
2005 fs.create(fakeReferenceFile);
2006
2007 HBaseFsck hbck = doFsck(conf, false);
2008 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LINGERING_REFERENCE_HFILE });
2009
2010 doFsck(conf, true);
2011
2012 assertNoErrors(doFsck(conf, false));
2013 } finally {
2014 cleanupTable(table);
2015 }
2016 }
2017
2018
2019
2020
2021 @Test (timeout=180000)
2022 public void testMissingRegionInfoQualifier() throws Exception {
2023 Connection connection = ConnectionFactory.createConnection(conf);
2024 TableName table = TableName.valueOf("testMissingRegionInfoQualifier");
2025 try {
2026 setupTable(table);
2027
2028
2029 final List<Delete> deletes = new LinkedList<Delete>();
2030 Table meta = connection.getTable(TableName.META_TABLE_NAME, hbfsckExecutorService);
2031 MetaScanner.metaScan(connection, new MetaScanner.MetaScannerVisitor() {
2032
2033 @Override
2034 public boolean processRow(Result rowResult) throws IOException {
2035 HRegionInfo hri = MetaTableAccessor.getHRegionInfo(rowResult);
2036 if (hri != null && !hri.getTable().isSystemTable()) {
2037 Delete delete = new Delete(rowResult.getRow());
2038 delete.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2039 deletes.add(delete);
2040 }
2041 return true;
2042 }
2043
2044 @Override
2045 public void close() throws IOException {
2046 }
2047 });
2048 meta.delete(deletes);
2049
2050
2051 meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
2052 HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes.toBytes("node1:60020")));
2053 meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
2054 HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(1362150791183L)));
2055 meta.close();
2056
2057 HBaseFsck hbck = doFsck(conf, false);
2058 assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
2059
2060
2061 hbck = doFsck(conf, true);
2062
2063
2064 assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
2065 } finally {
2066 cleanupTable(table);
2067 }
2068 connection.close();
2069 }
2070
2071
2072
2073
2074
2075 @Test (timeout=180000)
2076 public void testErrorReporter() throws Exception {
2077 try {
2078 MockErrorReporter.calledCount = 0;
2079 doFsck(conf, false);
2080 assertEquals(MockErrorReporter.calledCount, 0);
2081
2082 conf.set("hbasefsck.errorreporter", MockErrorReporter.class.getName());
2083 doFsck(conf, false);
2084 assertTrue(MockErrorReporter.calledCount > 20);
2085 } finally {
2086 conf.set("hbasefsck.errorreporter",
2087 PrintingErrorReporter.class.getName());
2088 MockErrorReporter.calledCount = 0;
2089 }
2090 }
2091
2092 static class MockErrorReporter implements ErrorReporter {
2093 static int calledCount = 0;
2094
2095 @Override
2096 public void clear() {
2097 calledCount++;
2098 }
2099
2100 @Override
2101 public void report(String message) {
2102 calledCount++;
2103 }
2104
2105 @Override
2106 public void reportError(String message) {
2107 calledCount++;
2108 }
2109
2110 @Override
2111 public void reportError(ERROR_CODE errorCode, String message) {
2112 calledCount++;
2113 }
2114
2115 @Override
2116 public void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
2117 calledCount++;
2118 }
2119
2120 @Override
2121 public void reportError(ERROR_CODE errorCode,
2122 String message, TableInfo table, HbckInfo info) {
2123 calledCount++;
2124 }
2125
2126 @Override
2127 public void reportError(ERROR_CODE errorCode, String message,
2128 TableInfo table, HbckInfo info1, HbckInfo info2) {
2129 calledCount++;
2130 }
2131
2132 @Override
2133 public int summarize() {
2134 return ++calledCount;
2135 }
2136
2137 @Override
2138 public void detail(String details) {
2139 calledCount++;
2140 }
2141
2142 @Override
2143 public ArrayList<ERROR_CODE> getErrorList() {
2144 calledCount++;
2145 return new ArrayList<ERROR_CODE>();
2146 }
2147
2148 @Override
2149 public void progress() {
2150 calledCount++;
2151 }
2152
2153 @Override
2154 public void print(String message) {
2155 calledCount++;
2156 }
2157
2158 @Override
2159 public void resetErrors() {
2160 calledCount++;
2161 }
2162
2163 @Override
2164 public boolean tableHasErrors(TableInfo table) {
2165 calledCount++;
2166 return false;
2167 }
2168 }
2169
2170 @Test(timeout=180000)
2171 public void testCheckTableLocks() throws Exception {
2172 IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0);
2173 EnvironmentEdgeManager.injectEdge(edge);
2174
2175 HBaseFsck hbck = doFsck(conf, false);
2176 assertNoErrors(hbck);
2177
2178 ServerName mockName = ServerName.valueOf("localhost", 60000, 1);
2179
2180
2181 final TableLockManager tableLockManager = TableLockManager.createTableLockManager(conf, TEST_UTIL.getZooKeeperWatcher(), mockName);
2182 TableLock writeLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2183 "testCheckTableLocks");
2184 writeLock.acquire();
2185 hbck = doFsck(conf, false);
2186 assertNoErrors(hbck);
2187
2188 edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2189 TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS));
2190
2191 hbck = doFsck(conf, false);
2192 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK});
2193
2194 final CountDownLatch latch = new CountDownLatch(1);
2195 new Thread() {
2196 @Override
2197 public void run() {
2198 TableLock readLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2199 "testCheckTableLocks");
2200 try {
2201 latch.countDown();
2202 readLock.acquire();
2203 } catch (IOException ex) {
2204 fail();
2205 } catch (IllegalStateException ex) {
2206 return;
2207 }
2208 fail("should not have come here");
2209 };
2210 }.start();
2211
2212 latch.await();
2213 Threads.sleep(300);
2214
2215 hbck = doFsck(conf, false);
2216 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK});
2217
2218 edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2219 TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS));
2220
2221 hbck = doFsck(conf, false);
2222 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK, ERROR_CODE.EXPIRED_TABLE_LOCK});
2223
2224 conf.setLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, 1);
2225
2226 Threads.sleep(10);
2227 hbck = doFsck(conf, true);
2228
2229 hbck = doFsck(conf, false);
2230 assertNoErrors(hbck);
2231
2232
2233 writeLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2234 "should acquire without blocking");
2235 writeLock.acquire();
2236 writeLock.release();
2237 }
2238
2239
2240
2241
2242 @Test
2243 public void testOrphanedTableZNode() throws Exception {
2244 TableName table = TableName.valueOf("testOrphanedZKTableEntry");
2245
2246 try {
2247 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getTableStateManager()
2248 .setTableState(table, ZooKeeperProtos.Table.State.ENABLING);
2249
2250 try {
2251 setupTable(table);
2252 Assert.fail(
2253 "Create table should fail when its ZNode has already existed with ENABLING state.");
2254 } catch(TableExistsException t) {
2255
2256 }
2257
2258 try {
2259 cleanupTable(table);
2260 } catch (IOException e) {
2261
2262
2263 }
2264
2265 HBaseFsck hbck = doFsck(conf, false);
2266 assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY));
2267
2268
2269 hbck = doFsck(conf, true);
2270
2271
2272 hbck = doFsck(conf, false);
2273 assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY));
2274
2275 setupTable(table);
2276 } finally {
2277
2278
2279 try {
2280 cleanupTable(table);
2281 } catch (IOException e) {
2282
2283
2284 }
2285 }
2286 }
2287
2288 @Test (timeout=180000)
2289 public void testMetaOffline() throws Exception {
2290
2291 HBaseFsck hbck = doFsck(conf, false);
2292 assertNoErrors(hbck);
2293 deleteMetaRegion(conf, true, false, false);
2294 hbck = doFsck(conf, false);
2295
2296
2297 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2298 hbck = doFsck(conf, true);
2299 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2300 hbck = doFsck(conf, false);
2301 assertNoErrors(hbck);
2302 }
2303
2304 private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
2305 boolean regionInfoOnly) throws IOException, InterruptedException {
2306 HRegionLocation metaLocation = connection.getRegionLocator(TableName.META_TABLE_NAME)
2307 .getRegionLocation(HConstants.EMPTY_START_ROW);
2308 ServerName hsa = metaLocation.getServerName();
2309 HRegionInfo hri = metaLocation.getRegionInfo();
2310 if (unassign) {
2311 LOG.info("Undeploying meta region " + hri + " from server " + hsa);
2312 try (Connection unmanagedConnection = ConnectionFactory.createConnection(conf)) {
2313 undeployRegion(unmanagedConnection, hsa, hri);
2314 }
2315 }
2316
2317 if (regionInfoOnly) {
2318 LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
2319 Path rootDir = FSUtils.getRootDir(conf);
2320 FileSystem fs = rootDir.getFileSystem(conf);
2321 Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
2322 hri.getEncodedName());
2323 Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
2324 fs.delete(hriPath, true);
2325 }
2326
2327 if (hdfs) {
2328 LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
2329 Path rootDir = FSUtils.getRootDir(conf);
2330 FileSystem fs = rootDir.getFileSystem(conf);
2331 Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
2332 hri.getEncodedName());
2333 HBaseFsck.debugLsr(conf, p);
2334 boolean success = fs.delete(p, true);
2335 LOG.info("Deleted " + p + " sucessfully? " + success);
2336 HBaseFsck.debugLsr(conf, p);
2337 }
2338 }
2339
2340 @Test (timeout=180000)
2341 public void testTableWithNoRegions() throws Exception {
2342
2343
2344 TableName table =
2345 TableName.valueOf(name.getMethodName());
2346 try {
2347
2348 HTableDescriptor desc = new HTableDescriptor(table);
2349 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
2350 desc.addFamily(hcd);
2351 admin.createTable(desc);
2352 tbl = (HTable) connection.getTable(table, tableExecutorService);
2353
2354
2355 deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW,
2356 HConstants.EMPTY_END_ROW, false, false, true);
2357
2358 HBaseFsck hbck = doFsck(conf, false);
2359 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
2360
2361 doFsck(conf, true);
2362
2363
2364 doFsck(conf, true);
2365
2366
2367 assertNoErrors(doFsck(conf, false));
2368 } finally {
2369 cleanupTable(table);
2370 }
2371
2372 }
2373
2374 @Test (timeout=180000)
2375 public void testHbckAfterRegionMerge() throws Exception {
2376 TableName table = TableName.valueOf("testMergeRegionFilesInHdfs");
2377 Table meta = null;
2378 try {
2379
2380 TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
2381 setupTable(table);
2382 assertEquals(ROWKEYS.length, countRows());
2383
2384
2385 admin.flush(table);
2386 HRegionInfo region1 = tbl.getRegionLocation(Bytes.toBytes("A")).getRegionInfo();
2387 HRegionInfo region2 = tbl.getRegionLocation(Bytes.toBytes("B")).getRegionInfo();
2388
2389 int regionCountBeforeMerge = tbl.getRegionLocations().size();
2390
2391 assertNotEquals(region1, region2);
2392
2393
2394 admin.mergeRegions(region1.getEncodedNameAsBytes(),
2395 region2.getEncodedNameAsBytes(), false);
2396
2397
2398 long timeout = System.currentTimeMillis() + 30 * 1000;
2399 while (true) {
2400 if (tbl.getRegionLocations().size() < regionCountBeforeMerge) {
2401 break;
2402 } else if (System.currentTimeMillis() > timeout) {
2403 fail("Time out waiting on region " + region1.getEncodedName()
2404 + " and " + region2.getEncodedName() + " be merged");
2405 }
2406 Thread.sleep(10);
2407 }
2408
2409 assertEquals(ROWKEYS.length, countRows());
2410
2411 HBaseFsck hbck = doFsck(conf, false);
2412 assertNoErrors(hbck);
2413
2414 } finally {
2415 TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
2416 cleanupTable(table);
2417 IOUtils.closeQuietly(meta);
2418 }
2419 }
2420
2421 @Test (timeout = 180000)
2422 public void testRegionBoundariesCheck() throws Exception {
2423 HBaseFsck hbck = doFsck(conf, false);
2424 assertNoErrors(hbck);
2425 try {
2426 hbck.checkRegionBoundaries();
2427 } catch (IllegalArgumentException e) {
2428 if (e.getMessage().endsWith("not a valid DFS filename.")) {
2429 fail("Table directory path is not valid." + e.getMessage());
2430 }
2431 }
2432 }
2433
2434 @org.junit.Rule
2435 public TestName name = new TestName();
2436
2437 @Test (timeout=180000)
2438 public void testReadOnlyProperty() throws Exception {
2439 HBaseFsck hbck = doFsck(conf, false);
2440 Assert.assertEquals("shouldIgnorePreCheckPermission", true,
2441 hbck.shouldIgnorePreCheckPermission());
2442
2443 hbck = doFsck(conf, true);
2444 Assert.assertEquals("shouldIgnorePreCheckPermission", false,
2445 hbck.shouldIgnorePreCheckPermission());
2446
2447 hbck = doFsck(conf, true);
2448 hbck.setIgnorePreCheckPermission(true);
2449 Assert.assertEquals("shouldIgnorePreCheckPermission", true,
2450 hbck.shouldIgnorePreCheckPermission());
2451 }
2452
2453 @Test (timeout=180000)
2454 public void testCleanUpDaughtersNotInMetaAfterFailedSplit() throws Exception {
2455 TableName table = TableName.valueOf("testCleanUpDaughtersNotInMetaAfterFailedSplit");
2456 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
2457 try {
2458 HTableDescriptor desc = new HTableDescriptor(table);
2459 desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
2460 admin.createTable(desc);
2461 tbl = new HTable(cluster.getConfiguration(), desc.getTableName());
2462 for (int i = 0; i < 5; i++) {
2463 Put p1 = new Put(("r" + i).getBytes());
2464 p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
2465 tbl.put(p1);
2466 }
2467 admin.flush(desc.getTableName());
2468 List<HRegion> regions = cluster.getRegions(desc.getTableName());
2469 int serverWith = cluster.getServerWith(regions.get(0).getRegionName());
2470 HRegionServer regionServer = cluster.getRegionServer(serverWith);
2471 cluster.getServerWith(regions.get(0).getRegionName());
2472 SplitTransaction st = new SplitTransaction(regions.get(0), Bytes.toBytes("r3"));
2473 st.prepare();
2474 st.stepsBeforePONR(regionServer, regionServer, false);
2475 AssignmentManager am = cluster.getMaster().getAssignmentManager();
2476 Map<String, RegionState> regionsInTransition = am.getRegionStates().getRegionsInTransition();
2477 for (RegionState state : regionsInTransition.values()) {
2478 am.regionOffline(state.getRegion());
2479 }
2480 ZKAssign.deleteNodeFailSilent(regionServer.getZooKeeper(), regions.get(0).getRegionInfo());
2481 Map<HRegionInfo, ServerName> regionsMap = new HashMap<HRegionInfo, ServerName>();
2482 regionsMap.put(regions.get(0).getRegionInfo(), regionServer.getServerName());
2483 am.assign(regionsMap);
2484 am.waitForAssignment(regions.get(0).getRegionInfo());
2485 HBaseFsck hbck = doFsck(conf, false);
2486 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
2487 ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
2488
2489 assertEquals(0, hbck.getOverlapGroups(table).size());
2490
2491
2492 assertErrors(
2493 doFsck(
2494 conf, false, true, false, false, false, false, false, false, false, false, false, null),
2495 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
2496 ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
2497
2498
2499 assertNoErrors(doFsck(conf, false));
2500 assertEquals(5, countRows());
2501 } finally {
2502 if (tbl != null) {
2503 tbl.close();
2504 tbl = null;
2505 }
2506 cleanupTable(table);
2507 }
2508 }
2509
2510 }