1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.util;
20
21 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
22 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
23 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
24 import static org.junit.Assert.assertEquals;
25 import static org.junit.Assert.assertFalse;
26 import static org.junit.Assert.assertNotEquals;
27 import static org.junit.Assert.assertNotNull;
28 import static org.junit.Assert.assertTrue;
29 import static org.junit.Assert.fail;
30
31 import java.io.IOException;
32 import java.util.ArrayList;
33 import java.util.Collection;
34 import java.util.HashMap;
35 import java.util.LinkedList;
36 import java.util.List;
37 import java.util.Map;
38 import java.util.Map.Entry;
39 import java.util.concurrent.Callable;
40 import java.util.concurrent.CountDownLatch;
41 import java.util.concurrent.ExecutorService;
42 import java.util.concurrent.Executors;
43 import java.util.concurrent.Future;
44 import java.util.concurrent.ScheduledThreadPoolExecutor;
45 import java.util.concurrent.SynchronousQueue;
46 import java.util.concurrent.ThreadPoolExecutor;
47 import java.util.concurrent.TimeUnit;
48 import java.util.concurrent.atomic.AtomicBoolean;
49
50 import org.apache.commons.io.IOUtils;
51 import org.apache.commons.logging.Log;
52 import org.apache.commons.logging.LogFactory;
53 import org.apache.hadoop.conf.Configuration;
54 import org.apache.hadoop.fs.FileStatus;
55 import org.apache.hadoop.fs.FileSystem;
56 import org.apache.hadoop.fs.Path;
57 import org.apache.hadoop.hbase.ClusterStatus;
58 import org.apache.hadoop.hbase.HBaseTestingUtility;
59 import org.apache.hadoop.hbase.HColumnDescriptor;
60 import org.apache.hadoop.hbase.HConstants;
61 import org.apache.hadoop.hbase.HRegionInfo;
62 import org.apache.hadoop.hbase.HRegionLocation;
63 import org.apache.hadoop.hbase.HTableDescriptor;
64 import org.apache.hadoop.hbase.TableExistsException;
65 import org.apache.hadoop.hbase.testclassification.LargeTests;
66 import org.apache.hadoop.hbase.MiniHBaseCluster;
67 import org.apache.hadoop.hbase.ServerName;
68 import org.apache.hadoop.hbase.TableName;
69 import org.apache.hadoop.hbase.catalog.MetaEditor;
70 import org.apache.hadoop.hbase.client.Delete;
71 import org.apache.hadoop.hbase.client.Durability;
72 import org.apache.hadoop.hbase.client.Get;
73 import org.apache.hadoop.hbase.client.HBaseAdmin;
74 import org.apache.hadoop.hbase.client.HConnection;
75 import org.apache.hadoop.hbase.client.HConnectionManager;
76 import org.apache.hadoop.hbase.client.HTable;
77 import org.apache.hadoop.hbase.client.MetaScanner;
78 import org.apache.hadoop.hbase.client.Put;
79 import org.apache.hadoop.hbase.client.Result;
80 import org.apache.hadoop.hbase.client.ResultScanner;
81 import org.apache.hadoop.hbase.client.Scan;
82 import org.apache.hadoop.hbase.io.hfile.TestHFile;
83 import org.apache.hadoop.hbase.master.AssignmentManager;
84 import org.apache.hadoop.hbase.master.HMaster;
85 import org.apache.hadoop.hbase.master.RegionState;
86 import org.apache.hadoop.hbase.master.RegionStates;
87 import org.apache.hadoop.hbase.master.TableLockManager;
88 import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
89 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
90 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
91 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
92 import org.apache.hadoop.hbase.regionserver.HRegion;
93 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
94 import org.apache.hadoop.hbase.regionserver.HRegionServer;
95 import org.apache.hadoop.hbase.regionserver.SplitTransaction;
96 import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
97 import org.apache.hadoop.hbase.testclassification.LargeTests;
98 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
99 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
100 import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;
101 import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter;
102 import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
103 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
104 import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
105 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
106 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
107 import org.apache.zookeeper.KeeperException;
108 import org.junit.AfterClass;
109 import org.junit.Assert;
110 import org.junit.Before;
111 import org.junit.BeforeClass;
112 import org.junit.Ignore;
113 import org.junit.Test;
114 import org.junit.experimental.categories.Category;
115 import org.junit.rules.TestName;
116
117 import com.google.common.collect.Multimap;
118
119
120
121
122 @Category(LargeTests.class)
123 public class TestHBaseFsck {
124 final static Log LOG = LogFactory.getLog(TestHBaseFsck.class);
125 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
126 private final static Configuration conf = TEST_UTIL.getConfiguration();
127 private final static String FAM_STR = "fam";
128 private final static byte[] FAM = Bytes.toBytes(FAM_STR);
129 private final static int REGION_ONLINE_TIMEOUT = 800;
130 private static RegionStates regionStates;
131 private static ExecutorService executorService;
132
133
134 private HTable tbl;
135 private final static byte[][] SPLITS = new byte[][] { Bytes.toBytes("A"),
136 Bytes.toBytes("B"), Bytes.toBytes("C") };
137
138 private final static byte[][] ROWKEYS= new byte[][] {
139 Bytes.toBytes("00"), Bytes.toBytes("50"), Bytes.toBytes("A0"), Bytes.toBytes("A5"),
140 Bytes.toBytes("B0"), Bytes.toBytes("B5"), Bytes.toBytes("C0"), Bytes.toBytes("C5") };
141
142 @SuppressWarnings("deprecation")
143 @BeforeClass
144 public static void setUpBeforeClass() throws Exception {
145 TEST_UTIL.getConfiguration().setInt("hbase.regionserver.handler.count", 2);
146 TEST_UTIL.getConfiguration().setInt("hbase.regionserver.metahandler.count", 2);
147 TEST_UTIL.getConfiguration().setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT);
148 TEST_UTIL.startMiniCluster(3);
149 TEST_UTIL.setHDFSClientRetry(0);
150
151 executorService = new ThreadPoolExecutor(1, Integer.MAX_VALUE, 60, TimeUnit.SECONDS,
152 new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
153
154 AssignmentManager assignmentManager =
155 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
156 regionStates = assignmentManager.getRegionStates();
157 TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, true);
158 }
159
160 @AfterClass
161 public static void tearDownAfterClass() throws Exception {
162 TEST_UTIL.shutdownMiniCluster();
163 }
164
165 @Test
166 public void testHBaseFsck() throws Exception {
167 assertNoErrors(doFsck(conf, false));
168 String table = "tableBadMetaAssign";
169 TEST_UTIL.createTable(Bytes.toBytes(table), FAM);
170
171
172 assertNoErrors(doFsck(conf, false));
173
174
175
176 HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
177 Scan scan = new Scan();
178 scan.setStartRow(Bytes.toBytes(table+",,"));
179 ResultScanner scanner = meta.getScanner(scan);
180 HRegionInfo hri = null;
181
182 Result res = scanner.next();
183 ServerName currServer =
184 ServerName.parseFrom(res.getValue(HConstants.CATALOG_FAMILY,
185 HConstants.SERVER_QUALIFIER));
186 long startCode = Bytes.toLong(res.getValue(HConstants.CATALOG_FAMILY,
187 HConstants.STARTCODE_QUALIFIER));
188
189 for (JVMClusterUtil.RegionServerThread rs :
190 TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
191
192 ServerName sn = rs.getRegionServer().getServerName();
193
194
195 if (!currServer.getHostAndPort().equals(sn.getHostAndPort()) ||
196 startCode != sn.getStartcode()) {
197 Put put = new Put(res.getRow());
198 put.setDurability(Durability.SKIP_WAL);
199 put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
200 Bytes.toBytes(sn.getHostAndPort()));
201 put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
202 Bytes.toBytes(sn.getStartcode()));
203 meta.put(put);
204 hri = HRegionInfo.getHRegionInfo(res);
205 break;
206 }
207 }
208
209
210 assertErrors(doFsck(conf, true), new ERROR_CODE[]{
211 ERROR_CODE.SERVER_DOES_NOT_MATCH_META});
212
213 TEST_UTIL.getHBaseCluster().getMaster()
214 .getAssignmentManager().waitForAssignment(hri);
215
216
217 assertNoErrors(doFsck(conf, false));
218
219
220 HTable t = new HTable(conf, Bytes.toBytes(table), executorService);
221 ResultScanner s = t.getScanner(new Scan());
222 s.close();
223 t.close();
224
225 scanner.close();
226 meta.close();
227 }
228
229 @Test(timeout=180000)
230 public void testFixAssignmentsWhenMETAinTransition() throws Exception {
231 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
232 HBaseAdmin admin = null;
233 try {
234 admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
235 admin.closeRegion(cluster.getServerHoldingMeta(),
236 HRegionInfo.FIRST_META_REGIONINFO);
237 } finally {
238 if (admin != null) {
239 admin.close();
240 }
241 }
242 regionStates.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
243 MetaRegionTracker.deleteMetaLocation(cluster.getMaster().getZooKeeper());
244 assertFalse(regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
245 HBaseFsck hbck = doFsck(conf, true);
246 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.UNKNOWN, ERROR_CODE.NO_META_REGION,
247 ERROR_CODE.NULL_META_REGION });
248 assertNoErrors(doFsck(conf, false));
249 }
250
251
252
253
254 private HRegionInfo createRegion(Configuration conf, final HTableDescriptor
255 htd, byte[] startKey, byte[] endKey)
256 throws IOException {
257 HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
258 HRegionInfo hri = new HRegionInfo(htd.getTableName(), startKey, endKey);
259 MetaEditor.addRegionToMeta(meta, hri);
260 meta.close();
261 return hri;
262 }
263
264
265
266
267 private void dumpMeta(TableName tableName) throws IOException {
268 List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(tableName);
269 for (byte[] row : metaRows) {
270 LOG.info(Bytes.toString(row));
271 }
272 }
273
274
275
276
277
278 private void undeployRegion(HBaseAdmin admin, ServerName sn,
279 HRegionInfo hri) throws IOException, InterruptedException {
280 try {
281 HBaseFsckRepair.closeRegionSilentlyAndWait(admin, sn, hri);
282 if (!hri.isMetaTable()) {
283 admin.offline(hri.getRegionName());
284 }
285 } catch (IOException ioe) {
286 LOG.warn("Got exception when attempting to offline region "
287 + Bytes.toString(hri.getRegionName()), ioe);
288 }
289 }
290
291
292
293
294
295
296 private void deleteRegion(Configuration conf, final HTableDescriptor htd,
297 byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
298 boolean hdfs) throws IOException, InterruptedException {
299 deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false);
300 }
301
302
303
304
305
306
307
308
309 private void deleteRegion(Configuration conf, final HTableDescriptor htd,
310 byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
311 boolean hdfs, boolean regionInfoOnly) throws IOException, InterruptedException {
312 LOG.info("** Before delete:");
313 dumpMeta(htd.getTableName());
314
315 Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
316 for (Entry<HRegionInfo, ServerName> e: hris.entrySet()) {
317 HRegionInfo hri = e.getKey();
318 ServerName hsa = e.getValue();
319 if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
320 && Bytes.compareTo(hri.getEndKey(), endKey) == 0) {
321
322 LOG.info("RegionName: " +hri.getRegionNameAsString());
323 byte[] deleteRow = hri.getRegionName();
324
325 if (unassign) {
326 LOG.info("Undeploying region " + hri + " from server " + hsa);
327 undeployRegion(new HBaseAdmin(conf), hsa, hri);
328 }
329
330 if (regionInfoOnly) {
331 LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
332 Path rootDir = FSUtils.getRootDir(conf);
333 FileSystem fs = rootDir.getFileSystem(conf);
334 Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
335 hri.getEncodedName());
336 Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
337 fs.delete(hriPath, true);
338 }
339
340 if (hdfs) {
341 LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
342 Path rootDir = FSUtils.getRootDir(conf);
343 FileSystem fs = rootDir.getFileSystem(conf);
344 Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
345 hri.getEncodedName());
346 HBaseFsck.debugLsr(conf, p);
347 boolean success = fs.delete(p, true);
348 LOG.info("Deleted " + p + " sucessfully? " + success);
349 HBaseFsck.debugLsr(conf, p);
350 }
351
352 if (metaRow) {
353 HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
354 Delete delete = new Delete(deleteRow);
355 meta.delete(delete);
356 }
357 }
358 LOG.info(hri.toString() + hsa.toString());
359 }
360
361 TEST_UTIL.getMetaTableRows(htd.getTableName());
362 LOG.info("*** After delete:");
363 dumpMeta(htd.getTableName());
364 }
365
366
367
368
369
370
371
372
373 HTable setupTable(TableName tablename) throws Exception {
374 HTableDescriptor desc = new HTableDescriptor(tablename);
375 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
376 desc.addFamily(hcd);
377 TEST_UTIL.getHBaseAdmin().createTable(desc, SPLITS);
378 tbl = new HTable(TEST_UTIL.getConfiguration(), tablename, executorService);
379
380 List<Put> puts = new ArrayList<Put>();
381 for (byte[] row : ROWKEYS) {
382 Put p = new Put(row);
383 p.add(FAM, Bytes.toBytes("val"), row);
384 puts.add(p);
385 }
386 tbl.put(puts);
387 tbl.flushCommits();
388 return tbl;
389 }
390
391
392
393
394 int countRows() throws IOException {
395 Scan s = new Scan();
396 ResultScanner rs = tbl.getScanner(s);
397 int i = 0;
398 while(rs.next() !=null) {
399 i++;
400 }
401 return i;
402 }
403
404
405
406
407
408
409
410 void deleteTable(TableName tablename) throws IOException {
411 HBaseAdmin admin = new HBaseAdmin(conf);
412 admin.getConnection().clearRegionCache();
413 if (admin.isTableEnabled(tablename)) {
414 admin.disableTableAsync(tablename);
415 }
416 long totalWait = 0;
417 long maxWait = 30*1000;
418 long sleepTime = 250;
419 while (!admin.isTableDisabled(tablename)) {
420 try {
421 Thread.sleep(sleepTime);
422 totalWait += sleepTime;
423 if (totalWait >= maxWait) {
424 fail("Waited too long for table to be disabled + " + tablename);
425 }
426 } catch (InterruptedException e) {
427 e.printStackTrace();
428 fail("Interrupted when trying to disable table " + tablename);
429 }
430 }
431 admin.deleteTable(tablename);
432 }
433
434
435
436
437 @Test
438 public void testHBaseFsckClean() throws Exception {
439 assertNoErrors(doFsck(conf, false));
440 TableName table = TableName.valueOf("tableClean");
441 try {
442 HBaseFsck hbck = doFsck(conf, false);
443 assertNoErrors(hbck);
444
445 setupTable(table);
446 assertEquals(ROWKEYS.length, countRows());
447
448
449 hbck = doFsck(conf, false);
450 assertNoErrors(hbck);
451 assertEquals(0, hbck.getOverlapGroups(table).size());
452 assertEquals(ROWKEYS.length, countRows());
453 } finally {
454 deleteTable(table);
455 }
456 }
457
458
459
460
461 @Test
462 public void testHbckThreadpooling() throws Exception {
463 TableName table =
464 TableName.valueOf("tableDupeStartKey");
465 try {
466
467 setupTable(table);
468
469
470 Configuration newconf = new Configuration(conf);
471 newconf.setInt("hbasefsck.numthreads", 1);
472 assertNoErrors(doFsck(newconf, false));
473
474
475 } finally {
476 deleteTable(table);
477 }
478 }
479
480 @Test
481 public void testHbckFixOrphanTable() throws Exception {
482 TableName table = TableName.valueOf("tableInfo");
483 FileSystem fs = null;
484 Path tableinfo = null;
485 try {
486 setupTable(table);
487 HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
488
489 Path hbaseTableDir = FSUtils.getTableDir(
490 FSUtils.getRootDir(conf), table);
491 fs = hbaseTableDir.getFileSystem(conf);
492 FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
493 tableinfo = status.getPath();
494 fs.rename(tableinfo, new Path("/.tableinfo"));
495
496
497 HBaseFsck hbck = doFsck(conf, false);
498 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE });
499
500
501 hbck = doFsck(conf, true);
502 assertNoErrors(hbck);
503 status = null;
504 status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
505 assertNotNull(status);
506
507 HTableDescriptor htd = admin.getTableDescriptor(table);
508 htd.setValue("NOT_DEFAULT", "true");
509 admin.disableTable(table);
510 admin.modifyTable(table, htd);
511 admin.enableTable(table);
512 fs.delete(status.getPath(), true);
513
514
515 htd = admin.getTableDescriptor(table);
516 hbck = doFsck(conf, true);
517 assertNoErrors(hbck);
518 status = null;
519 status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
520 assertNotNull(status);
521 htd = admin.getTableDescriptor(table);
522 assertEquals(htd.getValue("NOT_DEFAULT"), "true");
523 } finally {
524 fs.rename(new Path("/.tableinfo"), tableinfo);
525 deleteTable(table);
526 }
527 }
528
529
530
531
532
533
534 @Test
535 public void testParallelHbck() throws Exception {
536 final ExecutorService service;
537 final Future<HBaseFsck> hbck1,hbck2;
538
539 class RunHbck implements Callable<HBaseFsck>{
540 boolean fail = true;
541 @Override
542 public HBaseFsck call(){
543 try{
544 return doFsck(conf, false);
545 } catch(Exception e){
546 if (e.getMessage().contains("Duplicate hbck")) {
547 fail = false;
548 } else {
549 LOG.fatal("hbck failed.", e);
550 }
551 }
552
553 if (fail) fail();
554 return null;
555 }
556 }
557 service = Executors.newFixedThreadPool(2);
558 hbck1 = service.submit(new RunHbck());
559 hbck2 = service.submit(new RunHbck());
560 service.shutdown();
561
562 service.awaitTermination(15, TimeUnit.SECONDS);
563 HBaseFsck h1 = hbck1.get();
564 HBaseFsck h2 = hbck2.get();
565
566 assert(h1 == null || h2 == null);
567 if (h1 != null) {
568 assert(h1.getRetCode() >= 0);
569 }
570 if (h2 != null) {
571 assert(h2.getRetCode() >= 0);
572 }
573 }
574
575
576
577
578
579 @Test
580 public void testDupeStartKey() throws Exception {
581 TableName table =
582 TableName.valueOf("tableDupeStartKey");
583 try {
584 setupTable(table);
585 assertNoErrors(doFsck(conf, false));
586 assertEquals(ROWKEYS.length, countRows());
587
588
589 HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
590 Bytes.toBytes("A"), Bytes.toBytes("A2"));
591 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
592 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
593 .waitForAssignment(hriDupe);
594 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
595 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
596
597 HBaseFsck hbck = doFsck(conf, false);
598 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
599 ERROR_CODE.DUPE_STARTKEYS});
600 assertEquals(2, hbck.getOverlapGroups(table).size());
601 assertEquals(ROWKEYS.length, countRows());
602
603
604 doFsck(conf,true);
605
606
607 HBaseFsck hbck2 = doFsck(conf,false);
608 assertNoErrors(hbck2);
609 assertEquals(0, hbck2.getOverlapGroups(table).size());
610 assertEquals(ROWKEYS.length, countRows());
611 } finally {
612 deleteTable(table);
613 }
614 }
615
616
617
618
619 Map<ServerName, List<String>> getDeployedHRIs(
620 final HBaseAdmin admin) throws IOException {
621 ClusterStatus status = admin.getClusterStatus();
622 Collection<ServerName> regionServers = status.getServers();
623 Map<ServerName, List<String>> mm =
624 new HashMap<ServerName, List<String>>();
625 HConnection connection = admin.getConnection();
626 for (ServerName hsi : regionServers) {
627 AdminProtos.AdminService.BlockingInterface server = connection.getAdmin(hsi);
628
629
630 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
631 List<String> regionNames = new ArrayList<String>();
632 for (HRegionInfo hri : regions) {
633 regionNames.add(hri.getRegionNameAsString());
634 }
635 mm.put(hsi, regionNames);
636 }
637 return mm;
638 }
639
640
641
642
643 ServerName findDeployedHSI(Map<ServerName, List<String>> mm, HRegionInfo hri) {
644 for (Map.Entry<ServerName,List <String>> e : mm.entrySet()) {
645 if (e.getValue().contains(hri.getRegionNameAsString())) {
646 return e.getKey();
647 }
648 }
649 return null;
650 }
651
652
653
654
655
656 @Test
657 public void testDupeRegion() throws Exception {
658 TableName table =
659 TableName.valueOf("tableDupeRegion");
660 try {
661 setupTable(table);
662 assertNoErrors(doFsck(conf, false));
663 assertEquals(ROWKEYS.length, countRows());
664
665
666 HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
667 Bytes.toBytes("A"), Bytes.toBytes("B"));
668
669 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
670 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
671 .waitForAssignment(hriDupe);
672 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
673 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
674
675
676
677
678
679 HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
680 while (findDeployedHSI(getDeployedHRIs(admin), hriDupe) == null) {
681 Thread.sleep(250);
682 }
683
684 LOG.debug("Finished assignment of dupe region");
685
686
687 HBaseFsck hbck = doFsck(conf, false);
688 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
689 ERROR_CODE.DUPE_STARTKEYS});
690 assertEquals(2, hbck.getOverlapGroups(table).size());
691 assertEquals(ROWKEYS.length, countRows());
692
693
694 doFsck(conf,true);
695
696
697 HBaseFsck hbck2 = doFsck(conf,false);
698 assertNoErrors(hbck2);
699 assertEquals(0, hbck2.getOverlapGroups(table).size());
700 assertEquals(ROWKEYS.length, countRows());
701 } finally {
702 deleteTable(table);
703 }
704 }
705
706
707
708
709 @Test
710 public void testDegenerateRegions() throws Exception {
711 TableName table =
712 TableName.valueOf("tableDegenerateRegions");
713 try {
714 setupTable(table);
715 assertNoErrors(doFsck(conf,false));
716 assertEquals(ROWKEYS.length, countRows());
717
718
719 HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
720 Bytes.toBytes("B"), Bytes.toBytes("B"));
721 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
722 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
723 .waitForAssignment(hriDupe);
724 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
725 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
726
727 HBaseFsck hbck = doFsck(conf,false);
728 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DEGENERATE_REGION,
729 ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.DUPE_STARTKEYS});
730 assertEquals(2, hbck.getOverlapGroups(table).size());
731 assertEquals(ROWKEYS.length, countRows());
732
733
734 doFsck(conf,true);
735
736
737 HBaseFsck hbck2 = doFsck(conf,false);
738 assertNoErrors(hbck2);
739 assertEquals(0, hbck2.getOverlapGroups(table).size());
740 assertEquals(ROWKEYS.length, countRows());
741 } finally {
742 deleteTable(table);
743 }
744 }
745
746
747
748
749
750 @Test
751 public void testContainedRegionOverlap() throws Exception {
752 TableName table =
753 TableName.valueOf("tableContainedRegionOverlap");
754 try {
755 setupTable(table);
756 assertEquals(ROWKEYS.length, countRows());
757
758
759 HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
760 Bytes.toBytes("A2"), Bytes.toBytes("B"));
761 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
762 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
763 .waitForAssignment(hriOverlap);
764 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
765 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
766
767 HBaseFsck hbck = doFsck(conf, false);
768 assertErrors(hbck, new ERROR_CODE[] {
769 ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
770 assertEquals(2, hbck.getOverlapGroups(table).size());
771 assertEquals(ROWKEYS.length, countRows());
772
773
774 doFsck(conf, true);
775
776
777 HBaseFsck hbck2 = doFsck(conf,false);
778 assertNoErrors(hbck2);
779 assertEquals(0, hbck2.getOverlapGroups(table).size());
780 assertEquals(ROWKEYS.length, countRows());
781 } finally {
782 deleteTable(table);
783 }
784 }
785
786
787
788
789
790
791
792 @Test
793 public void testSidelineOverlapRegion() throws Exception {
794 TableName table =
795 TableName.valueOf("testSidelineOverlapRegion");
796 try {
797 setupTable(table);
798 assertEquals(ROWKEYS.length, countRows());
799
800
801 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
802 HMaster master = cluster.getMaster();
803 HRegionInfo hriOverlap1 = createRegion(conf, tbl.getTableDescriptor(),
804 Bytes.toBytes("A"), Bytes.toBytes("AB"));
805 master.assignRegion(hriOverlap1);
806 master.getAssignmentManager().waitForAssignment(hriOverlap1);
807 HRegionInfo hriOverlap2 = createRegion(conf, tbl.getTableDescriptor(),
808 Bytes.toBytes("AB"), Bytes.toBytes("B"));
809 master.assignRegion(hriOverlap2);
810 master.getAssignmentManager().waitForAssignment(hriOverlap2);
811
812 HBaseFsck hbck = doFsck(conf, false);
813 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.DUPE_STARTKEYS,
814 ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
815 assertEquals(3, hbck.getOverlapGroups(table).size());
816 assertEquals(ROWKEYS.length, countRows());
817
818
819 Multimap<byte[], HbckInfo> overlapGroups = hbck.getOverlapGroups(table);
820 ServerName serverName = null;
821 byte[] regionName = null;
822 for (HbckInfo hbi: overlapGroups.values()) {
823 if ("A".equals(Bytes.toString(hbi.getStartKey()))
824 && "B".equals(Bytes.toString(hbi.getEndKey()))) {
825 regionName = hbi.getRegionName();
826
827
828 int k = cluster.getServerWith(regionName);
829 for (int i = 0; i < 3; i++) {
830 if (i != k) {
831 HRegionServer rs = cluster.getRegionServer(i);
832 serverName = rs.getServerName();
833 break;
834 }
835 }
836
837 HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
838 HBaseFsckRepair.closeRegionSilentlyAndWait(admin,
839 cluster.getRegionServer(k).getServerName(), hbi.getHdfsHRI());
840 admin.offline(regionName);
841 break;
842 }
843 }
844
845 assertNotNull(regionName);
846 assertNotNull(serverName);
847 HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
848 Put put = new Put(regionName);
849 put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
850 Bytes.toBytes(serverName.getHostAndPort()));
851 meta.put(put);
852
853
854 HBaseFsck fsck = new HBaseFsck(conf);
855 fsck.connect();
856 fsck.setDisplayFullReport();
857 fsck.setTimeLag(0);
858 fsck.setFixAssignments(true);
859 fsck.setFixMeta(true);
860 fsck.setFixHdfsHoles(true);
861 fsck.setFixHdfsOverlaps(true);
862 fsck.setFixHdfsOrphans(true);
863 fsck.setFixVersionFile(true);
864 fsck.setSidelineBigOverlaps(true);
865 fsck.setMaxMerge(2);
866 fsck.onlineHbck();
867
868
869
870 HBaseFsck hbck2 = doFsck(conf,false);
871 assertNoErrors(hbck2);
872 assertEquals(0, hbck2.getOverlapGroups(table).size());
873 assertTrue(ROWKEYS.length > countRows());
874 } finally {
875 deleteTable(table);
876 }
877 }
878
879
880
881
882
883 @Test
884 public void testOverlapAndOrphan() throws Exception {
885 TableName table =
886 TableName.valueOf("tableOverlapAndOrphan");
887 try {
888 setupTable(table);
889 assertEquals(ROWKEYS.length, countRows());
890
891
892 TEST_UTIL.getHBaseAdmin().disableTable(table);
893 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
894 Bytes.toBytes("B"), true, true, false, true);
895 TEST_UTIL.getHBaseAdmin().enableTable(table);
896
897 HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
898 Bytes.toBytes("A2"), Bytes.toBytes("B"));
899 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
900 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
901 .waitForAssignment(hriOverlap);
902 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
903 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
904
905 HBaseFsck hbck = doFsck(conf, false);
906 assertErrors(hbck, new ERROR_CODE[] {
907 ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
908 ERROR_CODE.HOLE_IN_REGION_CHAIN});
909
910
911 doFsck(conf, true);
912
913
914 HBaseFsck hbck2 = doFsck(conf,false);
915 assertNoErrors(hbck2);
916 assertEquals(0, hbck2.getOverlapGroups(table).size());
917 assertEquals(ROWKEYS.length, countRows());
918 } finally {
919 deleteTable(table);
920 }
921 }
922
923
924
925
926
927
928 @Test
929 public void testCoveredStartKey() throws Exception {
930 TableName table =
931 TableName.valueOf("tableCoveredStartKey");
932 try {
933 setupTable(table);
934 assertEquals(ROWKEYS.length, countRows());
935
936
937 HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
938 Bytes.toBytes("A2"), Bytes.toBytes("B2"));
939 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
940 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
941 .waitForAssignment(hriOverlap);
942 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
943 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
944
945 HBaseFsck hbck = doFsck(conf, false);
946 assertErrors(hbck, new ERROR_CODE[] {
947 ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
948 ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
949 assertEquals(3, hbck.getOverlapGroups(table).size());
950 assertEquals(ROWKEYS.length, countRows());
951
952
953 doFsck(conf, true);
954
955
956 HBaseFsck hbck2 = doFsck(conf, false);
957 assertErrors(hbck2, new ERROR_CODE[0]);
958 assertEquals(0, hbck2.getOverlapGroups(table).size());
959 assertEquals(ROWKEYS.length, countRows());
960 } finally {
961 deleteTable(table);
962 }
963 }
964
965
966
967
968
969 @Test
970 public void testRegionHole() throws Exception {
971 TableName table =
972 TableName.valueOf("tableRegionHole");
973 try {
974 setupTable(table);
975 assertEquals(ROWKEYS.length, countRows());
976
977
978 TEST_UTIL.getHBaseAdmin().disableTable(table);
979 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
980 Bytes.toBytes("C"), true, true, true);
981 TEST_UTIL.getHBaseAdmin().enableTable(table);
982
983 HBaseFsck hbck = doFsck(conf, false);
984 assertErrors(hbck, new ERROR_CODE[] {
985 ERROR_CODE.HOLE_IN_REGION_CHAIN});
986
987 assertEquals(0, hbck.getOverlapGroups(table).size());
988
989
990 doFsck(conf, true);
991
992
993 assertNoErrors(doFsck(conf,false));
994 assertEquals(ROWKEYS.length - 2 , countRows());
995 } finally {
996 deleteTable(table);
997 }
998 }
999
1000
1001
1002
1003
1004 @Test
1005 public void testHDFSRegioninfoMissing() throws Exception {
1006 TableName table =
1007 TableName.valueOf("tableHDFSRegioininfoMissing");
1008 try {
1009 setupTable(table);
1010 assertEquals(ROWKEYS.length, countRows());
1011
1012
1013 TEST_UTIL.getHBaseAdmin().disableTable(table);
1014 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1015 Bytes.toBytes("C"), true, true, false, true);
1016 TEST_UTIL.getHBaseAdmin().enableTable(table);
1017
1018 HBaseFsck hbck = doFsck(conf, false);
1019 assertErrors(hbck, new ERROR_CODE[] {
1020 ERROR_CODE.ORPHAN_HDFS_REGION,
1021 ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1022 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1023
1024 assertEquals(0, hbck.getOverlapGroups(table).size());
1025
1026
1027 doFsck(conf, true);
1028
1029
1030 assertNoErrors(doFsck(conf, false));
1031 assertEquals(ROWKEYS.length, countRows());
1032 } finally {
1033 deleteTable(table);
1034 }
1035 }
1036
1037
1038
1039
1040
1041 @Test
1042 public void testNotInMetaOrDeployedHole() throws Exception {
1043 TableName table =
1044 TableName.valueOf("tableNotInMetaOrDeployedHole");
1045 try {
1046 setupTable(table);
1047 assertEquals(ROWKEYS.length, countRows());
1048
1049
1050 TEST_UTIL.getHBaseAdmin().disableTable(table);
1051 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1052 Bytes.toBytes("C"), true, true, false);
1053 TEST_UTIL.getHBaseAdmin().enableTable(table);
1054
1055 HBaseFsck hbck = doFsck(conf, false);
1056 assertErrors(hbck, new ERROR_CODE[] {
1057 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1058
1059 assertEquals(0, hbck.getOverlapGroups(table).size());
1060
1061
1062 assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1063 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1064
1065
1066 assertNoErrors(doFsck(conf,false));
1067 assertEquals(ROWKEYS.length, countRows());
1068 } finally {
1069 deleteTable(table);
1070 }
1071 }
1072
1073
1074
1075
1076 @Test
1077 public void testNotInMetaHole() throws Exception {
1078 TableName table =
1079 TableName.valueOf("tableNotInMetaHole");
1080 try {
1081 setupTable(table);
1082 assertEquals(ROWKEYS.length, countRows());
1083
1084
1085 TEST_UTIL.getHBaseAdmin().disableTable(table);
1086 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1087 Bytes.toBytes("C"), false, true, false);
1088 TEST_UTIL.getHBaseAdmin().enableTable(table);
1089
1090 HBaseFsck hbck = doFsck(conf, false);
1091 assertErrors(hbck, new ERROR_CODE[] {
1092 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1093
1094 assertEquals(0, hbck.getOverlapGroups(table).size());
1095
1096
1097 assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1098 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1099
1100
1101 assertNoErrors(doFsck(conf,false));
1102 assertEquals(ROWKEYS.length, countRows());
1103 } finally {
1104 deleteTable(table);
1105 }
1106 }
1107
1108
1109
1110
1111
1112 @Test
1113 public void testNotInHdfs() throws Exception {
1114 TableName table =
1115 TableName.valueOf("tableNotInHdfs");
1116 try {
1117 setupTable(table);
1118 assertEquals(ROWKEYS.length, countRows());
1119
1120
1121 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1122
1123
1124 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1125 Bytes.toBytes("C"), false, false, true);
1126
1127 HBaseFsck hbck = doFsck(conf, false);
1128 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1129
1130 assertEquals(0, hbck.getOverlapGroups(table).size());
1131
1132
1133 doFsck(conf, true);
1134
1135
1136 assertNoErrors(doFsck(conf,false));
1137 assertEquals(ROWKEYS.length - 2, countRows());
1138 } finally {
1139 deleteTable(table);
1140 }
1141 }
1142
1143
1144
1145
1146
1147 @Test
1148 public void testNoHdfsTable() throws Exception {
1149 TableName table = TableName.valueOf("NoHdfsTable");
1150 setupTable(table);
1151 assertEquals(ROWKEYS.length, countRows());
1152
1153
1154 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1155
1156
1157 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
1158 Bytes.toBytes("A"), false, false, true);
1159 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1160 Bytes.toBytes("B"), false, false, true);
1161 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1162 Bytes.toBytes("C"), false, false, true);
1163 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
1164 Bytes.toBytes(""), false, false, true);
1165
1166
1167 deleteTableDir(table);
1168
1169 HBaseFsck hbck = doFsck(conf, false);
1170 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS,
1171 ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS,
1172 ERROR_CODE.NOT_IN_HDFS,});
1173
1174 assertEquals(0, hbck.getOverlapGroups(table).size());
1175
1176
1177 doFsck(conf, true);
1178
1179
1180 assertNoErrors(doFsck(conf,false));
1181 assertFalse("Table "+ table + " should have been deleted",
1182 TEST_UTIL.getHBaseAdmin().tableExists(table));
1183 }
1184
1185 public void deleteTableDir(TableName table) throws IOException {
1186 Path rootDir = FSUtils.getRootDir(conf);
1187 FileSystem fs = rootDir.getFileSystem(conf);
1188 Path p = FSUtils.getTableDir(rootDir, table);
1189 HBaseFsck.debugLsr(conf, p);
1190 boolean success = fs.delete(p, true);
1191 LOG.info("Deleted " + p + " sucessfully? " + success);
1192 }
1193
1194
1195
1196
1197 @Test
1198 public void testNoVersionFile() throws Exception {
1199
1200 Path rootDir = FSUtils.getRootDir(conf);
1201 FileSystem fs = rootDir.getFileSystem(conf);
1202 Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
1203 fs.delete(versionFile, true);
1204
1205
1206 HBaseFsck hbck = doFsck(conf, false);
1207 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_VERSION_FILE });
1208
1209 doFsck(conf, true);
1210
1211
1212 assertNoErrors(doFsck(conf, false));
1213 }
1214
1215
1216
1217
1218 @Test
1219 public void testRegionShouldNotBeDeployed() throws Exception {
1220 TableName table =
1221 TableName.valueOf("tableRegionShouldNotBeDeployed");
1222 try {
1223 LOG.info("Starting testRegionShouldNotBeDeployed.");
1224 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1225 assertTrue(cluster.waitForActiveAndReadyMaster());
1226
1227
1228 byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"),
1229 Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") };
1230 HTableDescriptor htdDisabled = new HTableDescriptor(table);
1231 htdDisabled.addFamily(new HColumnDescriptor(FAM));
1232
1233
1234 FSTableDescriptors fstd = new FSTableDescriptors(conf);
1235 fstd.createTableDescriptor(htdDisabled);
1236 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
1237 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
1238
1239
1240 HRegionServer hrs = cluster.getRegionServer(0);
1241
1242
1243 TEST_UTIL.getHBaseAdmin().disableTable(table);
1244 TEST_UTIL.getHBaseAdmin().enableTable(table);
1245
1246
1247 TEST_UTIL.getHBaseAdmin().disableTable(table);
1248 HRegionInfo region = disabledRegions.remove(0);
1249 byte[] regionName = region.getRegionName();
1250
1251
1252 assertTrue(cluster.getServerWith(regionName) == -1);
1253
1254
1255
1256
1257
1258 HRegion r = HRegion.openHRegion(
1259 region, htdDisabled, hrs.getWAL(region), conf);
1260 hrs.addToOnlineRegions(r);
1261
1262 HBaseFsck hbck = doFsck(conf, false);
1263 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.SHOULD_NOT_BE_DEPLOYED });
1264
1265
1266 doFsck(conf, true);
1267
1268
1269 assertNoErrors(doFsck(conf, false));
1270 } finally {
1271 TEST_UTIL.getHBaseAdmin().enableTable(table);
1272 deleteTable(table);
1273 }
1274 }
1275
1276
1277
1278
1279 @Test
1280 public void testFixByTable() throws Exception {
1281 TableName table1 =
1282 TableName.valueOf("testFixByTable1");
1283 TableName table2 =
1284 TableName.valueOf("testFixByTable2");
1285 try {
1286 setupTable(table1);
1287
1288 TEST_UTIL.getHBaseAdmin().flush(table1.getName());
1289
1290 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1291 Bytes.toBytes("C"), false, false, true);
1292
1293 setupTable(table2);
1294
1295 TEST_UTIL.getHBaseAdmin().flush(table2.getName());
1296
1297 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1298 Bytes.toBytes("C"), false, false, true);
1299
1300 HBaseFsck hbck = doFsck(conf, false);
1301 assertErrors(hbck, new ERROR_CODE[] {
1302 ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS});
1303
1304
1305 doFsck(conf, true, table1);
1306
1307 assertNoErrors(doFsck(conf, false, table1));
1308
1309 assertErrors(doFsck(conf, false, table2),
1310 new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1311
1312
1313 doFsck(conf, true, table2);
1314
1315 assertNoErrors(doFsck(conf, false));
1316 assertEquals(ROWKEYS.length - 2, countRows());
1317 } finally {
1318 deleteTable(table1);
1319 deleteTable(table2);
1320 }
1321 }
1322
1323
1324
1325 @Test
1326 public void testLingeringSplitParent() throws Exception {
1327 TableName table =
1328 TableName.valueOf("testLingeringSplitParent");
1329 HTable meta = null;
1330 try {
1331 setupTable(table);
1332 assertEquals(ROWKEYS.length, countRows());
1333
1334
1335 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1336 HRegionLocation location = tbl.getRegionLocation("B");
1337
1338
1339 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1340 Bytes.toBytes("C"), true, true, false);
1341
1342
1343 meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
1344 HRegionInfo hri = location.getRegionInfo();
1345
1346 HRegionInfo a = new HRegionInfo(tbl.getName(),
1347 Bytes.toBytes("B"), Bytes.toBytes("BM"));
1348 HRegionInfo b = new HRegionInfo(tbl.getName(),
1349 Bytes.toBytes("BM"), Bytes.toBytes("C"));
1350
1351 hri.setOffline(true);
1352 hri.setSplit(true);
1353
1354 MetaEditor.addRegionToMeta(meta, hri, a, b);
1355 meta.flushCommits();
1356 TEST_UTIL.getHBaseAdmin().flush(TableName.META_TABLE_NAME.getName());
1357
1358 HBaseFsck hbck = doFsck(conf, false);
1359 assertErrors(hbck, new ERROR_CODE[] {
1360 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1361
1362
1363 hbck = doFsck(conf, true);
1364 assertErrors(hbck, new ERROR_CODE[] {
1365 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1366 assertFalse(hbck.shouldRerun());
1367 hbck = doFsck(conf, false);
1368 assertErrors(hbck, new ERROR_CODE[] {
1369 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1370
1371
1372 hbck = new HBaseFsck(conf);
1373 hbck.connect();
1374 hbck.setDisplayFullReport();
1375 hbck.setTimeLag(0);
1376 hbck.setFixSplitParents(true);
1377 hbck.onlineHbck();
1378 assertTrue(hbck.shouldRerun());
1379
1380 Get get = new Get(hri.getRegionName());
1381 Result result = meta.get(get);
1382 assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1383 HConstants.SPLITA_QUALIFIER).isEmpty());
1384 assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1385 HConstants.SPLITB_QUALIFIER).isEmpty());
1386 TEST_UTIL.getHBaseAdmin().flush(TableName.META_TABLE_NAME.getName());
1387
1388
1389 doFsck(conf, true);
1390
1391
1392 assertNoErrors(doFsck(conf, false));
1393 assertEquals(ROWKEYS.length, countRows());
1394 } finally {
1395 deleteTable(table);
1396 IOUtils.closeQuietly(meta);
1397 }
1398 }
1399
1400
1401
1402
1403
1404 @Test
1405 public void testValidLingeringSplitParent() throws Exception {
1406 TableName table =
1407 TableName.valueOf("testLingeringSplitParent");
1408 HTable meta = null;
1409 try {
1410 setupTable(table);
1411 assertEquals(ROWKEYS.length, countRows());
1412
1413
1414 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1415 HRegionLocation location = tbl.getRegionLocation("B");
1416
1417 meta = new HTable(conf, TableName.META_TABLE_NAME);
1418 HRegionInfo hri = location.getRegionInfo();
1419
1420
1421 HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1422 byte[] regionName = location.getRegionInfo().getRegionName();
1423 admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1424 TestEndToEndSplitTransaction.blockUntilRegionSplit(
1425 TEST_UTIL.getConfiguration(), 60000, regionName, true);
1426
1427
1428
1429
1430 HBaseFsck hbck = doFsck(
1431 conf, true, true, false, false, false, true, true, true, false, false, false, null);
1432 assertErrors(hbck, new ERROR_CODE[] {});
1433
1434
1435 Get get = new Get(hri.getRegionName());
1436 Result result = meta.get(get);
1437 assertNotNull(result);
1438 assertNotNull(HRegionInfo.getHRegionInfo(result));
1439
1440 assertEquals(ROWKEYS.length, countRows());
1441
1442
1443 assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1);
1444 assertNoErrors(doFsck(conf, false));
1445 } finally {
1446 deleteTable(table);
1447 IOUtils.closeQuietly(meta);
1448 }
1449 }
1450
1451
1452
1453
1454
1455 @Test(timeout=75000)
1456 public void testSplitDaughtersNotInMeta() throws Exception {
1457 TableName table =
1458 TableName.valueOf("testSplitdaughtersNotInMeta");
1459 HTable meta = null;
1460 try {
1461 setupTable(table);
1462 assertEquals(ROWKEYS.length, countRows());
1463
1464
1465 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1466 HRegionLocation location = tbl.getRegionLocation("B");
1467
1468 meta = new HTable(conf, TableName.META_TABLE_NAME);
1469 HRegionInfo hri = location.getRegionInfo();
1470
1471
1472 HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1473 byte[] regionName = location.getRegionInfo().getRegionName();
1474 admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1475 TestEndToEndSplitTransaction.blockUntilRegionSplit(
1476 TEST_UTIL.getConfiguration(), 60000, regionName, true);
1477
1478 PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(meta.get(new Get(regionName)));
1479
1480
1481 Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
1482 undeployRegion(admin, hris.get(daughters.getFirst()), daughters.getFirst());
1483 undeployRegion(admin, hris.get(daughters.getSecond()), daughters.getSecond());
1484
1485 meta.delete(new Delete(daughters.getFirst().getRegionName()));
1486 meta.delete(new Delete(daughters.getSecond().getRegionName()));
1487 meta.flushCommits();
1488
1489 HBaseFsck hbck = doFsck(conf, false);
1490 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1491 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1492
1493
1494 hbck = doFsck(
1495 conf, true, true, false, false, false, false, false, false, false, false, false, null);
1496 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1497 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1498
1499
1500 Get get = new Get(hri.getRegionName());
1501 Result result = meta.get(get);
1502 assertNotNull(result);
1503 assertNotNull(HRegionInfo.getHRegionInfo(result));
1504
1505 assertEquals(ROWKEYS.length, countRows());
1506
1507
1508 assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1);
1509 assertNoErrors(doFsck(conf, false));
1510 } finally {
1511 deleteTable(table);
1512 IOUtils.closeQuietly(meta);
1513 }
1514 }
1515
1516
1517
1518
1519
1520 @Test(timeout=120000)
1521 public void testMissingFirstRegion() throws Exception {
1522 TableName table =
1523 TableName.valueOf("testMissingFirstRegion");
1524 try {
1525 setupTable(table);
1526 assertEquals(ROWKEYS.length, countRows());
1527
1528
1529 TEST_UTIL.getHBaseAdmin().disableTable(table);
1530 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true,
1531 true, true);
1532 TEST_UTIL.getHBaseAdmin().enableTable(table);
1533
1534 HBaseFsck hbck = doFsck(conf, false);
1535 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY });
1536
1537 doFsck(conf, true);
1538
1539 assertNoErrors(doFsck(conf, false));
1540 } finally {
1541 deleteTable(table);
1542 }
1543 }
1544
1545
1546
1547
1548
1549 @Test(timeout=120000)
1550 public void testRegionDeployedNotInHdfs() throws Exception {
1551 TableName table =
1552 TableName.valueOf("testSingleRegionDeployedNotInHdfs");
1553 try {
1554 setupTable(table);
1555 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1556
1557
1558 deleteRegion(conf, tbl.getTableDescriptor(),
1559 HConstants.EMPTY_START_ROW, Bytes.toBytes("A"), false,
1560 false, true);
1561
1562 HBaseFsck hbck = doFsck(conf, false);
1563 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
1564
1565 doFsck(conf, true);
1566
1567 assertNoErrors(doFsck(conf, false));
1568 } finally {
1569 deleteTable(table);
1570 }
1571 }
1572
1573
1574
1575
1576
1577 @Test(timeout=120000)
1578 public void testMissingLastRegion() throws Exception {
1579 TableName table =
1580 TableName.valueOf("testMissingLastRegion");
1581 try {
1582 setupTable(table);
1583 assertEquals(ROWKEYS.length, countRows());
1584
1585
1586 TEST_UTIL.getHBaseAdmin().disableTable(table);
1587 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true,
1588 true, true);
1589 TEST_UTIL.getHBaseAdmin().enableTable(table);
1590
1591 HBaseFsck hbck = doFsck(conf, false);
1592 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY });
1593
1594 doFsck(conf, true);
1595
1596 assertNoErrors(doFsck(conf, false));
1597 } finally {
1598 deleteTable(table);
1599 }
1600 }
1601
1602
1603
1604
1605 @Test
1606 public void testFixAssignmentsAndNoHdfsChecking() throws Exception {
1607 TableName table =
1608 TableName.valueOf("testFixAssignmentsAndNoHdfsChecking");
1609 try {
1610 setupTable(table);
1611 assertEquals(ROWKEYS.length, countRows());
1612
1613
1614 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1615 Bytes.toBytes("B"), true, false, false, false);
1616
1617
1618 HBaseFsck hbck = doFsck(conf, false);
1619 assertErrors(hbck, new ERROR_CODE[] {
1620 ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1621
1622
1623 HBaseFsck fsck = new HBaseFsck(conf);
1624 fsck.connect();
1625 fsck.setDisplayFullReport();
1626 fsck.setTimeLag(0);
1627 fsck.setCheckHdfs(false);
1628 fsck.onlineHbck();
1629 assertErrors(fsck, new ERROR_CODE[] {
1630 ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1631
1632
1633 fsck = new HBaseFsck(conf);
1634 fsck.connect();
1635 fsck.setDisplayFullReport();
1636 fsck.setTimeLag(0);
1637 fsck.setCheckHdfs(false);
1638 fsck.setFixAssignments(true);
1639 fsck.onlineHbck();
1640 assertTrue(fsck.shouldRerun());
1641 fsck.onlineHbck();
1642 assertNoErrors(fsck);
1643
1644 assertEquals(ROWKEYS.length, countRows());
1645 } finally {
1646 deleteTable(table);
1647 }
1648 }
1649
1650
1651
1652
1653
1654
1655 @Test
1656 public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception {
1657 TableName table =
1658 TableName.valueOf("testFixMetaNotWorkingWithNoHdfsChecking");
1659 try {
1660 setupTable(table);
1661 assertEquals(ROWKEYS.length, countRows());
1662
1663
1664 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1665 Bytes.toBytes("B"), false, true, false, false);
1666
1667
1668 HBaseFsck hbck = doFsck(conf, false);
1669 assertErrors(hbck, new ERROR_CODE[] {
1670 ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1671
1672
1673 HBaseFsck fsck = new HBaseFsck(conf);
1674 fsck.connect();
1675 fsck.setDisplayFullReport();
1676 fsck.setTimeLag(0);
1677 fsck.setCheckHdfs(false);
1678 fsck.onlineHbck();
1679 assertErrors(fsck, new ERROR_CODE[] {
1680 ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1681
1682
1683 fsck = new HBaseFsck(conf);
1684 fsck.connect();
1685 fsck.setDisplayFullReport();
1686 fsck.setTimeLag(0);
1687 fsck.setCheckHdfs(false);
1688 fsck.setFixAssignments(true);
1689 fsck.setFixMeta(true);
1690 fsck.onlineHbck();
1691 assertFalse(fsck.shouldRerun());
1692 assertErrors(fsck, new ERROR_CODE[] {
1693 ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1694
1695
1696 fsck = doFsck(conf, true);
1697 assertTrue(fsck.shouldRerun());
1698 fsck = doFsck(conf, true);
1699 assertNoErrors(fsck);
1700 } finally {
1701 deleteTable(table);
1702 }
1703 }
1704
1705
1706
1707
1708
1709 @Test
1710 public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception {
1711 TableName table =
1712 TableName.valueOf("testFixHdfsHolesNotWorkingWithNoHdfsChecking");
1713 try {
1714 setupTable(table);
1715 assertEquals(ROWKEYS.length, countRows());
1716
1717
1718 TEST_UTIL.getHBaseAdmin().disableTable(table);
1719 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1720 Bytes.toBytes("B"), true, true, false, true);
1721 TEST_UTIL.getHBaseAdmin().enableTable(table);
1722
1723 HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
1724 Bytes.toBytes("A2"), Bytes.toBytes("B"));
1725 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
1726 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
1727 .waitForAssignment(hriOverlap);
1728 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
1729 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
1730
1731 HBaseFsck hbck = doFsck(conf, false);
1732 assertErrors(hbck, new ERROR_CODE[] {
1733 ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1734 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1735
1736
1737 HBaseFsck fsck = new HBaseFsck(conf);
1738 fsck.connect();
1739 fsck.setDisplayFullReport();
1740 fsck.setTimeLag(0);
1741 fsck.setCheckHdfs(false);
1742 fsck.onlineHbck();
1743 assertErrors(fsck, new ERROR_CODE[] {
1744 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1745
1746
1747 fsck = new HBaseFsck(conf);
1748 fsck.connect();
1749 fsck.setDisplayFullReport();
1750 fsck.setTimeLag(0);
1751 fsck.setCheckHdfs(false);
1752 fsck.setFixHdfsHoles(true);
1753 fsck.setFixHdfsOverlaps(true);
1754 fsck.setFixHdfsOrphans(true);
1755 fsck.onlineHbck();
1756 assertFalse(fsck.shouldRerun());
1757 assertErrors(fsck, new ERROR_CODE[] {
1758 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1759 } finally {
1760 if (TEST_UTIL.getHBaseAdmin().isTableDisabled(table)) {
1761 TEST_UTIL.getHBaseAdmin().enableTable(table);
1762 }
1763 deleteTable(table);
1764 }
1765 }
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775 Path getFlushedHFile(FileSystem fs, TableName table) throws IOException {
1776 Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
1777 Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
1778 Path famDir = new Path(regionDir, FAM_STR);
1779
1780
1781 while (true) {
1782 FileStatus[] hfFss = fs.listStatus(famDir);
1783 if (hfFss.length == 0) {
1784 continue;
1785 }
1786 for (FileStatus hfs : hfFss) {
1787 if (!hfs.isDir()) {
1788 return hfs.getPath();
1789 }
1790 }
1791 }
1792 }
1793
1794
1795
1796
1797 @Test(timeout=180000)
1798 public void testQuarantineCorruptHFile() throws Exception {
1799 TableName table = TableName.valueOf(name.getMethodName());
1800 try {
1801 setupTable(table);
1802 assertEquals(ROWKEYS.length, countRows());
1803 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1804
1805 FileSystem fs = FileSystem.get(conf);
1806 Path hfile = getFlushedHFile(fs, table);
1807
1808
1809 TEST_UTIL.getHBaseAdmin().disableTable(table);
1810
1811
1812 Path corrupt = new Path(hfile.getParent(), "deadbeef");
1813 TestHFile.truncateFile(fs, hfile, corrupt);
1814 LOG.info("Created corrupted file " + corrupt);
1815 HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
1816
1817
1818 HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
1819 assertEquals(res.getRetCode(), 0);
1820 HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
1821 assertEquals(hfcc.getHFilesChecked(), 5);
1822 assertEquals(hfcc.getCorrupted().size(), 1);
1823 assertEquals(hfcc.getFailures().size(), 0);
1824 assertEquals(hfcc.getQuarantined().size(), 1);
1825 assertEquals(hfcc.getMissing().size(), 0);
1826
1827
1828 TEST_UTIL.getHBaseAdmin().enableTable(table);
1829 } finally {
1830 deleteTable(table);
1831 }
1832 }
1833
1834
1835
1836
1837 private void doQuarantineTest(TableName table, HBaseFsck hbck, int check,
1838 int corrupt, int fail, int quar, int missing) throws Exception {
1839 try {
1840 setupTable(table);
1841 assertEquals(ROWKEYS.length, countRows());
1842 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1843
1844
1845 TEST_UTIL.getHBaseAdmin().disableTable(table);
1846
1847 String[] args = {"-sidelineCorruptHFiles", "-repairHoles", "-ignorePreCheckPermission",
1848 table.getNameAsString()};
1849 ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1850 HBaseFsck res = hbck.exec(exec, args);
1851
1852 HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
1853 assertEquals(hfcc.getHFilesChecked(), check);
1854 assertEquals(hfcc.getCorrupted().size(), corrupt);
1855 assertEquals(hfcc.getFailures().size(), fail);
1856 assertEquals(hfcc.getQuarantined().size(), quar);
1857 assertEquals(hfcc.getMissing().size(), missing);
1858
1859
1860 HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1861 admin.enableTableAsync(table);
1862 while (!admin.isTableEnabled(table)) {
1863 try {
1864 Thread.sleep(250);
1865 } catch (InterruptedException e) {
1866 e.printStackTrace();
1867 fail("Interrupted when trying to enable table " + table);
1868 }
1869 }
1870 } finally {
1871 deleteTable(table);
1872 }
1873 }
1874
1875
1876
1877
1878
1879 @Test(timeout=180000)
1880 public void testQuarantineMissingHFile() throws Exception {
1881 TableName table = TableName.valueOf(name.getMethodName());
1882 ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1883
1884 final FileSystem fs = FileSystem.get(conf);
1885 HBaseFsck hbck = new HBaseFsck(conf, exec) {
1886 @Override
1887 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1888 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1889 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1890 @Override
1891 protected void checkHFile(Path p) throws IOException {
1892 if (attemptedFirstHFile.compareAndSet(false, true)) {
1893 assertTrue(fs.delete(p, true));
1894 }
1895 super.checkHFile(p);
1896 }
1897 };
1898 }
1899 };
1900 doQuarantineTest(table, hbck, 4, 0, 0, 0, 1);
1901 }
1902
1903
1904
1905
1906
1907
1908
1909 @Ignore @Test(timeout=180000)
1910 public void testQuarantineMissingFamdir() throws Exception {
1911 TableName table = TableName.valueOf(name.getMethodName());
1912 ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1913
1914 final FileSystem fs = FileSystem.get(conf);
1915 HBaseFsck hbck = new HBaseFsck(conf, exec) {
1916 @Override
1917 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1918 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1919 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1920 @Override
1921 protected void checkColFamDir(Path p) throws IOException {
1922 if (attemptedFirstHFile.compareAndSet(false, true)) {
1923 assertTrue(fs.delete(p, true));
1924 }
1925 super.checkColFamDir(p);
1926 }
1927 };
1928 }
1929 };
1930 doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
1931 }
1932
1933
1934
1935
1936
1937 @Test(timeout=180000)
1938 public void testQuarantineMissingRegionDir() throws Exception {
1939 TableName table = TableName.valueOf(name.getMethodName());
1940 ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1941
1942 final FileSystem fs = FileSystem.get(conf);
1943 HBaseFsck hbck = new HBaseFsck(conf, exec) {
1944 @Override
1945 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1946 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1947 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1948 @Override
1949 protected void checkRegionDir(Path p) throws IOException {
1950 if (attemptedFirstHFile.compareAndSet(false, true)) {
1951 assertTrue(fs.delete(p, true));
1952 }
1953 super.checkRegionDir(p);
1954 }
1955 };
1956 }
1957 };
1958 doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
1959 }
1960
1961
1962
1963
1964 @Test
1965 public void testLingeringReferenceFile() throws Exception {
1966 TableName table =
1967 TableName.valueOf("testLingeringReferenceFile");
1968 try {
1969 setupTable(table);
1970 assertEquals(ROWKEYS.length, countRows());
1971
1972
1973 FileSystem fs = FileSystem.get(conf);
1974 Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
1975 Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
1976 Path famDir = new Path(regionDir, FAM_STR);
1977 Path fakeReferenceFile = new Path(famDir, "fbce357483ceea.12144538");
1978 fs.create(fakeReferenceFile);
1979
1980 HBaseFsck hbck = doFsck(conf, false);
1981 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LINGERING_REFERENCE_HFILE });
1982
1983 doFsck(conf, true);
1984
1985 assertNoErrors(doFsck(conf, false));
1986 } finally {
1987 deleteTable(table);
1988 }
1989 }
1990
1991
1992
1993
1994 @Test
1995 public void testMissingRegionInfoQualifier() throws Exception {
1996 TableName table =
1997 TableName.valueOf("testMissingRegionInfoQualifier");
1998 try {
1999 setupTable(table);
2000
2001
2002 final List<Delete> deletes = new LinkedList<Delete>();
2003 HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
2004 MetaScanner.metaScan(conf, new MetaScanner.MetaScannerVisitor() {
2005
2006 @Override
2007 public boolean processRow(Result rowResult) throws IOException {
2008 HRegionInfo hri = MetaScanner.getHRegionInfo(rowResult);
2009 if (hri != null && !hri.getTable().isSystemTable()) {
2010 Delete delete = new Delete(rowResult.getRow());
2011 delete.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2012 deletes.add(delete);
2013 }
2014 return true;
2015 }
2016
2017 @Override
2018 public void close() throws IOException {
2019 }
2020 });
2021 meta.delete(deletes);
2022
2023
2024 meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
2025 HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes.toBytes("node1:60020")));
2026 meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
2027 HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(1362150791183L)));
2028 meta.close();
2029
2030 HBaseFsck hbck = doFsck(conf, false);
2031 assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
2032
2033
2034 hbck = doFsck(conf, true);
2035
2036
2037 assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
2038 } finally {
2039 deleteTable(table);
2040 }
2041 }
2042
2043
2044
2045
2046
2047
2048 @Test
2049 public void testErrorReporter() throws Exception {
2050 try {
2051 MockErrorReporter.calledCount = 0;
2052 doFsck(conf, false);
2053 assertEquals(MockErrorReporter.calledCount, 0);
2054
2055 conf.set("hbasefsck.errorreporter", MockErrorReporter.class.getName());
2056 doFsck(conf, false);
2057 assertTrue(MockErrorReporter.calledCount > 20);
2058 } finally {
2059 conf.set("hbasefsck.errorreporter",
2060 PrintingErrorReporter.class.getName());
2061 MockErrorReporter.calledCount = 0;
2062 }
2063 }
2064
2065 static class MockErrorReporter implements ErrorReporter {
2066 static int calledCount = 0;
2067
2068 @Override
2069 public void clear() {
2070 calledCount++;
2071 }
2072
2073 @Override
2074 public void report(String message) {
2075 calledCount++;
2076 }
2077
2078 @Override
2079 public void reportError(String message) {
2080 calledCount++;
2081 }
2082
2083 @Override
2084 public void reportError(ERROR_CODE errorCode, String message) {
2085 calledCount++;
2086 }
2087
2088 @Override
2089 public void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
2090 calledCount++;
2091 }
2092
2093 @Override
2094 public void reportError(ERROR_CODE errorCode,
2095 String message, TableInfo table, HbckInfo info) {
2096 calledCount++;
2097 }
2098
2099 @Override
2100 public void reportError(ERROR_CODE errorCode, String message,
2101 TableInfo table, HbckInfo info1, HbckInfo info2) {
2102 calledCount++;
2103 }
2104
2105 @Override
2106 public int summarize() {
2107 return ++calledCount;
2108 }
2109
2110 @Override
2111 public void detail(String details) {
2112 calledCount++;
2113 }
2114
2115 @Override
2116 public ArrayList<ERROR_CODE> getErrorList() {
2117 calledCount++;
2118 return new ArrayList<ERROR_CODE>();
2119 }
2120
2121 @Override
2122 public void progress() {
2123 calledCount++;
2124 }
2125
2126 @Override
2127 public void print(String message) {
2128 calledCount++;
2129 }
2130
2131 @Override
2132 public void resetErrors() {
2133 calledCount++;
2134 }
2135
2136 @Override
2137 public boolean tableHasErrors(TableInfo table) {
2138 calledCount++;
2139 return false;
2140 }
2141 }
2142
2143 @Test(timeout=180000)
2144 public void testCheckTableLocks() throws Exception {
2145 IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0);
2146 EnvironmentEdgeManager.injectEdge(edge);
2147
2148 HBaseFsck hbck = doFsck(conf, false);
2149 assertNoErrors(hbck);
2150
2151 ServerName mockName = ServerName.valueOf("localhost", 60000, 1);
2152
2153
2154 final TableLockManager tableLockManager = TableLockManager.createTableLockManager(conf, TEST_UTIL.getZooKeeperWatcher(), mockName);
2155 TableLock writeLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2156 "testCheckTableLocks");
2157 writeLock.acquire();
2158 hbck = doFsck(conf, false);
2159 assertNoErrors(hbck);
2160
2161 edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2162 TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS));
2163
2164 hbck = doFsck(conf, false);
2165 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK});
2166
2167 final CountDownLatch latch = new CountDownLatch(1);
2168 new Thread() {
2169 @Override
2170 public void run() {
2171 TableLock readLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2172 "testCheckTableLocks");
2173 try {
2174 latch.countDown();
2175 readLock.acquire();
2176 } catch (IOException ex) {
2177 fail();
2178 } catch (IllegalStateException ex) {
2179 return;
2180 }
2181 fail("should not have come here");
2182 };
2183 }.start();
2184
2185 latch.await();
2186 Threads.sleep(300);
2187
2188 hbck = doFsck(conf, false);
2189 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK});
2190
2191 edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2192 TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS));
2193
2194 hbck = doFsck(conf, false);
2195 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK, ERROR_CODE.EXPIRED_TABLE_LOCK});
2196
2197 conf.setLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, 1);
2198
2199 Threads.sleep(10);
2200 hbck = doFsck(conf, true);
2201
2202 hbck = doFsck(conf, false);
2203 assertNoErrors(hbck);
2204
2205
2206 writeLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2207 "should acquire without blocking");
2208 writeLock.acquire();
2209 writeLock.release();
2210 }
2211
2212
2213
2214
2215 @Test
2216 public void testOrphanedTableZNode() throws Exception {
2217 TableName table = TableName.valueOf("testOrphanedZKTableEntry");
2218
2219 try {
2220 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getZKTable().
2221 setEnablingTable(table);
2222
2223 try {
2224 setupTable(table);
2225 Assert.fail(
2226 "Create table should fail when its ZNode has already existed with ENABLING state.");
2227 } catch(TableExistsException t) {
2228
2229 }
2230
2231 try {
2232 deleteTable(table);
2233 } catch (IOException e) {
2234
2235
2236 }
2237
2238 HBaseFsck hbck = doFsck(conf, false);
2239 assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY));
2240
2241
2242 hbck = doFsck(conf, true);
2243
2244
2245 hbck = doFsck(conf, false);
2246 assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY));
2247
2248 setupTable(table);
2249 } finally {
2250
2251
2252 try {
2253 deleteTable(table);
2254 } catch (IOException e) {
2255
2256
2257 }
2258 }
2259 }
2260
2261 @Test
2262 public void testMetaOffline() throws Exception {
2263
2264 HBaseFsck hbck = doFsck(conf, false);
2265 assertNoErrors(hbck);
2266 deleteMetaRegion(conf, true, false, false);
2267 hbck = doFsck(conf, false);
2268
2269
2270 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2271 hbck = doFsck(conf, true);
2272 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2273 hbck = doFsck(conf, false);
2274 assertNoErrors(hbck);
2275 }
2276
2277 private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
2278 boolean regionInfoOnly) throws IOException, InterruptedException {
2279 HConnection connection = HConnectionManager.getConnection(conf);
2280 HRegionLocation metaLocation = connection.locateRegion(TableName.META_TABLE_NAME,
2281 HConstants.EMPTY_START_ROW);
2282 ServerName hsa = metaLocation.getServerName();
2283 HRegionInfo hri = metaLocation.getRegionInfo();
2284 if (unassign) {
2285 LOG.info("Undeploying meta region " + hri + " from server " + hsa);
2286 undeployRegion(new HBaseAdmin(conf), hsa, hri);
2287 }
2288
2289 if (regionInfoOnly) {
2290 LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
2291 Path rootDir = FSUtils.getRootDir(conf);
2292 FileSystem fs = rootDir.getFileSystem(conf);
2293 Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
2294 hri.getEncodedName());
2295 Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
2296 fs.delete(hriPath, true);
2297 }
2298
2299 if (hdfs) {
2300 LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
2301 Path rootDir = FSUtils.getRootDir(conf);
2302 FileSystem fs = rootDir.getFileSystem(conf);
2303 Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
2304 hri.getEncodedName());
2305 HBaseFsck.debugLsr(conf, p);
2306 boolean success = fs.delete(p, true);
2307 LOG.info("Deleted " + p + " sucessfully? " + success);
2308 HBaseFsck.debugLsr(conf, p);
2309 }
2310 }
2311
2312 @Test
2313 public void testTableWithNoRegions() throws Exception {
2314
2315
2316 TableName table =
2317 TableName.valueOf(name.getMethodName());
2318 try {
2319
2320 HTableDescriptor desc = new HTableDescriptor(table);
2321 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
2322 desc.addFamily(hcd);
2323 TEST_UTIL.getHBaseAdmin().createTable(desc);
2324 tbl = new HTable(TEST_UTIL.getConfiguration(), table, executorService);
2325
2326
2327 deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false,
2328 false, true);
2329
2330 HBaseFsck hbck = doFsck(conf, false);
2331 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
2332
2333 doFsck(conf, true);
2334
2335
2336 doFsck(conf, true);
2337
2338
2339 assertNoErrors(doFsck(conf, false));
2340 } finally {
2341 deleteTable(table);
2342 }
2343
2344 }
2345
2346 @Test
2347 public void testHbckAfterRegionMerge() throws Exception {
2348 TableName table = TableName.valueOf("testMergeRegionFilesInHdfs");
2349 HTable meta = null;
2350 try {
2351
2352 TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
2353 setupTable(table);
2354 assertEquals(ROWKEYS.length, countRows());
2355
2356
2357 TEST_UTIL.getHBaseAdmin().flush(table.getName());
2358 HRegionInfo region1 = tbl.getRegionLocation("A").getRegionInfo();
2359 HRegionInfo region2 = tbl.getRegionLocation("B").getRegionInfo();
2360
2361 int regionCountBeforeMerge = tbl.getRegionLocations().size();
2362
2363 assertNotEquals(region1, region2);
2364
2365
2366 HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
2367 admin.mergeRegions(region1.getEncodedNameAsBytes(),
2368 region2.getEncodedNameAsBytes(), false);
2369
2370
2371 long timeout = System.currentTimeMillis() + 30 * 1000;
2372 while (true) {
2373 if (tbl.getRegionLocations().size() < regionCountBeforeMerge) {
2374 break;
2375 } else if (System.currentTimeMillis() > timeout) {
2376 fail("Time out waiting on region " + region1.getEncodedName()
2377 + " and " + region2.getEncodedName() + " be merged");
2378 }
2379 Thread.sleep(10);
2380 }
2381
2382 assertEquals(ROWKEYS.length, countRows());
2383
2384 HBaseFsck hbck = doFsck(conf, false);
2385 assertNoErrors(hbck);
2386
2387 } finally {
2388 TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
2389 deleteTable(table);
2390 IOUtils.closeQuietly(meta);
2391 }
2392 }
2393
2394 @Test
2395 public void testRegionBoundariesCheck() throws Exception {
2396 HBaseFsck hbck = doFsck(conf, false);
2397 assertNoErrors(hbck);
2398 try {
2399 hbck.checkRegionBoundaries();
2400 } catch (IllegalArgumentException e) {
2401 if (e.getMessage().endsWith("not a valid DFS filename.")) {
2402 fail("Table directory path is not valid." + e.getMessage());
2403 }
2404 }
2405 }
2406
2407 @org.junit.Rule
2408 public TestName name = new TestName();
2409
2410 @Test
2411 public void testReadOnlyProperty() throws Exception {
2412 HBaseFsck hbck = doFsck(conf, false);
2413 Assert.assertEquals("shouldIgnorePreCheckPermission", true,
2414 hbck.shouldIgnorePreCheckPermission());
2415
2416 hbck = doFsck(conf, true);
2417 Assert.assertEquals("shouldIgnorePreCheckPermission", false,
2418 hbck.shouldIgnorePreCheckPermission());
2419
2420 hbck = doFsck(conf, true);
2421 hbck.setIgnorePreCheckPermission(true);
2422 Assert.assertEquals("shouldIgnorePreCheckPermission", true,
2423 hbck.shouldIgnorePreCheckPermission());
2424 }
2425
2426 @Before
2427 public void setUp() {
2428 EnvironmentEdgeManager.reset();
2429 }
2430
2431 @Test (timeout=180000)
2432 public void testCleanUpDaughtersNotInMetaAfterFailedSplit() throws Exception {
2433 TableName table = TableName.valueOf("testCleanUpDaughtersNotInMetaAfterFailedSplit");
2434 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
2435 try {
2436 HTableDescriptor desc = new HTableDescriptor(table);
2437 desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
2438 TEST_UTIL.getHBaseAdmin().createTable(desc);
2439 tbl = new HTable(cluster.getConfiguration(), desc.getTableName());
2440 for (int i = 0; i < 5; i++) {
2441 Put p1 = new Put(("r" + i).getBytes());
2442 p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
2443 tbl.put(p1);
2444 }
2445 TEST_UTIL.getHBaseAdmin().flush(desc.getTableName().toString());
2446 List<HRegion> regions = cluster.getRegions(desc.getTableName());
2447 int serverWith = cluster.getServerWith(regions.get(0).getRegionName());
2448 HRegionServer regionServer = cluster.getRegionServer(serverWith);
2449 cluster.getServerWith(regions.get(0).getRegionName());
2450 SplitTransaction st = new SplitTransaction(regions.get(0), Bytes.toBytes("r3"));
2451 st.prepare();
2452 st.stepsBeforePONR(regionServer, regionServer, false);
2453 AssignmentManager am = cluster.getMaster().getAssignmentManager();
2454 Map<String, RegionState> regionsInTransition = am.getRegionStates().getRegionsInTransition();
2455 for (RegionState state : regionsInTransition.values()) {
2456 am.regionOffline(state.getRegion());
2457 }
2458 ZKAssign.deleteNodeFailSilent(regionServer.getZooKeeper(), regions.get(0).getRegionInfo());
2459 Map<HRegionInfo, ServerName> regionsMap = new HashMap<HRegionInfo, ServerName>();
2460 regionsMap.put(regions.get(0).getRegionInfo(), regionServer.getServerName());
2461 am.assign(regionsMap);
2462 am.waitForAssignment(regions.get(0).getRegionInfo());
2463 HBaseFsck hbck = doFsck(conf, false);
2464 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
2465 ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
2466
2467 assertEquals(0, hbck.getOverlapGroups(table).size());
2468
2469
2470 assertErrors(
2471 doFsck(
2472 conf, false, true, false, false, false, false, false, false, false, false, false, null),
2473 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
2474 ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
2475
2476
2477 assertNoErrors(doFsck(conf, false));
2478 assertEquals(5, countRows());
2479 } finally {
2480 if (tbl != null) {
2481 tbl.close();
2482 tbl = null;
2483 }
2484 deleteTable(table);
2485 }
2486 }
2487 }