1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.util;
20
21 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
22 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
23 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
24 import static org.junit.Assert.assertEquals;
25 import static org.junit.Assert.assertFalse;
26 import static org.junit.Assert.assertNotEquals;
27 import static org.junit.Assert.assertNotNull;
28 import static org.junit.Assert.assertTrue;
29 import static org.junit.Assert.fail;
30
31 import java.io.IOException;
32 import java.util.ArrayList;
33 import java.util.Collection;
34 import java.util.HashMap;
35 import java.util.LinkedList;
36 import java.util.List;
37 import java.util.Map;
38 import java.util.Map.Entry;
39 import java.util.concurrent.Callable;
40 import java.util.concurrent.CountDownLatch;
41 import java.util.concurrent.ExecutorService;
42 import java.util.concurrent.Executors;
43 import java.util.concurrent.Future;
44 import java.util.concurrent.ScheduledThreadPoolExecutor;
45 import java.util.concurrent.SynchronousQueue;
46 import java.util.concurrent.ThreadPoolExecutor;
47 import java.util.concurrent.TimeUnit;
48 import java.util.concurrent.atomic.AtomicBoolean;
49
50 import org.apache.commons.io.IOUtils;
51 import org.apache.commons.logging.Log;
52 import org.apache.commons.logging.LogFactory;
53 import org.apache.hadoop.conf.Configuration;
54 import org.apache.hadoop.fs.FileStatus;
55 import org.apache.hadoop.fs.FileSystem;
56 import org.apache.hadoop.fs.Path;
57 import org.apache.hadoop.hbase.ClusterStatus;
58 import org.apache.hadoop.hbase.HBaseTestingUtility;
59 import org.apache.hadoop.hbase.HColumnDescriptor;
60 import org.apache.hadoop.hbase.HConstants;
61 import org.apache.hadoop.hbase.HRegionInfo;
62 import org.apache.hadoop.hbase.HRegionLocation;
63 import org.apache.hadoop.hbase.HTableDescriptor;
64 import org.apache.hadoop.hbase.TableExistsException;
65 import org.apache.hadoop.hbase.testclassification.LargeTests;
66 import org.apache.hadoop.hbase.MiniHBaseCluster;
67 import org.apache.hadoop.hbase.ServerName;
68 import org.apache.hadoop.hbase.TableName;
69 import org.apache.hadoop.hbase.catalog.MetaEditor;
70 import org.apache.hadoop.hbase.client.Delete;
71 import org.apache.hadoop.hbase.client.Durability;
72 import org.apache.hadoop.hbase.client.Get;
73 import org.apache.hadoop.hbase.client.HBaseAdmin;
74 import org.apache.hadoop.hbase.client.HConnection;
75 import org.apache.hadoop.hbase.client.HConnectionManager;
76 import org.apache.hadoop.hbase.client.HTable;
77 import org.apache.hadoop.hbase.client.MetaScanner;
78 import org.apache.hadoop.hbase.client.Put;
79 import org.apache.hadoop.hbase.client.Result;
80 import org.apache.hadoop.hbase.client.ResultScanner;
81 import org.apache.hadoop.hbase.client.Scan;
82 import org.apache.hadoop.hbase.io.hfile.TestHFile;
83 import org.apache.hadoop.hbase.master.AssignmentManager;
84 import org.apache.hadoop.hbase.master.HMaster;
85 import org.apache.hadoop.hbase.master.RegionState;
86 import org.apache.hadoop.hbase.master.RegionStates;
87 import org.apache.hadoop.hbase.master.TableLockManager;
88 import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
89 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
90 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
91 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
92 import org.apache.hadoop.hbase.regionserver.HRegion;
93 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
94 import org.apache.hadoop.hbase.regionserver.HRegionServer;
95 import org.apache.hadoop.hbase.regionserver.SplitTransaction;
96 import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
97 import org.apache.hadoop.hbase.testclassification.LargeTests;
98 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
99 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
100 import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;
101 import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter;
102 import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
103 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
104 import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
105 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
106 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
107 import org.apache.zookeeper.KeeperException;
108 import org.junit.AfterClass;
109 import org.junit.Assert;
110 import org.junit.Before;
111 import org.junit.BeforeClass;
112 import org.junit.Ignore;
113 import org.junit.Test;
114 import org.junit.experimental.categories.Category;
115 import org.junit.rules.TestName;
116
117 import com.google.common.collect.Multimap;
118
119
120
121
122 @Category(LargeTests.class)
123 public class TestHBaseFsck {
124 final static Log LOG = LogFactory.getLog(TestHBaseFsck.class);
125 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
126 private final static Configuration conf = TEST_UTIL.getConfiguration();
127 private final static String FAM_STR = "fam";
128 private final static byte[] FAM = Bytes.toBytes(FAM_STR);
129 private final static int REGION_ONLINE_TIMEOUT = 800;
130 private static RegionStates regionStates;
131 private static ExecutorService executorService;
132
133
134 private HTable tbl;
135 private final static byte[][] SPLITS = new byte[][] { Bytes.toBytes("A"),
136 Bytes.toBytes("B"), Bytes.toBytes("C") };
137
138 private final static byte[][] ROWKEYS= new byte[][] {
139 Bytes.toBytes("00"), Bytes.toBytes("50"), Bytes.toBytes("A0"), Bytes.toBytes("A5"),
140 Bytes.toBytes("B0"), Bytes.toBytes("B5"), Bytes.toBytes("C0"), Bytes.toBytes("C5") };
141
142 @SuppressWarnings("deprecation")
143 @BeforeClass
144 public static void setUpBeforeClass() throws Exception {
145 TEST_UTIL.getConfiguration().setInt("hbase.regionserver.handler.count", 2);
146 TEST_UTIL.getConfiguration().setInt("hbase.regionserver.metahandler.count", 2);
147 TEST_UTIL.getConfiguration().setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT);
148 TEST_UTIL.startMiniCluster(3);
149 TEST_UTIL.setHDFSClientRetry(0);
150
151 executorService = new ThreadPoolExecutor(1, Integer.MAX_VALUE, 60, TimeUnit.SECONDS,
152 new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
153
154 AssignmentManager assignmentManager =
155 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
156 regionStates = assignmentManager.getRegionStates();
157 TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, true);
158 }
159
160 @AfterClass
161 public static void tearDownAfterClass() throws Exception {
162 TEST_UTIL.shutdownMiniCluster();
163 }
164
165 @Test
166 public void testHBaseFsck() throws Exception {
167 assertNoErrors(doFsck(conf, false));
168 String table = "tableBadMetaAssign";
169 TEST_UTIL.createTable(Bytes.toBytes(table), FAM);
170
171
172 assertNoErrors(doFsck(conf, false));
173
174
175
176 HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
177 Scan scan = new Scan();
178 scan.setStartRow(Bytes.toBytes(table+",,"));
179 ResultScanner scanner = meta.getScanner(scan);
180 HRegionInfo hri = null;
181
182 Result res = scanner.next();
183 ServerName currServer =
184 ServerName.parseFrom(res.getValue(HConstants.CATALOG_FAMILY,
185 HConstants.SERVER_QUALIFIER));
186 long startCode = Bytes.toLong(res.getValue(HConstants.CATALOG_FAMILY,
187 HConstants.STARTCODE_QUALIFIER));
188
189 for (JVMClusterUtil.RegionServerThread rs :
190 TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
191
192 ServerName sn = rs.getRegionServer().getServerName();
193
194
195 if (!currServer.getHostAndPort().equals(sn.getHostAndPort()) ||
196 startCode != sn.getStartcode()) {
197 Put put = new Put(res.getRow());
198 put.setDurability(Durability.SKIP_WAL);
199 put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
200 Bytes.toBytes(sn.getHostAndPort()));
201 put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
202 Bytes.toBytes(sn.getStartcode()));
203 meta.put(put);
204 hri = HRegionInfo.getHRegionInfo(res);
205 break;
206 }
207 }
208
209
210 assertErrors(doFsck(conf, true), new ERROR_CODE[]{
211 ERROR_CODE.SERVER_DOES_NOT_MATCH_META});
212
213 TEST_UTIL.getHBaseCluster().getMaster()
214 .getAssignmentManager().waitForAssignment(hri);
215
216
217 assertNoErrors(doFsck(conf, false));
218
219
220 HTable t = new HTable(conf, Bytes.toBytes(table), executorService);
221 ResultScanner s = t.getScanner(new Scan());
222 s.close();
223 t.close();
224
225 scanner.close();
226 meta.close();
227 }
228
229 @Test(timeout=180000)
230 public void testFixAssignmentsWhenMETAinTransition() throws Exception {
231 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
232 HBaseAdmin admin = null;
233 try {
234 admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
235 admin.closeRegion(cluster.getServerHoldingMeta(),
236 HRegionInfo.FIRST_META_REGIONINFO);
237 } finally {
238 if (admin != null) {
239 admin.close();
240 }
241 }
242 regionStates.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
243 MetaRegionTracker.deleteMetaLocation(cluster.getMaster().getZooKeeper());
244 assertFalse(regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
245 HBaseFsck hbck = doFsck(conf, true);
246 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.UNKNOWN, ERROR_CODE.NO_META_REGION,
247 ERROR_CODE.NULL_META_REGION });
248 assertNoErrors(doFsck(conf, false));
249 }
250
251
252
253
254 private HRegionInfo createRegion(Configuration conf, final HTableDescriptor
255 htd, byte[] startKey, byte[] endKey)
256 throws IOException {
257 HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
258 HRegionInfo hri = new HRegionInfo(htd.getTableName(), startKey, endKey);
259 MetaEditor.addRegionToMeta(meta, hri);
260 meta.close();
261 return hri;
262 }
263
264
265
266
267 private void dumpMeta(TableName tableName) throws IOException {
268 List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(tableName);
269 for (byte[] row : metaRows) {
270 LOG.info(Bytes.toString(row));
271 }
272 }
273
274
275
276
277
278 private void undeployRegion(HBaseAdmin admin, ServerName sn,
279 HRegionInfo hri) throws IOException, InterruptedException {
280 try {
281 HBaseFsckRepair.closeRegionSilentlyAndWait(admin, sn, hri);
282 if (!hri.isMetaTable()) {
283 admin.offline(hri.getRegionName());
284 }
285 } catch (IOException ioe) {
286 LOG.warn("Got exception when attempting to offline region "
287 + Bytes.toString(hri.getRegionName()), ioe);
288 }
289 }
290
291
292
293
294
295
296 private void deleteRegion(Configuration conf, final HTableDescriptor htd,
297 byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
298 boolean hdfs) throws IOException, InterruptedException {
299 deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false);
300 }
301
302
303
304
305
306
307
308
309 private void deleteRegion(Configuration conf, final HTableDescriptor htd,
310 byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
311 boolean hdfs, boolean regionInfoOnly) throws IOException, InterruptedException {
312 LOG.info("** Before delete:");
313 dumpMeta(htd.getTableName());
314
315 Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
316 for (Entry<HRegionInfo, ServerName> e: hris.entrySet()) {
317 HRegionInfo hri = e.getKey();
318 ServerName hsa = e.getValue();
319 if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
320 && Bytes.compareTo(hri.getEndKey(), endKey) == 0) {
321
322 LOG.info("RegionName: " +hri.getRegionNameAsString());
323 byte[] deleteRow = hri.getRegionName();
324
325 if (unassign) {
326 LOG.info("Undeploying region " + hri + " from server " + hsa);
327 undeployRegion(new HBaseAdmin(conf), hsa, hri);
328 }
329
330 if (regionInfoOnly) {
331 LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
332 Path rootDir = FSUtils.getRootDir(conf);
333 FileSystem fs = rootDir.getFileSystem(conf);
334 Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
335 hri.getEncodedName());
336 Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
337 fs.delete(hriPath, true);
338 }
339
340 if (hdfs) {
341 LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
342 Path rootDir = FSUtils.getRootDir(conf);
343 FileSystem fs = rootDir.getFileSystem(conf);
344 Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
345 hri.getEncodedName());
346 HBaseFsck.debugLsr(conf, p);
347 boolean success = fs.delete(p, true);
348 LOG.info("Deleted " + p + " sucessfully? " + success);
349 HBaseFsck.debugLsr(conf, p);
350 }
351
352 if (metaRow) {
353 HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
354 Delete delete = new Delete(deleteRow);
355 meta.delete(delete);
356 }
357 }
358 LOG.info(hri.toString() + hsa.toString());
359 }
360
361 TEST_UTIL.getMetaTableRows(htd.getTableName());
362 LOG.info("*** After delete:");
363 dumpMeta(htd.getTableName());
364 }
365
366
367
368
369
370
371
372
373 HTable setupTable(TableName tablename) throws Exception {
374 HTableDescriptor desc = new HTableDescriptor(tablename);
375 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
376 desc.addFamily(hcd);
377 TEST_UTIL.getHBaseAdmin().createTable(desc, SPLITS);
378 tbl = new HTable(TEST_UTIL.getConfiguration(), tablename, executorService);
379
380 List<Put> puts = new ArrayList<Put>();
381 for (byte[] row : ROWKEYS) {
382 Put p = new Put(row);
383 p.add(FAM, Bytes.toBytes("val"), row);
384 puts.add(p);
385 }
386 tbl.put(puts);
387 tbl.flushCommits();
388 return tbl;
389 }
390
391
392
393
394 int countRows() throws IOException {
395 Scan s = new Scan();
396 ResultScanner rs = tbl.getScanner(s);
397 int i = 0;
398 while(rs.next() !=null) {
399 i++;
400 }
401 return i;
402 }
403
404
405
406
407 int countRows(byte[] start, byte[] end) throws IOException {
408 Scan s = new Scan(start, end);
409 ResultScanner rs = tbl.getScanner(s);
410 int i = 0;
411 while (rs.next() != null) {
412 i++;
413 }
414 return i;
415 }
416
417
418
419
420
421
422 void deleteTable(TableName tablename) throws IOException {
423 HBaseAdmin admin = new HBaseAdmin(conf);
424 admin.getConnection().clearRegionCache();
425 if (admin.isTableEnabled(tablename)) {
426 admin.disableTableAsync(tablename);
427 }
428 long totalWait = 0;
429 long maxWait = 30*1000;
430 long sleepTime = 250;
431 while (!admin.isTableDisabled(tablename)) {
432 try {
433 Thread.sleep(sleepTime);
434 totalWait += sleepTime;
435 if (totalWait >= maxWait) {
436 fail("Waited too long for table to be disabled + " + tablename);
437 }
438 } catch (InterruptedException e) {
439 e.printStackTrace();
440 fail("Interrupted when trying to disable table " + tablename);
441 }
442 }
443 admin.deleteTable(tablename);
444 }
445
446
447
448
449 @Test
450 public void testHBaseFsckClean() throws Exception {
451 assertNoErrors(doFsck(conf, false));
452 TableName table = TableName.valueOf("tableClean");
453 try {
454 HBaseFsck hbck = doFsck(conf, false);
455 assertNoErrors(hbck);
456
457 setupTable(table);
458 assertEquals(ROWKEYS.length, countRows());
459
460
461 hbck = doFsck(conf, false);
462 assertNoErrors(hbck);
463 assertEquals(0, hbck.getOverlapGroups(table).size());
464 assertEquals(ROWKEYS.length, countRows());
465 } finally {
466 deleteTable(table);
467 }
468 }
469
470
471
472
473 @Test
474 public void testHbckThreadpooling() throws Exception {
475 TableName table =
476 TableName.valueOf("tableDupeStartKey");
477 try {
478
479 setupTable(table);
480
481
482 Configuration newconf = new Configuration(conf);
483 newconf.setInt("hbasefsck.numthreads", 1);
484 assertNoErrors(doFsck(newconf, false));
485
486
487 } finally {
488 deleteTable(table);
489 }
490 }
491
492 @Test
493 public void testHbckFixOrphanTable() throws Exception {
494 TableName table = TableName.valueOf("tableInfo");
495 FileSystem fs = null;
496 Path tableinfo = null;
497 try {
498 setupTable(table);
499 HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
500
501 Path hbaseTableDir = FSUtils.getTableDir(
502 FSUtils.getRootDir(conf), table);
503 fs = hbaseTableDir.getFileSystem(conf);
504 FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
505 tableinfo = status.getPath();
506 fs.rename(tableinfo, new Path("/.tableinfo"));
507
508
509 HBaseFsck hbck = doFsck(conf, false);
510 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE });
511
512
513 hbck = doFsck(conf, true);
514 assertNoErrors(hbck);
515 status = null;
516 status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
517 assertNotNull(status);
518
519 HTableDescriptor htd = admin.getTableDescriptor(table);
520 htd.setValue("NOT_DEFAULT", "true");
521 admin.disableTable(table);
522 admin.modifyTable(table, htd);
523 admin.enableTable(table);
524 fs.delete(status.getPath(), true);
525
526
527 htd = admin.getTableDescriptor(table);
528 hbck = doFsck(conf, true);
529 assertNoErrors(hbck);
530 status = null;
531 status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
532 assertNotNull(status);
533 htd = admin.getTableDescriptor(table);
534 assertEquals(htd.getValue("NOT_DEFAULT"), "true");
535 } finally {
536 fs.rename(new Path("/.tableinfo"), tableinfo);
537 deleteTable(table);
538 }
539 }
540
541
542
543
544
545
546 @Test
547 public void testParallelHbck() throws Exception {
548 final ExecutorService service;
549 final Future<HBaseFsck> hbck1,hbck2;
550
551 class RunHbck implements Callable<HBaseFsck>{
552 boolean fail = true;
553 @Override
554 public HBaseFsck call(){
555 try{
556 return doFsck(conf, false);
557 } catch(Exception e){
558 if (e.getMessage().contains("Duplicate hbck")) {
559 fail = false;
560 } else {
561 LOG.fatal("hbck failed.", e);
562 }
563 }
564
565 if (fail) fail();
566 return null;
567 }
568 }
569 service = Executors.newFixedThreadPool(2);
570 hbck1 = service.submit(new RunHbck());
571 hbck2 = service.submit(new RunHbck());
572 service.shutdown();
573
574 service.awaitTermination(15, TimeUnit.SECONDS);
575 HBaseFsck h1 = hbck1.get();
576 HBaseFsck h2 = hbck2.get();
577
578 assert(h1 == null || h2 == null);
579 if (h1 != null) {
580 assert(h1.getRetCode() >= 0);
581 }
582 if (h2 != null) {
583 assert(h2.getRetCode() >= 0);
584 }
585 }
586
587
588
589
590
591 @Test
592 public void testDupeStartKey() throws Exception {
593 TableName table =
594 TableName.valueOf("tableDupeStartKey");
595 try {
596 setupTable(table);
597 assertNoErrors(doFsck(conf, false));
598 assertEquals(ROWKEYS.length, countRows());
599
600
601 HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
602 Bytes.toBytes("A"), Bytes.toBytes("A2"));
603 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
604 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
605 .waitForAssignment(hriDupe);
606 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
607 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
608
609 HBaseFsck hbck = doFsck(conf, false);
610 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
611 ERROR_CODE.DUPE_STARTKEYS});
612 assertEquals(2, hbck.getOverlapGroups(table).size());
613 assertEquals(ROWKEYS.length, countRows());
614
615
616 doFsck(conf,true);
617
618
619 HBaseFsck hbck2 = doFsck(conf,false);
620 assertNoErrors(hbck2);
621 assertEquals(0, hbck2.getOverlapGroups(table).size());
622 assertEquals(ROWKEYS.length, countRows());
623 } finally {
624 deleteTable(table);
625 }
626 }
627
628
629
630
631 Map<ServerName, List<String>> getDeployedHRIs(
632 final HBaseAdmin admin) throws IOException {
633 ClusterStatus status = admin.getClusterStatus();
634 Collection<ServerName> regionServers = status.getServers();
635 Map<ServerName, List<String>> mm =
636 new HashMap<ServerName, List<String>>();
637 HConnection connection = admin.getConnection();
638 for (ServerName hsi : regionServers) {
639 AdminProtos.AdminService.BlockingInterface server = connection.getAdmin(hsi);
640
641
642 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
643 List<String> regionNames = new ArrayList<String>();
644 for (HRegionInfo hri : regions) {
645 regionNames.add(hri.getRegionNameAsString());
646 }
647 mm.put(hsi, regionNames);
648 }
649 return mm;
650 }
651
652
653
654
655 ServerName findDeployedHSI(Map<ServerName, List<String>> mm, HRegionInfo hri) {
656 for (Map.Entry<ServerName,List <String>> e : mm.entrySet()) {
657 if (e.getValue().contains(hri.getRegionNameAsString())) {
658 return e.getKey();
659 }
660 }
661 return null;
662 }
663
664
665
666
667
668 @Test
669 public void testDupeRegion() throws Exception {
670 TableName table =
671 TableName.valueOf("tableDupeRegion");
672 try {
673 setupTable(table);
674 assertNoErrors(doFsck(conf, false));
675 assertEquals(ROWKEYS.length, countRows());
676
677
678 HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
679 Bytes.toBytes("A"), Bytes.toBytes("B"));
680
681 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
682 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
683 .waitForAssignment(hriDupe);
684 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
685 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
686
687
688
689
690
691 HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
692 while (findDeployedHSI(getDeployedHRIs(admin), hriDupe) == null) {
693 Thread.sleep(250);
694 }
695
696 LOG.debug("Finished assignment of dupe region");
697
698
699 HBaseFsck hbck = doFsck(conf, false);
700 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
701 ERROR_CODE.DUPE_STARTKEYS});
702 assertEquals(2, hbck.getOverlapGroups(table).size());
703 assertEquals(ROWKEYS.length, countRows());
704
705
706 doFsck(conf,true);
707
708
709 HBaseFsck hbck2 = doFsck(conf,false);
710 assertNoErrors(hbck2);
711 assertEquals(0, hbck2.getOverlapGroups(table).size());
712 assertEquals(ROWKEYS.length, countRows());
713 } finally {
714 deleteTable(table);
715 }
716 }
717
718
719
720
721 @Test
722 public void testDegenerateRegions() throws Exception {
723 TableName table =
724 TableName.valueOf("tableDegenerateRegions");
725 try {
726 setupTable(table);
727 assertNoErrors(doFsck(conf,false));
728 assertEquals(ROWKEYS.length, countRows());
729
730
731 HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
732 Bytes.toBytes("B"), Bytes.toBytes("B"));
733 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
734 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
735 .waitForAssignment(hriDupe);
736 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
737 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
738
739 HBaseFsck hbck = doFsck(conf,false);
740 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DEGENERATE_REGION,
741 ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.DUPE_STARTKEYS});
742 assertEquals(2, hbck.getOverlapGroups(table).size());
743 assertEquals(ROWKEYS.length, countRows());
744
745
746 doFsck(conf,true);
747
748
749 HBaseFsck hbck2 = doFsck(conf,false);
750 assertNoErrors(hbck2);
751 assertEquals(0, hbck2.getOverlapGroups(table).size());
752 assertEquals(ROWKEYS.length, countRows());
753 } finally {
754 deleteTable(table);
755 }
756 }
757
758
759
760
761
762 @Test
763 public void testContainedRegionOverlap() throws Exception {
764 TableName table =
765 TableName.valueOf("tableContainedRegionOverlap");
766 try {
767 setupTable(table);
768 assertEquals(ROWKEYS.length, countRows());
769
770
771 HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
772 Bytes.toBytes("A2"), Bytes.toBytes("B"));
773 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
774 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
775 .waitForAssignment(hriOverlap);
776 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
777 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
778
779 HBaseFsck hbck = doFsck(conf, false);
780 assertErrors(hbck, new ERROR_CODE[] {
781 ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
782 assertEquals(2, hbck.getOverlapGroups(table).size());
783 assertEquals(ROWKEYS.length, countRows());
784
785
786 doFsck(conf, true);
787
788
789 HBaseFsck hbck2 = doFsck(conf,false);
790 assertNoErrors(hbck2);
791 assertEquals(0, hbck2.getOverlapGroups(table).size());
792 assertEquals(ROWKEYS.length, countRows());
793 } finally {
794 deleteTable(table);
795 }
796 }
797
798
799
800
801
802
803
804 @Test
805 public void testSidelineOverlapRegion() throws Exception {
806 TableName table =
807 TableName.valueOf("testSidelineOverlapRegion");
808 try {
809 setupTable(table);
810 assertEquals(ROWKEYS.length, countRows());
811
812
813 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
814 HMaster master = cluster.getMaster();
815 HRegionInfo hriOverlap1 = createRegion(conf, tbl.getTableDescriptor(),
816 Bytes.toBytes("A"), Bytes.toBytes("AB"));
817 master.assignRegion(hriOverlap1);
818 master.getAssignmentManager().waitForAssignment(hriOverlap1);
819 HRegionInfo hriOverlap2 = createRegion(conf, tbl.getTableDescriptor(),
820 Bytes.toBytes("AB"), Bytes.toBytes("B"));
821 master.assignRegion(hriOverlap2);
822 master.getAssignmentManager().waitForAssignment(hriOverlap2);
823
824 HBaseFsck hbck = doFsck(conf, false);
825 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.DUPE_STARTKEYS,
826 ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
827 assertEquals(3, hbck.getOverlapGroups(table).size());
828 assertEquals(ROWKEYS.length, countRows());
829
830
831 Multimap<byte[], HbckInfo> overlapGroups = hbck.getOverlapGroups(table);
832 ServerName serverName = null;
833 byte[] regionName = null;
834 for (HbckInfo hbi: overlapGroups.values()) {
835 if ("A".equals(Bytes.toString(hbi.getStartKey()))
836 && "B".equals(Bytes.toString(hbi.getEndKey()))) {
837 regionName = hbi.getRegionName();
838
839
840 int k = cluster.getServerWith(regionName);
841 for (int i = 0; i < 3; i++) {
842 if (i != k) {
843 HRegionServer rs = cluster.getRegionServer(i);
844 serverName = rs.getServerName();
845 break;
846 }
847 }
848
849 HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
850 HBaseFsckRepair.closeRegionSilentlyAndWait(admin,
851 cluster.getRegionServer(k).getServerName(), hbi.getHdfsHRI());
852 admin.offline(regionName);
853 break;
854 }
855 }
856
857 assertNotNull(regionName);
858 assertNotNull(serverName);
859 HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
860 Put put = new Put(regionName);
861 put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
862 Bytes.toBytes(serverName.getHostAndPort()));
863 meta.put(put);
864
865
866 HBaseFsck fsck = new HBaseFsck(conf);
867 fsck.connect();
868 fsck.setDisplayFullReport();
869 fsck.setTimeLag(0);
870 fsck.setFixAssignments(true);
871 fsck.setFixMeta(true);
872 fsck.setFixHdfsHoles(true);
873 fsck.setFixHdfsOverlaps(true);
874 fsck.setFixHdfsOrphans(true);
875 fsck.setFixVersionFile(true);
876 fsck.setSidelineBigOverlaps(true);
877 fsck.setMaxMerge(2);
878 fsck.onlineHbck();
879
880
881
882 HBaseFsck hbck2 = doFsck(conf,false);
883 assertNoErrors(hbck2);
884 assertEquals(0, hbck2.getOverlapGroups(table).size());
885 assertTrue(ROWKEYS.length > countRows());
886 } finally {
887 deleteTable(table);
888 }
889 }
890
891
892
893
894
895 @Test
896 public void testOverlapAndOrphan() throws Exception {
897 TableName table =
898 TableName.valueOf("tableOverlapAndOrphan");
899 try {
900 setupTable(table);
901 assertEquals(ROWKEYS.length, countRows());
902
903
904 TEST_UTIL.getHBaseAdmin().disableTable(table);
905 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
906 Bytes.toBytes("B"), true, true, false, true);
907 TEST_UTIL.getHBaseAdmin().enableTable(table);
908
909 HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
910 Bytes.toBytes("A2"), Bytes.toBytes("B"));
911 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
912 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
913 .waitForAssignment(hriOverlap);
914 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
915 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
916
917 HBaseFsck hbck = doFsck(conf, false);
918 assertErrors(hbck, new ERROR_CODE[] {
919 ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
920 ERROR_CODE.HOLE_IN_REGION_CHAIN});
921
922
923 doFsck(conf, true);
924
925
926 HBaseFsck hbck2 = doFsck(conf,false);
927 assertNoErrors(hbck2);
928 assertEquals(0, hbck2.getOverlapGroups(table).size());
929 assertEquals(ROWKEYS.length, countRows());
930 } finally {
931 deleteTable(table);
932 }
933 }
934
935
936
937
938
939
940 @Test
941 public void testCoveredStartKey() throws Exception {
942 TableName table =
943 TableName.valueOf("tableCoveredStartKey");
944 try {
945 setupTable(table);
946 assertEquals(ROWKEYS.length, countRows());
947
948
949 HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
950 Bytes.toBytes("A2"), Bytes.toBytes("B2"));
951 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
952 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
953 .waitForAssignment(hriOverlap);
954 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
955 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
956
957 HBaseFsck hbck = doFsck(conf, false);
958 assertErrors(hbck, new ERROR_CODE[] {
959 ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
960 ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
961 assertEquals(3, hbck.getOverlapGroups(table).size());
962 assertEquals(ROWKEYS.length, countRows());
963
964
965 doFsck(conf, true);
966
967
968 HBaseFsck hbck2 = doFsck(conf, false);
969 assertErrors(hbck2, new ERROR_CODE[0]);
970 assertEquals(0, hbck2.getOverlapGroups(table).size());
971 assertEquals(ROWKEYS.length, countRows());
972 } finally {
973 deleteTable(table);
974 }
975 }
976
977
978
979
980
981 @Test
982 public void testRegionHole() throws Exception {
983 TableName table =
984 TableName.valueOf("tableRegionHole");
985 try {
986 setupTable(table);
987 assertEquals(ROWKEYS.length, countRows());
988
989
990 TEST_UTIL.getHBaseAdmin().disableTable(table);
991 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
992 Bytes.toBytes("C"), true, true, true);
993 TEST_UTIL.getHBaseAdmin().enableTable(table);
994
995 HBaseFsck hbck = doFsck(conf, false);
996 assertErrors(hbck, new ERROR_CODE[] {
997 ERROR_CODE.HOLE_IN_REGION_CHAIN});
998
999 assertEquals(0, hbck.getOverlapGroups(table).size());
1000
1001
1002 doFsck(conf, true);
1003
1004
1005 assertNoErrors(doFsck(conf,false));
1006 assertEquals(ROWKEYS.length - 2 , countRows());
1007 } finally {
1008 deleteTable(table);
1009 }
1010 }
1011
1012
1013
1014
1015
1016 @Test
1017 public void testHDFSRegioninfoMissing() throws Exception {
1018 TableName table =
1019 TableName.valueOf("tableHDFSRegioininfoMissing");
1020 try {
1021 setupTable(table);
1022 assertEquals(ROWKEYS.length, countRows());
1023
1024
1025 TEST_UTIL.getHBaseAdmin().disableTable(table);
1026 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1027 Bytes.toBytes("C"), true, true, false, true);
1028 TEST_UTIL.getHBaseAdmin().enableTable(table);
1029
1030 HBaseFsck hbck = doFsck(conf, false);
1031 assertErrors(hbck, new ERROR_CODE[] {
1032 ERROR_CODE.ORPHAN_HDFS_REGION,
1033 ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1034 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1035
1036 assertEquals(0, hbck.getOverlapGroups(table).size());
1037
1038
1039 doFsck(conf, true);
1040
1041
1042 assertNoErrors(doFsck(conf, false));
1043 assertEquals(ROWKEYS.length, countRows());
1044 } finally {
1045 deleteTable(table);
1046 }
1047 }
1048
1049
1050
1051
1052
1053
1054 @Test(timeout = 180000)
1055 public void testHDFSRegioninfoMissingAndCheckRegionBoundary() throws Exception {
1056 TableName table = TableName.valueOf("testHDFSRegioninfoMissingAndCheckRegionBoundary");
1057 try {
1058 setupTable(table);
1059 assertEquals(ROWKEYS.length, countRows());
1060
1061
1062 HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1063 admin.disableTable(table);
1064 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
1065 true, false, true);
1066 admin.enableTable(table);
1067
1068 HBaseFsck hbck = doFsck(conf, false);
1069 assertErrors(hbck,
1070 new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1071 HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION,
1072 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1073 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
1074
1075 assertEquals(0, hbck.getOverlapGroups(table).size());
1076
1077
1078 doFsck(conf, true);
1079
1080
1081 assertNoErrors(doFsck(conf, false));
1082
1083
1084 for (int i = 0; i < ROWKEYS.length; i++) {
1085 if (i != ROWKEYS.length - 1) {
1086 assertEquals(1, countRows(ROWKEYS[i], ROWKEYS[i + 1]));
1087 } else {
1088 assertEquals(1, countRows(ROWKEYS[i], null));
1089 }
1090 }
1091
1092 } finally {
1093 deleteTable(table);
1094 }
1095 }
1096
1097
1098
1099
1100
1101 @Test
1102 public void testNotInMetaOrDeployedHole() throws Exception {
1103 TableName table =
1104 TableName.valueOf("tableNotInMetaOrDeployedHole");
1105 try {
1106 setupTable(table);
1107 assertEquals(ROWKEYS.length, countRows());
1108
1109
1110 TEST_UTIL.getHBaseAdmin().disableTable(table);
1111 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1112 Bytes.toBytes("C"), true, true, false);
1113 TEST_UTIL.getHBaseAdmin().enableTable(table);
1114
1115 HBaseFsck hbck = doFsck(conf, false);
1116 assertErrors(hbck, new ERROR_CODE[] {
1117 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1118
1119 assertEquals(0, hbck.getOverlapGroups(table).size());
1120
1121
1122 assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1123 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1124
1125
1126 assertNoErrors(doFsck(conf,false));
1127 assertEquals(ROWKEYS.length, countRows());
1128 } finally {
1129 deleteTable(table);
1130 }
1131 }
1132
1133
1134
1135
1136 @Test
1137 public void testNotInMetaHole() throws Exception {
1138 TableName table =
1139 TableName.valueOf("tableNotInMetaHole");
1140 try {
1141 setupTable(table);
1142 assertEquals(ROWKEYS.length, countRows());
1143
1144
1145 TEST_UTIL.getHBaseAdmin().disableTable(table);
1146 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1147 Bytes.toBytes("C"), false, true, false);
1148 TEST_UTIL.getHBaseAdmin().enableTable(table);
1149
1150 HBaseFsck hbck = doFsck(conf, false);
1151 assertErrors(hbck, new ERROR_CODE[] {
1152 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1153
1154 assertEquals(0, hbck.getOverlapGroups(table).size());
1155
1156
1157 assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1158 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1159
1160
1161 assertNoErrors(doFsck(conf,false));
1162 assertEquals(ROWKEYS.length, countRows());
1163 } finally {
1164 deleteTable(table);
1165 }
1166 }
1167
1168
1169
1170
1171
1172 @Test
1173 public void testNotInHdfs() throws Exception {
1174 TableName table =
1175 TableName.valueOf("tableNotInHdfs");
1176 try {
1177 setupTable(table);
1178 assertEquals(ROWKEYS.length, countRows());
1179
1180
1181 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1182
1183
1184 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1185 Bytes.toBytes("C"), false, false, true);
1186
1187 HBaseFsck hbck = doFsck(conf, false);
1188 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1189
1190 assertEquals(0, hbck.getOverlapGroups(table).size());
1191
1192
1193 doFsck(conf, true);
1194
1195
1196 assertNoErrors(doFsck(conf,false));
1197 assertEquals(ROWKEYS.length - 2, countRows());
1198 } finally {
1199 deleteTable(table);
1200 }
1201 }
1202
1203
1204
1205
1206
1207 @Test
1208 public void testNoHdfsTable() throws Exception {
1209 TableName table = TableName.valueOf("NoHdfsTable");
1210 setupTable(table);
1211 assertEquals(ROWKEYS.length, countRows());
1212
1213
1214 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1215
1216
1217 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
1218 Bytes.toBytes("A"), false, false, true);
1219 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1220 Bytes.toBytes("B"), false, false, true);
1221 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1222 Bytes.toBytes("C"), false, false, true);
1223 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
1224 Bytes.toBytes(""), false, false, true);
1225
1226
1227 deleteTableDir(table);
1228
1229 HBaseFsck hbck = doFsck(conf, false);
1230 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS,
1231 ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS,
1232 ERROR_CODE.NOT_IN_HDFS,});
1233
1234 assertEquals(0, hbck.getOverlapGroups(table).size());
1235
1236
1237 doFsck(conf, true);
1238
1239
1240 assertNoErrors(doFsck(conf,false));
1241 assertFalse("Table "+ table + " should have been deleted",
1242 TEST_UTIL.getHBaseAdmin().tableExists(table));
1243 }
1244
1245 public void deleteTableDir(TableName table) throws IOException {
1246 Path rootDir = FSUtils.getRootDir(conf);
1247 FileSystem fs = rootDir.getFileSystem(conf);
1248 Path p = FSUtils.getTableDir(rootDir, table);
1249 HBaseFsck.debugLsr(conf, p);
1250 boolean success = fs.delete(p, true);
1251 LOG.info("Deleted " + p + " sucessfully? " + success);
1252 }
1253
1254
1255
1256
1257 @Test
1258 public void testNoVersionFile() throws Exception {
1259
1260 Path rootDir = FSUtils.getRootDir(conf);
1261 FileSystem fs = rootDir.getFileSystem(conf);
1262 Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
1263 fs.delete(versionFile, true);
1264
1265
1266 HBaseFsck hbck = doFsck(conf, false);
1267 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_VERSION_FILE });
1268
1269 doFsck(conf, true);
1270
1271
1272 assertNoErrors(doFsck(conf, false));
1273 }
1274
1275
1276
1277
1278 @Test
1279 public void testRegionShouldNotBeDeployed() throws Exception {
1280 TableName table =
1281 TableName.valueOf("tableRegionShouldNotBeDeployed");
1282 try {
1283 LOG.info("Starting testRegionShouldNotBeDeployed.");
1284 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1285 assertTrue(cluster.waitForActiveAndReadyMaster());
1286
1287
1288 byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"),
1289 Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") };
1290 HTableDescriptor htdDisabled = new HTableDescriptor(table);
1291 htdDisabled.addFamily(new HColumnDescriptor(FAM));
1292
1293
1294 FSTableDescriptors fstd = new FSTableDescriptors(conf);
1295 fstd.createTableDescriptor(htdDisabled);
1296 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
1297 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
1298
1299
1300 HRegionServer hrs = cluster.getRegionServer(0);
1301
1302
1303 TEST_UTIL.getHBaseAdmin().disableTable(table);
1304 TEST_UTIL.getHBaseAdmin().enableTable(table);
1305
1306
1307 TEST_UTIL.getHBaseAdmin().disableTable(table);
1308 HRegionInfo region = disabledRegions.remove(0);
1309 byte[] regionName = region.getRegionName();
1310
1311
1312 assertTrue(cluster.getServerWith(regionName) == -1);
1313
1314
1315
1316
1317
1318 HRegion r = HRegion.openHRegion(
1319 region, htdDisabled, hrs.getWAL(region), conf);
1320 hrs.addToOnlineRegions(r);
1321
1322 HBaseFsck hbck = doFsck(conf, false);
1323 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.SHOULD_NOT_BE_DEPLOYED });
1324
1325
1326 doFsck(conf, true);
1327
1328
1329 assertNoErrors(doFsck(conf, false));
1330 } finally {
1331 TEST_UTIL.getHBaseAdmin().enableTable(table);
1332 deleteTable(table);
1333 }
1334 }
1335
1336
1337
1338
1339 @Test
1340 public void testFixByTable() throws Exception {
1341 TableName table1 =
1342 TableName.valueOf("testFixByTable1");
1343 TableName table2 =
1344 TableName.valueOf("testFixByTable2");
1345 try {
1346 setupTable(table1);
1347
1348 TEST_UTIL.getHBaseAdmin().flush(table1.getName());
1349
1350 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1351 Bytes.toBytes("C"), false, false, true);
1352
1353 setupTable(table2);
1354
1355 TEST_UTIL.getHBaseAdmin().flush(table2.getName());
1356
1357 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1358 Bytes.toBytes("C"), false, false, true);
1359
1360 HBaseFsck hbck = doFsck(conf, false);
1361 assertErrors(hbck, new ERROR_CODE[] {
1362 ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS});
1363
1364
1365 doFsck(conf, true, table1);
1366
1367 assertNoErrors(doFsck(conf, false, table1));
1368
1369 assertErrors(doFsck(conf, false, table2),
1370 new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1371
1372
1373 doFsck(conf, true, table2);
1374
1375 assertNoErrors(doFsck(conf, false));
1376 assertEquals(ROWKEYS.length - 2, countRows());
1377 } finally {
1378 deleteTable(table1);
1379 deleteTable(table2);
1380 }
1381 }
1382
1383
1384
1385 @Test
1386 public void testLingeringSplitParent() throws Exception {
1387 TableName table =
1388 TableName.valueOf("testLingeringSplitParent");
1389 HTable meta = null;
1390 try {
1391 setupTable(table);
1392 assertEquals(ROWKEYS.length, countRows());
1393
1394
1395 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1396 HRegionLocation location = tbl.getRegionLocation("B");
1397
1398
1399 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1400 Bytes.toBytes("C"), true, true, false);
1401
1402
1403 meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
1404 HRegionInfo hri = location.getRegionInfo();
1405
1406 HRegionInfo a = new HRegionInfo(tbl.getName(),
1407 Bytes.toBytes("B"), Bytes.toBytes("BM"));
1408 HRegionInfo b = new HRegionInfo(tbl.getName(),
1409 Bytes.toBytes("BM"), Bytes.toBytes("C"));
1410
1411 hri.setOffline(true);
1412 hri.setSplit(true);
1413
1414 MetaEditor.addRegionToMeta(meta, hri, a, b);
1415 meta.flushCommits();
1416 TEST_UTIL.getHBaseAdmin().flush(TableName.META_TABLE_NAME.getName());
1417
1418 HBaseFsck hbck = doFsck(conf, false);
1419 assertErrors(hbck, new ERROR_CODE[] {
1420 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1421
1422
1423 hbck = doFsck(conf, true);
1424 assertErrors(hbck, new ERROR_CODE[] {
1425 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1426 assertFalse(hbck.shouldRerun());
1427 hbck = doFsck(conf, false);
1428 assertErrors(hbck, new ERROR_CODE[] {
1429 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1430
1431
1432 hbck = new HBaseFsck(conf);
1433 hbck.connect();
1434 hbck.setDisplayFullReport();
1435 hbck.setTimeLag(0);
1436 hbck.setFixSplitParents(true);
1437 hbck.onlineHbck();
1438 assertTrue(hbck.shouldRerun());
1439
1440 Get get = new Get(hri.getRegionName());
1441 Result result = meta.get(get);
1442 assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1443 HConstants.SPLITA_QUALIFIER).isEmpty());
1444 assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1445 HConstants.SPLITB_QUALIFIER).isEmpty());
1446 TEST_UTIL.getHBaseAdmin().flush(TableName.META_TABLE_NAME.getName());
1447
1448
1449 doFsck(conf, true);
1450
1451
1452 assertNoErrors(doFsck(conf, false));
1453 assertEquals(ROWKEYS.length, countRows());
1454 } finally {
1455 deleteTable(table);
1456 IOUtils.closeQuietly(meta);
1457 }
1458 }
1459
1460
1461
1462
1463
1464 @Test
1465 public void testValidLingeringSplitParent() throws Exception {
1466 TableName table =
1467 TableName.valueOf("testLingeringSplitParent");
1468 HTable meta = null;
1469 try {
1470 setupTable(table);
1471 assertEquals(ROWKEYS.length, countRows());
1472
1473
1474 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1475 HRegionLocation location = tbl.getRegionLocation("B");
1476
1477 meta = new HTable(conf, TableName.META_TABLE_NAME);
1478 HRegionInfo hri = location.getRegionInfo();
1479
1480
1481 HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1482 byte[] regionName = location.getRegionInfo().getRegionName();
1483 admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1484 TestEndToEndSplitTransaction.blockUntilRegionSplit(
1485 TEST_UTIL.getConfiguration(), 60000, regionName, true);
1486
1487
1488
1489
1490 HBaseFsck hbck = doFsck(
1491 conf, true, true, false, false, false, true, true, true, false, false, false, null);
1492 assertErrors(hbck, new ERROR_CODE[] {});
1493
1494
1495 Get get = new Get(hri.getRegionName());
1496 Result result = meta.get(get);
1497 assertNotNull(result);
1498 assertNotNull(HRegionInfo.getHRegionInfo(result));
1499
1500 assertEquals(ROWKEYS.length, countRows());
1501
1502
1503 assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1);
1504 assertNoErrors(doFsck(conf, false));
1505 } finally {
1506 deleteTable(table);
1507 IOUtils.closeQuietly(meta);
1508 }
1509 }
1510
1511
1512
1513
1514
1515 @Test(timeout=75000)
1516 public void testSplitDaughtersNotInMeta() throws Exception {
1517 TableName table =
1518 TableName.valueOf("testSplitdaughtersNotInMeta");
1519 HTable meta = null;
1520 try {
1521 setupTable(table);
1522 assertEquals(ROWKEYS.length, countRows());
1523
1524
1525 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1526 HRegionLocation location = tbl.getRegionLocation("B");
1527
1528 meta = new HTable(conf, TableName.META_TABLE_NAME);
1529 HRegionInfo hri = location.getRegionInfo();
1530
1531
1532 HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1533 byte[] regionName = location.getRegionInfo().getRegionName();
1534 admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1535 TestEndToEndSplitTransaction.blockUntilRegionSplit(
1536 TEST_UTIL.getConfiguration(), 60000, regionName, true);
1537
1538 PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(meta.get(new Get(regionName)));
1539
1540
1541 Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
1542 undeployRegion(admin, hris.get(daughters.getFirst()), daughters.getFirst());
1543 undeployRegion(admin, hris.get(daughters.getSecond()), daughters.getSecond());
1544
1545 meta.delete(new Delete(daughters.getFirst().getRegionName()));
1546 meta.delete(new Delete(daughters.getSecond().getRegionName()));
1547 meta.flushCommits();
1548
1549 HBaseFsck hbck = doFsck(conf, false);
1550 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1551 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1552
1553
1554 hbck = doFsck(
1555 conf, true, true, false, false, false, false, false, false, false, false, false, null);
1556 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1557 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1558
1559
1560 Get get = new Get(hri.getRegionName());
1561 Result result = meta.get(get);
1562 assertNotNull(result);
1563 assertNotNull(HRegionInfo.getHRegionInfo(result));
1564
1565 assertEquals(ROWKEYS.length, countRows());
1566
1567
1568 assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1);
1569 assertNoErrors(doFsck(conf, false));
1570 } finally {
1571 deleteTable(table);
1572 IOUtils.closeQuietly(meta);
1573 }
1574 }
1575
1576
1577
1578
1579
1580 @Test(timeout=120000)
1581 public void testMissingFirstRegion() throws Exception {
1582 TableName table =
1583 TableName.valueOf("testMissingFirstRegion");
1584 try {
1585 setupTable(table);
1586 assertEquals(ROWKEYS.length, countRows());
1587
1588
1589 TEST_UTIL.getHBaseAdmin().disableTable(table);
1590 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true,
1591 true, true);
1592 TEST_UTIL.getHBaseAdmin().enableTable(table);
1593
1594 HBaseFsck hbck = doFsck(conf, false);
1595 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY });
1596
1597 doFsck(conf, true);
1598
1599 assertNoErrors(doFsck(conf, false));
1600 } finally {
1601 deleteTable(table);
1602 }
1603 }
1604
1605
1606
1607
1608
1609 @Test(timeout=120000)
1610 public void testRegionDeployedNotInHdfs() throws Exception {
1611 TableName table =
1612 TableName.valueOf("testSingleRegionDeployedNotInHdfs");
1613 try {
1614 setupTable(table);
1615 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1616
1617
1618 deleteRegion(conf, tbl.getTableDescriptor(),
1619 HConstants.EMPTY_START_ROW, Bytes.toBytes("A"), false,
1620 false, true);
1621
1622 HBaseFsck hbck = doFsck(conf, false);
1623 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
1624
1625 doFsck(conf, true);
1626
1627 assertNoErrors(doFsck(conf, false));
1628 } finally {
1629 deleteTable(table);
1630 }
1631 }
1632
1633
1634
1635
1636
1637 @Test(timeout=120000)
1638 public void testMissingLastRegion() throws Exception {
1639 TableName table =
1640 TableName.valueOf("testMissingLastRegion");
1641 try {
1642 setupTable(table);
1643 assertEquals(ROWKEYS.length, countRows());
1644
1645
1646 TEST_UTIL.getHBaseAdmin().disableTable(table);
1647 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true,
1648 true, true);
1649 TEST_UTIL.getHBaseAdmin().enableTable(table);
1650
1651 HBaseFsck hbck = doFsck(conf, false);
1652 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY });
1653
1654 doFsck(conf, true);
1655
1656 assertNoErrors(doFsck(conf, false));
1657 } finally {
1658 deleteTable(table);
1659 }
1660 }
1661
1662
1663
1664
1665 @Test
1666 public void testFixAssignmentsAndNoHdfsChecking() throws Exception {
1667 TableName table =
1668 TableName.valueOf("testFixAssignmentsAndNoHdfsChecking");
1669 try {
1670 setupTable(table);
1671 assertEquals(ROWKEYS.length, countRows());
1672
1673
1674 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1675 Bytes.toBytes("B"), true, false, false, false);
1676
1677
1678 HBaseFsck hbck = doFsck(conf, false);
1679 assertErrors(hbck, new ERROR_CODE[] {
1680 ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1681
1682
1683 HBaseFsck fsck = new HBaseFsck(conf);
1684 fsck.connect();
1685 fsck.setDisplayFullReport();
1686 fsck.setTimeLag(0);
1687 fsck.setCheckHdfs(false);
1688 fsck.onlineHbck();
1689 assertErrors(fsck, new ERROR_CODE[] {
1690 ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1691
1692
1693 fsck = new HBaseFsck(conf);
1694 fsck.connect();
1695 fsck.setDisplayFullReport();
1696 fsck.setTimeLag(0);
1697 fsck.setCheckHdfs(false);
1698 fsck.setFixAssignments(true);
1699 fsck.onlineHbck();
1700 assertTrue(fsck.shouldRerun());
1701 fsck.onlineHbck();
1702 assertNoErrors(fsck);
1703
1704 assertEquals(ROWKEYS.length, countRows());
1705 } finally {
1706 deleteTable(table);
1707 }
1708 }
1709
1710
1711
1712
1713
1714
1715 @Test
1716 public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception {
1717 TableName table =
1718 TableName.valueOf("testFixMetaNotWorkingWithNoHdfsChecking");
1719 try {
1720 setupTable(table);
1721 assertEquals(ROWKEYS.length, countRows());
1722
1723
1724 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1725 Bytes.toBytes("B"), false, true, false, false);
1726
1727
1728 HBaseFsck hbck = doFsck(conf, false);
1729 assertErrors(hbck, new ERROR_CODE[] {
1730 ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1731
1732
1733 HBaseFsck fsck = new HBaseFsck(conf);
1734 fsck.connect();
1735 fsck.setDisplayFullReport();
1736 fsck.setTimeLag(0);
1737 fsck.setCheckHdfs(false);
1738 fsck.onlineHbck();
1739 assertErrors(fsck, new ERROR_CODE[] {
1740 ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1741
1742
1743 fsck = new HBaseFsck(conf);
1744 fsck.connect();
1745 fsck.setDisplayFullReport();
1746 fsck.setTimeLag(0);
1747 fsck.setCheckHdfs(false);
1748 fsck.setFixAssignments(true);
1749 fsck.setFixMeta(true);
1750 fsck.onlineHbck();
1751 assertFalse(fsck.shouldRerun());
1752 assertErrors(fsck, new ERROR_CODE[] {
1753 ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1754
1755
1756 fsck = doFsck(conf, true);
1757 assertTrue(fsck.shouldRerun());
1758 fsck = doFsck(conf, true);
1759 assertNoErrors(fsck);
1760 } finally {
1761 deleteTable(table);
1762 }
1763 }
1764
1765
1766
1767
1768
1769 @Test
1770 public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception {
1771 TableName table =
1772 TableName.valueOf("testFixHdfsHolesNotWorkingWithNoHdfsChecking");
1773 try {
1774 setupTable(table);
1775 assertEquals(ROWKEYS.length, countRows());
1776
1777
1778 TEST_UTIL.getHBaseAdmin().disableTable(table);
1779 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1780 Bytes.toBytes("B"), true, true, false, true);
1781 TEST_UTIL.getHBaseAdmin().enableTable(table);
1782
1783 HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
1784 Bytes.toBytes("A2"), Bytes.toBytes("B"));
1785 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
1786 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
1787 .waitForAssignment(hriOverlap);
1788 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
1789 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
1790
1791 HBaseFsck hbck = doFsck(conf, false);
1792 assertErrors(hbck, new ERROR_CODE[] {
1793 ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1794 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1795
1796
1797 HBaseFsck fsck = new HBaseFsck(conf);
1798 fsck.connect();
1799 fsck.setDisplayFullReport();
1800 fsck.setTimeLag(0);
1801 fsck.setCheckHdfs(false);
1802 fsck.onlineHbck();
1803 assertErrors(fsck, new ERROR_CODE[] {
1804 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1805
1806
1807 fsck = new HBaseFsck(conf);
1808 fsck.connect();
1809 fsck.setDisplayFullReport();
1810 fsck.setTimeLag(0);
1811 fsck.setCheckHdfs(false);
1812 fsck.setFixHdfsHoles(true);
1813 fsck.setFixHdfsOverlaps(true);
1814 fsck.setFixHdfsOrphans(true);
1815 fsck.onlineHbck();
1816 assertFalse(fsck.shouldRerun());
1817 assertErrors(fsck, new ERROR_CODE[] {
1818 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1819 } finally {
1820 if (TEST_UTIL.getHBaseAdmin().isTableDisabled(table)) {
1821 TEST_UTIL.getHBaseAdmin().enableTable(table);
1822 }
1823 deleteTable(table);
1824 }
1825 }
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835 Path getFlushedHFile(FileSystem fs, TableName table) throws IOException {
1836 Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
1837 Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
1838 Path famDir = new Path(regionDir, FAM_STR);
1839
1840
1841 while (true) {
1842 FileStatus[] hfFss = fs.listStatus(famDir);
1843 if (hfFss.length == 0) {
1844 continue;
1845 }
1846 for (FileStatus hfs : hfFss) {
1847 if (!hfs.isDir()) {
1848 return hfs.getPath();
1849 }
1850 }
1851 }
1852 }
1853
1854
1855
1856
1857 @Test(timeout=180000)
1858 public void testQuarantineCorruptHFile() throws Exception {
1859 TableName table = TableName.valueOf(name.getMethodName());
1860 try {
1861 setupTable(table);
1862 assertEquals(ROWKEYS.length, countRows());
1863 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1864
1865 FileSystem fs = FileSystem.get(conf);
1866 Path hfile = getFlushedHFile(fs, table);
1867
1868
1869 TEST_UTIL.getHBaseAdmin().disableTable(table);
1870
1871
1872 Path corrupt = new Path(hfile.getParent(), "deadbeef");
1873 TestHFile.truncateFile(fs, hfile, corrupt);
1874 LOG.info("Created corrupted file " + corrupt);
1875 HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
1876
1877
1878 HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
1879 assertEquals(res.getRetCode(), 0);
1880 HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
1881 assertEquals(hfcc.getHFilesChecked(), 5);
1882 assertEquals(hfcc.getCorrupted().size(), 1);
1883 assertEquals(hfcc.getFailures().size(), 0);
1884 assertEquals(hfcc.getQuarantined().size(), 1);
1885 assertEquals(hfcc.getMissing().size(), 0);
1886
1887
1888 TEST_UTIL.getHBaseAdmin().enableTable(table);
1889 } finally {
1890 deleteTable(table);
1891 }
1892 }
1893
1894
1895
1896
1897 private void doQuarantineTest(TableName table, HBaseFsck hbck, int check,
1898 int corrupt, int fail, int quar, int missing) throws Exception {
1899 try {
1900 setupTable(table);
1901 assertEquals(ROWKEYS.length, countRows());
1902 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1903
1904
1905 TEST_UTIL.getHBaseAdmin().disableTable(table);
1906
1907 String[] args = {"-sidelineCorruptHFiles", "-repairHoles", "-ignorePreCheckPermission",
1908 table.getNameAsString()};
1909 ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1910 HBaseFsck res = hbck.exec(exec, args);
1911
1912 HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
1913 assertEquals(hfcc.getHFilesChecked(), check);
1914 assertEquals(hfcc.getCorrupted().size(), corrupt);
1915 assertEquals(hfcc.getFailures().size(), fail);
1916 assertEquals(hfcc.getQuarantined().size(), quar);
1917 assertEquals(hfcc.getMissing().size(), missing);
1918
1919
1920 HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1921 admin.enableTableAsync(table);
1922 while (!admin.isTableEnabled(table)) {
1923 try {
1924 Thread.sleep(250);
1925 } catch (InterruptedException e) {
1926 e.printStackTrace();
1927 fail("Interrupted when trying to enable table " + table);
1928 }
1929 }
1930 } finally {
1931 deleteTable(table);
1932 }
1933 }
1934
1935
1936
1937
1938
1939 @Test(timeout=180000)
1940 public void testQuarantineMissingHFile() throws Exception {
1941 TableName table = TableName.valueOf(name.getMethodName());
1942 ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1943
1944 final FileSystem fs = FileSystem.get(conf);
1945 HBaseFsck hbck = new HBaseFsck(conf, exec) {
1946 @Override
1947 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1948 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1949 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1950 @Override
1951 protected void checkHFile(Path p) throws IOException {
1952 if (attemptedFirstHFile.compareAndSet(false, true)) {
1953 assertTrue(fs.delete(p, true));
1954 }
1955 super.checkHFile(p);
1956 }
1957 };
1958 }
1959 };
1960 doQuarantineTest(table, hbck, 4, 0, 0, 0, 1);
1961 }
1962
1963
1964
1965
1966
1967
1968
1969 @Ignore @Test(timeout=180000)
1970 public void testQuarantineMissingFamdir() throws Exception {
1971 TableName table = TableName.valueOf(name.getMethodName());
1972 ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1973
1974 final FileSystem fs = FileSystem.get(conf);
1975 HBaseFsck hbck = new HBaseFsck(conf, exec) {
1976 @Override
1977 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1978 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1979 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1980 @Override
1981 protected void checkColFamDir(Path p) throws IOException {
1982 if (attemptedFirstHFile.compareAndSet(false, true)) {
1983 assertTrue(fs.delete(p, true));
1984 }
1985 super.checkColFamDir(p);
1986 }
1987 };
1988 }
1989 };
1990 doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
1991 }
1992
1993
1994
1995
1996
1997 @Test(timeout=180000)
1998 public void testQuarantineMissingRegionDir() throws Exception {
1999 TableName table = TableName.valueOf(name.getMethodName());
2000 ExecutorService exec = new ScheduledThreadPoolExecutor(10);
2001
2002 final FileSystem fs = FileSystem.get(conf);
2003 HBaseFsck hbck = new HBaseFsck(conf, exec) {
2004 @Override
2005 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
2006 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
2007 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
2008 @Override
2009 protected void checkRegionDir(Path p) throws IOException {
2010 if (attemptedFirstHFile.compareAndSet(false, true)) {
2011 assertTrue(fs.delete(p, true));
2012 }
2013 super.checkRegionDir(p);
2014 }
2015 };
2016 }
2017 };
2018 doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
2019 }
2020
2021
2022
2023
2024 @Test
2025 public void testLingeringReferenceFile() throws Exception {
2026 TableName table =
2027 TableName.valueOf("testLingeringReferenceFile");
2028 try {
2029 setupTable(table);
2030 assertEquals(ROWKEYS.length, countRows());
2031
2032
2033 FileSystem fs = FileSystem.get(conf);
2034 Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
2035 Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
2036 Path famDir = new Path(regionDir, FAM_STR);
2037 Path fakeReferenceFile = new Path(famDir, "fbce357483ceea.12144538");
2038 fs.create(fakeReferenceFile);
2039
2040 HBaseFsck hbck = doFsck(conf, false);
2041 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LINGERING_REFERENCE_HFILE });
2042
2043 doFsck(conf, true);
2044
2045 assertNoErrors(doFsck(conf, false));
2046 } finally {
2047 deleteTable(table);
2048 }
2049 }
2050
2051
2052
2053
2054 @Test
2055 public void testMissingRegionInfoQualifier() throws Exception {
2056 TableName table =
2057 TableName.valueOf("testMissingRegionInfoQualifier");
2058 try {
2059 setupTable(table);
2060
2061
2062 final List<Delete> deletes = new LinkedList<Delete>();
2063 HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
2064 MetaScanner.metaScan(conf, new MetaScanner.MetaScannerVisitor() {
2065
2066 @Override
2067 public boolean processRow(Result rowResult) throws IOException {
2068 HRegionInfo hri = MetaScanner.getHRegionInfo(rowResult);
2069 if (hri != null && !hri.getTable().isSystemTable()) {
2070 Delete delete = new Delete(rowResult.getRow());
2071 delete.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2072 deletes.add(delete);
2073 }
2074 return true;
2075 }
2076
2077 @Override
2078 public void close() throws IOException {
2079 }
2080 });
2081 meta.delete(deletes);
2082
2083
2084 meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
2085 HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes.toBytes("node1:60020")));
2086 meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
2087 HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(1362150791183L)));
2088 meta.close();
2089
2090 HBaseFsck hbck = doFsck(conf, false);
2091 assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
2092
2093
2094 hbck = doFsck(conf, true);
2095
2096
2097 assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
2098 } finally {
2099 deleteTable(table);
2100 }
2101 }
2102
2103
2104
2105
2106
2107
2108 @Test
2109 public void testErrorReporter() throws Exception {
2110 try {
2111 MockErrorReporter.calledCount = 0;
2112 doFsck(conf, false);
2113 assertEquals(MockErrorReporter.calledCount, 0);
2114
2115 conf.set("hbasefsck.errorreporter", MockErrorReporter.class.getName());
2116 doFsck(conf, false);
2117 assertTrue(MockErrorReporter.calledCount > 20);
2118 } finally {
2119 conf.set("hbasefsck.errorreporter",
2120 PrintingErrorReporter.class.getName());
2121 MockErrorReporter.calledCount = 0;
2122 }
2123 }
2124
2125 static class MockErrorReporter implements ErrorReporter {
2126 static int calledCount = 0;
2127
2128 @Override
2129 public void clear() {
2130 calledCount++;
2131 }
2132
2133 @Override
2134 public void report(String message) {
2135 calledCount++;
2136 }
2137
2138 @Override
2139 public void reportError(String message) {
2140 calledCount++;
2141 }
2142
2143 @Override
2144 public void reportError(ERROR_CODE errorCode, String message) {
2145 calledCount++;
2146 }
2147
2148 @Override
2149 public void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
2150 calledCount++;
2151 }
2152
2153 @Override
2154 public void reportError(ERROR_CODE errorCode,
2155 String message, TableInfo table, HbckInfo info) {
2156 calledCount++;
2157 }
2158
2159 @Override
2160 public void reportError(ERROR_CODE errorCode, String message,
2161 TableInfo table, HbckInfo info1, HbckInfo info2) {
2162 calledCount++;
2163 }
2164
2165 @Override
2166 public int summarize() {
2167 return ++calledCount;
2168 }
2169
2170 @Override
2171 public void detail(String details) {
2172 calledCount++;
2173 }
2174
2175 @Override
2176 public ArrayList<ERROR_CODE> getErrorList() {
2177 calledCount++;
2178 return new ArrayList<ERROR_CODE>();
2179 }
2180
2181 @Override
2182 public void progress() {
2183 calledCount++;
2184 }
2185
2186 @Override
2187 public void print(String message) {
2188 calledCount++;
2189 }
2190
2191 @Override
2192 public void resetErrors() {
2193 calledCount++;
2194 }
2195
2196 @Override
2197 public boolean tableHasErrors(TableInfo table) {
2198 calledCount++;
2199 return false;
2200 }
2201 }
2202
2203 @Test(timeout=180000)
2204 public void testCheckTableLocks() throws Exception {
2205 IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0);
2206 EnvironmentEdgeManager.injectEdge(edge);
2207
2208 HBaseFsck hbck = doFsck(conf, false);
2209 assertNoErrors(hbck);
2210
2211 ServerName mockName = ServerName.valueOf("localhost", 60000, 1);
2212
2213
2214 final TableLockManager tableLockManager = TableLockManager.createTableLockManager(conf, TEST_UTIL.getZooKeeperWatcher(), mockName);
2215 TableLock writeLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2216 "testCheckTableLocks");
2217 writeLock.acquire();
2218 hbck = doFsck(conf, false);
2219 assertNoErrors(hbck);
2220
2221 edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2222 TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS));
2223
2224 hbck = doFsck(conf, false);
2225 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK});
2226
2227 final CountDownLatch latch = new CountDownLatch(1);
2228 new Thread() {
2229 @Override
2230 public void run() {
2231 TableLock readLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2232 "testCheckTableLocks");
2233 try {
2234 latch.countDown();
2235 readLock.acquire();
2236 } catch (IOException ex) {
2237 fail();
2238 } catch (IllegalStateException ex) {
2239 return;
2240 }
2241 fail("should not have come here");
2242 };
2243 }.start();
2244
2245 latch.await();
2246 Threads.sleep(300);
2247
2248 hbck = doFsck(conf, false);
2249 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK});
2250
2251 edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2252 TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS));
2253
2254 hbck = doFsck(conf, false);
2255 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK, ERROR_CODE.EXPIRED_TABLE_LOCK});
2256
2257 conf.setLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, 1);
2258
2259 Threads.sleep(10);
2260 hbck = doFsck(conf, true);
2261
2262 hbck = doFsck(conf, false);
2263 assertNoErrors(hbck);
2264
2265
2266 writeLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2267 "should acquire without blocking");
2268 writeLock.acquire();
2269 writeLock.release();
2270 }
2271
2272
2273
2274
2275 @Test
2276 public void testOrphanedTableZNode() throws Exception {
2277 TableName table = TableName.valueOf("testOrphanedZKTableEntry");
2278
2279 try {
2280 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getZKTable().
2281 setEnablingTable(table);
2282
2283 try {
2284 setupTable(table);
2285 Assert.fail(
2286 "Create table should fail when its ZNode has already existed with ENABLING state.");
2287 } catch(TableExistsException t) {
2288
2289 }
2290
2291 try {
2292 deleteTable(table);
2293 } catch (IOException e) {
2294
2295
2296 }
2297
2298 HBaseFsck hbck = doFsck(conf, false);
2299 assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY));
2300
2301
2302 hbck = doFsck(conf, true);
2303
2304
2305 hbck = doFsck(conf, false);
2306 assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY));
2307
2308 setupTable(table);
2309 } finally {
2310
2311
2312 try {
2313 deleteTable(table);
2314 } catch (IOException e) {
2315
2316
2317 }
2318 }
2319 }
2320
2321 @Test
2322 public void testMetaOffline() throws Exception {
2323
2324 HBaseFsck hbck = doFsck(conf, false);
2325 assertNoErrors(hbck);
2326 deleteMetaRegion(conf, true, false, false);
2327 hbck = doFsck(conf, false);
2328
2329
2330 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2331 hbck = doFsck(conf, true);
2332 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2333 hbck = doFsck(conf, false);
2334 assertNoErrors(hbck);
2335 }
2336
2337 private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
2338 boolean regionInfoOnly) throws IOException, InterruptedException {
2339 HConnection connection = HConnectionManager.getConnection(conf);
2340 HRegionLocation metaLocation = connection.locateRegion(TableName.META_TABLE_NAME,
2341 HConstants.EMPTY_START_ROW);
2342 ServerName hsa = metaLocation.getServerName();
2343 HRegionInfo hri = metaLocation.getRegionInfo();
2344 if (unassign) {
2345 LOG.info("Undeploying meta region " + hri + " from server " + hsa);
2346 undeployRegion(new HBaseAdmin(conf), hsa, hri);
2347 }
2348
2349 if (regionInfoOnly) {
2350 LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
2351 Path rootDir = FSUtils.getRootDir(conf);
2352 FileSystem fs = rootDir.getFileSystem(conf);
2353 Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
2354 hri.getEncodedName());
2355 Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
2356 fs.delete(hriPath, true);
2357 }
2358
2359 if (hdfs) {
2360 LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
2361 Path rootDir = FSUtils.getRootDir(conf);
2362 FileSystem fs = rootDir.getFileSystem(conf);
2363 Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
2364 hri.getEncodedName());
2365 HBaseFsck.debugLsr(conf, p);
2366 boolean success = fs.delete(p, true);
2367 LOG.info("Deleted " + p + " sucessfully? " + success);
2368 HBaseFsck.debugLsr(conf, p);
2369 }
2370 }
2371
2372 @Test
2373 public void testTableWithNoRegions() throws Exception {
2374
2375
2376 TableName table =
2377 TableName.valueOf(name.getMethodName());
2378 try {
2379
2380 HTableDescriptor desc = new HTableDescriptor(table);
2381 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
2382 desc.addFamily(hcd);
2383 TEST_UTIL.getHBaseAdmin().createTable(desc);
2384 tbl = new HTable(TEST_UTIL.getConfiguration(), table, executorService);
2385
2386
2387 deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false,
2388 false, true);
2389
2390 HBaseFsck hbck = doFsck(conf, false);
2391 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
2392
2393 doFsck(conf, true);
2394
2395
2396 doFsck(conf, true);
2397
2398
2399 assertNoErrors(doFsck(conf, false));
2400 } finally {
2401 deleteTable(table);
2402 }
2403
2404 }
2405
2406 @Test
2407 public void testHbckAfterRegionMerge() throws Exception {
2408 TableName table = TableName.valueOf("testMergeRegionFilesInHdfs");
2409 HTable meta = null;
2410 try {
2411
2412 TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
2413 setupTable(table);
2414 assertEquals(ROWKEYS.length, countRows());
2415
2416
2417 TEST_UTIL.getHBaseAdmin().flush(table.getName());
2418 HRegionInfo region1 = tbl.getRegionLocation("A").getRegionInfo();
2419 HRegionInfo region2 = tbl.getRegionLocation("B").getRegionInfo();
2420
2421 int regionCountBeforeMerge = tbl.getRegionLocations().size();
2422
2423 assertNotEquals(region1, region2);
2424
2425
2426 HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
2427 admin.mergeRegions(region1.getEncodedNameAsBytes(),
2428 region2.getEncodedNameAsBytes(), false);
2429
2430
2431 long timeout = System.currentTimeMillis() + 30 * 1000;
2432 while (true) {
2433 if (tbl.getRegionLocations().size() < regionCountBeforeMerge) {
2434 break;
2435 } else if (System.currentTimeMillis() > timeout) {
2436 fail("Time out waiting on region " + region1.getEncodedName()
2437 + " and " + region2.getEncodedName() + " be merged");
2438 }
2439 Thread.sleep(10);
2440 }
2441
2442 assertEquals(ROWKEYS.length, countRows());
2443
2444 HBaseFsck hbck = doFsck(conf, false);
2445 assertNoErrors(hbck);
2446
2447 } finally {
2448 TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
2449 deleteTable(table);
2450 IOUtils.closeQuietly(meta);
2451 }
2452 }
2453
2454 @Test
2455 public void testRegionBoundariesCheck() throws Exception {
2456 HBaseFsck hbck = doFsck(conf, false);
2457 assertNoErrors(hbck);
2458 try {
2459 hbck.checkRegionBoundaries();
2460 } catch (IllegalArgumentException e) {
2461 if (e.getMessage().endsWith("not a valid DFS filename.")) {
2462 fail("Table directory path is not valid." + e.getMessage());
2463 }
2464 }
2465 }
2466
2467 @org.junit.Rule
2468 public TestName name = new TestName();
2469
2470 @Test
2471 public void testReadOnlyProperty() throws Exception {
2472 HBaseFsck hbck = doFsck(conf, false);
2473 Assert.assertEquals("shouldIgnorePreCheckPermission", true,
2474 hbck.shouldIgnorePreCheckPermission());
2475
2476 hbck = doFsck(conf, true);
2477 Assert.assertEquals("shouldIgnorePreCheckPermission", false,
2478 hbck.shouldIgnorePreCheckPermission());
2479
2480 hbck = doFsck(conf, true);
2481 hbck.setIgnorePreCheckPermission(true);
2482 Assert.assertEquals("shouldIgnorePreCheckPermission", true,
2483 hbck.shouldIgnorePreCheckPermission());
2484 }
2485
2486 @Before
2487 public void setUp() {
2488 EnvironmentEdgeManager.reset();
2489 }
2490
2491 @Test (timeout=180000)
2492 public void testCleanUpDaughtersNotInMetaAfterFailedSplit() throws Exception {
2493 TableName table = TableName.valueOf("testCleanUpDaughtersNotInMetaAfterFailedSplit");
2494 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
2495 try {
2496 HTableDescriptor desc = new HTableDescriptor(table);
2497 desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
2498 TEST_UTIL.getHBaseAdmin().createTable(desc);
2499 tbl = new HTable(cluster.getConfiguration(), desc.getTableName());
2500 for (int i = 0; i < 5; i++) {
2501 Put p1 = new Put(("r" + i).getBytes());
2502 p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
2503 tbl.put(p1);
2504 }
2505 TEST_UTIL.getHBaseAdmin().flush(desc.getTableName().toString());
2506 List<HRegion> regions = cluster.getRegions(desc.getTableName());
2507 int serverWith = cluster.getServerWith(regions.get(0).getRegionName());
2508 HRegionServer regionServer = cluster.getRegionServer(serverWith);
2509 cluster.getServerWith(regions.get(0).getRegionName());
2510 SplitTransaction st = new SplitTransaction(regions.get(0), Bytes.toBytes("r3"));
2511 st.prepare();
2512 st.stepsBeforePONR(regionServer, regionServer, false);
2513 AssignmentManager am = cluster.getMaster().getAssignmentManager();
2514 Map<String, RegionState> regionsInTransition = am.getRegionStates().getRegionsInTransition();
2515 for (RegionState state : regionsInTransition.values()) {
2516 am.regionOffline(state.getRegion());
2517 }
2518 ZKAssign.deleteNodeFailSilent(regionServer.getZooKeeper(), regions.get(0).getRegionInfo());
2519 Map<HRegionInfo, ServerName> regionsMap = new HashMap<HRegionInfo, ServerName>();
2520 regionsMap.put(regions.get(0).getRegionInfo(), regionServer.getServerName());
2521 am.assign(regionsMap);
2522 am.waitForAssignment(regions.get(0).getRegionInfo());
2523 HBaseFsck hbck = doFsck(conf, false);
2524 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
2525 ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
2526
2527 assertEquals(0, hbck.getOverlapGroups(table).size());
2528
2529
2530 assertErrors(
2531 doFsck(
2532 conf, false, true, false, false, false, false, false, false, false, false, false, null),
2533 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
2534 ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
2535
2536
2537 assertNoErrors(doFsck(conf, false));
2538 assertEquals(5, countRows());
2539 } finally {
2540 if (tbl != null) {
2541 tbl.close();
2542 tbl = null;
2543 }
2544 deleteTable(table);
2545 }
2546 }
2547 }