1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.util;
19
20 import java.io.IOException;
21 import java.io.PrintWriter;
22 import java.io.StringWriter;
23 import java.net.URI;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.Collection;
27 import java.util.Collections;
28 import java.util.Comparator;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.Iterator;
32 import java.util.List;
33 import java.util.Map;
34 import java.util.Map.Entry;
35 import java.util.Set;
36 import java.util.SortedMap;
37 import java.util.SortedSet;
38 import java.util.TreeMap;
39 import java.util.TreeSet;
40 import java.util.concurrent.Callable;
41 import java.util.concurrent.ConcurrentSkipListMap;
42 import java.util.concurrent.ExecutionException;
43 import java.util.concurrent.ExecutorService;
44 import java.util.concurrent.Future;
45 import java.util.concurrent.ScheduledThreadPoolExecutor;
46 import java.util.concurrent.atomic.AtomicInteger;
47
48 import org.apache.commons.logging.Log;
49 import org.apache.commons.logging.LogFactory;
50 import org.apache.hadoop.classification.InterfaceAudience;
51 import org.apache.hadoop.classification.InterfaceStability;
52 import org.apache.hadoop.conf.Configuration;
53 import org.apache.hadoop.conf.Configured;
54 import org.apache.hadoop.fs.FileStatus;
55 import org.apache.hadoop.fs.FileSystem;
56 import org.apache.hadoop.fs.Path;
57 import org.apache.hadoop.fs.permission.FsAction;
58 import org.apache.hadoop.hbase.Abortable;
59 import org.apache.hadoop.hbase.ClusterStatus;
60 import org.apache.hadoop.hbase.TableName;
61 import org.apache.hadoop.hbase.HBaseConfiguration;
62 import org.apache.hadoop.hbase.HColumnDescriptor;
63 import org.apache.hadoop.hbase.HConstants;
64 import org.apache.hadoop.hbase.HRegionInfo;
65 import org.apache.hadoop.hbase.HRegionLocation;
66 import org.apache.hadoop.hbase.HTableDescriptor;
67 import org.apache.hadoop.hbase.KeyValue;
68 import org.apache.hadoop.hbase.MasterNotRunningException;
69 import org.apache.hadoop.hbase.ServerName;
70 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
71 import org.apache.hadoop.hbase.catalog.MetaEditor;
72 import org.apache.hadoop.hbase.client.Delete;
73 import org.apache.hadoop.hbase.client.Get;
74 import org.apache.hadoop.hbase.client.HBaseAdmin;
75 import org.apache.hadoop.hbase.client.HConnectable;
76 import org.apache.hadoop.hbase.client.HConnection;
77 import org.apache.hadoop.hbase.client.HConnectionManager;
78 import org.apache.hadoop.hbase.client.HTable;
79 import org.apache.hadoop.hbase.client.MetaScanner;
80 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
81 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
82 import org.apache.hadoop.hbase.client.Put;
83 import org.apache.hadoop.hbase.client.Result;
84 import org.apache.hadoop.hbase.client.RowMutations;
85 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
86 import org.apache.hadoop.hbase.io.hfile.HFile;
87 import org.apache.hadoop.hbase.master.MasterFileSystem;
88 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
89 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
90 import org.apache.hadoop.hbase.regionserver.HRegion;
91 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
92 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
93 import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
94 import org.apache.hadoop.hbase.security.User;
95 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
96 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
97 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
98 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
99 import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
100 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
101 import org.apache.hadoop.hbase.zookeeper.ZKTableReadOnly;
102 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
103 import org.apache.hadoop.security.AccessControlException;
104 import org.apache.hadoop.security.UserGroupInformation;
105 import org.apache.hadoop.util.ReflectionUtils;
106 import org.apache.hadoop.util.Tool;
107 import org.apache.hadoop.util.ToolRunner;
108 import org.apache.zookeeper.KeeperException;
109
110 import com.google.common.base.Joiner;
111 import com.google.common.base.Preconditions;
112 import com.google.common.collect.Lists;
113 import com.google.common.collect.Multimap;
114 import com.google.common.collect.TreeMultimap;
115 import com.google.protobuf.ServiceException;
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162 @InterfaceAudience.Public
163 @InterfaceStability.Evolving
164 public class HBaseFsck extends Configured implements Tool {
165 public static final long DEFAULT_TIME_LAG = 60000;
166 public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
167 private static final int MAX_NUM_THREADS = 50;
168 private static boolean rsSupportsOffline = true;
169 private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
170 private static final int DEFAULT_MAX_MERGE = 5;
171 private static final String TO_BE_LOADED = "to_be_loaded";
172
173
174
175
176 private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
177 private ClusterStatus status;
178 private HConnection connection;
179 private HBaseAdmin admin;
180 private HTable meta;
181 protected ExecutorService executor;
182 private long startMillis = System.currentTimeMillis();
183 private HFileCorruptionChecker hfcc;
184 private int retcode = 0;
185
186
187
188
189 private static boolean details = false;
190 private long timelag = DEFAULT_TIME_LAG;
191 private boolean fixAssignments = false;
192 private boolean fixMeta = false;
193 private boolean checkHdfs = true;
194 private boolean fixHdfsHoles = false;
195 private boolean fixHdfsOverlaps = false;
196 private boolean fixHdfsOrphans = false;
197 private boolean fixTableOrphans = false;
198 private boolean fixVersionFile = false;
199 private boolean fixSplitParents = false;
200 private boolean fixReferenceFiles = false;
201 private boolean fixEmptyMetaCells = false;
202 private boolean fixTableLocks = false;
203
204
205
206 private Set<TableName> tablesIncluded = new HashSet<TableName>();
207 private int maxMerge = DEFAULT_MAX_MERGE;
208 private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
209 private boolean sidelineBigOverlaps = false;
210 private Path sidelineDir = null;
211
212 private boolean rerun = false;
213 private static boolean summary = false;
214 private boolean checkMetaOnly = false;
215 private boolean ignorePreCheckPermission = false;
216
217
218
219
220 final private ErrorReporter errors;
221 int fixes = 0;
222
223
224
225
226
227
228 private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
229 private TreeSet<TableName> disabledTables =
230 new TreeSet<TableName>();
231
232 private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
233
234
235
236
237
238
239
240
241
242
243
244 private SortedMap<TableName, TableInfo> tablesInfo =
245 new ConcurrentSkipListMap<TableName, TableInfo>();
246
247
248
249
250 private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
251
252 private Map<TableName, Set<String>> orphanTableDirs =
253 new HashMap<TableName, Set<String>>();
254
255
256
257
258
259
260
261
262 public HBaseFsck(Configuration conf) throws MasterNotRunningException,
263 ZooKeeperConnectionException, IOException, ClassNotFoundException {
264 super(conf);
265 errors = getErrorReporter(conf);
266
267 int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
268 executor = new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
269 }
270
271
272
273
274
275
276
277
278
279
280
281 public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
282 ZooKeeperConnectionException, IOException, ClassNotFoundException {
283 super(conf);
284 errors = getErrorReporter(getConf());
285 this.executor = exec;
286 }
287
288
289
290
291
292 public void connect() throws IOException {
293 admin = new HBaseAdmin(getConf());
294 meta = new HTable(getConf(), TableName.META_TABLE_NAME);
295 status = admin.getClusterStatus();
296 connection = admin.getConnection();
297 }
298
299
300
301
302 private void loadDeployedRegions() throws IOException, InterruptedException {
303
304 Collection<ServerName> regionServers = status.getServers();
305 errors.print("Number of live region servers: " + regionServers.size());
306 if (details) {
307 for (ServerName rsinfo: regionServers) {
308 errors.print(" " + rsinfo.getServerName());
309 }
310 }
311
312
313 Collection<ServerName> deadRegionServers = status.getDeadServerNames();
314 errors.print("Number of dead region servers: " + deadRegionServers.size());
315 if (details) {
316 for (ServerName name: deadRegionServers) {
317 errors.print(" " + name);
318 }
319 }
320
321
322 errors.print("Master: " + status.getMaster());
323
324
325 Collection<ServerName> backupMasters = status.getBackupMasters();
326 errors.print("Number of backup masters: " + backupMasters.size());
327 if (details) {
328 for (ServerName name: backupMasters) {
329 errors.print(" " + name);
330 }
331 }
332
333
334 processRegionServers(regionServers);
335 }
336
337
338
339
340 private void clearState() {
341
342 fixes = 0;
343 regionInfoMap.clear();
344 emptyRegionInfoQualifiers.clear();
345 disabledTables.clear();
346 errors.clear();
347 tablesInfo.clear();
348 orphanHdfsDirs.clear();
349 }
350
351
352
353
354
355
356 public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
357
358 if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
359 || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
360 LOG.info("Loading regioninfos HDFS");
361
362 int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
363 int curIter = 0;
364 do {
365 clearState();
366
367 restoreHdfsIntegrity();
368 curIter++;
369 } while (fixes > 0 && curIter <= maxIterations);
370
371
372
373 if (curIter > 2) {
374 if (curIter == maxIterations) {
375 LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
376 + "Tables integrity may not be fully repaired!");
377 } else {
378 LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
379 }
380 }
381 }
382 }
383
384
385
386
387
388
389
390
391
392 public int onlineConsistencyRepair() throws IOException, KeeperException,
393 InterruptedException {
394 clearState();
395
396
397 loadDeployedRegions();
398
399 if (!recordMetaRegion()) {
400
401 errors.reportError("Fatal error: unable to get .META. region location. Exiting...");
402 return -2;
403 }
404
405 if (!checkMetaRegion()) {
406 String errorMsg = ".META. table is not consistent. ";
407 if (shouldFixAssignments()) {
408 errorMsg += "HBCK will try fixing it. Rerun once .META. is back to consistent state.";
409 } else {
410 errorMsg += "Run HBCK with proper fix options to fix .META. inconsistency.";
411 }
412 errors.reportError(errorMsg + " Exiting...");
413 return -2;
414 }
415
416 LOG.info("Loading regionsinfo from the .META. table");
417 boolean success = loadMetaEntries();
418 if (!success) return -1;
419
420
421 reportEmptyMetaCells();
422
423
424 if (shouldFixEmptyMetaCells()) {
425 fixEmptyMetaCells();
426 }
427
428
429 if (!checkMetaOnly) {
430 reportTablesInFlux();
431 }
432
433
434 if (shouldCheckHdfs()) {
435 loadHdfsRegionDirs();
436 loadHdfsRegionInfos();
437 }
438
439
440 loadDisabledTables();
441
442
443 fixOrphanTables();
444
445
446 checkAndFixConsistency();
447
448
449 checkIntegrity();
450 return errors.getErrorList().size();
451 }
452
453
454
455
456
457 public int onlineHbck() throws IOException, KeeperException, InterruptedException, ServiceException {
458
459 errors.print("Version: " + status.getHBaseVersion());
460 offlineHdfsIntegrityRepair();
461
462
463 boolean oldBalancer = admin.setBalancerRunning(false, true);
464 try {
465 onlineConsistencyRepair();
466 }
467 finally {
468 admin.setBalancerRunning(oldBalancer, false);
469 }
470
471 offlineReferenceFileRepair();
472
473 checkAndFixTableLocks();
474
475
476 printTableSummary(tablesInfo);
477 return errors.summarize();
478 }
479
480
481
482
483 private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
484 for (HbckInfo hi : orphanHdfsDirs) {
485 LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
486 adoptHdfsOrphan(hi);
487 }
488 }
489
490
491
492
493
494
495
496
497
498
499 private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
500 Path p = hi.getHdfsRegionDir();
501 FileSystem fs = p.getFileSystem(getConf());
502 FileStatus[] dirs = fs.listStatus(p);
503 if (dirs == null) {
504 LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " +
505 p + ". This dir could probably be deleted.");
506 return ;
507 }
508
509 TableName tableName = hi.getTableName();
510 TableInfo tableInfo = tablesInfo.get(tableName);
511 Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
512 HTableDescriptor template = tableInfo.getHTD();
513
514
515 Pair<byte[],byte[]> orphanRegionRange = null;
516 for (FileStatus cf : dirs) {
517 String cfName= cf.getPath().getName();
518
519 if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
520
521 FileStatus[] hfiles = fs.listStatus(cf.getPath());
522 for (FileStatus hfile : hfiles) {
523 byte[] start, end;
524 HFile.Reader hf = null;
525 try {
526 CacheConfig cacheConf = new CacheConfig(getConf());
527 hf = HFile.createReader(fs, hfile.getPath(), cacheConf);
528 hf.loadFileInfo();
529 KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
530 start = startKv.getRow();
531 KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
532 end = endKv.getRow();
533 } catch (IOException ioe) {
534 LOG.warn("Problem reading orphan file " + hfile + ", skipping");
535 continue;
536 } catch (NullPointerException ioe) {
537 LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
538 continue;
539 } finally {
540 if (hf != null) {
541 hf.close();
542 }
543 }
544
545
546 if (orphanRegionRange == null) {
547
548 orphanRegionRange = new Pair<byte[], byte[]>(start, end);
549 } else {
550
551
552
553 if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
554 orphanRegionRange.setFirst(start);
555 }
556 if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
557 orphanRegionRange.setSecond(end);
558 }
559 }
560 }
561 }
562 if (orphanRegionRange == null) {
563 LOG.warn("No data in dir " + p + ", sidelining data");
564 fixes++;
565 sidelineRegionDir(fs, hi);
566 return;
567 }
568 LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
569 Bytes.toString(orphanRegionRange.getSecond()) + ")");
570
571
572 HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(), orphanRegionRange.getSecond());
573 LOG.info("Creating new region : " + hri);
574 HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
575 Path target = region.getRegionFileSystem().getRegionDir();
576
577
578 mergeRegionDirs(target, hi);
579 fixes++;
580 }
581
582
583
584
585
586
587
588
589
590 private int restoreHdfsIntegrity() throws IOException, InterruptedException {
591
592 LOG.info("Loading HBase regioninfo from HDFS...");
593 loadHdfsRegionDirs();
594
595 int errs = errors.getErrorList().size();
596
597 tablesInfo = loadHdfsRegionInfos();
598 checkHdfsIntegrity(false, false);
599
600 if (errors.getErrorList().size() == errs) {
601 LOG.info("No integrity errors. We are done with this phase. Glorious.");
602 return 0;
603 }
604
605 if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
606 adoptHdfsOrphans(orphanHdfsDirs);
607
608 }
609
610
611 if (shouldFixHdfsHoles()) {
612 clearState();
613 loadHdfsRegionDirs();
614 tablesInfo = loadHdfsRegionInfos();
615 tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
616 }
617
618
619 if (shouldFixHdfsOverlaps()) {
620
621 clearState();
622 loadHdfsRegionDirs();
623 tablesInfo = loadHdfsRegionInfos();
624 tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
625 }
626
627 return errors.getErrorList().size();
628 }
629
630
631
632
633
634
635
636
637
638 private void offlineReferenceFileRepair() throws IOException {
639 Configuration conf = getConf();
640 Path hbaseRoot = FSUtils.getRootDir(conf);
641 FileSystem fs = hbaseRoot.getFileSystem(conf);
642 Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot);
643 for (Path path: allFiles.values()) {
644 boolean isReference = false;
645 try {
646 isReference = StoreFileInfo.isReference(path);
647 } catch (Throwable t) {
648
649
650
651
652 }
653 if (!isReference) continue;
654
655 Path referredToFile = StoreFileInfo.getReferredToFile(path);
656 if (fs.exists(referredToFile)) continue;
657
658
659 errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
660 "Found lingering reference file " + path);
661 if (!shouldFixReferenceFiles()) continue;
662
663
664 boolean success = false;
665 String pathStr = path.toString();
666
667
668
669
670
671 int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
672 for (int i = 0; index > 0 && i < 3; i++) {
673 index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index);
674 }
675 if (index > 0) {
676 Path rootDir = getSidelineDir();
677 Path dst = new Path(rootDir, pathStr.substring(index));
678 fs.mkdirs(dst.getParent());
679 LOG.info("Trying to sildeline reference file"
680 + path + " to " + dst);
681 setShouldRerun();
682
683 success = fs.rename(path, dst);
684 }
685 if (!success) {
686 LOG.error("Failed to sideline reference file " + path);
687 }
688 }
689 }
690
691
692
693
694 private void reportEmptyMetaCells() {
695 errors.print("Number of empty REGIONINFO_QUALIFIER rows in .META.: " +
696 emptyRegionInfoQualifiers.size());
697 if (details) {
698 for (Result r: emptyRegionInfoQualifiers) {
699 errors.print(" " + r);
700 }
701 }
702 }
703
704
705
706
707 private void reportTablesInFlux() {
708 AtomicInteger numSkipped = new AtomicInteger(0);
709 HTableDescriptor[] allTables = getTables(numSkipped);
710 errors.print("Number of Tables: " + allTables.length);
711 if (details) {
712 if (numSkipped.get() > 0) {
713 errors.detail("Number of Tables in flux: " + numSkipped.get());
714 }
715 for (HTableDescriptor td : allTables) {
716 errors.detail(" Table: " + td.getTableName() + "\t" +
717 (td.isReadOnly() ? "ro" : "rw") + "\t" +
718 (td.isMetaRegion() ? "META" : " ") + "\t" +
719 " families: " + td.getFamilies().size());
720 }
721 }
722 }
723
724 public ErrorReporter getErrors() {
725 return errors;
726 }
727
728
729
730
731
732 private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
733 Path regionDir = hbi.getHdfsRegionDir();
734 if (regionDir == null) {
735 LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
736 return;
737 }
738
739 if (hbi.hdfsEntry.hri != null) {
740
741 return;
742 }
743
744 FileSystem fs = FileSystem.get(getConf());
745 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
746 LOG.debug("HRegionInfo read: " + hri.toString());
747 hbi.hdfsEntry.hri = hri;
748 }
749
750
751
752
753
754 public static class RegionRepairException extends IOException {
755 private static final long serialVersionUID = 1L;
756 final IOException ioe;
757 public RegionRepairException(String s, IOException ioe) {
758 super(s);
759 this.ioe = ioe;
760 }
761 }
762
763
764
765
766 private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
767 throws IOException, InterruptedException {
768 tablesInfo.clear();
769
770 Collection<HbckInfo> hbckInfos = regionInfoMap.values();
771
772
773 List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
774 List<Future<Void>> hbiFutures;
775
776 for (HbckInfo hbi : hbckInfos) {
777 WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
778 hbis.add(work);
779 }
780
781
782 hbiFutures = executor.invokeAll(hbis);
783
784 for(int i=0; i<hbiFutures.size(); i++) {
785 WorkItemHdfsRegionInfo work = hbis.get(i);
786 Future<Void> f = hbiFutures.get(i);
787 try {
788 f.get();
789 } catch(ExecutionException e) {
790 LOG.warn("Failed to read .regioninfo file for region " +
791 work.hbi.getRegionNameAsString(), e.getCause());
792 }
793 }
794
795
796 for (HbckInfo hbi: hbckInfos) {
797
798 if (hbi.getHdfsHRI() == null) {
799
800 continue;
801 }
802
803
804
805 TableName tableName = hbi.getTableName();
806 if (tableName == null) {
807
808 LOG.warn("tableName was null for: " + hbi);
809 continue;
810 }
811
812 TableInfo modTInfo = tablesInfo.get(tableName);
813 if (modTInfo == null) {
814
815 modTInfo = new TableInfo(tableName);
816 Path hbaseRoot = FSUtils.getRootDir(getConf());
817 tablesInfo.put(tableName, modTInfo);
818 try {
819 HTableDescriptor htd =
820 FSTableDescriptors.getTableDescriptorFromFs(hbaseRoot.getFileSystem(getConf()),
821 hbaseRoot, tableName);
822 modTInfo.htds.add(htd);
823 } catch (IOException ioe) {
824 if (!orphanTableDirs.containsKey(tableName)) {
825 LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
826
827 errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
828 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
829 Set<String> columns = new HashSet<String>();
830 orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
831 }
832 }
833 }
834 if (!hbi.isSkipChecks()) {
835 modTInfo.addRegionInfo(hbi);
836 }
837 }
838
839 return tablesInfo;
840 }
841
842
843
844
845
846
847
848
849 private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
850 Path regionDir = hbi.getHdfsRegionDir();
851 FileSystem fs = regionDir.getFileSystem(getConf());
852 FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
853 for (FileStatus subdir : subDirs) {
854 String columnfamily = subdir.getPath().getName();
855 columns.add(columnfamily);
856 }
857 return columns;
858 }
859
860
861
862
863
864
865
866
867 private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
868 Set<String> columns) throws IOException {
869 if (columns ==null || columns.isEmpty()) return false;
870 HTableDescriptor htd = new HTableDescriptor(tableName);
871 for (String columnfamimly : columns) {
872 htd.addFamily(new HColumnDescriptor(columnfamimly));
873 }
874 fstd.createTableDescriptor(htd, true);
875 return true;
876 }
877
878
879
880
881
882 public void fixEmptyMetaCells() throws IOException {
883 if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
884 LOG.info("Trying to fix empty REGIONINFO_QUALIFIER .META. rows.");
885 for (Result region : emptyRegionInfoQualifiers) {
886 deleteMetaRegion(region.getRow());
887 errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
888 }
889 emptyRegionInfoQualifiers.clear();
890 }
891 }
892
893
894
895
896
897
898
899
900
901
902 public void fixOrphanTables() throws IOException {
903 if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
904
905 List<TableName> tmpList = new ArrayList<TableName>();
906 tmpList.addAll(orphanTableDirs.keySet());
907 HTableDescriptor[] htds = getHTableDescriptors(tmpList);
908 Iterator<Entry<TableName, Set<String>>> iter =
909 orphanTableDirs.entrySet().iterator();
910 int j = 0;
911 int numFailedCase = 0;
912 FSTableDescriptors fstd = new FSTableDescriptors(getConf());
913 while (iter.hasNext()) {
914 Entry<TableName, Set<String>> entry =
915 (Entry<TableName, Set<String>>) iter.next();
916 TableName tableName = entry.getKey();
917 LOG.info("Trying to fix orphan table error: " + tableName);
918 if (j < htds.length) {
919 if (tableName.equals(htds[j].getTableName())) {
920 HTableDescriptor htd = htds[j];
921 LOG.info("fixing orphan table: " + tableName + " from cache");
922 fstd.createTableDescriptor(htd, true);
923 j++;
924 iter.remove();
925 }
926 } else {
927 if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
928 LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
929 LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
930 iter.remove();
931 } else {
932 LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
933 numFailedCase++;
934 }
935 }
936 fixes++;
937 }
938
939 if (orphanTableDirs.isEmpty()) {
940
941
942 setShouldRerun();
943 LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
944 } else if (numFailedCase > 0) {
945 LOG.error("Failed to fix " + numFailedCase
946 + " OrphanTables with default .tableinfo files");
947 }
948
949 }
950
951 orphanTableDirs.clear();
952
953 }
954
955
956
957
958
959
960 private HRegion createNewMeta() throws IOException {
961 Path rootdir = FSUtils.getRootDir(getConf());
962 Configuration c = getConf();
963 HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
964 MasterFileSystem.setInfoFamilyCachingForMeta(false);
965 HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c,
966 HTableDescriptor.META_TABLEDESC);
967 MasterFileSystem.setInfoFamilyCachingForMeta(true);
968 return meta;
969 }
970
971
972
973
974
975
976
977 private ArrayList<Put> generatePuts(
978 SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
979 ArrayList<Put> puts = new ArrayList<Put>();
980 boolean hasProblems = false;
981 for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) {
982 TableName name = e.getKey();
983
984
985 if (name.compareTo(TableName.META_TABLE_NAME) == 0) {
986 continue;
987 }
988
989 TableInfo ti = e.getValue();
990 for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
991 .entrySet()) {
992 Collection<HbckInfo> his = spl.getValue();
993 int sz = his.size();
994 if (sz != 1) {
995
996 LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
997 + " had " + sz + " regions instead of exactly 1." );
998 hasProblems = true;
999 continue;
1000 }
1001
1002
1003 HbckInfo hi = his.iterator().next();
1004 HRegionInfo hri = hi.getHdfsHRI();
1005 Put p = MetaEditor.makePutFromRegionInfo(hri);
1006 puts.add(p);
1007 }
1008 }
1009 return hasProblems ? null : puts;
1010 }
1011
1012
1013
1014
1015 private void suggestFixes(
1016 SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1017 for (TableInfo tInfo : tablesInfo.values()) {
1018 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1019 tInfo.checkRegionChain(handler);
1020 }
1021 }
1022
1023
1024
1025
1026
1027
1028
1029
1030 public boolean rebuildMeta(boolean fix) throws IOException,
1031 InterruptedException {
1032
1033
1034
1035
1036
1037 LOG.info("Loading HBase regioninfo from HDFS...");
1038 loadHdfsRegionDirs();
1039
1040 int errs = errors.getErrorList().size();
1041 tablesInfo = loadHdfsRegionInfos();
1042 checkHdfsIntegrity(false, false);
1043
1044
1045 if (errors.getErrorList().size() != errs) {
1046
1047 while(true) {
1048 fixes = 0;
1049 suggestFixes(tablesInfo);
1050 errors.clear();
1051 loadHdfsRegionInfos();
1052 checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1053
1054 int errCount = errors.getErrorList().size();
1055
1056 if (fixes == 0) {
1057 if (errCount > 0) {
1058 return false;
1059 } else {
1060 break;
1061 }
1062 }
1063 }
1064 }
1065
1066
1067 LOG.info("HDFS regioninfo's seems good. Sidelining old .META.");
1068 Path backupDir = sidelineOldMeta();
1069
1070 LOG.info("Creating new .META.");
1071 HRegion meta = createNewMeta();
1072
1073
1074 List<Put> puts = generatePuts(tablesInfo);
1075 if (puts == null) {
1076 LOG.fatal("Problem encountered when creating new .META. entries. " +
1077 "You may need to restore the previously sidelined .META.");
1078 return false;
1079 }
1080 meta.batchMutate(puts.toArray(new Put[0]));
1081 HRegion.closeHRegion(meta);
1082 LOG.info("Success! .META. table rebuilt.");
1083 LOG.info("Old .META. is moved into " + backupDir);
1084 return true;
1085 }
1086
1087 private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1088 boolean fixOverlaps) throws IOException {
1089 LOG.info("Checking HBase region split map from HDFS data...");
1090 for (TableInfo tInfo : tablesInfo.values()) {
1091 TableIntegrityErrorHandler handler;
1092 if (fixHoles || fixOverlaps) {
1093 handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1094 fixHoles, fixOverlaps);
1095 } else {
1096 handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1097 }
1098 if (!tInfo.checkRegionChain(handler)) {
1099
1100 errors.report("Found inconsistency in table " + tInfo.getName());
1101 }
1102 }
1103 return tablesInfo;
1104 }
1105
1106 private Path getSidelineDir() throws IOException {
1107 if (sidelineDir == null) {
1108 Path hbaseDir = FSUtils.getRootDir(getConf());
1109 Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1110 sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1111 + startMillis);
1112 }
1113 return sidelineDir;
1114 }
1115
1116
1117
1118
1119 Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1120 return sidelineRegionDir(fs, null, hi);
1121 }
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131 Path sidelineRegionDir(FileSystem fs,
1132 String parentDir, HbckInfo hi) throws IOException {
1133 TableName tableName = hi.getTableName();
1134 Path regionDir = hi.getHdfsRegionDir();
1135
1136 if (!fs.exists(regionDir)) {
1137 LOG.warn("No previous " + regionDir + " exists. Continuing.");
1138 return null;
1139 }
1140
1141 Path rootDir = getSidelineDir();
1142 if (parentDir != null) {
1143 rootDir = new Path(rootDir, parentDir);
1144 }
1145 Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1146 Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1147 fs.mkdirs(sidelineRegionDir);
1148 boolean success = false;
1149 FileStatus[] cfs = fs.listStatus(regionDir);
1150 if (cfs == null) {
1151 LOG.info("Region dir is empty: " + regionDir);
1152 } else {
1153 for (FileStatus cf : cfs) {
1154 Path src = cf.getPath();
1155 Path dst = new Path(sidelineRegionDir, src.getName());
1156 if (fs.isFile(src)) {
1157
1158 success = fs.rename(src, dst);
1159 if (!success) {
1160 String msg = "Unable to rename file " + src + " to " + dst;
1161 LOG.error(msg);
1162 throw new IOException(msg);
1163 }
1164 continue;
1165 }
1166
1167
1168 fs.mkdirs(dst);
1169
1170 LOG.info("Sidelining files from " + src + " into containing region " + dst);
1171
1172
1173
1174
1175 FileStatus[] hfiles = fs.listStatus(src);
1176 if (hfiles != null && hfiles.length > 0) {
1177 for (FileStatus hfile : hfiles) {
1178 success = fs.rename(hfile.getPath(), dst);
1179 if (!success) {
1180 String msg = "Unable to rename file " + src + " to " + dst;
1181 LOG.error(msg);
1182 throw new IOException(msg);
1183 }
1184 }
1185 }
1186 LOG.debug("Sideline directory contents:");
1187 debugLsr(sidelineRegionDir);
1188 }
1189 }
1190
1191 LOG.info("Removing old region dir: " + regionDir);
1192 success = fs.delete(regionDir, true);
1193 if (!success) {
1194 String msg = "Unable to delete dir " + regionDir;
1195 LOG.error(msg);
1196 throw new IOException(msg);
1197 }
1198 return sidelineRegionDir;
1199 }
1200
1201
1202
1203
1204 void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir,
1205 Path backupHbaseDir) throws IOException {
1206 Path tableDir = FSUtils.getTableDir(hbaseDir, tableName);
1207 if (fs.exists(tableDir)) {
1208 Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName);
1209 fs.mkdirs(backupTableDir.getParent());
1210 boolean success = fs.rename(tableDir, backupTableDir);
1211 if (!success) {
1212 throw new IOException("Failed to move " + tableName + " from "
1213 + tableDir + " to " + backupTableDir);
1214 }
1215 } else {
1216 LOG.info("No previous " + tableName + " exists. Continuing.");
1217 }
1218 }
1219
1220
1221
1222
1223 Path sidelineOldMeta() throws IOException {
1224
1225 Path hbaseDir = FSUtils.getRootDir(getConf());
1226 FileSystem fs = hbaseDir.getFileSystem(getConf());
1227 Path backupDir = getSidelineDir();
1228 fs.mkdirs(backupDir);
1229 try {
1230 sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir);
1231 } catch (IOException e) {
1232 LOG.fatal("... failed to sideline meta. Currently in inconsistent state. To restore "
1233 + "try to rename .META. in " + backupDir.getName() + " to "
1234 + hbaseDir.getName() + ".", e);
1235 throw e;
1236 }
1237 return backupDir;
1238 }
1239
1240
1241
1242
1243
1244
1245 private void loadDisabledTables()
1246 throws ZooKeeperConnectionException, IOException {
1247 HConnectionManager.execute(new HConnectable<Void>(getConf()) {
1248 @Override
1249 public Void connect(HConnection connection) throws IOException {
1250 ZooKeeperWatcher zkw = createZooKeeperWatcher();
1251 try {
1252 for (TableName tableName :
1253 ZKTableReadOnly.getDisabledOrDisablingTables(zkw)) {
1254 disabledTables.add(tableName);
1255 }
1256 } catch (KeeperException ke) {
1257 throw new IOException(ke);
1258 } finally {
1259 zkw.close();
1260 }
1261 return null;
1262 }
1263 });
1264 }
1265
1266
1267
1268
1269 private boolean isTableDisabled(HRegionInfo regionInfo) {
1270 return disabledTables.contains(regionInfo.getTableName());
1271 }
1272
1273
1274
1275
1276
1277 public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1278 Path rootDir = FSUtils.getRootDir(getConf());
1279 FileSystem fs = rootDir.getFileSystem(getConf());
1280
1281
1282 List<FileStatus> tableDirs = Lists.newArrayList();
1283
1284 boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1285
1286 List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1287 for (Path path : paths) {
1288 TableName tableName = FSUtils.getTableName(path);
1289 if ((!checkMetaOnly &&
1290 isTableIncluded(tableName)) ||
1291 tableName.equals(TableName.META_TABLE_NAME)) {
1292 tableDirs.add(fs.getFileStatus(path));
1293 }
1294 }
1295
1296
1297 if (!foundVersionFile) {
1298 errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1299 "Version file does not exist in root dir " + rootDir);
1300 if (shouldFixVersionFile()) {
1301 LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1302 + " file.");
1303 setShouldRerun();
1304 FSUtils.setVersion(fs, rootDir, getConf().getInt(
1305 HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1306 HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1307 HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1308 }
1309 }
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335 private boolean recordMetaRegion() throws IOException {
1336 HRegionLocation metaLocation = connection.locateRegion(
1337 TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW);
1338
1339
1340 if (metaLocation == null || metaLocation.getRegionInfo() == null ||
1341 metaLocation.getHostname() == null) {
1342 errors.reportError(ERROR_CODE.NULL_META_REGION,
1343 "META region or some of its attributes are null.");
1344 return false;
1345 }
1346 ServerName sn;
1347 try {
1348 sn = getMetaRegionServerName();
1349 } catch (KeeperException e) {
1350 throw new IOException(e);
1351 }
1352 MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
1353 HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
1354 if (hbckInfo == null) {
1355 regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
1356 } else {
1357 hbckInfo.metaEntry = m;
1358 }
1359 return true;
1360 }
1361
1362 private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1363 return new ZooKeeperWatcher(getConf(), "hbase Fsck", new Abortable() {
1364 @Override
1365 public void abort(String why, Throwable e) {
1366 LOG.error(why, e);
1367 System.exit(1);
1368 }
1369
1370 @Override
1371 public boolean isAborted() {
1372 return false;
1373 }
1374
1375 });
1376 }
1377
1378 private ServerName getMetaRegionServerName()
1379 throws IOException, KeeperException {
1380 ZooKeeperWatcher zkw = createZooKeeperWatcher();
1381 ServerName sn = null;
1382 try {
1383 sn = MetaRegionTracker.getMetaRegionLocation(zkw);
1384 } finally {
1385 zkw.close();
1386 }
1387 return sn;
1388 }
1389
1390
1391
1392
1393
1394
1395 void processRegionServers(Collection<ServerName> regionServerList)
1396 throws IOException, InterruptedException {
1397
1398 List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1399 List<Future<Void>> workFutures;
1400
1401
1402 for (ServerName rsinfo: regionServerList) {
1403 workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1404 }
1405
1406 workFutures = executor.invokeAll(workItems);
1407
1408 for(int i=0; i<workFutures.size(); i++) {
1409 WorkItemRegion item = workItems.get(i);
1410 Future<Void> f = workFutures.get(i);
1411 try {
1412 f.get();
1413 } catch(ExecutionException e) {
1414 LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1415 e.getCause());
1416 }
1417 }
1418 }
1419
1420
1421
1422
1423 private void checkAndFixConsistency()
1424 throws IOException, KeeperException, InterruptedException {
1425 for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1426 checkRegionConsistency(e.getKey(), e.getValue());
1427 }
1428 }
1429
1430 private void preCheckPermission() throws IOException, AccessControlException {
1431 if (shouldIgnorePreCheckPermission()) {
1432 return;
1433 }
1434
1435 Path hbaseDir = FSUtils.getRootDir(getConf());
1436 FileSystem fs = hbaseDir.getFileSystem(getConf());
1437 UserGroupInformation ugi = User.getCurrent().getUGI();
1438 FileStatus[] files = fs.listStatus(hbaseDir);
1439 for (FileStatus file : files) {
1440 try {
1441 FSUtils.checkAccess(ugi, file, FsAction.WRITE);
1442 } catch (AccessControlException ace) {
1443 LOG.warn("Got AccessControlException when preCheckPermission ", ace);
1444 errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
1445 + " does not have write perms to " + file.getPath()
1446 + ". Please rerun hbck as hdfs user " + file.getOwner());
1447 throw new AccessControlException(ace);
1448 }
1449 }
1450 }
1451
1452
1453
1454
1455 private void deleteMetaRegion(HbckInfo hi) throws IOException {
1456 deleteMetaRegion(hi.metaEntry.getRegionName());
1457 }
1458
1459
1460
1461
1462 private void deleteMetaRegion(byte[] metaKey) throws IOException {
1463 Delete d = new Delete(metaKey);
1464 meta.delete(d);
1465 meta.flushCommits();
1466 LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
1467 }
1468
1469
1470
1471
1472 private void resetSplitParent(HbckInfo hi) throws IOException {
1473 RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
1474 Delete d = new Delete(hi.metaEntry.getRegionName());
1475 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1476 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1477 mutations.add(d);
1478
1479 HRegionInfo hri = new HRegionInfo(hi.metaEntry);
1480 hri.setOffline(false);
1481 hri.setSplit(false);
1482 Put p = MetaEditor.makePutFromRegionInfo(hri);
1483 mutations.add(p);
1484
1485 meta.mutateRow(mutations);
1486 meta.flushCommits();
1487 LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
1488 }
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498 private void offline(byte[] regionName) throws IOException {
1499 String regionString = Bytes.toStringBinary(regionName);
1500 if (!rsSupportsOffline) {
1501 LOG.warn("Using unassign region " + regionString
1502 + " instead of using offline method, you should"
1503 + " restart HMaster after these repairs");
1504 admin.unassign(regionName, true);
1505 return;
1506 }
1507
1508
1509 try {
1510 LOG.info("Offlining region " + regionString);
1511 admin.offline(regionName);
1512 } catch (IOException ioe) {
1513 String notFoundMsg = "java.lang.NoSuchMethodException: " +
1514 "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1515 if (ioe.getMessage().contains(notFoundMsg)) {
1516 LOG.warn("Using unassign region " + regionString
1517 + " instead of using offline method, you should"
1518 + " restart HMaster after these repairs");
1519 rsSupportsOffline = false;
1520 admin.unassign(regionName, true);
1521 return;
1522 }
1523 throw ioe;
1524 }
1525 }
1526
1527 private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
1528 for (OnlineEntry rse : hi.deployedEntries) {
1529 LOG.debug("Undeploy region " + rse.hri + " from " + rse.hsa);
1530 try {
1531 HBaseFsckRepair.closeRegionSilentlyAndWait(admin, rse.hsa, rse.hri);
1532 offline(rse.hri.getRegionName());
1533 } catch (IOException ioe) {
1534 LOG.warn("Got exception when attempting to offline region "
1535 + Bytes.toString(rse.hri.getRegionName()), ioe);
1536 }
1537 }
1538 }
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552 private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
1553 if (hi.metaEntry == null && hi.hdfsEntry == null) {
1554 undeployRegions(hi);
1555 return;
1556 }
1557
1558
1559 Get get = new Get(hi.getRegionName());
1560 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1561 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1562 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1563 Result r = meta.get(get);
1564 ServerName serverName = HRegionInfo.getServerName(r);
1565 if (serverName == null) {
1566 errors.reportError("Unable to close region "
1567 + hi.getRegionNameAsString() + " because meta does not "
1568 + "have handle to reach it.");
1569 return;
1570 }
1571
1572 HRegionInfo hri = HRegionInfo.getHRegionInfo(r);
1573 if (hri == null) {
1574 LOG.warn("Unable to close region " + hi.getRegionNameAsString()
1575 + " because META had invalid or missing "
1576 + HConstants.CATALOG_FAMILY_STR + ":"
1577 + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
1578 + " qualifier value.");
1579 return;
1580 }
1581
1582
1583 HBaseFsckRepair.closeRegionSilentlyAndWait(admin, serverName, hri);
1584 }
1585
1586 private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
1587 KeeperException, InterruptedException {
1588
1589 if (shouldFixAssignments()) {
1590 errors.print(msg);
1591 undeployRegions(hbi);
1592 setShouldRerun();
1593 HRegionInfo hri = hbi.getHdfsHRI();
1594 if (hri == null) {
1595 hri = hbi.metaEntry;
1596 }
1597 HBaseFsckRepair.fixUnassigned(admin, hri);
1598 HBaseFsckRepair.waitUntilAssigned(admin, hri);
1599 }
1600 }
1601
1602
1603
1604
1605 private void checkRegionConsistency(final String key, final HbckInfo hbi)
1606 throws IOException, KeeperException, InterruptedException {
1607 String descriptiveName = hbi.toString();
1608
1609 boolean inMeta = hbi.metaEntry != null;
1610
1611 boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
1612 boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
1613 boolean isDeployed = !hbi.deployedOn.isEmpty();
1614 boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
1615 boolean deploymentMatchesMeta =
1616 hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
1617 hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
1618 boolean splitParent =
1619 (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
1620 boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
1621 boolean recentlyModified = inHdfs &&
1622 hbi.getModTime() + timelag > System.currentTimeMillis();
1623
1624
1625 if (hbi.containsOnlyHdfsEdits()) {
1626 return;
1627 }
1628 if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
1629 return;
1630 } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
1631 LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
1632 "tabled that is not deployed");
1633 return;
1634 } else if (recentlyModified) {
1635 LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
1636 return;
1637 }
1638
1639 else if (!inMeta && !inHdfs && !isDeployed) {
1640
1641 assert false : "Entry for region with no data";
1642 } else if (!inMeta && !inHdfs && isDeployed) {
1643 errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
1644 + descriptiveName + ", key=" + key + ", not on HDFS or in META but " +
1645 "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1646 if (shouldFixAssignments()) {
1647 undeployRegions(hbi);
1648 }
1649
1650 } else if (!inMeta && inHdfs && !isDeployed) {
1651 errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
1652 + descriptiveName + " on HDFS, but not listed in META " +
1653 "or deployed on any region server");
1654
1655 if (shouldFixMeta()) {
1656 if (!hbi.isHdfsRegioninfoPresent()) {
1657 LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
1658 + " in table integrity repair phase if -fixHdfsOrphans was" +
1659 " used.");
1660 return;
1661 }
1662
1663 LOG.info("Patching .META. with .regioninfo: " + hbi.getHdfsHRI());
1664 HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
1665
1666 tryAssignmentRepair(hbi, "Trying to reassign region...");
1667 }
1668
1669 } else if (!inMeta && inHdfs && isDeployed) {
1670 errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
1671 + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1672 debugLsr(hbi.getHdfsRegionDir());
1673 if (shouldFixMeta()) {
1674 if (!hbi.isHdfsRegioninfoPresent()) {
1675 LOG.error("This should have been repaired in table integrity repair phase");
1676 return;
1677 }
1678
1679 LOG.info("Patching .META. with with .regioninfo: " + hbi.getHdfsHRI());
1680 HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
1681
1682 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
1683 }
1684
1685
1686 } else if (inMeta && inHdfs && !isDeployed && splitParent) {
1687
1688
1689 if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
1690
1691 HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
1692 HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
1693 if (infoA != null && infoB != null) {
1694
1695 hbi.setSkipChecks(true);
1696 return;
1697 }
1698 }
1699 errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
1700 + descriptiveName + " is a split parent in META, in HDFS, "
1701 + "and not deployed on any region server. This could be transient.");
1702 if (shouldFixSplitParents()) {
1703 setShouldRerun();
1704 resetSplitParent(hbi);
1705 }
1706 } else if (inMeta && !inHdfs && !isDeployed) {
1707 errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
1708 + descriptiveName + " found in META, but not in HDFS "
1709 + "or deployed on any region server.");
1710 if (shouldFixMeta()) {
1711 deleteMetaRegion(hbi);
1712 }
1713 } else if (inMeta && !inHdfs && isDeployed) {
1714 errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
1715 + " found in META, but not in HDFS, " +
1716 "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1717
1718
1719
1720 if (shouldFixAssignments()) {
1721 errors.print("Trying to fix unassigned region...");
1722 closeRegion(hbi);
1723 }
1724 if (shouldFixMeta()) {
1725
1726 deleteMetaRegion(hbi);
1727 }
1728 } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
1729 errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
1730 + " not deployed on any region server.");
1731 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
1732 } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
1733 errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
1734 "Region " + descriptiveName + " should not be deployed according " +
1735 "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1736 if (shouldFixAssignments()) {
1737 errors.print("Trying to close the region " + descriptiveName);
1738 setShouldRerun();
1739 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1740 }
1741 } else if (inMeta && inHdfs && isMultiplyDeployed) {
1742 errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
1743 + " is listed in META on region server " + hbi.metaEntry.regionServer
1744 + " but is multiply assigned to region servers " +
1745 Joiner.on(", ").join(hbi.deployedOn));
1746
1747 if (shouldFixAssignments()) {
1748 errors.print("Trying to fix assignment error...");
1749 setShouldRerun();
1750 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1751 }
1752 } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
1753 errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
1754 + descriptiveName + " listed in META on region server " +
1755 hbi.metaEntry.regionServer + " but found on region server " +
1756 hbi.deployedOn.get(0));
1757
1758 if (shouldFixAssignments()) {
1759 errors.print("Trying to fix assignment error...");
1760 setShouldRerun();
1761 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1762 HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
1763 }
1764 } else {
1765 errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
1766 " is in an unforeseen state:" +
1767 " inMeta=" + inMeta +
1768 " inHdfs=" + inHdfs +
1769 " isDeployed=" + isDeployed +
1770 " isMultiplyDeployed=" + isMultiplyDeployed +
1771 " deploymentMatchesMeta=" + deploymentMatchesMeta +
1772 " shouldBeDeployed=" + shouldBeDeployed);
1773 }
1774 }
1775
1776
1777
1778
1779
1780
1781
1782 SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
1783 tablesInfo = new TreeMap<TableName,TableInfo> ();
1784 List<HbckInfo> noHDFSRegionInfos = new ArrayList<HbckInfo>();
1785 LOG.debug("There are " + regionInfoMap.size() + " region info entries");
1786 for (HbckInfo hbi : regionInfoMap.values()) {
1787
1788 if (hbi.metaEntry == null) {
1789
1790 noHDFSRegionInfos.add(hbi);
1791 Path p = hbi.getHdfsRegionDir();
1792 if (p == null) {
1793 errors.report("No regioninfo in Meta or HDFS. " + hbi);
1794 }
1795
1796
1797 continue;
1798 }
1799 if (hbi.metaEntry.regionServer == null) {
1800 errors.detail("Skipping region because no region server: " + hbi);
1801 continue;
1802 }
1803 if (hbi.metaEntry.isOffline()) {
1804 errors.detail("Skipping region because it is offline: " + hbi);
1805 continue;
1806 }
1807 if (hbi.containsOnlyHdfsEdits()) {
1808 errors.detail("Skipping region because it only contains edits" + hbi);
1809 continue;
1810 }
1811
1812
1813
1814
1815
1816
1817 if (hbi.deployedOn.size() == 0) continue;
1818
1819
1820 TableName tableName = hbi.metaEntry.getTableName();
1821 TableInfo modTInfo = tablesInfo.get(tableName);
1822 if (modTInfo == null) {
1823 modTInfo = new TableInfo(tableName);
1824 }
1825 for (ServerName server : hbi.deployedOn) {
1826 modTInfo.addServer(server);
1827 }
1828
1829 if (!hbi.isSkipChecks()) {
1830 modTInfo.addRegionInfo(hbi);
1831 }
1832
1833 tablesInfo.put(tableName, modTInfo);
1834 }
1835
1836 for (TableInfo tInfo : tablesInfo.values()) {
1837 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1838 if (!tInfo.checkRegionChain(handler)) {
1839 errors.report("Found inconsistency in table " + tInfo.getName());
1840 }
1841 }
1842 return tablesInfo;
1843 }
1844
1845
1846
1847
1848
1849 public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
1850 int fileMoves = 0;
1851
1852 LOG.debug("Contained region dir after close and pause");
1853 debugLsr(contained.getHdfsRegionDir());
1854
1855
1856 FileSystem fs = targetRegionDir.getFileSystem(getConf());
1857 FileStatus[] dirs = fs.listStatus(contained.getHdfsRegionDir());
1858
1859 if (dirs == null) {
1860 if (!fs.exists(contained.getHdfsRegionDir())) {
1861 LOG.warn("HDFS region dir " + contained.getHdfsRegionDir() + " already sidelined.");
1862 } else {
1863 sidelineRegionDir(fs, contained);
1864 }
1865 return fileMoves;
1866 }
1867
1868 for (FileStatus cf : dirs) {
1869 Path src = cf.getPath();
1870 Path dst = new Path(targetRegionDir, src.getName());
1871
1872 if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
1873
1874 continue;
1875 }
1876
1877 if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
1878
1879 continue;
1880 }
1881
1882 LOG.info("Moving files from " + src + " into containing region " + dst);
1883
1884
1885
1886
1887 for (FileStatus hfile : fs.listStatus(src)) {
1888 boolean success = fs.rename(hfile.getPath(), dst);
1889 if (success) {
1890 fileMoves++;
1891 }
1892 }
1893 LOG.debug("Sideline directory contents:");
1894 debugLsr(targetRegionDir);
1895 }
1896
1897
1898 sidelineRegionDir(fs, contained);
1899 LOG.info("Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
1900 getSidelineDir());
1901 debugLsr(contained.getHdfsRegionDir());
1902
1903 return fileMoves;
1904 }
1905
1906
1907
1908
1909 public class TableInfo {
1910 TableName tableName;
1911 TreeSet <ServerName> deployedOn;
1912
1913
1914 final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
1915
1916
1917 final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
1918
1919
1920 final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
1921
1922
1923 final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
1924
1925
1926 final Multimap<byte[], HbckInfo> overlapGroups =
1927 TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
1928
1929 TableInfo(TableName name) {
1930 this.tableName = name;
1931 deployedOn = new TreeSet <ServerName>();
1932 }
1933
1934
1935
1936
1937 private HTableDescriptor getHTD() {
1938 if (htds.size() == 1) {
1939 return (HTableDescriptor)htds.toArray()[0];
1940 } else {
1941 LOG.error("None/Multiple table descriptors found for table '"
1942 + tableName + "' regions: " + htds);
1943 }
1944 return null;
1945 }
1946
1947 public void addRegionInfo(HbckInfo hir) {
1948 if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
1949
1950 sc.add(hir);
1951 return;
1952 }
1953
1954
1955 if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
1956 errors.reportError(
1957 ERROR_CODE.REGION_CYCLE,
1958 String.format("The endkey for this region comes before the "
1959 + "startkey, startkey=%s, endkey=%s",
1960 Bytes.toStringBinary(hir.getStartKey()),
1961 Bytes.toStringBinary(hir.getEndKey())), this, hir);
1962 backwards.add(hir);
1963 return;
1964 }
1965
1966
1967 sc.add(hir);
1968 }
1969
1970 public void addServer(ServerName server) {
1971 this.deployedOn.add(server);
1972 }
1973
1974 public TableName getName() {
1975 return tableName;
1976 }
1977
1978 public int getNumRegions() {
1979 return sc.getStarts().size() + backwards.size();
1980 }
1981
1982 private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
1983 ErrorReporter errors;
1984
1985 IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
1986 this.errors = errors;
1987 setTableInfo(ti);
1988 }
1989
1990 @Override
1991 public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
1992 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
1993 "First region should start with an empty key. You need to "
1994 + " create a new region and regioninfo in HDFS to plug the hole.",
1995 getTableInfo(), hi);
1996 }
1997
1998 @Override
1999 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2000 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2001 "Last region should end with an empty key. You need to "
2002 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2003 }
2004
2005 @Override
2006 public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2007 errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2008 "Region has the same start and end key.", getTableInfo(), hi);
2009 }
2010
2011 @Override
2012 public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2013 byte[] key = r1.getStartKey();
2014
2015 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2016 "Multiple regions have the same startkey: "
2017 + Bytes.toStringBinary(key), getTableInfo(), r1);
2018 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2019 "Multiple regions have the same startkey: "
2020 + Bytes.toStringBinary(key), getTableInfo(), r2);
2021 }
2022
2023 @Override
2024 public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2025 errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2026 "There is an overlap in the region chain.",
2027 getTableInfo(), hi1, hi2);
2028 }
2029
2030 @Override
2031 public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2032 errors.reportError(
2033 ERROR_CODE.HOLE_IN_REGION_CHAIN,
2034 "There is a hole in the region chain between "
2035 + Bytes.toStringBinary(holeStart) + " and "
2036 + Bytes.toStringBinary(holeStop)
2037 + ". You need to create a new .regioninfo and region "
2038 + "dir in hdfs to plug the hole.");
2039 }
2040 };
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054 private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2055 Configuration conf;
2056
2057 boolean fixOverlaps = true;
2058
2059 HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2060 boolean fixHoles, boolean fixOverlaps) {
2061 super(ti, errors);
2062 this.conf = conf;
2063 this.fixOverlaps = fixOverlaps;
2064
2065 }
2066
2067
2068
2069
2070
2071
2072 public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2073 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2074 "First region should start with an empty key. Creating a new " +
2075 "region and regioninfo in HDFS to plug the hole.",
2076 getTableInfo(), next);
2077 HTableDescriptor htd = getTableInfo().getHTD();
2078
2079 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(),
2080 HConstants.EMPTY_START_ROW, next.getStartKey());
2081
2082
2083 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2084 LOG.info("Table region start key was not empty. Created new empty region: "
2085 + newRegion + " " +region);
2086 fixes++;
2087 }
2088
2089 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2090 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2091 "Last region should end with an empty key. Creating a new "
2092 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2093 HTableDescriptor htd = getTableInfo().getHTD();
2094
2095 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), curEndKey,
2096 HConstants.EMPTY_START_ROW);
2097
2098 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2099 LOG.info("Table region end key was not empty. Created new empty region: " + newRegion
2100 + " " + region);
2101 fixes++;
2102 }
2103
2104
2105
2106
2107
2108 public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2109 errors.reportError(
2110 ERROR_CODE.HOLE_IN_REGION_CHAIN,
2111 "There is a hole in the region chain between "
2112 + Bytes.toStringBinary(holeStartKey) + " and "
2113 + Bytes.toStringBinary(holeStopKey)
2114 + ". Creating a new regioninfo and region "
2115 + "dir in hdfs to plug the hole.");
2116 HTableDescriptor htd = getTableInfo().getHTD();
2117 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey);
2118 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2119 LOG.info("Plugged hold by creating new empty region: "+ newRegion + " " +region);
2120 fixes++;
2121 }
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132 @Override
2133 public void handleOverlapGroup(Collection<HbckInfo> overlap)
2134 throws IOException {
2135 Preconditions.checkNotNull(overlap);
2136 Preconditions.checkArgument(overlap.size() >0);
2137
2138 if (!this.fixOverlaps) {
2139 LOG.warn("Not attempting to repair overlaps.");
2140 return;
2141 }
2142
2143 if (overlap.size() > maxMerge) {
2144 LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2145 "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2146 if (sidelineBigOverlaps) {
2147
2148 sidelineBigOverlaps(overlap);
2149 }
2150 return;
2151 }
2152
2153 mergeOverlaps(overlap);
2154 }
2155
2156 void mergeOverlaps(Collection<HbckInfo> overlap)
2157 throws IOException {
2158 LOG.info("== Merging regions into one region: "
2159 + Joiner.on(",").join(overlap));
2160
2161 Pair<byte[], byte[]> range = null;
2162 for (HbckInfo hi : overlap) {
2163 if (range == null) {
2164 range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2165 } else {
2166 if (RegionSplitCalculator.BYTES_COMPARATOR
2167 .compare(hi.getStartKey(), range.getFirst()) < 0) {
2168 range.setFirst(hi.getStartKey());
2169 }
2170 if (RegionSplitCalculator.BYTES_COMPARATOR
2171 .compare(hi.getEndKey(), range.getSecond()) > 0) {
2172 range.setSecond(hi.getEndKey());
2173 }
2174 }
2175
2176 LOG.debug("Closing region before moving data around: " + hi);
2177 LOG.debug("Contained region dir before close");
2178 debugLsr(hi.getHdfsRegionDir());
2179 try {
2180 LOG.info("Closing region: " + hi);
2181 closeRegion(hi);
2182 } catch (IOException ioe) {
2183 LOG.warn("Was unable to close region " + hi
2184 + ". Just continuing... ", ioe);
2185 } catch (InterruptedException e) {
2186 LOG.warn("Was unable to close region " + hi
2187 + ". Just continuing... ", e);
2188 }
2189
2190 try {
2191 LOG.info("Offlining region: " + hi);
2192 offline(hi.getRegionName());
2193 } catch (IOException ioe) {
2194 LOG.warn("Unable to offline region from master: " + hi
2195 + ". Just continuing... ", ioe);
2196 }
2197 }
2198
2199
2200 HTableDescriptor htd = getTableInfo().getHTD();
2201
2202 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), range.getFirst(),
2203 range.getSecond());
2204 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2205 LOG.info("Created new empty container region: " +
2206 newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
2207 debugLsr(region.getRegionFileSystem().getRegionDir());
2208
2209
2210 boolean didFix= false;
2211 Path target = region.getRegionFileSystem().getRegionDir();
2212 for (HbckInfo contained : overlap) {
2213 LOG.info("Merging " + contained + " into " + target );
2214 int merges = mergeRegionDirs(target, contained);
2215 if (merges > 0) {
2216 didFix = true;
2217 }
2218 }
2219 if (didFix) {
2220 fixes++;
2221 }
2222 }
2223
2224
2225
2226
2227
2228
2229
2230
2231 void sidelineBigOverlaps(
2232 Collection<HbckInfo> bigOverlap) throws IOException {
2233 int overlapsToSideline = bigOverlap.size() - maxMerge;
2234 if (overlapsToSideline > maxOverlapsToSideline) {
2235 overlapsToSideline = maxOverlapsToSideline;
2236 }
2237 List<HbckInfo> regionsToSideline =
2238 RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
2239 FileSystem fs = FileSystem.get(conf);
2240 for (HbckInfo regionToSideline: regionsToSideline) {
2241 try {
2242 LOG.info("Closing region: " + regionToSideline);
2243 closeRegion(regionToSideline);
2244 } catch (IOException ioe) {
2245 LOG.warn("Was unable to close region " + regionToSideline
2246 + ". Just continuing... ", ioe);
2247 } catch (InterruptedException e) {
2248 LOG.warn("Was unable to close region " + regionToSideline
2249 + ". Just continuing... ", e);
2250 }
2251
2252 try {
2253 LOG.info("Offlining region: " + regionToSideline);
2254 offline(regionToSideline.getRegionName());
2255 } catch (IOException ioe) {
2256 LOG.warn("Unable to offline region from master: " + regionToSideline
2257 + ". Just continuing... ", ioe);
2258 }
2259
2260 LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
2261 Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
2262 if (sidelineRegionDir != null) {
2263 sidelinedRegions.put(sidelineRegionDir, regionToSideline);
2264 LOG.info("After sidelined big overlapped region: "
2265 + regionToSideline.getRegionNameAsString()
2266 + " to " + sidelineRegionDir.toString());
2267 fixes++;
2268 }
2269 }
2270 }
2271 }
2272
2273
2274
2275
2276
2277
2278
2279 public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
2280
2281
2282
2283 if (disabledTables.contains(this.tableName)) {
2284 return true;
2285 }
2286 int originalErrorsCount = errors.getErrorList().size();
2287 Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
2288 SortedSet<byte[]> splits = sc.getSplits();
2289
2290 byte[] prevKey = null;
2291 byte[] problemKey = null;
2292 for (byte[] key : splits) {
2293 Collection<HbckInfo> ranges = regions.get(key);
2294 if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
2295 for (HbckInfo rng : ranges) {
2296 handler.handleRegionStartKeyNotEmpty(rng);
2297 }
2298 }
2299
2300
2301 for (HbckInfo rng : ranges) {
2302
2303 byte[] endKey = rng.getEndKey();
2304 endKey = (endKey.length == 0) ? null : endKey;
2305 if (Bytes.equals(rng.getStartKey(),endKey)) {
2306 handler.handleDegenerateRegion(rng);
2307 }
2308 }
2309
2310 if (ranges.size() == 1) {
2311
2312 if (problemKey != null) {
2313 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2314 }
2315 problemKey = null;
2316 } else if (ranges.size() > 1) {
2317
2318
2319 if (problemKey == null) {
2320
2321 LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
2322 problemKey = key;
2323 }
2324 overlapGroups.putAll(problemKey, ranges);
2325
2326
2327 ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
2328
2329 for (HbckInfo r1 : ranges) {
2330 subRange.remove(r1);
2331 for (HbckInfo r2 : subRange) {
2332 if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
2333 handler.handleDuplicateStartKeys(r1,r2);
2334 } else {
2335
2336 handler.handleOverlapInRegionChain(r1, r2);
2337 }
2338 }
2339 }
2340
2341 } else if (ranges.size() == 0) {
2342 if (problemKey != null) {
2343 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2344 }
2345 problemKey = null;
2346
2347 byte[] holeStopKey = sc.getSplits().higher(key);
2348
2349 if (holeStopKey != null) {
2350
2351 handler.handleHoleInRegionChain(key, holeStopKey);
2352 }
2353 }
2354 prevKey = key;
2355 }
2356
2357
2358
2359 if (prevKey != null) {
2360 handler.handleRegionEndKeyNotEmpty(prevKey);
2361 }
2362
2363 for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2364 handler.handleOverlapGroup(overlap);
2365 }
2366
2367 if (details) {
2368
2369 errors.print("---- Table '" + this.tableName
2370 + "': region split map");
2371 dump(splits, regions);
2372 errors.print("---- Table '" + this.tableName
2373 + "': overlap groups");
2374 dumpOverlapProblems(overlapGroups);
2375 errors.print("There are " + overlapGroups.keySet().size()
2376 + " overlap groups with " + overlapGroups.size()
2377 + " overlapping regions");
2378 }
2379 if (!sidelinedRegions.isEmpty()) {
2380 LOG.warn("Sidelined big overlapped regions, please bulk load them!");
2381 errors.print("---- Table '" + this.tableName
2382 + "': sidelined big overlapped regions");
2383 dumpSidelinedRegions(sidelinedRegions);
2384 }
2385 return errors.getErrorList().size() == originalErrorsCount;
2386 }
2387
2388
2389
2390
2391
2392
2393
2394 void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
2395
2396 StringBuilder sb = new StringBuilder();
2397 for (byte[] k : splits) {
2398 sb.setLength(0);
2399 sb.append(Bytes.toStringBinary(k) + ":\t");
2400 for (HbckInfo r : regions.get(k)) {
2401 sb.append("[ "+ r.toString() + ", "
2402 + Bytes.toStringBinary(r.getEndKey())+ "]\t");
2403 }
2404 errors.print(sb.toString());
2405 }
2406 }
2407 }
2408
2409 public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
2410
2411
2412 for (byte[] k : regions.keySet()) {
2413 errors.print(Bytes.toStringBinary(k) + ":");
2414 for (HbckInfo r : regions.get(k)) {
2415 errors.print("[ " + r.toString() + ", "
2416 + Bytes.toStringBinary(r.getEndKey()) + "]");
2417 }
2418 errors.print("----");
2419 }
2420 }
2421
2422 public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
2423 for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
2424 TableName tableName = entry.getValue().getTableName();
2425 Path path = entry.getKey();
2426 errors.print("This sidelined region dir should be bulk loaded: "
2427 + path.toString());
2428 errors.print("Bulk load command looks like: "
2429 + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
2430 + path.toUri().getPath() + " "+ tableName);
2431 }
2432 }
2433
2434 public Multimap<byte[], HbckInfo> getOverlapGroups(
2435 TableName table) {
2436 TableInfo ti = tablesInfo.get(table);
2437 return ti.overlapGroups;
2438 }
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449 HTableDescriptor[] getTables(AtomicInteger numSkipped) {
2450 List<TableName> tableNames = new ArrayList<TableName>();
2451 long now = System.currentTimeMillis();
2452
2453 for (HbckInfo hbi : regionInfoMap.values()) {
2454 MetaEntry info = hbi.metaEntry;
2455
2456
2457
2458 if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
2459 if (info.modTime + timelag < now) {
2460 tableNames.add(info.getTableName());
2461 } else {
2462 numSkipped.incrementAndGet();
2463 }
2464 }
2465 }
2466 return getHTableDescriptors(tableNames);
2467 }
2468
2469 HTableDescriptor[] getHTableDescriptors(List<TableName> tableNames) {
2470 HTableDescriptor[] htd = new HTableDescriptor[0];
2471 try {
2472 LOG.info("getHTableDescriptors == tableNames => " + tableNames);
2473 htd = new HBaseAdmin(getConf()).getTableDescriptorsByTableName(tableNames);
2474 } catch (IOException e) {
2475 LOG.debug("Exception getting table descriptors", e);
2476 }
2477 return htd;
2478 }
2479
2480
2481
2482
2483
2484
2485
2486 private synchronized HbckInfo getOrCreateInfo(String name) {
2487 HbckInfo hbi = regionInfoMap.get(name);
2488 if (hbi == null) {
2489 hbi = new HbckInfo(null);
2490 regionInfoMap.put(name, hbi);
2491 }
2492 return hbi;
2493 }
2494
2495 private void checkAndFixTableLocks() throws IOException {
2496 TableLockChecker checker = new TableLockChecker(createZooKeeperWatcher(), errors);
2497 checker.checkTableLocks();
2498
2499 if (this.fixTableLocks) {
2500 checker.fixExpiredTableLocks();
2501 }
2502 }
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513 boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
2514 List<HbckInfo> metaRegions = Lists.newArrayList();
2515 for (HbckInfo value : regionInfoMap.values()) {
2516 if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
2517 metaRegions.add(value);
2518 }
2519 }
2520
2521
2522
2523 HbckInfo metaHbckInfo = metaRegions.get(0);
2524 List<ServerName> servers = metaHbckInfo.deployedOn;
2525 if (servers.size() != 1) {
2526 if (servers.size() == 0) {
2527 errors.reportError(ERROR_CODE.NO_META_REGION, ".META. is not found on any region.");
2528 if (shouldFixAssignments()) {
2529 errors.print("Trying to fix a problem with .META...");
2530 setShouldRerun();
2531
2532 HBaseFsckRepair.fixUnassigned(admin, metaHbckInfo.metaEntry);
2533 HBaseFsckRepair.waitUntilAssigned(admin, metaHbckInfo.metaEntry);
2534 }
2535 } else if (servers.size() > 1) {
2536 errors
2537 .reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
2538 if (shouldFixAssignments()) {
2539 errors.print("Trying to fix a problem with .META...");
2540 setShouldRerun();
2541
2542 HBaseFsckRepair.fixMultiAssignment(admin, metaHbckInfo.metaEntry, servers);
2543 }
2544 }
2545
2546 return false;
2547 }
2548
2549 return true;
2550 }
2551
2552
2553
2554
2555
2556 boolean loadMetaEntries() throws IOException {
2557 MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
2558 int countRecord = 1;
2559
2560
2561 final Comparator<KeyValue> comp = new Comparator<KeyValue>() {
2562 public int compare(KeyValue k1, KeyValue k2) {
2563 return (int)(k1.getTimestamp() - k2.getTimestamp());
2564 }
2565 };
2566
2567 public boolean processRow(Result result) throws IOException {
2568 try {
2569
2570
2571 long ts = Collections.max(result.list(), comp).getTimestamp();
2572 Pair<HRegionInfo, ServerName> pair = HRegionInfo.getHRegionInfoAndServerName(result);
2573 if (pair == null || pair.getFirst() == null) {
2574 emptyRegionInfoQualifiers.add(result);
2575 errors.reportError(ERROR_CODE.EMPTY_META_CELL,
2576 "Empty REGIONINFO_QUALIFIER found in .META.");
2577 return true;
2578 }
2579 ServerName sn = null;
2580 if (pair.getSecond() != null) {
2581 sn = pair.getSecond();
2582 }
2583 HRegionInfo hri = pair.getFirst();
2584 if (!(isTableIncluded(hri.getTableName())
2585 || hri.isMetaRegion())) {
2586 return true;
2587 }
2588 PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
2589 MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
2590 HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
2591 if (previous == null) {
2592 regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
2593 } else if (previous.metaEntry == null) {
2594 previous.metaEntry = m;
2595 } else {
2596 throw new IOException("Two entries in META are same " + previous);
2597 }
2598
2599
2600 if (countRecord % 100 == 0) {
2601 errors.progress();
2602 }
2603 countRecord++;
2604 return true;
2605 } catch (RuntimeException e) {
2606 LOG.error("Result=" + result);
2607 throw e;
2608 }
2609 }
2610 };
2611 if (!checkMetaOnly) {
2612
2613 MetaScanner.metaScan(getConf(), visitor);
2614 }
2615
2616 errors.print("");
2617 return true;
2618 }
2619
2620
2621
2622
2623 static class MetaEntry extends HRegionInfo {
2624 ServerName regionServer;
2625 long modTime;
2626 HRegionInfo splitA, splitB;
2627
2628 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
2629 this(rinfo, regionServer, modTime, null, null);
2630 }
2631
2632 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
2633 HRegionInfo splitA, HRegionInfo splitB) {
2634 super(rinfo);
2635 this.regionServer = regionServer;
2636 this.modTime = modTime;
2637 this.splitA = splitA;
2638 this.splitB = splitB;
2639 }
2640
2641 public boolean equals(Object o) {
2642 boolean superEq = super.equals(o);
2643 if (!superEq) {
2644 return superEq;
2645 }
2646
2647 MetaEntry me = (MetaEntry) o;
2648 if (!regionServer.equals(me.regionServer)) {
2649 return false;
2650 }
2651 return (modTime == me.modTime);
2652 }
2653
2654 @Override
2655 public int hashCode() {
2656 int hash = Arrays.hashCode(getRegionName());
2657 hash ^= getRegionId();
2658 hash ^= Arrays.hashCode(getStartKey());
2659 hash ^= Arrays.hashCode(getEndKey());
2660 hash ^= Boolean.valueOf(isOffline()).hashCode();
2661 hash ^= getTableName().hashCode();
2662 if (regionServer != null) {
2663 hash ^= regionServer.hashCode();
2664 }
2665 hash ^= modTime;
2666 return hash;
2667 }
2668 }
2669
2670
2671
2672
2673 static class HdfsEntry {
2674 HRegionInfo hri;
2675 Path hdfsRegionDir = null;
2676 long hdfsRegionDirModTime = 0;
2677 boolean hdfsRegioninfoFilePresent = false;
2678 boolean hdfsOnlyEdits = false;
2679 }
2680
2681
2682
2683
2684 static class OnlineEntry {
2685 HRegionInfo hri;
2686 ServerName hsa;
2687
2688 public String toString() {
2689 return hsa.toString() + ";" + hri.getRegionNameAsString();
2690 }
2691 }
2692
2693
2694
2695
2696
2697 public static class HbckInfo implements KeyRange {
2698 private MetaEntry metaEntry = null;
2699 private HdfsEntry hdfsEntry = null;
2700 private List<OnlineEntry> deployedEntries = Lists.newArrayList();
2701 private List<ServerName> deployedOn = Lists.newArrayList();
2702 private boolean skipChecks = false;
2703
2704 HbckInfo(MetaEntry metaEntry) {
2705 this.metaEntry = metaEntry;
2706 }
2707
2708 public synchronized void addServer(HRegionInfo hri, ServerName server) {
2709 OnlineEntry rse = new OnlineEntry() ;
2710 rse.hri = hri;
2711 rse.hsa = server;
2712 this.deployedEntries.add(rse);
2713 this.deployedOn.add(server);
2714 }
2715
2716 public synchronized String toString() {
2717 StringBuilder sb = new StringBuilder();
2718 sb.append("{ meta => ");
2719 sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
2720 sb.append( ", hdfs => " + getHdfsRegionDir());
2721 sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
2722 sb.append(" }");
2723 return sb.toString();
2724 }
2725
2726 @Override
2727 public byte[] getStartKey() {
2728 if (this.metaEntry != null) {
2729 return this.metaEntry.getStartKey();
2730 } else if (this.hdfsEntry != null) {
2731 return this.hdfsEntry.hri.getStartKey();
2732 } else {
2733 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
2734 return null;
2735 }
2736 }
2737
2738 @Override
2739 public byte[] getEndKey() {
2740 if (this.metaEntry != null) {
2741 return this.metaEntry.getEndKey();
2742 } else if (this.hdfsEntry != null) {
2743 return this.hdfsEntry.hri.getEndKey();
2744 } else {
2745 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
2746 return null;
2747 }
2748 }
2749
2750 public TableName getTableName() {
2751 if (this.metaEntry != null) {
2752 return this.metaEntry.getTableName();
2753 } else if (this.hdfsEntry != null) {
2754
2755
2756 Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
2757 return FSUtils.getTableName(tableDir);
2758 } else {
2759
2760
2761 return null;
2762 }
2763 }
2764
2765 public String getRegionNameAsString() {
2766 if (metaEntry != null) {
2767 return metaEntry.getRegionNameAsString();
2768 } else if (hdfsEntry != null) {
2769 if (hdfsEntry.hri != null) {
2770 return hdfsEntry.hri.getRegionNameAsString();
2771 }
2772 }
2773 return null;
2774 }
2775
2776 public byte[] getRegionName() {
2777 if (metaEntry != null) {
2778 return metaEntry.getRegionName();
2779 } else if (hdfsEntry != null) {
2780 return hdfsEntry.hri.getRegionName();
2781 } else {
2782 return null;
2783 }
2784 }
2785
2786 Path getHdfsRegionDir() {
2787 if (hdfsEntry == null) {
2788 return null;
2789 }
2790 return hdfsEntry.hdfsRegionDir;
2791 }
2792
2793 boolean containsOnlyHdfsEdits() {
2794 if (hdfsEntry == null) {
2795 return false;
2796 }
2797 return hdfsEntry.hdfsOnlyEdits;
2798 }
2799
2800 boolean isHdfsRegioninfoPresent() {
2801 if (hdfsEntry == null) {
2802 return false;
2803 }
2804 return hdfsEntry.hdfsRegioninfoFilePresent;
2805 }
2806
2807 long getModTime() {
2808 if (hdfsEntry == null) {
2809 return 0;
2810 }
2811 return hdfsEntry.hdfsRegionDirModTime;
2812 }
2813
2814 HRegionInfo getHdfsHRI() {
2815 if (hdfsEntry == null) {
2816 return null;
2817 }
2818 return hdfsEntry.hri;
2819 }
2820
2821 public void setSkipChecks(boolean skipChecks) {
2822 this.skipChecks = skipChecks;
2823 }
2824
2825 public boolean isSkipChecks() {
2826 return skipChecks;
2827 }
2828 }
2829
2830 final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
2831 @Override
2832 public int compare(HbckInfo l, HbckInfo r) {
2833 if (l == r) {
2834
2835 return 0;
2836 }
2837
2838 int tableCompare = l.getTableName().compareTo(r.getTableName());
2839 if (tableCompare != 0) {
2840 return tableCompare;
2841 }
2842
2843 int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
2844 l.getStartKey(), r.getStartKey());
2845 if (startComparison != 0) {
2846 return startComparison;
2847 }
2848
2849
2850 byte[] endKey = r.getEndKey();
2851 endKey = (endKey.length == 0) ? null : endKey;
2852 byte[] endKey2 = l.getEndKey();
2853 endKey2 = (endKey2.length == 0) ? null : endKey2;
2854 int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
2855 endKey2, endKey);
2856
2857 if (endComparison != 0) {
2858 return endComparison;
2859 }
2860
2861
2862
2863 if (l.hdfsEntry == null && r.hdfsEntry == null) {
2864 return 0;
2865 }
2866 if (l.hdfsEntry == null && r.hdfsEntry != null) {
2867 return 1;
2868 }
2869
2870 if (r.hdfsEntry == null) {
2871 return -1;
2872 }
2873
2874 return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
2875 }
2876 };
2877
2878
2879
2880
2881 private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
2882 StringBuilder sb = new StringBuilder();
2883 errors.print("Summary:");
2884 for (TableInfo tInfo : tablesInfo.values()) {
2885 if (errors.tableHasErrors(tInfo)) {
2886 errors.print("Table " + tInfo.getName() + " is inconsistent.");
2887 } else {
2888 errors.print(" " + tInfo.getName() + " is okay.");
2889 }
2890 errors.print(" Number of regions: " + tInfo.getNumRegions());
2891 sb.setLength(0);
2892 sb.append(" Deployed on: ");
2893 for (ServerName server : tInfo.deployedOn) {
2894 sb.append(" " + server.toString());
2895 }
2896 errors.print(sb.toString());
2897 }
2898 }
2899
2900 static ErrorReporter getErrorReporter(
2901 final Configuration conf) throws ClassNotFoundException {
2902 Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
2903 return (ErrorReporter)ReflectionUtils.newInstance(reporter, conf);
2904 }
2905
2906 public interface ErrorReporter {
2907 enum ERROR_CODE {
2908 UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
2909 NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED,
2910 MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
2911 FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
2912 HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
2913 ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
2914 WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK
2915 }
2916 void clear();
2917 void report(String message);
2918 void reportError(String message);
2919 void reportError(ERROR_CODE errorCode, String message);
2920 void reportError(ERROR_CODE errorCode, String message, TableInfo table);
2921 void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
2922 void reportError(
2923 ERROR_CODE errorCode,
2924 String message,
2925 TableInfo table,
2926 HbckInfo info1,
2927 HbckInfo info2
2928 );
2929 int summarize();
2930 void detail(String details);
2931 ArrayList<ERROR_CODE> getErrorList();
2932 void progress();
2933 void print(String message);
2934 void resetErrors();
2935 boolean tableHasErrors(TableInfo table);
2936 }
2937
2938 static class PrintingErrorReporter implements ErrorReporter {
2939 public int errorCount = 0;
2940 private int showProgress;
2941
2942 Set<TableInfo> errorTables = new HashSet<TableInfo>();
2943
2944
2945 private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
2946
2947 public void clear() {
2948 errorTables.clear();
2949 errorList.clear();
2950 errorCount = 0;
2951 }
2952
2953 public synchronized void reportError(ERROR_CODE errorCode, String message) {
2954 if (errorCode == ERROR_CODE.WRONG_USAGE) {
2955 System.err.println(message);
2956 return;
2957 }
2958
2959 errorList.add(errorCode);
2960 if (!summary) {
2961 System.out.println("ERROR: " + message);
2962 }
2963 errorCount++;
2964 showProgress = 0;
2965 }
2966
2967 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
2968 errorTables.add(table);
2969 reportError(errorCode, message);
2970 }
2971
2972 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
2973 HbckInfo info) {
2974 errorTables.add(table);
2975 String reference = "(region " + info.getRegionNameAsString() + ")";
2976 reportError(errorCode, reference + " " + message);
2977 }
2978
2979 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
2980 HbckInfo info1, HbckInfo info2) {
2981 errorTables.add(table);
2982 String reference = "(regions " + info1.getRegionNameAsString()
2983 + " and " + info2.getRegionNameAsString() + ")";
2984 reportError(errorCode, reference + " " + message);
2985 }
2986
2987 public synchronized void reportError(String message) {
2988 reportError(ERROR_CODE.UNKNOWN, message);
2989 }
2990
2991
2992
2993
2994
2995
2996 public synchronized void report(String message) {
2997 if (! summary) {
2998 System.out.println("ERROR: " + message);
2999 }
3000 showProgress = 0;
3001 }
3002
3003 public synchronized int summarize() {
3004 System.out.println(Integer.toString(errorCount) +
3005 " inconsistencies detected.");
3006 if (errorCount == 0) {
3007 System.out.println("Status: OK");
3008 return 0;
3009 } else {
3010 System.out.println("Status: INCONSISTENT");
3011 return -1;
3012 }
3013 }
3014
3015 public ArrayList<ERROR_CODE> getErrorList() {
3016 return errorList;
3017 }
3018
3019 public synchronized void print(String message) {
3020 if (!summary) {
3021 System.out.println(message);
3022 }
3023 }
3024
3025 @Override
3026 public boolean tableHasErrors(TableInfo table) {
3027 return errorTables.contains(table);
3028 }
3029
3030 @Override
3031 public void resetErrors() {
3032 errorCount = 0;
3033 }
3034
3035 public synchronized void detail(String message) {
3036 if (details) {
3037 System.out.println(message);
3038 }
3039 showProgress = 0;
3040 }
3041
3042 public synchronized void progress() {
3043 if (showProgress++ == 10) {
3044 if (!summary) {
3045 System.out.print(".");
3046 }
3047 showProgress = 0;
3048 }
3049 }
3050 }
3051
3052
3053
3054
3055 static class WorkItemRegion implements Callable<Void> {
3056 private HBaseFsck hbck;
3057 private ServerName rsinfo;
3058 private ErrorReporter errors;
3059 private HConnection connection;
3060
3061 WorkItemRegion(HBaseFsck hbck, ServerName info,
3062 ErrorReporter errors, HConnection connection) {
3063 this.hbck = hbck;
3064 this.rsinfo = info;
3065 this.errors = errors;
3066 this.connection = connection;
3067 }
3068
3069 @Override
3070 public synchronized Void call() throws IOException {
3071 errors.progress();
3072 try {
3073 BlockingInterface server = connection.getAdmin(rsinfo);
3074
3075
3076 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
3077 regions = filterRegions(regions);
3078
3079 if (details) {
3080 errors.detail("RegionServer: " + rsinfo.getServerName() +
3081 " number of regions: " + regions.size());
3082 for (HRegionInfo rinfo: regions) {
3083 errors.detail(" " + rinfo.getRegionNameAsString() +
3084 " id: " + rinfo.getRegionId() +
3085 " encoded_name: " + rinfo.getEncodedName() +
3086 " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
3087 " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
3088 }
3089 }
3090
3091
3092 for (HRegionInfo r:regions) {
3093 HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
3094 hbi.addServer(r, rsinfo);
3095 }
3096 } catch (IOException e) {
3097 errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
3098 " Unable to fetch region information. " + e);
3099 throw e;
3100 }
3101 return null;
3102 }
3103
3104 private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
3105 List<HRegionInfo> ret = Lists.newArrayList();
3106 for (HRegionInfo hri : regions) {
3107 if (hri.isMetaTable() || (!hbck.checkMetaOnly
3108 && hbck.isTableIncluded(hri.getTableName()))) {
3109 ret.add(hri);
3110 }
3111 }
3112 return ret;
3113 }
3114 }
3115
3116
3117
3118
3119
3120 static class WorkItemHdfsDir implements Callable<Void> {
3121 private HBaseFsck hbck;
3122 private FileStatus tableDir;
3123 private ErrorReporter errors;
3124 private FileSystem fs;
3125
3126 WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors,
3127 FileStatus status) {
3128 this.hbck = hbck;
3129 this.fs = fs;
3130 this.tableDir = status;
3131 this.errors = errors;
3132 }
3133
3134 @Override
3135 public synchronized Void call() throws IOException {
3136 try {
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191 static class WorkItemHdfsRegionInfo implements Callable<Void> {
3192 private HbckInfo hbi;
3193 private HBaseFsck hbck;
3194 private ErrorReporter errors;
3195
3196 WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
3197 this.hbi = hbi;
3198 this.hbck = hbck;
3199 this.errors = errors;
3200 }
3201
3202 @Override
3203 public synchronized Void call() throws IOException {
3204
3205 if (hbi.getHdfsHRI() == null) {
3206 try {
3207 hbck.loadHdfsRegioninfo(hbi);
3208 } catch (IOException ioe) {
3209 String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
3210 + hbi.getTableName() + " in hdfs dir "
3211 + hbi.getHdfsRegionDir()
3212 + "! It may be an invalid format or version file. Treating as "
3213 + "an orphaned regiondir.";
3214 errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
3215 try {
3216 hbck.debugLsr(hbi.getHdfsRegionDir());
3217 } catch (IOException ioe2) {
3218 LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
3219 throw ioe2;
3220 }
3221 hbck.orphanHdfsDirs.add(hbi);
3222 throw ioe;
3223 }
3224 }
3225 return null;
3226 }
3227 };
3228
3229
3230
3231
3232
3233 public void setDisplayFullReport() {
3234 details = true;
3235 }
3236
3237
3238
3239
3240
3241 void setSummary() {
3242 summary = true;
3243 }
3244
3245
3246
3247
3248
3249 void setCheckMetaOnly() {
3250 checkMetaOnly = true;
3251 }
3252
3253
3254
3255
3256
3257 public void setFixTableLocks(boolean shouldFix) {
3258 fixTableLocks = shouldFix;
3259 }
3260
3261
3262
3263
3264
3265
3266
3267 void setShouldRerun() {
3268 rerun = true;
3269 }
3270
3271 boolean shouldRerun() {
3272 return rerun;
3273 }
3274
3275
3276
3277
3278
3279 public void setFixAssignments(boolean shouldFix) {
3280 fixAssignments = shouldFix;
3281 }
3282
3283 boolean shouldFixAssignments() {
3284 return fixAssignments;
3285 }
3286
3287 public void setFixMeta(boolean shouldFix) {
3288 fixMeta = shouldFix;
3289 }
3290
3291 boolean shouldFixMeta() {
3292 return fixMeta;
3293 }
3294
3295 public void setFixEmptyMetaCells(boolean shouldFix) {
3296 fixEmptyMetaCells = shouldFix;
3297 }
3298
3299 boolean shouldFixEmptyMetaCells() {
3300 return fixEmptyMetaCells;
3301 }
3302
3303 public void setCheckHdfs(boolean checking) {
3304 checkHdfs = checking;
3305 }
3306
3307 boolean shouldCheckHdfs() {
3308 return checkHdfs;
3309 }
3310
3311 public void setFixHdfsHoles(boolean shouldFix) {
3312 fixHdfsHoles = shouldFix;
3313 }
3314
3315 boolean shouldFixHdfsHoles() {
3316 return fixHdfsHoles;
3317 }
3318
3319 public void setFixTableOrphans(boolean shouldFix) {
3320 fixTableOrphans = shouldFix;
3321 }
3322
3323 boolean shouldFixTableOrphans() {
3324 return fixTableOrphans;
3325 }
3326
3327 public void setFixHdfsOverlaps(boolean shouldFix) {
3328 fixHdfsOverlaps = shouldFix;
3329 }
3330
3331 boolean shouldFixHdfsOverlaps() {
3332 return fixHdfsOverlaps;
3333 }
3334
3335 public void setFixHdfsOrphans(boolean shouldFix) {
3336 fixHdfsOrphans = shouldFix;
3337 }
3338
3339 boolean shouldFixHdfsOrphans() {
3340 return fixHdfsOrphans;
3341 }
3342
3343 public void setFixVersionFile(boolean shouldFix) {
3344 fixVersionFile = shouldFix;
3345 }
3346
3347 public boolean shouldFixVersionFile() {
3348 return fixVersionFile;
3349 }
3350
3351 public void setSidelineBigOverlaps(boolean sbo) {
3352 this.sidelineBigOverlaps = sbo;
3353 }
3354
3355 public boolean shouldSidelineBigOverlaps() {
3356 return sidelineBigOverlaps;
3357 }
3358
3359 public void setFixSplitParents(boolean shouldFix) {
3360 fixSplitParents = shouldFix;
3361 }
3362
3363 boolean shouldFixSplitParents() {
3364 return fixSplitParents;
3365 }
3366
3367 public void setFixReferenceFiles(boolean shouldFix) {
3368 fixReferenceFiles = shouldFix;
3369 }
3370
3371 boolean shouldFixReferenceFiles() {
3372 return fixReferenceFiles;
3373 }
3374
3375 public boolean shouldIgnorePreCheckPermission() {
3376 return ignorePreCheckPermission;
3377 }
3378
3379 public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
3380 this.ignorePreCheckPermission = ignorePreCheckPermission;
3381 }
3382
3383
3384
3385
3386 public void setMaxMerge(int mm) {
3387 this.maxMerge = mm;
3388 }
3389
3390 public int getMaxMerge() {
3391 return maxMerge;
3392 }
3393
3394 public void setMaxOverlapsToSideline(int mo) {
3395 this.maxOverlapsToSideline = mo;
3396 }
3397
3398 public int getMaxOverlapsToSideline() {
3399 return maxOverlapsToSideline;
3400 }
3401
3402
3403
3404
3405
3406 boolean isTableIncluded(TableName table) {
3407 return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
3408 }
3409
3410 public void includeTable(TableName table) {
3411 tablesIncluded.add(table);
3412 }
3413
3414 Set<TableName> getIncludedTables() {
3415 return new HashSet<TableName>(tablesIncluded);
3416 }
3417
3418
3419
3420
3421
3422
3423 public void setTimeLag(long seconds) {
3424 timelag = seconds * 1000;
3425 }
3426
3427
3428
3429
3430
3431 public void setSidelineDir(String sidelineDir) {
3432 this.sidelineDir = new Path(sidelineDir);
3433 }
3434
3435 protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
3436 return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
3437 }
3438
3439 public HFileCorruptionChecker getHFilecorruptionChecker() {
3440 return hfcc;
3441 }
3442
3443 public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
3444 this.hfcc = hfcc;
3445 }
3446
3447 public void setRetCode(int code) {
3448 this.retcode = code;
3449 }
3450
3451 public int getRetCode() {
3452 return retcode;
3453 }
3454
3455 protected HBaseFsck printUsageAndExit() {
3456 StringWriter sw = new StringWriter(2048);
3457 PrintWriter out = new PrintWriter(sw);
3458 out.println("Usage: fsck [opts] {only tables}");
3459 out.println(" where [opts] are:");
3460 out.println(" -help Display help options (this)");
3461 out.println(" -details Display full report of all regions.");
3462 out.println(" -timelag <timeInSeconds> Process only regions that " +
3463 " have not experienced any metadata updates in the last " +
3464 " <timeInSeconds> seconds.");
3465 out.println(" -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
3466 " before checking if the fix worked if run with -fix");
3467 out.println(" -summary Print only summary of the tables and status.");
3468 out.println(" -metaonly Only check the state of the .META. table.");
3469 out.println(" -sidelineDir <hdfs://> HDFS path to backup existing meta.");
3470
3471 out.println("");
3472 out.println(" Metadata Repair options: (expert features, use with caution!)");
3473 out.println(" -fix Try to fix region assignments. This is for backwards compatiblity");
3474 out.println(" -fixAssignments Try to fix region assignments. Replaces the old -fix");
3475 out.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good.");
3476 out.println(" -noHdfsChecking Don't load/check region info from HDFS."
3477 + " Assumes META region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
3478 out.println(" -fixHdfsHoles Try to fix region holes in hdfs.");
3479 out.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs");
3480 out.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
3481 out.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs.");
3482 out.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs.");
3483 out.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
3484 out.println(" -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps");
3485 out.println(" -maxOverlapsToSideline <n> When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
3486 out.println(" -fixSplitParents Try to force offline split parents to be online.");
3487 out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check");
3488 out.println(" -fixReferenceFiles Try to offline lingering reference store files");
3489 out.println(" -fixEmptyMetaCells Try to fix .META. entries not referencing any region"
3490 + " (empty REGIONINFO_QUALIFIER rows)");
3491
3492 out.println("");
3493 out.println(" Datafile Repair options: (expert features, use with caution!)");
3494 out.println(" -checkCorruptHFiles Check all Hfiles by opening them to make sure they are valid");
3495 out.println(" -sidelineCorruptHfiles Quarantine corrupted HFiles. implies -checkCorruptHfiles");
3496
3497 out.println("");
3498 out.println(" Metadata Repair shortcuts");
3499 out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
3500 "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles -fixTableLocks");
3501 out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
3502
3503 out.println("");
3504 out.println(" Table lock options");
3505 out.println(" -fixTableLocks Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
3506
3507 out.flush();
3508 errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
3509
3510 setRetCode(-2);
3511 return this;
3512 }
3513
3514
3515
3516
3517
3518
3519
3520 public static void main(String[] args) throws Exception {
3521
3522 Configuration conf = HBaseConfiguration.create();
3523 Path hbasedir = FSUtils.getRootDir(conf);
3524 URI defaultFs = hbasedir.getFileSystem(conf).getUri();
3525 FSUtils.setFsDefault(conf, new Path(defaultFs));
3526
3527 int ret = ToolRunner.run(new HBaseFsck(conf), args);
3528 System.exit(ret);
3529 }
3530
3531 @Override
3532 public int run(String[] args) throws Exception {
3533 exec(executor, args);
3534 return getRetCode();
3535 }
3536
3537 public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
3538 ServiceException, InterruptedException {
3539 long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
3540
3541 boolean checkCorruptHFiles = false;
3542 boolean sidelineCorruptHFiles = false;
3543
3544
3545 for (int i = 0; i < args.length; i++) {
3546 String cmd = args[i];
3547 if (cmd.equals("-help") || cmd.equals("-h")) {
3548 return printUsageAndExit();
3549 } else if (cmd.equals("-details")) {
3550 setDisplayFullReport();
3551 } else if (cmd.equals("-timelag")) {
3552 if (i == args.length - 1) {
3553 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
3554 return printUsageAndExit();
3555 }
3556 try {
3557 long timelag = Long.parseLong(args[i+1]);
3558 setTimeLag(timelag);
3559 } catch (NumberFormatException e) {
3560 errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
3561 return printUsageAndExit();
3562 }
3563 i++;
3564 } else if (cmd.equals("-sleepBeforeRerun")) {
3565 if (i == args.length - 1) {
3566 errors.reportError(ERROR_CODE.WRONG_USAGE,
3567 "HBaseFsck: -sleepBeforeRerun needs a value.");
3568 return printUsageAndExit();
3569 }
3570 try {
3571 sleepBeforeRerun = Long.parseLong(args[i+1]);
3572 } catch (NumberFormatException e) {
3573 errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
3574 return printUsageAndExit();
3575 }
3576 i++;
3577 } else if (cmd.equals("-sidelineDir")) {
3578 if (i == args.length - 1) {
3579 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
3580 return printUsageAndExit();
3581 }
3582 i++;
3583 setSidelineDir(args[i]);
3584 } else if (cmd.equals("-fix")) {
3585 errors.reportError(ERROR_CODE.WRONG_USAGE,
3586 "This option is deprecated, please use -fixAssignments instead.");
3587 setFixAssignments(true);
3588 } else if (cmd.equals("-fixAssignments")) {
3589 setFixAssignments(true);
3590 } else if (cmd.equals("-fixMeta")) {
3591 setFixMeta(true);
3592 } else if (cmd.equals("-noHdfsChecking")) {
3593 setCheckHdfs(false);
3594 } else if (cmd.equals("-fixHdfsHoles")) {
3595 setFixHdfsHoles(true);
3596 } else if (cmd.equals("-fixHdfsOrphans")) {
3597 setFixHdfsOrphans(true);
3598 } else if (cmd.equals("-fixTableOrphans")) {
3599 setFixTableOrphans(true);
3600 } else if (cmd.equals("-fixHdfsOverlaps")) {
3601 setFixHdfsOverlaps(true);
3602 } else if (cmd.equals("-fixVersionFile")) {
3603 setFixVersionFile(true);
3604 } else if (cmd.equals("-sidelineBigOverlaps")) {
3605 setSidelineBigOverlaps(true);
3606 } else if (cmd.equals("-fixSplitParents")) {
3607 setFixSplitParents(true);
3608 } else if (cmd.equals("-ignorePreCheckPermission")) {
3609 setIgnorePreCheckPermission(true);
3610 } else if (cmd.equals("-checkCorruptHFiles")) {
3611 checkCorruptHFiles = true;
3612 } else if (cmd.equals("-sidelineCorruptHFiles")) {
3613 sidelineCorruptHFiles = true;
3614 } else if (cmd.equals("-fixReferenceFiles")) {
3615 setFixReferenceFiles(true);
3616 } else if (cmd.equals("-fixEmptyMetaCells")) {
3617 setFixEmptyMetaCells(true);
3618 } else if (cmd.equals("-repair")) {
3619
3620
3621 setFixHdfsHoles(true);
3622 setFixHdfsOrphans(true);
3623 setFixMeta(true);
3624 setFixAssignments(true);
3625 setFixHdfsOverlaps(true);
3626 setFixVersionFile(true);
3627 setSidelineBigOverlaps(true);
3628 setFixSplitParents(false);
3629 setCheckHdfs(true);
3630 setFixReferenceFiles(true);
3631 setFixTableLocks(true);
3632 } else if (cmd.equals("-repairHoles")) {
3633
3634 setFixHdfsHoles(true);
3635 setFixHdfsOrphans(false);
3636 setFixMeta(true);
3637 setFixAssignments(true);
3638 setFixHdfsOverlaps(false);
3639 setSidelineBigOverlaps(false);
3640 setFixSplitParents(false);
3641 setCheckHdfs(true);
3642 } else if (cmd.equals("-maxOverlapsToSideline")) {
3643 if (i == args.length - 1) {
3644 errors.reportError(ERROR_CODE.WRONG_USAGE,
3645 "-maxOverlapsToSideline needs a numeric value argument.");
3646 return printUsageAndExit();
3647 }
3648 try {
3649 int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
3650 setMaxOverlapsToSideline(maxOverlapsToSideline);
3651 } catch (NumberFormatException e) {
3652 errors.reportError(ERROR_CODE.WRONG_USAGE,
3653 "-maxOverlapsToSideline needs a numeric value argument.");
3654 return printUsageAndExit();
3655 }
3656 i++;
3657 } else if (cmd.equals("-maxMerge")) {
3658 if (i == args.length - 1) {
3659 errors.reportError(ERROR_CODE.WRONG_USAGE,
3660 "-maxMerge needs a numeric value argument.");
3661 return printUsageAndExit();
3662 }
3663 try {
3664 int maxMerge = Integer.parseInt(args[i+1]);
3665 setMaxMerge(maxMerge);
3666 } catch (NumberFormatException e) {
3667 errors.reportError(ERROR_CODE.WRONG_USAGE,
3668 "-maxMerge needs a numeric value argument.");
3669 return printUsageAndExit();
3670 }
3671 i++;
3672 } else if (cmd.equals("-summary")) {
3673 setSummary();
3674 } else if (cmd.equals("-metaonly")) {
3675 setCheckMetaOnly();
3676 } else if (cmd.equals("-fixTableLocks")) {
3677 setFixTableLocks(true);
3678 } else if (cmd.startsWith("-")) {
3679 errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
3680 return printUsageAndExit();
3681 } else {
3682 includeTable(TableName.valueOf(cmd));
3683 errors.print("Allow checking/fixes for table: " + cmd);
3684 }
3685 }
3686
3687
3688 try {
3689 preCheckPermission();
3690 } catch (AccessControlException ace) {
3691 Runtime.getRuntime().exit(-1);
3692 } catch (IOException ioe) {
3693 Runtime.getRuntime().exit(-1);
3694 }
3695
3696
3697 connect();
3698
3699
3700 if (checkCorruptHFiles || sidelineCorruptHFiles) {
3701 LOG.info("Checking all hfiles for corruption");
3702 HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
3703 setHFileCorruptionChecker(hfcc);
3704 Collection<TableName> tables = getIncludedTables();
3705 Collection<Path> tableDirs = new ArrayList<Path>();
3706 Path rootdir = FSUtils.getRootDir(getConf());
3707 if (tables.size() > 0) {
3708 for (TableName t : tables) {
3709 tableDirs.add(FSUtils.getTableDir(rootdir, t));
3710 }
3711 } else {
3712 tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
3713 }
3714 hfcc.checkTables(tableDirs);
3715 hfcc.report(errors);
3716 }
3717
3718
3719 int code = onlineHbck();
3720 setRetCode(code);
3721
3722
3723
3724
3725 if (shouldRerun()) {
3726 try {
3727 LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
3728 Thread.sleep(sleepBeforeRerun);
3729 } catch (InterruptedException ie) {
3730 return this;
3731 }
3732
3733 setFixAssignments(false);
3734 setFixMeta(false);
3735 setFixHdfsHoles(false);
3736 setFixHdfsOverlaps(false);
3737 setFixVersionFile(false);
3738 setFixTableOrphans(false);
3739 errors.resetErrors();
3740 code = onlineHbck();
3741 setRetCode(code);
3742 }
3743 return this;
3744 }
3745
3746
3747
3748
3749 void debugLsr(Path p) throws IOException {
3750 debugLsr(getConf(), p, errors);
3751 }
3752
3753
3754
3755
3756 public static void debugLsr(Configuration conf,
3757 Path p) throws IOException {
3758 debugLsr(conf, p, new PrintingErrorReporter());
3759 }
3760
3761
3762
3763
3764 public static void debugLsr(Configuration conf,
3765 Path p, ErrorReporter errors) throws IOException {
3766 if (!LOG.isDebugEnabled() || p == null) {
3767 return;
3768 }
3769 FileSystem fs = p.getFileSystem(conf);
3770
3771 if (!fs.exists(p)) {
3772
3773 return;
3774 }
3775 errors.print(p.toString());
3776
3777 if (fs.isFile(p)) {
3778 return;
3779 }
3780
3781 if (fs.getFileStatus(p).isDir()) {
3782 FileStatus[] fss= fs.listStatus(p);
3783 for (FileStatus status : fss) {
3784 debugLsr(conf, status.getPath(), errors);
3785 }
3786 }
3787 }
3788 }