1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.util;
19
20 import java.io.IOException;
21 import java.io.PrintWriter;
22 import java.io.StringWriter;
23 import java.net.URI;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.Collection;
27 import java.util.Collections;
28 import java.util.Comparator;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.Iterator;
32 import java.util.List;
33 import java.util.Map;
34 import java.util.Map.Entry;
35 import java.util.Set;
36 import java.util.SortedMap;
37 import java.util.SortedSet;
38 import java.util.TreeMap;
39 import java.util.TreeSet;
40 import java.util.concurrent.Callable;
41 import java.util.concurrent.ConcurrentSkipListMap;
42 import java.util.concurrent.ExecutionException;
43 import java.util.concurrent.ExecutorService;
44 import java.util.concurrent.Future;
45 import java.util.concurrent.ScheduledThreadPoolExecutor;
46 import java.util.concurrent.atomic.AtomicInteger;
47
48 import org.apache.commons.logging.Log;
49 import org.apache.commons.logging.LogFactory;
50 import org.apache.hadoop.classification.InterfaceAudience;
51 import org.apache.hadoop.classification.InterfaceStability;
52 import org.apache.hadoop.conf.Configuration;
53 import org.apache.hadoop.conf.Configured;
54 import org.apache.hadoop.fs.FileStatus;
55 import org.apache.hadoop.fs.FileSystem;
56 import org.apache.hadoop.fs.Path;
57 import org.apache.hadoop.fs.permission.FsAction;
58 import org.apache.hadoop.hbase.Abortable;
59 import org.apache.hadoop.hbase.ClusterStatus;
60 import org.apache.hadoop.hbase.HBaseConfiguration;
61 import org.apache.hadoop.hbase.HColumnDescriptor;
62 import org.apache.hadoop.hbase.HConstants;
63 import org.apache.hadoop.hbase.HRegionInfo;
64 import org.apache.hadoop.hbase.HRegionLocation;
65 import org.apache.hadoop.hbase.HTableDescriptor;
66 import org.apache.hadoop.hbase.KeyValue;
67 import org.apache.hadoop.hbase.exceptions.MasterNotRunningException;
68 import org.apache.hadoop.hbase.ServerName;
69 import org.apache.hadoop.hbase.exceptions.ZooKeeperConnectionException;
70 import org.apache.hadoop.hbase.catalog.MetaEditor;
71 import org.apache.hadoop.hbase.client.AdminProtocol;
72 import org.apache.hadoop.hbase.client.Delete;
73 import org.apache.hadoop.hbase.client.Get;
74 import org.apache.hadoop.hbase.client.HBaseAdmin;
75 import org.apache.hadoop.hbase.client.HConnection;
76 import org.apache.hadoop.hbase.client.HConnectionManager;
77 import org.apache.hadoop.hbase.client.HConnectionManager.HConnectable;
78 import org.apache.hadoop.hbase.client.HTable;
79 import org.apache.hadoop.hbase.client.MetaScanner;
80 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
81 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
82 import org.apache.hadoop.hbase.client.Put;
83 import org.apache.hadoop.hbase.client.Result;
84 import org.apache.hadoop.hbase.client.RowMutations;
85 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
86 import org.apache.hadoop.hbase.io.hfile.HFile;
87 import org.apache.hadoop.hbase.master.MasterFileSystem;
88 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
89 import org.apache.hadoop.hbase.regionserver.HRegion;
90 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
91 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
92 import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
93 import org.apache.hadoop.hbase.security.User;
94 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
95 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
96 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
97 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
98 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
99 import org.apache.hadoop.hbase.zookeeper.ZKTableReadOnly;
100 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
101 import org.apache.hadoop.security.AccessControlException;
102 import org.apache.hadoop.security.UserGroupInformation;
103 import org.apache.hadoop.util.ReflectionUtils;
104 import org.apache.hadoop.util.Tool;
105 import org.apache.hadoop.util.ToolRunner;
106 import org.apache.zookeeper.KeeperException;
107
108 import com.google.common.base.Joiner;
109 import com.google.common.base.Preconditions;
110 import com.google.common.collect.Lists;
111 import com.google.common.collect.Multimap;
112 import com.google.common.collect.TreeMultimap;
113 import com.google.protobuf.ServiceException;
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160 @InterfaceAudience.Public
161 @InterfaceStability.Evolving
162 public class HBaseFsck extends Configured implements Tool {
163 public static final long DEFAULT_TIME_LAG = 60000;
164 public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
165 private static final int MAX_NUM_THREADS = 50;
166 private static boolean rsSupportsOffline = true;
167 private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
168 private static final int DEFAULT_MAX_MERGE = 5;
169 private static final String TO_BE_LOADED = "to_be_loaded";
170
171
172
173
174 private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
175 private ClusterStatus status;
176 private HConnection connection;
177 private HBaseAdmin admin;
178 private HTable meta;
179 protected ExecutorService executor;
180 private long startMillis = System.currentTimeMillis();
181 private HFileCorruptionChecker hfcc;
182 private int retcode = 0;
183
184
185
186
187 private static boolean details = false;
188 private long timelag = DEFAULT_TIME_LAG;
189 private boolean fixAssignments = false;
190 private boolean fixMeta = false;
191 private boolean checkHdfs = true;
192 private boolean fixHdfsHoles = false;
193 private boolean fixHdfsOverlaps = false;
194 private boolean fixHdfsOrphans = false;
195 private boolean fixTableOrphans = false;
196 private boolean fixVersionFile = false;
197 private boolean fixSplitParents = false;
198 private boolean fixReferenceFiles = false;
199 private boolean fixEmptyMetaCells = false;
200
201
202
203 private Set<String> tablesIncluded = new HashSet<String>();
204 private int maxMerge = DEFAULT_MAX_MERGE;
205 private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
206 private boolean sidelineBigOverlaps = false;
207 private Path sidelineDir = null;
208
209 private boolean rerun = false;
210 private static boolean summary = false;
211 private boolean checkMetaOnly = false;
212 private boolean ignorePreCheckPermission = false;
213
214
215
216
217 final private ErrorReporter errors;
218 int fixes = 0;
219
220
221
222
223
224
225 private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
226 private TreeSet<byte[]> disabledTables =
227 new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
228
229 private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
230
231
232
233
234
235
236
237
238
239
240
241 private SortedMap<String, TableInfo> tablesInfo = new ConcurrentSkipListMap<String,TableInfo>();
242
243
244
245
246 private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
247
248 private Map<String, Set<String>> orphanTableDirs = new HashMap<String, Set<String>>();
249
250
251
252
253
254
255
256
257 public HBaseFsck(Configuration conf) throws MasterNotRunningException,
258 ZooKeeperConnectionException, IOException, ClassNotFoundException {
259 super(conf);
260 errors = getErrorReporter(conf);
261
262 int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
263 executor = new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
264 }
265
266
267
268
269
270
271
272
273
274
275
276 public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
277 ZooKeeperConnectionException, IOException, ClassNotFoundException {
278 super(conf);
279 errors = getErrorReporter(getConf());
280 this.executor = exec;
281 }
282
283
284
285
286
287 public void connect() throws IOException {
288 admin = new HBaseAdmin(getConf());
289 meta = new HTable(getConf(), HConstants.META_TABLE_NAME);
290 status = admin.getClusterStatus();
291 connection = admin.getConnection();
292 }
293
294
295
296
297 private void loadDeployedRegions() throws IOException, InterruptedException {
298
299 Collection<ServerName> regionServers = status.getServers();
300 errors.print("Number of live region servers: " + regionServers.size());
301 if (details) {
302 for (ServerName rsinfo: regionServers) {
303 errors.print(" " + rsinfo.getServerName());
304 }
305 }
306
307
308 Collection<ServerName> deadRegionServers = status.getDeadServerNames();
309 errors.print("Number of dead region servers: " + deadRegionServers.size());
310 if (details) {
311 for (ServerName name: deadRegionServers) {
312 errors.print(" " + name);
313 }
314 }
315
316
317 errors.print("Master: " + status.getMaster());
318
319
320 Collection<ServerName> backupMasters = status.getBackupMasters();
321 errors.print("Number of backup masters: " + backupMasters.size());
322 if (details) {
323 for (ServerName name: backupMasters) {
324 errors.print(" " + name);
325 }
326 }
327
328
329 processRegionServers(regionServers);
330 }
331
332
333
334
335 private void clearState() {
336
337 fixes = 0;
338 regionInfoMap.clear();
339 emptyRegionInfoQualifiers.clear();
340 disabledTables.clear();
341 errors.clear();
342 tablesInfo.clear();
343 orphanHdfsDirs.clear();
344 }
345
346
347
348
349
350
351 public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
352
353 if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
354 || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
355 LOG.info("Loading regioninfos HDFS");
356
357 int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
358 int curIter = 0;
359 do {
360 clearState();
361
362 restoreHdfsIntegrity();
363 curIter++;
364 } while (fixes > 0 && curIter <= maxIterations);
365
366
367
368 if (curIter > 2) {
369 if (curIter == maxIterations) {
370 LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
371 + "Tables integrity may not be fully repaired!");
372 } else {
373 LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
374 }
375 }
376 }
377 }
378
379
380
381
382
383
384
385
386
387 public int onlineConsistencyRepair() throws IOException, KeeperException,
388 InterruptedException {
389 clearState();
390
391 LOG.info("Loading regionsinfo from the .META. table");
392 boolean success = loadMetaEntries();
393 if (!success) return -1;
394
395
396 if (!checkMetaRegion()) {
397
398 errors.reportError("Encountered fatal error. Exiting...");
399 return -2;
400 }
401
402
403 reportEmptyMetaCells();
404
405
406 if (shouldFixEmptyMetaCells()) {
407 fixEmptyMetaCells();
408 }
409
410
411 if (!checkMetaOnly) {
412 reportTablesInFlux();
413 }
414
415
416 loadDeployedRegions();
417
418
419 if (shouldCheckHdfs()) {
420 loadHdfsRegionDirs();
421 loadHdfsRegionInfos();
422 }
423
424
425 loadDisabledTables();
426
427
428 fixOrphanTables();
429
430
431 checkAndFixConsistency();
432
433
434 checkIntegrity();
435 return errors.getErrorList().size();
436 }
437
438
439
440
441
442 public int onlineHbck() throws IOException, KeeperException, InterruptedException, ServiceException {
443
444 errors.print("Version: " + status.getHBaseVersion());
445 offlineHdfsIntegrityRepair();
446
447
448 boolean oldBalancer = admin.setBalancerRunning(false, true);
449 try {
450 onlineConsistencyRepair();
451 }
452 finally {
453 admin.setBalancerRunning(oldBalancer, false);
454 }
455
456 offlineReferenceFileRepair();
457
458
459 printTableSummary(tablesInfo);
460 return errors.summarize();
461 }
462
463
464
465
466 private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
467 for (HbckInfo hi : orphanHdfsDirs) {
468 LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
469 adoptHdfsOrphan(hi);
470 }
471 }
472
473
474
475
476
477
478
479
480
481
482 private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
483 Path p = hi.getHdfsRegionDir();
484 FileSystem fs = p.getFileSystem(getConf());
485 FileStatus[] dirs = fs.listStatus(p);
486 if (dirs == null) {
487 LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " +
488 p + ". This dir could probably be deleted.");
489 return ;
490 }
491
492 String tableName = Bytes.toString(hi.getTableName());
493 TableInfo tableInfo = tablesInfo.get(tableName);
494 Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
495 HTableDescriptor template = tableInfo.getHTD();
496
497
498 Pair<byte[],byte[]> orphanRegionRange = null;
499 for (FileStatus cf : dirs) {
500 String cfName= cf.getPath().getName();
501
502 if (cfName.startsWith(".") || cfName.equals("splitlog")) continue;
503
504 FileStatus[] hfiles = fs.listStatus(cf.getPath());
505 for (FileStatus hfile : hfiles) {
506 byte[] start, end;
507 HFile.Reader hf = null;
508 try {
509 CacheConfig cacheConf = new CacheConfig(getConf());
510 hf = HFile.createReader(fs, hfile.getPath(), cacheConf);
511 hf.loadFileInfo();
512 KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
513 start = startKv.getRow();
514 KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
515 end = endKv.getRow();
516 } catch (IOException ioe) {
517 LOG.warn("Problem reading orphan file " + hfile + ", skipping");
518 continue;
519 } catch (NullPointerException ioe) {
520 LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
521 continue;
522 } finally {
523 if (hf != null) {
524 hf.close();
525 }
526 }
527
528
529 if (orphanRegionRange == null) {
530
531 orphanRegionRange = new Pair<byte[], byte[]>(start, end);
532 } else {
533
534
535
536 if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
537 orphanRegionRange.setFirst(start);
538 }
539 if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
540 orphanRegionRange.setSecond(end);
541 }
542 }
543 }
544 }
545 if (orphanRegionRange == null) {
546 LOG.warn("No data in dir " + p + ", sidelining data");
547 fixes++;
548 sidelineRegionDir(fs, hi);
549 return;
550 }
551 LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
552 Bytes.toString(orphanRegionRange.getSecond()) + ")");
553
554
555 HRegionInfo hri = new HRegionInfo(template.getName(), orphanRegionRange.getFirst(), orphanRegionRange.getSecond());
556 LOG.info("Creating new region : " + hri);
557 HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
558 Path target = region.getRegionFileSystem().getRegionDir();
559
560
561 mergeRegionDirs(target, hi);
562 fixes++;
563 }
564
565
566
567
568
569
570
571
572
573 private int restoreHdfsIntegrity() throws IOException, InterruptedException {
574
575 LOG.info("Loading HBase regioninfo from HDFS...");
576 loadHdfsRegionDirs();
577
578 int errs = errors.getErrorList().size();
579
580 tablesInfo = loadHdfsRegionInfos();
581 checkHdfsIntegrity(false, false);
582
583 if (errors.getErrorList().size() == errs) {
584 LOG.info("No integrity errors. We are done with this phase. Glorious.");
585 return 0;
586 }
587
588 if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
589 adoptHdfsOrphans(orphanHdfsDirs);
590
591 }
592
593
594 if (shouldFixHdfsHoles()) {
595 clearState();
596 loadHdfsRegionDirs();
597 tablesInfo = loadHdfsRegionInfos();
598 tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
599 }
600
601
602 if (shouldFixHdfsOverlaps()) {
603
604 clearState();
605 loadHdfsRegionDirs();
606 tablesInfo = loadHdfsRegionInfos();
607 tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
608 }
609
610 return errors.getErrorList().size();
611 }
612
613
614
615
616
617
618
619
620
621 private void offlineReferenceFileRepair() throws IOException {
622 Configuration conf = getConf();
623 Path hbaseRoot = FSUtils.getRootDir(conf);
624 FileSystem fs = hbaseRoot.getFileSystem(conf);
625 Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot);
626 for (Path path: allFiles.values()) {
627 boolean isReference = false;
628 try {
629 isReference = StoreFileInfo.isReference(path);
630 } catch (Throwable t) {
631
632
633
634
635 }
636 if (!isReference) continue;
637
638 Path referredToFile = StoreFileInfo.getReferredToFile(path);
639 if (fs.exists(referredToFile)) continue;
640
641
642 errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
643 "Found lingering reference file " + path);
644 if (!shouldFixReferenceFiles()) continue;
645
646
647 boolean success = false;
648 String pathStr = path.toString();
649
650
651
652
653
654 int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
655 for (int i = 0; index > 0 && i < 3; i++) {
656 index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index);
657 }
658 if (index > 0) {
659 Path rootDir = getSidelineDir();
660 Path dst = new Path(rootDir, pathStr.substring(index));
661 fs.mkdirs(dst.getParent());
662 LOG.info("Trying to sildeline reference file"
663 + path + " to " + dst);
664 setShouldRerun();
665
666 success = fs.rename(path, dst);
667 }
668 if (!success) {
669 LOG.error("Failed to sideline reference file " + path);
670 }
671 }
672 }
673
674
675
676
677 private void reportEmptyMetaCells() {
678 errors.print("Number of empty REGIONINFO_QUALIFIER rows in .META.: " +
679 emptyRegionInfoQualifiers.size());
680 if (details) {
681 for (Result r: emptyRegionInfoQualifiers) {
682 errors.print(" " + r);
683 }
684 }
685 }
686
687
688
689
690 private void reportTablesInFlux() {
691 AtomicInteger numSkipped = new AtomicInteger(0);
692 HTableDescriptor[] allTables = getTables(numSkipped);
693 errors.print("Number of Tables: " + allTables.length);
694 if (details) {
695 if (numSkipped.get() > 0) {
696 errors.detail("Number of Tables in flux: " + numSkipped.get());
697 }
698 for (HTableDescriptor td : allTables) {
699 String tableName = td.getNameAsString();
700 errors.detail(" Table: " + tableName + "\t" +
701 (td.isReadOnly() ? "ro" : "rw") + "\t" +
702 (td.isMetaRegion() ? "META" : " ") + "\t" +
703 " families: " + td.getFamilies().size());
704 }
705 }
706 }
707
708 public ErrorReporter getErrors() {
709 return errors;
710 }
711
712
713
714
715
716 private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
717 Path regionDir = hbi.getHdfsRegionDir();
718 if (regionDir == null) {
719 LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
720 return;
721 }
722
723 if (hbi.hdfsEntry.hri != null) {
724
725 return;
726 }
727
728 FileSystem fs = FileSystem.get(getConf());
729 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
730 LOG.debug("HRegionInfo read: " + hri.toString());
731 hbi.hdfsEntry.hri = hri;
732 }
733
734
735
736
737
738 public static class RegionRepairException extends IOException {
739 private static final long serialVersionUID = 1L;
740 final IOException ioe;
741 public RegionRepairException(String s, IOException ioe) {
742 super(s);
743 this.ioe = ioe;
744 }
745 }
746
747
748
749
750 private SortedMap<String, TableInfo> loadHdfsRegionInfos() throws IOException, InterruptedException {
751 tablesInfo.clear();
752
753 Collection<HbckInfo> hbckInfos = regionInfoMap.values();
754
755
756 List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
757 List<Future<Void>> hbiFutures;
758
759 for (HbckInfo hbi : hbckInfos) {
760 WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
761 hbis.add(work);
762 }
763
764
765 hbiFutures = executor.invokeAll(hbis);
766
767 for(int i=0; i<hbiFutures.size(); i++) {
768 WorkItemHdfsRegionInfo work = hbis.get(i);
769 Future<Void> f = hbiFutures.get(i);
770 try {
771 f.get();
772 } catch(ExecutionException e) {
773 LOG.warn("Failed to read .regioninfo file for region " +
774 work.hbi.getRegionNameAsString(), e.getCause());
775 }
776 }
777
778
779 for (HbckInfo hbi: hbckInfos) {
780
781 if (hbi.getHdfsHRI() == null) {
782
783 continue;
784 }
785
786
787
788 String tableName = Bytes.toString(hbi.getTableName());
789 if (tableName == null) {
790
791 LOG.warn("tableName was null for: " + hbi);
792 continue;
793 }
794
795 TableInfo modTInfo = tablesInfo.get(tableName);
796 if (modTInfo == null) {
797
798 modTInfo = new TableInfo(tableName);
799 Path hbaseRoot = FSUtils.getRootDir(getConf());
800 tablesInfo.put(tableName, modTInfo);
801 try {
802 HTableDescriptor htd =
803 FSTableDescriptors.getTableDescriptor(hbaseRoot.getFileSystem(getConf()),
804 hbaseRoot, tableName);
805 modTInfo.htds.add(htd);
806 } catch (IOException ioe) {
807 if (!orphanTableDirs.containsKey(tableName)) {
808 LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
809
810 errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
811 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
812 Set<String> columns = new HashSet<String>();
813 orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
814 }
815 }
816 }
817 if (!hbi.isSkipChecks()) {
818 modTInfo.addRegionInfo(hbi);
819 }
820 }
821
822 return tablesInfo;
823 }
824
825
826
827
828
829
830
831
832 private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
833 Path regionDir = hbi.getHdfsRegionDir();
834 FileSystem fs = regionDir.getFileSystem(getConf());
835 FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
836 for (FileStatus subdir : subDirs) {
837 String columnfamily = subdir.getPath().getName();
838 columns.add(columnfamily);
839 }
840 return columns;
841 }
842
843
844
845
846
847
848
849
850
851 private boolean fabricateTableInfo(String tableName, Set<String> columns) throws IOException {
852 if (columns ==null || columns.isEmpty()) return false;
853 HTableDescriptor htd = new HTableDescriptor(tableName);
854 for (String columnfamimly : columns) {
855 htd.addFamily(new HColumnDescriptor(columnfamimly));
856 }
857 FSTableDescriptors.createTableDescriptor(htd, getConf(), true);
858 return true;
859 }
860
861
862
863
864
865 public void fixEmptyMetaCells() throws IOException {
866 if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
867 LOG.info("Trying to fix empty REGIONINFO_QUALIFIER .META. rows.");
868 for (Result region : emptyRegionInfoQualifiers) {
869 deleteMetaRegion(region.getRow());
870 errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
871 }
872 emptyRegionInfoQualifiers.clear();
873 }
874 }
875
876
877
878
879
880
881
882
883
884
885 public void fixOrphanTables() throws IOException {
886 if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
887
888 Path hbaseRoot = FSUtils.getRootDir(getConf());
889 List<String> tmpList = new ArrayList<String>();
890 tmpList.addAll(orphanTableDirs.keySet());
891 HTableDescriptor[] htds = getHTableDescriptors(tmpList);
892 Iterator<Entry<String, Set<String>>> iter = orphanTableDirs.entrySet().iterator();
893 int j = 0;
894 int numFailedCase = 0;
895 while (iter.hasNext()) {
896 Entry<String, Set<String>> entry = (Entry<String, Set<String>>) iter.next();
897 String tableName = entry.getKey();
898 LOG.info("Trying to fix orphan table error: " + tableName);
899 if (j < htds.length) {
900 if (tableName.equals(Bytes.toString(htds[j].getName()))) {
901 HTableDescriptor htd = htds[j];
902 LOG.info("fixing orphan table: " + tableName + " from cache");
903 FSTableDescriptors.createTableDescriptor(
904 hbaseRoot.getFileSystem(getConf()), hbaseRoot, htd, true);
905 j++;
906 iter.remove();
907 }
908 } else {
909 if (fabricateTableInfo(tableName, entry.getValue())) {
910 LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
911 LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
912 iter.remove();
913 } else {
914 LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
915 numFailedCase++;
916 }
917 }
918 fixes++;
919 }
920
921 if (orphanTableDirs.isEmpty()) {
922
923
924 setShouldRerun();
925 LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
926 } else if (numFailedCase > 0) {
927 LOG.error("Failed to fix " + numFailedCase
928 + " OrphanTables with default .tableinfo files");
929 }
930
931 }
932
933 orphanTableDirs.clear();
934
935 }
936
937
938
939
940
941
942 private HRegion createNewMeta() throws IOException {
943 Path rootdir = FSUtils.getRootDir(getConf());
944 Configuration c = getConf();
945 HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
946 MasterFileSystem.setInfoFamilyCachingForMeta(false);
947 HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c,
948 HTableDescriptor.META_TABLEDESC);
949 MasterFileSystem.setInfoFamilyCachingForMeta(true);
950 return meta;
951 }
952
953
954
955
956
957
958
959 private ArrayList<Put> generatePuts(SortedMap<String, TableInfo> tablesInfo) throws IOException {
960 ArrayList<Put> puts = new ArrayList<Put>();
961 boolean hasProblems = false;
962 for (Entry<String, TableInfo> e : tablesInfo.entrySet()) {
963 String name = e.getKey();
964
965
966 if (Bytes.compareTo(Bytes.toBytes(name), HConstants.META_TABLE_NAME) == 0) {
967 continue;
968 }
969
970 TableInfo ti = e.getValue();
971 for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
972 .entrySet()) {
973 Collection<HbckInfo> his = spl.getValue();
974 int sz = his.size();
975 if (sz != 1) {
976
977 LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
978 + " had " + sz + " regions instead of exactly 1." );
979 hasProblems = true;
980 continue;
981 }
982
983
984 HbckInfo hi = his.iterator().next();
985 HRegionInfo hri = hi.getHdfsHRI();
986 Put p = MetaEditor.makePutFromRegionInfo(hri);
987 puts.add(p);
988 }
989 }
990 return hasProblems ? null : puts;
991 }
992
993
994
995
996 private void suggestFixes(SortedMap<String, TableInfo> tablesInfo) throws IOException {
997 for (TableInfo tInfo : tablesInfo.values()) {
998 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
999 tInfo.checkRegionChain(handler);
1000 }
1001 }
1002
1003
1004
1005
1006
1007
1008
1009
1010 public boolean rebuildMeta(boolean fix) throws IOException,
1011 InterruptedException {
1012
1013
1014
1015
1016
1017 LOG.info("Loading HBase regioninfo from HDFS...");
1018 loadHdfsRegionDirs();
1019
1020 int errs = errors.getErrorList().size();
1021 tablesInfo = loadHdfsRegionInfos();
1022 checkHdfsIntegrity(false, false);
1023
1024
1025 if (errors.getErrorList().size() != errs) {
1026
1027 while(true) {
1028 fixes = 0;
1029 suggestFixes(tablesInfo);
1030 errors.clear();
1031 loadHdfsRegionInfos();
1032 checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1033
1034 int errCount = errors.getErrorList().size();
1035
1036 if (fixes == 0) {
1037 if (errCount > 0) {
1038 return false;
1039 } else {
1040 break;
1041 }
1042 }
1043 }
1044 }
1045
1046
1047 LOG.info("HDFS regioninfo's seems good. Sidelining old .META.");
1048 Path backupDir = sidelineOldMeta();
1049
1050 LOG.info("Creating new .META.");
1051 HRegion meta = createNewMeta();
1052
1053
1054 List<Put> puts = generatePuts(tablesInfo);
1055 if (puts == null) {
1056 LOG.fatal("Problem encountered when creating new .META. entries. " +
1057 "You may need to restore the previously sidelined .META.");
1058 return false;
1059 }
1060 meta.put(puts.toArray(new Put[0]));
1061 HRegion.closeHRegion(meta);
1062 LOG.info("Success! .META. table rebuilt.");
1063 LOG.info("Old .META. is moved into " + backupDir);
1064 return true;
1065 }
1066
1067 private SortedMap<String, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1068 boolean fixOverlaps) throws IOException {
1069 LOG.info("Checking HBase region split map from HDFS data...");
1070 for (TableInfo tInfo : tablesInfo.values()) {
1071 TableIntegrityErrorHandler handler;
1072 if (fixHoles || fixOverlaps) {
1073 handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1074 fixHoles, fixOverlaps);
1075 } else {
1076 handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1077 }
1078 if (!tInfo.checkRegionChain(handler)) {
1079
1080 errors.report("Found inconsistency in table " + tInfo.getName());
1081 }
1082 }
1083 return tablesInfo;
1084 }
1085
1086 private Path getSidelineDir() throws IOException {
1087 if (sidelineDir == null) {
1088 Path hbaseDir = FSUtils.getRootDir(getConf());
1089 Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1090 sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1091 + startMillis);
1092 }
1093 return sidelineDir;
1094 }
1095
1096
1097
1098
1099 Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1100 return sidelineRegionDir(fs, null, hi);
1101 }
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111 Path sidelineRegionDir(FileSystem fs,
1112 String parentDir, HbckInfo hi) throws IOException {
1113 String tableName = Bytes.toString(hi.getTableName());
1114 Path regionDir = hi.getHdfsRegionDir();
1115
1116 if (!fs.exists(regionDir)) {
1117 LOG.warn("No previous " + regionDir + " exists. Continuing.");
1118 return null;
1119 }
1120
1121 Path rootDir = getSidelineDir();
1122 if (parentDir != null) {
1123 rootDir = new Path(rootDir, parentDir);
1124 }
1125 Path sidelineTableDir= new Path(rootDir, tableName);
1126 Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1127 fs.mkdirs(sidelineRegionDir);
1128 boolean success = false;
1129 FileStatus[] cfs = fs.listStatus(regionDir);
1130 if (cfs == null) {
1131 LOG.info("Region dir is empty: " + regionDir);
1132 } else {
1133 for (FileStatus cf : cfs) {
1134 Path src = cf.getPath();
1135 Path dst = new Path(sidelineRegionDir, src.getName());
1136 if (fs.isFile(src)) {
1137
1138 success = fs.rename(src, dst);
1139 if (!success) {
1140 String msg = "Unable to rename file " + src + " to " + dst;
1141 LOG.error(msg);
1142 throw new IOException(msg);
1143 }
1144 continue;
1145 }
1146
1147
1148 fs.mkdirs(dst);
1149
1150 LOG.info("Sidelining files from " + src + " into containing region " + dst);
1151
1152
1153
1154
1155 FileStatus[] hfiles = fs.listStatus(src);
1156 if (hfiles != null && hfiles.length > 0) {
1157 for (FileStatus hfile : hfiles) {
1158 success = fs.rename(hfile.getPath(), dst);
1159 if (!success) {
1160 String msg = "Unable to rename file " + src + " to " + dst;
1161 LOG.error(msg);
1162 throw new IOException(msg);
1163 }
1164 }
1165 }
1166 LOG.debug("Sideline directory contents:");
1167 debugLsr(sidelineRegionDir);
1168 }
1169 }
1170
1171 LOG.info("Removing old region dir: " + regionDir);
1172 success = fs.delete(regionDir, true);
1173 if (!success) {
1174 String msg = "Unable to delete dir " + regionDir;
1175 LOG.error(msg);
1176 throw new IOException(msg);
1177 }
1178 return sidelineRegionDir;
1179 }
1180
1181
1182
1183
1184 void sidelineTable(FileSystem fs, byte[] table, Path hbaseDir,
1185 Path backupHbaseDir) throws IOException {
1186 String tableName = Bytes.toString(table);
1187 Path tableDir = new Path(hbaseDir, tableName);
1188 if (fs.exists(tableDir)) {
1189 Path backupTableDir= new Path(backupHbaseDir, tableName);
1190 boolean success = fs.rename(tableDir, backupTableDir);
1191 if (!success) {
1192 throw new IOException("Failed to move " + tableName + " from "
1193 + tableDir.getName() + " to " + backupTableDir.getName());
1194 }
1195 } else {
1196 LOG.info("No previous " + tableName + " exists. Continuing.");
1197 }
1198 }
1199
1200
1201
1202
1203 Path sidelineOldMeta() throws IOException {
1204
1205 Path hbaseDir = FSUtils.getRootDir(getConf());
1206 FileSystem fs = hbaseDir.getFileSystem(getConf());
1207 Path backupDir = getSidelineDir();
1208 fs.mkdirs(backupDir);
1209 try {
1210 sidelineTable(fs, HConstants.META_TABLE_NAME, hbaseDir, backupDir);
1211 } catch (IOException e) {
1212 LOG.fatal("... failed to sideline meta. Currently in inconsistent state. To restore "
1213 + "try to rename .META. in " + backupDir.getName() + " to "
1214 + hbaseDir.getName() + ".", e);
1215 throw e;
1216 }
1217 return backupDir;
1218 }
1219
1220
1221
1222
1223
1224
1225 private void loadDisabledTables()
1226 throws ZooKeeperConnectionException, IOException {
1227 HConnectionManager.execute(new HConnectable<Void>(getConf()) {
1228 @Override
1229 public Void connect(HConnection connection) throws IOException {
1230 ZooKeeperWatcher zkw = createZooKeeperWatcher();
1231 try {
1232 for (String tableName : ZKTableReadOnly.getDisabledOrDisablingTables(zkw)) {
1233 disabledTables.add(Bytes.toBytes(tableName));
1234 }
1235 } catch (KeeperException ke) {
1236 throw new IOException(ke);
1237 } finally {
1238 zkw.close();
1239 }
1240 return null;
1241 }
1242 });
1243 }
1244
1245
1246
1247
1248 private boolean isTableDisabled(HRegionInfo regionInfo) {
1249 return disabledTables.contains(regionInfo.getTableName());
1250 }
1251
1252
1253
1254
1255
1256 public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1257 Path rootDir = FSUtils.getRootDir(getConf());
1258 FileSystem fs = rootDir.getFileSystem(getConf());
1259
1260
1261 List<FileStatus> tableDirs = Lists.newArrayList();
1262
1263 boolean foundVersionFile = false;
1264 FileStatus[] files = fs.listStatus(rootDir);
1265 for (FileStatus file : files) {
1266 String dirName = file.getPath().getName();
1267 if (dirName.equals(HConstants.VERSION_FILE_NAME)) {
1268 foundVersionFile = true;
1269 } else {
1270 if ((!checkMetaOnly && isTableIncluded(dirName)) ||
1271 dirName.equals(".META.")) {
1272 tableDirs.add(file);
1273 }
1274 }
1275 }
1276
1277
1278 if (!foundVersionFile) {
1279 errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1280 "Version file does not exist in root dir " + rootDir);
1281 if (shouldFixVersionFile()) {
1282 LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1283 + " file.");
1284 setShouldRerun();
1285 FSUtils.setVersion(fs, rootDir, getConf().getInt(
1286 HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1287 HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1288 HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1289 }
1290 }
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316 private boolean recordMetaRegion() throws IOException {
1317 HRegionLocation metaLocation = connection.locateRegion(
1318 HConstants.META_TABLE_NAME, HConstants.EMPTY_START_ROW);
1319
1320
1321 if (metaLocation == null || metaLocation.getRegionInfo() == null ||
1322 metaLocation.getHostname() == null) {
1323 errors.reportError(ERROR_CODE.NULL_META_REGION,
1324 "META region or some of its attributes are null.");
1325 return false;
1326 }
1327 ServerName sn;
1328 try {
1329 sn = getMetaRegionServerName();
1330 } catch (KeeperException e) {
1331 throw new IOException(e);
1332 }
1333 MetaEntry m =
1334 new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
1335 HbckInfo hbInfo = new HbckInfo(m);
1336 regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), hbInfo);
1337 return true;
1338 }
1339
1340 private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1341 return new ZooKeeperWatcher(getConf(), "hbase Fsck", new Abortable() {
1342 @Override
1343 public void abort(String why, Throwable e) {
1344 LOG.error(why, e);
1345 System.exit(1);
1346 }
1347
1348 @Override
1349 public boolean isAborted() {
1350 return false;
1351 }
1352
1353 });
1354 }
1355
1356 private ServerName getMetaRegionServerName()
1357 throws IOException, KeeperException {
1358 ZooKeeperWatcher zkw = createZooKeeperWatcher();
1359 ServerName sn = null;
1360 try {
1361 sn = MetaRegionTracker.getMetaRegionLocation(zkw);
1362 } finally {
1363 zkw.close();
1364 }
1365 return sn;
1366 }
1367
1368
1369
1370
1371
1372
1373 void processRegionServers(Collection<ServerName> regionServerList)
1374 throws IOException, InterruptedException {
1375
1376 List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1377 List<Future<Void>> workFutures;
1378
1379
1380 for (ServerName rsinfo: regionServerList) {
1381 workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1382 }
1383
1384 workFutures = executor.invokeAll(workItems);
1385
1386 for(int i=0; i<workFutures.size(); i++) {
1387 WorkItemRegion item = workItems.get(i);
1388 Future<Void> f = workFutures.get(i);
1389 try {
1390 f.get();
1391 } catch(ExecutionException e) {
1392 LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1393 e.getCause());
1394 }
1395 }
1396 }
1397
1398
1399
1400
1401 private void checkAndFixConsistency()
1402 throws IOException, KeeperException, InterruptedException {
1403 for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1404 checkRegionConsistency(e.getKey(), e.getValue());
1405 }
1406 }
1407
1408 private void preCheckPermission() throws IOException, AccessControlException {
1409 if (shouldIgnorePreCheckPermission()) {
1410 return;
1411 }
1412
1413 Path hbaseDir = FSUtils.getRootDir(getConf());
1414 FileSystem fs = hbaseDir.getFileSystem(getConf());
1415 UserGroupInformation ugi = User.getCurrent().getUGI();
1416 FileStatus[] files = fs.listStatus(hbaseDir);
1417 for (FileStatus file : files) {
1418 try {
1419 FSUtils.checkAccess(ugi, file, FsAction.WRITE);
1420 } catch (AccessControlException ace) {
1421 LOG.warn("Got AccessControlException when preCheckPermission ", ace);
1422 errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
1423 + " does not have write perms to " + file.getPath()
1424 + ". Please rerun hbck as hdfs user " + file.getOwner());
1425 throw new AccessControlException(ace);
1426 }
1427 }
1428 }
1429
1430
1431
1432
1433 private void deleteMetaRegion(HbckInfo hi) throws IOException {
1434 deleteMetaRegion(hi.metaEntry.getRegionName());
1435 }
1436
1437
1438
1439
1440 private void deleteMetaRegion(byte[] metaKey) throws IOException {
1441 Delete d = new Delete(metaKey);
1442 meta.delete(d);
1443 meta.flushCommits();
1444 LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
1445 }
1446
1447
1448
1449
1450 private void resetSplitParent(HbckInfo hi) throws IOException {
1451 RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
1452 Delete d = new Delete(hi.metaEntry.getRegionName());
1453 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1454 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1455 mutations.add(d);
1456
1457 HRegionInfo hri = new HRegionInfo(hi.metaEntry);
1458 hri.setOffline(false);
1459 hri.setSplit(false);
1460 Put p = MetaEditor.makePutFromRegionInfo(hri);
1461 mutations.add(p);
1462
1463 meta.mutateRow(mutations);
1464 meta.flushCommits();
1465 LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
1466 }
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476 private void offline(byte[] regionName) throws IOException {
1477 String regionString = Bytes.toStringBinary(regionName);
1478 if (!rsSupportsOffline) {
1479 LOG.warn("Using unassign region " + regionString
1480 + " instead of using offline method, you should"
1481 + " restart HMaster after these repairs");
1482 admin.unassign(regionName, true);
1483 return;
1484 }
1485
1486
1487 try {
1488 LOG.info("Offlining region " + regionString);
1489 admin.offline(regionName);
1490 } catch (IOException ioe) {
1491 String notFoundMsg = "java.lang.NoSuchMethodException: " +
1492 "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1493 if (ioe.getMessage().contains(notFoundMsg)) {
1494 LOG.warn("Using unassign region " + regionString
1495 + " instead of using offline method, you should"
1496 + " restart HMaster after these repairs");
1497 rsSupportsOffline = false;
1498 admin.unassign(regionName, true);
1499 return;
1500 }
1501 throw ioe;
1502 }
1503 }
1504
1505 private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
1506 for (OnlineEntry rse : hi.deployedEntries) {
1507 LOG.debug("Undeploy region " + rse.hri + " from " + rse.hsa);
1508 try {
1509 HBaseFsckRepair.closeRegionSilentlyAndWait(admin, rse.hsa, rse.hri);
1510 offline(rse.hri.getRegionName());
1511 } catch (IOException ioe) {
1512 LOG.warn("Got exception when attempting to offline region "
1513 + Bytes.toString(rse.hri.getRegionName()), ioe);
1514 }
1515 }
1516 }
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530 private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
1531 if (hi.metaEntry == null && hi.hdfsEntry == null) {
1532 undeployRegions(hi);
1533 return;
1534 }
1535
1536
1537 Get get = new Get(hi.getRegionName());
1538 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1539 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1540 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1541 Result r = meta.get(get);
1542 ServerName serverName = HRegionInfo.getServerName(r);
1543 if (serverName == null) {
1544 errors.reportError("Unable to close region "
1545 + hi.getRegionNameAsString() + " because meta does not "
1546 + "have handle to reach it.");
1547 return;
1548 }
1549
1550 HRegionInfo hri = HRegionInfo.getHRegionInfo(r);
1551 if (hri == null) {
1552 LOG.warn("Unable to close region " + hi.getRegionNameAsString()
1553 + " because META had invalid or missing "
1554 + HConstants.CATALOG_FAMILY_STR + ":"
1555 + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
1556 + " qualifier value.");
1557 return;
1558 }
1559
1560
1561 HBaseFsckRepair.closeRegionSilentlyAndWait(admin, serverName, hri);
1562 }
1563
1564 private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
1565 KeeperException, InterruptedException {
1566
1567 if (shouldFixAssignments()) {
1568 errors.print(msg);
1569 undeployRegions(hbi);
1570 setShouldRerun();
1571 HRegionInfo hri = hbi.getHdfsHRI();
1572 if (hri == null) {
1573 hri = hbi.metaEntry;
1574 }
1575 HBaseFsckRepair.fixUnassigned(admin, hri);
1576 HBaseFsckRepair.waitUntilAssigned(admin, hri);
1577 }
1578 }
1579
1580
1581
1582
1583 private void checkRegionConsistency(final String key, final HbckInfo hbi)
1584 throws IOException, KeeperException, InterruptedException {
1585 String descriptiveName = hbi.toString();
1586
1587 boolean inMeta = hbi.metaEntry != null;
1588
1589 boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
1590 boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
1591 boolean isDeployed = !hbi.deployedOn.isEmpty();
1592 boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
1593 boolean deploymentMatchesMeta =
1594 hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
1595 hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
1596 boolean splitParent =
1597 (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
1598 boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
1599 boolean recentlyModified = inHdfs &&
1600 hbi.getModTime() + timelag > System.currentTimeMillis();
1601
1602
1603 if (hbi.containsOnlyHdfsEdits()) {
1604 return;
1605 }
1606 if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
1607 return;
1608 } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
1609 LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
1610 "tabled that is not deployed");
1611 return;
1612 } else if (recentlyModified) {
1613 LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
1614 return;
1615 }
1616
1617 else if (!inMeta && !inHdfs && !isDeployed) {
1618
1619 assert false : "Entry for region with no data";
1620 } else if (!inMeta && !inHdfs && isDeployed) {
1621 errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
1622 + descriptiveName + ", key=" + key + ", not on HDFS or in META but " +
1623 "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1624 if (shouldFixAssignments()) {
1625 undeployRegions(hbi);
1626 }
1627
1628 } else if (!inMeta && inHdfs && !isDeployed) {
1629 errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
1630 + descriptiveName + " on HDFS, but not listed in META " +
1631 "or deployed on any region server");
1632
1633 if (shouldFixMeta()) {
1634 if (!hbi.isHdfsRegioninfoPresent()) {
1635 LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
1636 + " in table integrity repair phase if -fixHdfsOrphans was" +
1637 " used.");
1638 return;
1639 }
1640
1641 LOG.info("Patching .META. with .regioninfo: " + hbi.getHdfsHRI());
1642 HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
1643
1644 tryAssignmentRepair(hbi, "Trying to reassign region...");
1645 }
1646
1647 } else if (!inMeta && inHdfs && isDeployed) {
1648 errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
1649 + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1650 debugLsr(hbi.getHdfsRegionDir());
1651 if (shouldFixMeta()) {
1652 if (!hbi.isHdfsRegioninfoPresent()) {
1653 LOG.error("This should have been repaired in table integrity repair phase");
1654 return;
1655 }
1656
1657 LOG.info("Patching .META. with with .regioninfo: " + hbi.getHdfsHRI());
1658 HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
1659
1660 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
1661 }
1662
1663
1664 } else if (inMeta && inHdfs && !isDeployed && splitParent) {
1665
1666
1667 if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
1668
1669 HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
1670 HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
1671 if (infoA != null && infoB != null) {
1672
1673 hbi.setSkipChecks(true);
1674 return;
1675 }
1676 }
1677 errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
1678 + descriptiveName + " is a split parent in META, in HDFS, "
1679 + "and not deployed on any region server. This could be transient.");
1680 if (shouldFixSplitParents()) {
1681 setShouldRerun();
1682 resetSplitParent(hbi);
1683 }
1684 } else if (inMeta && !inHdfs && !isDeployed) {
1685 errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
1686 + descriptiveName + " found in META, but not in HDFS "
1687 + "or deployed on any region server.");
1688 if (shouldFixMeta()) {
1689 deleteMetaRegion(hbi);
1690 }
1691 } else if (inMeta && !inHdfs && isDeployed) {
1692 errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
1693 + " found in META, but not in HDFS, " +
1694 "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1695
1696
1697
1698 if (shouldFixAssignments()) {
1699 errors.print("Trying to fix unassigned region...");
1700 closeRegion(hbi);
1701 }
1702 if (shouldFixMeta()) {
1703
1704 deleteMetaRegion(hbi);
1705 }
1706 } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
1707 errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
1708 + " not deployed on any region server.");
1709 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
1710 } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
1711 errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
1712 "Region " + descriptiveName + " should not be deployed according " +
1713 "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1714 if (shouldFixAssignments()) {
1715 errors.print("Trying to close the region " + descriptiveName);
1716 setShouldRerun();
1717 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1718 }
1719 } else if (inMeta && inHdfs && isMultiplyDeployed) {
1720 errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
1721 + " is listed in META on region server " + hbi.metaEntry.regionServer
1722 + " but is multiply assigned to region servers " +
1723 Joiner.on(", ").join(hbi.deployedOn));
1724
1725 if (shouldFixAssignments()) {
1726 errors.print("Trying to fix assignment error...");
1727 setShouldRerun();
1728 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1729 }
1730 } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
1731 errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
1732 + descriptiveName + " listed in META on region server " +
1733 hbi.metaEntry.regionServer + " but found on region server " +
1734 hbi.deployedOn.get(0));
1735
1736 if (shouldFixAssignments()) {
1737 errors.print("Trying to fix assignment error...");
1738 setShouldRerun();
1739 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1740 HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
1741 }
1742 } else {
1743 errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
1744 " is in an unforeseen state:" +
1745 " inMeta=" + inMeta +
1746 " inHdfs=" + inHdfs +
1747 " isDeployed=" + isDeployed +
1748 " isMultiplyDeployed=" + isMultiplyDeployed +
1749 " deploymentMatchesMeta=" + deploymentMatchesMeta +
1750 " shouldBeDeployed=" + shouldBeDeployed);
1751 }
1752 }
1753
1754
1755
1756
1757
1758
1759
1760 SortedMap<String, TableInfo> checkIntegrity() throws IOException {
1761 tablesInfo = new TreeMap<String,TableInfo> ();
1762 List<HbckInfo> noHDFSRegionInfos = new ArrayList<HbckInfo>();
1763 LOG.debug("There are " + regionInfoMap.size() + " region info entries");
1764 for (HbckInfo hbi : regionInfoMap.values()) {
1765
1766 if (hbi.metaEntry == null) {
1767
1768 noHDFSRegionInfos.add(hbi);
1769 Path p = hbi.getHdfsRegionDir();
1770 if (p == null) {
1771 errors.report("No regioninfo in Meta or HDFS. " + hbi);
1772 }
1773
1774
1775 continue;
1776 }
1777 if (hbi.metaEntry.regionServer == null) {
1778 errors.detail("Skipping region because no region server: " + hbi);
1779 continue;
1780 }
1781 if (hbi.metaEntry.isOffline()) {
1782 errors.detail("Skipping region because it is offline: " + hbi);
1783 continue;
1784 }
1785 if (hbi.containsOnlyHdfsEdits()) {
1786 errors.detail("Skipping region because it only contains edits" + hbi);
1787 continue;
1788 }
1789
1790
1791
1792
1793
1794
1795 if (hbi.deployedOn.size() == 0) continue;
1796
1797
1798 String tableName = hbi.metaEntry.getTableNameAsString();
1799 TableInfo modTInfo = tablesInfo.get(tableName);
1800 if (modTInfo == null) {
1801 modTInfo = new TableInfo(tableName);
1802 }
1803 for (ServerName server : hbi.deployedOn) {
1804 modTInfo.addServer(server);
1805 }
1806
1807 if (!hbi.isSkipChecks()) {
1808 modTInfo.addRegionInfo(hbi);
1809 }
1810
1811 tablesInfo.put(tableName, modTInfo);
1812 }
1813
1814 for (TableInfo tInfo : tablesInfo.values()) {
1815 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1816 if (!tInfo.checkRegionChain(handler)) {
1817 errors.report("Found inconsistency in table " + tInfo.getName());
1818 }
1819 }
1820 return tablesInfo;
1821 }
1822
1823
1824
1825
1826
1827 public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
1828 int fileMoves = 0;
1829
1830 LOG.debug("Contained region dir after close and pause");
1831 debugLsr(contained.getHdfsRegionDir());
1832
1833
1834 FileSystem fs = targetRegionDir.getFileSystem(getConf());
1835 FileStatus[] dirs = fs.listStatus(contained.getHdfsRegionDir());
1836
1837 if (dirs == null) {
1838 if (!fs.exists(contained.getHdfsRegionDir())) {
1839 LOG.warn("HDFS region dir " + contained.getHdfsRegionDir() + " already sidelined.");
1840 } else {
1841 sidelineRegionDir(fs, contained);
1842 }
1843 return fileMoves;
1844 }
1845
1846 for (FileStatus cf : dirs) {
1847 Path src = cf.getPath();
1848 Path dst = new Path(targetRegionDir, src.getName());
1849
1850 if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
1851
1852 continue;
1853 }
1854
1855 if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
1856
1857 continue;
1858 }
1859
1860 LOG.info("Moving files from " + src + " into containing region " + dst);
1861
1862
1863
1864
1865 for (FileStatus hfile : fs.listStatus(src)) {
1866 boolean success = fs.rename(hfile.getPath(), dst);
1867 if (success) {
1868 fileMoves++;
1869 }
1870 }
1871 LOG.debug("Sideline directory contents:");
1872 debugLsr(targetRegionDir);
1873 }
1874
1875
1876 sidelineRegionDir(fs, contained);
1877 LOG.info("Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
1878 getSidelineDir());
1879 debugLsr(contained.getHdfsRegionDir());
1880
1881 return fileMoves;
1882 }
1883
1884
1885
1886
1887 public class TableInfo {
1888 String tableName;
1889 TreeSet <ServerName> deployedOn;
1890
1891
1892 final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
1893
1894
1895 final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
1896
1897
1898 final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
1899
1900
1901 final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
1902
1903
1904 final Multimap<byte[], HbckInfo> overlapGroups =
1905 TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
1906
1907 TableInfo(String name) {
1908 this.tableName = name;
1909 deployedOn = new TreeSet <ServerName>();
1910 }
1911
1912
1913
1914
1915 private HTableDescriptor getHTD() {
1916 if (htds.size() == 1) {
1917 return (HTableDescriptor)htds.toArray()[0];
1918 } else {
1919 LOG.error("None/Multiple table descriptors found for table '"
1920 + tableName + "' regions: " + htds);
1921 }
1922 return null;
1923 }
1924
1925 public void addRegionInfo(HbckInfo hir) {
1926 if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
1927
1928 sc.add(hir);
1929 return;
1930 }
1931
1932
1933 if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
1934 errors.reportError(
1935 ERROR_CODE.REGION_CYCLE,
1936 String.format("The endkey for this region comes before the "
1937 + "startkey, startkey=%s, endkey=%s",
1938 Bytes.toStringBinary(hir.getStartKey()),
1939 Bytes.toStringBinary(hir.getEndKey())), this, hir);
1940 backwards.add(hir);
1941 return;
1942 }
1943
1944
1945 sc.add(hir);
1946 }
1947
1948 public void addServer(ServerName server) {
1949 this.deployedOn.add(server);
1950 }
1951
1952 public String getName() {
1953 return tableName;
1954 }
1955
1956 public int getNumRegions() {
1957 return sc.getStarts().size() + backwards.size();
1958 }
1959
1960 private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
1961 ErrorReporter errors;
1962
1963 IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
1964 this.errors = errors;
1965 setTableInfo(ti);
1966 }
1967
1968 @Override
1969 public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
1970 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
1971 "First region should start with an empty key. You need to "
1972 + " create a new region and regioninfo in HDFS to plug the hole.",
1973 getTableInfo(), hi);
1974 }
1975
1976 @Override
1977 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
1978 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
1979 "Last region should end with an empty key. You need to "
1980 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
1981 }
1982
1983 @Override
1984 public void handleDegenerateRegion(HbckInfo hi) throws IOException{
1985 errors.reportError(ERROR_CODE.DEGENERATE_REGION,
1986 "Region has the same start and end key.", getTableInfo(), hi);
1987 }
1988
1989 @Override
1990 public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
1991 byte[] key = r1.getStartKey();
1992
1993 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
1994 "Multiple regions have the same startkey: "
1995 + Bytes.toStringBinary(key), getTableInfo(), r1);
1996 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
1997 "Multiple regions have the same startkey: "
1998 + Bytes.toStringBinary(key), getTableInfo(), r2);
1999 }
2000
2001 @Override
2002 public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2003 errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2004 "There is an overlap in the region chain.",
2005 getTableInfo(), hi1, hi2);
2006 }
2007
2008 @Override
2009 public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2010 errors.reportError(
2011 ERROR_CODE.HOLE_IN_REGION_CHAIN,
2012 "There is a hole in the region chain between "
2013 + Bytes.toStringBinary(holeStart) + " and "
2014 + Bytes.toStringBinary(holeStop)
2015 + ". You need to create a new .regioninfo and region "
2016 + "dir in hdfs to plug the hole.");
2017 }
2018 };
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032 private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2033 Configuration conf;
2034
2035 boolean fixOverlaps = true;
2036
2037 HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2038 boolean fixHoles, boolean fixOverlaps) {
2039 super(ti, errors);
2040 this.conf = conf;
2041 this.fixOverlaps = fixOverlaps;
2042
2043 }
2044
2045
2046
2047
2048
2049
2050 public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2051 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2052 "First region should start with an empty key. Creating a new " +
2053 "region and regioninfo in HDFS to plug the hole.",
2054 getTableInfo(), next);
2055 HTableDescriptor htd = getTableInfo().getHTD();
2056
2057 HRegionInfo newRegion = new HRegionInfo(htd.getName(),
2058 HConstants.EMPTY_START_ROW, next.getStartKey());
2059
2060
2061 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2062 LOG.info("Table region start key was not empty. Created new empty region: "
2063 + newRegion + " " +region);
2064 fixes++;
2065 }
2066
2067 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2068 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2069 "Last region should end with an empty key. Creating a new "
2070 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2071 HTableDescriptor htd = getTableInfo().getHTD();
2072
2073 HRegionInfo newRegion = new HRegionInfo(htd.getName(), curEndKey,
2074 HConstants.EMPTY_START_ROW);
2075
2076 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2077 LOG.info("Table region end key was not empty. Created new empty region: " + newRegion
2078 + " " + region);
2079 fixes++;
2080 }
2081
2082
2083
2084
2085
2086 public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2087 errors.reportError(
2088 ERROR_CODE.HOLE_IN_REGION_CHAIN,
2089 "There is a hole in the region chain between "
2090 + Bytes.toStringBinary(holeStartKey) + " and "
2091 + Bytes.toStringBinary(holeStopKey)
2092 + ". Creating a new regioninfo and region "
2093 + "dir in hdfs to plug the hole.");
2094 HTableDescriptor htd = getTableInfo().getHTD();
2095 HRegionInfo newRegion = new HRegionInfo(htd.getName(), holeStartKey, holeStopKey);
2096 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2097 LOG.info("Plugged hold by creating new empty region: "+ newRegion + " " +region);
2098 fixes++;
2099 }
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110 @Override
2111 public void handleOverlapGroup(Collection<HbckInfo> overlap)
2112 throws IOException {
2113 Preconditions.checkNotNull(overlap);
2114 Preconditions.checkArgument(overlap.size() >0);
2115
2116 if (!this.fixOverlaps) {
2117 LOG.warn("Not attempting to repair overlaps.");
2118 return;
2119 }
2120
2121 if (overlap.size() > maxMerge) {
2122 LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2123 "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2124 if (sidelineBigOverlaps) {
2125
2126 sidelineBigOverlaps(overlap);
2127 }
2128 return;
2129 }
2130
2131 mergeOverlaps(overlap);
2132 }
2133
2134 void mergeOverlaps(Collection<HbckInfo> overlap)
2135 throws IOException {
2136 LOG.info("== Merging regions into one region: "
2137 + Joiner.on(",").join(overlap));
2138
2139 Pair<byte[], byte[]> range = null;
2140 for (HbckInfo hi : overlap) {
2141 if (range == null) {
2142 range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2143 } else {
2144 if (RegionSplitCalculator.BYTES_COMPARATOR
2145 .compare(hi.getStartKey(), range.getFirst()) < 0) {
2146 range.setFirst(hi.getStartKey());
2147 }
2148 if (RegionSplitCalculator.BYTES_COMPARATOR
2149 .compare(hi.getEndKey(), range.getSecond()) > 0) {
2150 range.setSecond(hi.getEndKey());
2151 }
2152 }
2153
2154 LOG.debug("Closing region before moving data around: " + hi);
2155 LOG.debug("Contained region dir before close");
2156 debugLsr(hi.getHdfsRegionDir());
2157 try {
2158 LOG.info("Closing region: " + hi);
2159 closeRegion(hi);
2160 } catch (IOException ioe) {
2161 LOG.warn("Was unable to close region " + hi
2162 + ". Just continuing... ", ioe);
2163 } catch (InterruptedException e) {
2164 LOG.warn("Was unable to close region " + hi
2165 + ". Just continuing... ", e);
2166 }
2167
2168 try {
2169 LOG.info("Offlining region: " + hi);
2170 offline(hi.getRegionName());
2171 } catch (IOException ioe) {
2172 LOG.warn("Unable to offline region from master: " + hi
2173 + ". Just continuing... ", ioe);
2174 }
2175 }
2176
2177
2178 HTableDescriptor htd = getTableInfo().getHTD();
2179
2180 HRegionInfo newRegion = new HRegionInfo(htd.getName(), range.getFirst(),
2181 range.getSecond());
2182 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2183 LOG.info("Created new empty container region: " +
2184 newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
2185 debugLsr(region.getRegionFileSystem().getRegionDir());
2186
2187
2188 boolean didFix= false;
2189 Path target = region.getRegionFileSystem().getRegionDir();
2190 for (HbckInfo contained : overlap) {
2191 LOG.info("Merging " + contained + " into " + target );
2192 int merges = mergeRegionDirs(target, contained);
2193 if (merges > 0) {
2194 didFix = true;
2195 }
2196 }
2197 if (didFix) {
2198 fixes++;
2199 }
2200 }
2201
2202
2203
2204
2205
2206
2207
2208
2209 void sidelineBigOverlaps(
2210 Collection<HbckInfo> bigOverlap) throws IOException {
2211 int overlapsToSideline = bigOverlap.size() - maxMerge;
2212 if (overlapsToSideline > maxOverlapsToSideline) {
2213 overlapsToSideline = maxOverlapsToSideline;
2214 }
2215 List<HbckInfo> regionsToSideline =
2216 RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
2217 FileSystem fs = FileSystem.get(conf);
2218 for (HbckInfo regionToSideline: regionsToSideline) {
2219 try {
2220 LOG.info("Closing region: " + regionToSideline);
2221 closeRegion(regionToSideline);
2222 } catch (IOException ioe) {
2223 LOG.warn("Was unable to close region " + regionToSideline
2224 + ". Just continuing... ", ioe);
2225 } catch (InterruptedException e) {
2226 LOG.warn("Was unable to close region " + regionToSideline
2227 + ". Just continuing... ", e);
2228 }
2229
2230 try {
2231 LOG.info("Offlining region: " + regionToSideline);
2232 offline(regionToSideline.getRegionName());
2233 } catch (IOException ioe) {
2234 LOG.warn("Unable to offline region from master: " + regionToSideline
2235 + ". Just continuing... ", ioe);
2236 }
2237
2238 LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
2239 Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
2240 if (sidelineRegionDir != null) {
2241 sidelinedRegions.put(sidelineRegionDir, regionToSideline);
2242 LOG.info("After sidelined big overlapped region: "
2243 + regionToSideline.getRegionNameAsString()
2244 + " to " + sidelineRegionDir.toString());
2245 fixes++;
2246 }
2247 }
2248 }
2249 }
2250
2251
2252
2253
2254
2255
2256
2257 public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
2258
2259
2260
2261 if (disabledTables.contains(this.tableName.getBytes())) {
2262 return true;
2263 }
2264 int originalErrorsCount = errors.getErrorList().size();
2265 Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
2266 SortedSet<byte[]> splits = sc.getSplits();
2267
2268 byte[] prevKey = null;
2269 byte[] problemKey = null;
2270 for (byte[] key : splits) {
2271 Collection<HbckInfo> ranges = regions.get(key);
2272 if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
2273 for (HbckInfo rng : ranges) {
2274 handler.handleRegionStartKeyNotEmpty(rng);
2275 }
2276 }
2277
2278
2279 for (HbckInfo rng : ranges) {
2280
2281 byte[] endKey = rng.getEndKey();
2282 endKey = (endKey.length == 0) ? null : endKey;
2283 if (Bytes.equals(rng.getStartKey(),endKey)) {
2284 handler.handleDegenerateRegion(rng);
2285 }
2286 }
2287
2288 if (ranges.size() == 1) {
2289
2290 if (problemKey != null) {
2291 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2292 }
2293 problemKey = null;
2294 } else if (ranges.size() > 1) {
2295
2296
2297 if (problemKey == null) {
2298
2299 LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
2300 problemKey = key;
2301 }
2302 overlapGroups.putAll(problemKey, ranges);
2303
2304
2305 ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
2306
2307 for (HbckInfo r1 : ranges) {
2308 subRange.remove(r1);
2309 for (HbckInfo r2 : subRange) {
2310 if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
2311 handler.handleDuplicateStartKeys(r1,r2);
2312 } else {
2313
2314 handler.handleOverlapInRegionChain(r1, r2);
2315 }
2316 }
2317 }
2318
2319 } else if (ranges.size() == 0) {
2320 if (problemKey != null) {
2321 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2322 }
2323 problemKey = null;
2324
2325 byte[] holeStopKey = sc.getSplits().higher(key);
2326
2327 if (holeStopKey != null) {
2328
2329 handler.handleHoleInRegionChain(key, holeStopKey);
2330 }
2331 }
2332 prevKey = key;
2333 }
2334
2335
2336
2337 if (prevKey != null) {
2338 handler.handleRegionEndKeyNotEmpty(prevKey);
2339 }
2340
2341 for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2342 handler.handleOverlapGroup(overlap);
2343 }
2344
2345 if (details) {
2346
2347 errors.print("---- Table '" + this.tableName
2348 + "': region split map");
2349 dump(splits, regions);
2350 errors.print("---- Table '" + this.tableName
2351 + "': overlap groups");
2352 dumpOverlapProblems(overlapGroups);
2353 errors.print("There are " + overlapGroups.keySet().size()
2354 + " overlap groups with " + overlapGroups.size()
2355 + " overlapping regions");
2356 }
2357 if (!sidelinedRegions.isEmpty()) {
2358 LOG.warn("Sidelined big overlapped regions, please bulk load them!");
2359 errors.print("---- Table '" + this.tableName
2360 + "': sidelined big overlapped regions");
2361 dumpSidelinedRegions(sidelinedRegions);
2362 }
2363 return errors.getErrorList().size() == originalErrorsCount;
2364 }
2365
2366
2367
2368
2369
2370
2371
2372 void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
2373
2374 StringBuilder sb = new StringBuilder();
2375 for (byte[] k : splits) {
2376 sb.setLength(0);
2377 sb.append(Bytes.toStringBinary(k) + ":\t");
2378 for (HbckInfo r : regions.get(k)) {
2379 sb.append("[ "+ r.toString() + ", "
2380 + Bytes.toStringBinary(r.getEndKey())+ "]\t");
2381 }
2382 errors.print(sb.toString());
2383 }
2384 }
2385 }
2386
2387 public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
2388
2389
2390 for (byte[] k : regions.keySet()) {
2391 errors.print(Bytes.toStringBinary(k) + ":");
2392 for (HbckInfo r : regions.get(k)) {
2393 errors.print("[ " + r.toString() + ", "
2394 + Bytes.toStringBinary(r.getEndKey()) + "]");
2395 }
2396 errors.print("----");
2397 }
2398 }
2399
2400 public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
2401 for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
2402 String tableName = Bytes.toStringBinary(entry.getValue().getTableName());
2403 Path path = entry.getKey();
2404 errors.print("This sidelined region dir should be bulk loaded: "
2405 + path.toString());
2406 errors.print("Bulk load command looks like: "
2407 + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
2408 + path.toUri().getPath() + " "+ tableName);
2409 }
2410 }
2411
2412 public Multimap<byte[], HbckInfo> getOverlapGroups(
2413 String table) {
2414 TableInfo ti = tablesInfo.get(table);
2415 return ti.overlapGroups;
2416 }
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427 HTableDescriptor[] getTables(AtomicInteger numSkipped) {
2428 List<String> tableNames = new ArrayList<String>();
2429 long now = System.currentTimeMillis();
2430
2431 for (HbckInfo hbi : regionInfoMap.values()) {
2432 MetaEntry info = hbi.metaEntry;
2433
2434
2435
2436 if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
2437 if (info.modTime + timelag < now) {
2438 tableNames.add(info.getTableNameAsString());
2439 } else {
2440 numSkipped.incrementAndGet();
2441 }
2442 }
2443 }
2444 return getHTableDescriptors(tableNames);
2445 }
2446
2447 HTableDescriptor[] getHTableDescriptors(List<String> tableNames) {
2448 HTableDescriptor[] htd = new HTableDescriptor[0];
2449 try {
2450 LOG.info("getHTableDescriptors == tableNames => " + tableNames);
2451 htd = new HBaseAdmin(getConf()).getTableDescriptors(tableNames);
2452 } catch (IOException e) {
2453 LOG.debug("Exception getting table descriptors", e);
2454 }
2455 return htd;
2456 }
2457
2458
2459
2460
2461
2462
2463
2464 private synchronized HbckInfo getOrCreateInfo(String name) {
2465 HbckInfo hbi = regionInfoMap.get(name);
2466 if (hbi == null) {
2467 hbi = new HbckInfo(null);
2468 regionInfoMap.put(name, hbi);
2469 }
2470 return hbi;
2471 }
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482 boolean checkMetaRegion()
2483 throws IOException, KeeperException, InterruptedException {
2484 List <HbckInfo> metaRegions = Lists.newArrayList();
2485 for (HbckInfo value : regionInfoMap.values()) {
2486 if (value.metaEntry.isMetaRegion()) {
2487 metaRegions.add(value);
2488 }
2489 }
2490
2491
2492 if (metaRegions.size() != 1) {
2493 HRegionLocation rootLocation = connection.locateRegion(
2494 HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
2495 HbckInfo root =
2496 regionInfoMap.get(rootLocation.getRegionInfo().getEncodedName());
2497
2498
2499 if (metaRegions.size() == 0) {
2500 errors.reportError(ERROR_CODE.NO_META_REGION, ".META. is not found on any region.");
2501 if (shouldFixAssignments()) {
2502 errors.print("Trying to fix a problem with .META...");
2503 setShouldRerun();
2504
2505 HBaseFsckRepair.fixUnassigned(admin, root.metaEntry);
2506 HBaseFsckRepair.waitUntilAssigned(admin, root.getHdfsHRI());
2507 }
2508 }
2509
2510 else if (metaRegions.size() > 1) {
2511 errors.reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
2512 if (shouldFixAssignments()) {
2513 errors.print("Trying to fix a problem with .META...");
2514 setShouldRerun();
2515
2516 List <ServerName> deployedOn = Lists.newArrayList();
2517 for (HbckInfo mRegion : metaRegions) {
2518 deployedOn.add(mRegion.metaEntry.regionServer);
2519 }
2520 HBaseFsckRepair.fixMultiAssignment(admin, root.metaEntry, deployedOn);
2521 }
2522 }
2523
2524 return false;
2525 }
2526
2527 return true;
2528 }
2529
2530
2531
2532
2533
2534 boolean loadMetaEntries() throws IOException {
2535
2536
2537
2538 if (!recordMetaRegion()) {
2539
2540 errors.reportError("Fatal error: unable to get root region location. Exiting...");
2541 return false;
2542 }
2543
2544 MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
2545 int countRecord = 1;
2546
2547
2548 final Comparator<KeyValue> comp = new Comparator<KeyValue>() {
2549 public int compare(KeyValue k1, KeyValue k2) {
2550 return (int)(k1.getTimestamp() - k2.getTimestamp());
2551 }
2552 };
2553
2554 public boolean processRow(Result result) throws IOException {
2555 try {
2556
2557
2558 long ts = Collections.max(result.list(), comp).getTimestamp();
2559 Pair<HRegionInfo, ServerName> pair = HRegionInfo.getHRegionInfoAndServerName(result);
2560 if (pair == null || pair.getFirst() == null) {
2561 emptyRegionInfoQualifiers.add(result);
2562 errors.reportError(ERROR_CODE.EMPTY_META_CELL,
2563 "Empty REGIONINFO_QUALIFIER found in .META.");
2564 return true;
2565 }
2566 ServerName sn = null;
2567 if (pair.getSecond() != null) {
2568 sn = pair.getSecond();
2569 }
2570 HRegionInfo hri = pair.getFirst();
2571 if (!(isTableIncluded(hri.getTableNameAsString())
2572 || hri.isMetaRegion())) {
2573 return true;
2574 }
2575 PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
2576 MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
2577 HbckInfo hbInfo = new HbckInfo(m);
2578 HbckInfo previous = regionInfoMap.put(hri.getEncodedName(), hbInfo);
2579 if (previous != null) {
2580 throw new IOException("Two entries in META are same " + previous);
2581 }
2582
2583
2584 if (countRecord % 100 == 0) {
2585 errors.progress();
2586 }
2587 countRecord++;
2588 return true;
2589 } catch (RuntimeException e) {
2590 LOG.error("Result=" + result);
2591 throw e;
2592 }
2593 }
2594 };
2595 if (!checkMetaOnly) {
2596
2597 MetaScanner.metaScan(getConf(), visitor);
2598 }
2599
2600 errors.print("");
2601 return true;
2602 }
2603
2604
2605
2606
2607 static class MetaEntry extends HRegionInfo {
2608 ServerName regionServer;
2609 long modTime;
2610 HRegionInfo splitA, splitB;
2611
2612 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
2613 this(rinfo, regionServer, modTime, null, null);
2614 }
2615
2616 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
2617 HRegionInfo splitA, HRegionInfo splitB) {
2618 super(rinfo);
2619 this.regionServer = regionServer;
2620 this.modTime = modTime;
2621 this.splitA = splitA;
2622 this.splitB = splitB;
2623 }
2624
2625 public boolean equals(Object o) {
2626 boolean superEq = super.equals(o);
2627 if (!superEq) {
2628 return superEq;
2629 }
2630
2631 MetaEntry me = (MetaEntry) o;
2632 if (!regionServer.equals(me.regionServer)) {
2633 return false;
2634 }
2635 return (modTime == me.modTime);
2636 }
2637
2638 @Override
2639 public int hashCode() {
2640 int hash = Arrays.hashCode(getRegionName());
2641 hash ^= getRegionId();
2642 hash ^= Arrays.hashCode(getStartKey());
2643 hash ^= Arrays.hashCode(getEndKey());
2644 hash ^= Boolean.valueOf(isOffline()).hashCode();
2645 hash ^= Arrays.hashCode(getTableName());
2646 if (regionServer != null) {
2647 hash ^= regionServer.hashCode();
2648 }
2649 hash ^= modTime;
2650 return hash;
2651 }
2652 }
2653
2654
2655
2656
2657 static class HdfsEntry {
2658 HRegionInfo hri;
2659 Path hdfsRegionDir = null;
2660 long hdfsRegionDirModTime = 0;
2661 boolean hdfsRegioninfoFilePresent = false;
2662 boolean hdfsOnlyEdits = false;
2663 }
2664
2665
2666
2667
2668 static class OnlineEntry {
2669 HRegionInfo hri;
2670 ServerName hsa;
2671
2672 public String toString() {
2673 return hsa.toString() + ";" + hri.getRegionNameAsString();
2674 }
2675 }
2676
2677
2678
2679
2680
2681 public static class HbckInfo implements KeyRange {
2682 private MetaEntry metaEntry = null;
2683 private HdfsEntry hdfsEntry = null;
2684 private List<OnlineEntry> deployedEntries = Lists.newArrayList();
2685 private List<ServerName> deployedOn = Lists.newArrayList();
2686 private boolean skipChecks = false;
2687
2688 HbckInfo(MetaEntry metaEntry) {
2689 this.metaEntry = metaEntry;
2690 }
2691
2692 public synchronized void addServer(HRegionInfo hri, ServerName server) {
2693 OnlineEntry rse = new OnlineEntry() ;
2694 rse.hri = hri;
2695 rse.hsa = server;
2696 this.deployedEntries.add(rse);
2697 this.deployedOn.add(server);
2698 }
2699
2700 public synchronized String toString() {
2701 StringBuilder sb = new StringBuilder();
2702 sb.append("{ meta => ");
2703 sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
2704 sb.append( ", hdfs => " + getHdfsRegionDir());
2705 sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
2706 sb.append(" }");
2707 return sb.toString();
2708 }
2709
2710 @Override
2711 public byte[] getStartKey() {
2712 if (this.metaEntry != null) {
2713 return this.metaEntry.getStartKey();
2714 } else if (this.hdfsEntry != null) {
2715 return this.hdfsEntry.hri.getStartKey();
2716 } else {
2717 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
2718 return null;
2719 }
2720 }
2721
2722 @Override
2723 public byte[] getEndKey() {
2724 if (this.metaEntry != null) {
2725 return this.metaEntry.getEndKey();
2726 } else if (this.hdfsEntry != null) {
2727 return this.hdfsEntry.hri.getEndKey();
2728 } else {
2729 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
2730 return null;
2731 }
2732 }
2733
2734 public byte[] getTableName() {
2735 if (this.metaEntry != null) {
2736 return this.metaEntry.getTableName();
2737 } else if (this.hdfsEntry != null) {
2738
2739
2740 Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
2741 return Bytes.toBytes(tableDir.getName());
2742 } else {
2743
2744
2745 return null;
2746 }
2747 }
2748
2749 public String getRegionNameAsString() {
2750 if (metaEntry != null) {
2751 return metaEntry.getRegionNameAsString();
2752 } else if (hdfsEntry != null) {
2753 if (hdfsEntry.hri != null) {
2754 return hdfsEntry.hri.getRegionNameAsString();
2755 }
2756 }
2757 return null;
2758 }
2759
2760 public byte[] getRegionName() {
2761 if (metaEntry != null) {
2762 return metaEntry.getRegionName();
2763 } else if (hdfsEntry != null) {
2764 return hdfsEntry.hri.getRegionName();
2765 } else {
2766 return null;
2767 }
2768 }
2769
2770 Path getHdfsRegionDir() {
2771 if (hdfsEntry == null) {
2772 return null;
2773 }
2774 return hdfsEntry.hdfsRegionDir;
2775 }
2776
2777 boolean containsOnlyHdfsEdits() {
2778 if (hdfsEntry == null) {
2779 return false;
2780 }
2781 return hdfsEntry.hdfsOnlyEdits;
2782 }
2783
2784 boolean isHdfsRegioninfoPresent() {
2785 if (hdfsEntry == null) {
2786 return false;
2787 }
2788 return hdfsEntry.hdfsRegioninfoFilePresent;
2789 }
2790
2791 long getModTime() {
2792 if (hdfsEntry == null) {
2793 return 0;
2794 }
2795 return hdfsEntry.hdfsRegionDirModTime;
2796 }
2797
2798 HRegionInfo getHdfsHRI() {
2799 if (hdfsEntry == null) {
2800 return null;
2801 }
2802 return hdfsEntry.hri;
2803 }
2804
2805 public void setSkipChecks(boolean skipChecks) {
2806 this.skipChecks = skipChecks;
2807 }
2808
2809 public boolean isSkipChecks() {
2810 return skipChecks;
2811 }
2812 }
2813
2814 final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
2815 @Override
2816 public int compare(HbckInfo l, HbckInfo r) {
2817 if (l == r) {
2818
2819 return 0;
2820 }
2821
2822 int tableCompare = RegionSplitCalculator.BYTES_COMPARATOR.compare(
2823 l.getTableName(), r.getTableName());
2824 if (tableCompare != 0) {
2825 return tableCompare;
2826 }
2827
2828 int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
2829 l.getStartKey(), r.getStartKey());
2830 if (startComparison != 0) {
2831 return startComparison;
2832 }
2833
2834
2835 byte[] endKey = r.getEndKey();
2836 endKey = (endKey.length == 0) ? null : endKey;
2837 byte[] endKey2 = l.getEndKey();
2838 endKey2 = (endKey2.length == 0) ? null : endKey2;
2839 int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
2840 endKey2, endKey);
2841
2842 if (endComparison != 0) {
2843 return endComparison;
2844 }
2845
2846
2847
2848 if (l.hdfsEntry == null && r.hdfsEntry == null) {
2849 return 0;
2850 }
2851 if (l.hdfsEntry == null && r.hdfsEntry != null) {
2852 return 1;
2853 }
2854
2855 if (r.hdfsEntry == null) {
2856 return -1;
2857 }
2858
2859 return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
2860 }
2861 };
2862
2863
2864
2865
2866 private void printTableSummary(SortedMap<String, TableInfo> tablesInfo) {
2867 StringBuilder sb = new StringBuilder();
2868 errors.print("Summary:");
2869 for (TableInfo tInfo : tablesInfo.values()) {
2870 if (errors.tableHasErrors(tInfo)) {
2871 errors.print("Table " + tInfo.getName() + " is inconsistent.");
2872 } else {
2873 errors.print(" " + tInfo.getName() + " is okay.");
2874 }
2875 errors.print(" Number of regions: " + tInfo.getNumRegions());
2876 sb.setLength(0);
2877 sb.append(" Deployed on: ");
2878 for (ServerName server : tInfo.deployedOn) {
2879 sb.append(" " + server.toString());
2880 }
2881 errors.print(sb.toString());
2882 }
2883 }
2884
2885 static ErrorReporter getErrorReporter(
2886 final Configuration conf) throws ClassNotFoundException {
2887 Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
2888 return (ErrorReporter)ReflectionUtils.newInstance(reporter, conf);
2889 }
2890
2891 public interface ErrorReporter {
2892 public static enum ERROR_CODE {
2893 UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
2894 NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED,
2895 MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
2896 FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
2897 HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
2898 ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
2899 WRONG_USAGE, EMPTY_META_CELL
2900 }
2901 public void clear();
2902 public void report(String message);
2903 public void reportError(String message);
2904 public void reportError(ERROR_CODE errorCode, String message);
2905 public void reportError(ERROR_CODE errorCode, String message, TableInfo table);
2906 public void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
2907 public void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info1, HbckInfo info2);
2908 public int summarize();
2909 public void detail(String details);
2910 public ArrayList<ERROR_CODE> getErrorList();
2911 public void progress();
2912 public void print(String message);
2913 public void resetErrors();
2914 public boolean tableHasErrors(TableInfo table);
2915 }
2916
2917 static class PrintingErrorReporter implements ErrorReporter {
2918 public int errorCount = 0;
2919 private int showProgress;
2920
2921 Set<TableInfo> errorTables = new HashSet<TableInfo>();
2922
2923
2924 private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
2925
2926 public void clear() {
2927 errorTables.clear();
2928 errorList.clear();
2929 errorCount = 0;
2930 }
2931
2932 public synchronized void reportError(ERROR_CODE errorCode, String message) {
2933 if (errorCode == ERROR_CODE.WRONG_USAGE) {
2934 System.err.println(message);
2935 return;
2936 }
2937
2938 errorList.add(errorCode);
2939 if (!summary) {
2940 System.out.println("ERROR: " + message);
2941 }
2942 errorCount++;
2943 showProgress = 0;
2944 }
2945
2946 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
2947 errorTables.add(table);
2948 reportError(errorCode, message);
2949 }
2950
2951 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
2952 HbckInfo info) {
2953 errorTables.add(table);
2954 String reference = "(region " + info.getRegionNameAsString() + ")";
2955 reportError(errorCode, reference + " " + message);
2956 }
2957
2958 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
2959 HbckInfo info1, HbckInfo info2) {
2960 errorTables.add(table);
2961 String reference = "(regions " + info1.getRegionNameAsString()
2962 + " and " + info2.getRegionNameAsString() + ")";
2963 reportError(errorCode, reference + " " + message);
2964 }
2965
2966 public synchronized void reportError(String message) {
2967 reportError(ERROR_CODE.UNKNOWN, message);
2968 }
2969
2970
2971
2972
2973
2974
2975 public synchronized void report(String message) {
2976 if (! summary) {
2977 System.out.println("ERROR: " + message);
2978 }
2979 showProgress = 0;
2980 }
2981
2982 public synchronized int summarize() {
2983 System.out.println(Integer.toString(errorCount) +
2984 " inconsistencies detected.");
2985 if (errorCount == 0) {
2986 System.out.println("Status: OK");
2987 return 0;
2988 } else {
2989 System.out.println("Status: INCONSISTENT");
2990 return -1;
2991 }
2992 }
2993
2994 public ArrayList<ERROR_CODE> getErrorList() {
2995 return errorList;
2996 }
2997
2998 public synchronized void print(String message) {
2999 if (!summary) {
3000 System.out.println(message);
3001 }
3002 }
3003
3004 @Override
3005 public boolean tableHasErrors(TableInfo table) {
3006 return errorTables.contains(table);
3007 }
3008
3009 @Override
3010 public void resetErrors() {
3011 errorCount = 0;
3012 }
3013
3014 public synchronized void detail(String message) {
3015 if (details) {
3016 System.out.println(message);
3017 }
3018 showProgress = 0;
3019 }
3020
3021 public synchronized void progress() {
3022 if (showProgress++ == 10) {
3023 if (!summary) {
3024 System.out.print(".");
3025 }
3026 showProgress = 0;
3027 }
3028 }
3029 }
3030
3031
3032
3033
3034 static class WorkItemRegion implements Callable<Void> {
3035 private HBaseFsck hbck;
3036 private ServerName rsinfo;
3037 private ErrorReporter errors;
3038 private HConnection connection;
3039
3040 WorkItemRegion(HBaseFsck hbck, ServerName info,
3041 ErrorReporter errors, HConnection connection) {
3042 this.hbck = hbck;
3043 this.rsinfo = info;
3044 this.errors = errors;
3045 this.connection = connection;
3046 }
3047
3048 @Override
3049 public synchronized Void call() throws IOException {
3050 errors.progress();
3051 try {
3052 AdminProtocol server =
3053 connection.getAdmin(rsinfo);
3054
3055
3056 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
3057 regions = filterRegions(regions);
3058
3059 if (details) {
3060 errors.detail("RegionServer: " + rsinfo.getServerName() +
3061 " number of regions: " + regions.size());
3062 for (HRegionInfo rinfo: regions) {
3063 errors.detail(" " + rinfo.getRegionNameAsString() +
3064 " id: " + rinfo.getRegionId() +
3065 " encoded_name: " + rinfo.getEncodedName() +
3066 " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
3067 " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
3068 }
3069 }
3070
3071
3072 for (HRegionInfo r:regions) {
3073 HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
3074 hbi.addServer(r, rsinfo);
3075 }
3076 } catch (IOException e) {
3077 errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
3078 " Unable to fetch region information. " + e);
3079 throw e;
3080 }
3081 return null;
3082 }
3083
3084 private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
3085 List<HRegionInfo> ret = Lists.newArrayList();
3086 for (HRegionInfo hri : regions) {
3087 if (hri.isMetaTable() || (!hbck.checkMetaOnly
3088 && hbck.isTableIncluded(hri.getTableNameAsString()))) {
3089 ret.add(hri);
3090 }
3091 }
3092 return ret;
3093 }
3094 }
3095
3096
3097
3098
3099
3100 static class WorkItemHdfsDir implements Callable<Void> {
3101 private HBaseFsck hbck;
3102 private FileStatus tableDir;
3103 private ErrorReporter errors;
3104 private FileSystem fs;
3105
3106 WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors,
3107 FileStatus status) {
3108 this.hbck = hbck;
3109 this.fs = fs;
3110 this.tableDir = status;
3111 this.errors = errors;
3112 }
3113
3114 @Override
3115 public synchronized Void call() throws IOException {
3116 try {
3117 String tableName = tableDir.getPath().getName();
3118
3119 if (tableName.startsWith(".") &&
3120 !tableName.equals( Bytes.toString(HConstants.META_TABLE_NAME))) {
3121 return null;
3122 }
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177 static class WorkItemHdfsRegionInfo implements Callable<Void> {
3178 private HbckInfo hbi;
3179 private HBaseFsck hbck;
3180 private ErrorReporter errors;
3181
3182 WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
3183 this.hbi = hbi;
3184 this.hbck = hbck;
3185 this.errors = errors;
3186 }
3187
3188 @Override
3189 public synchronized Void call() throws IOException {
3190
3191 if (hbi.getHdfsHRI() == null) {
3192 try {
3193 hbck.loadHdfsRegioninfo(hbi);
3194 } catch (IOException ioe) {
3195 String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
3196 + Bytes.toString(hbi.getTableName()) + " in hdfs dir "
3197 + hbi.getHdfsRegionDir()
3198 + "! It may be an invalid format or version file. Treating as "
3199 + "an orphaned regiondir.";
3200 errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
3201 try {
3202 hbck.debugLsr(hbi.getHdfsRegionDir());
3203 } catch (IOException ioe2) {
3204 LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
3205 throw ioe2;
3206 }
3207 hbck.orphanHdfsDirs.add(hbi);
3208 throw ioe;
3209 }
3210 }
3211 return null;
3212 }
3213 };
3214
3215
3216
3217
3218
3219 public void setDisplayFullReport() {
3220 details = true;
3221 }
3222
3223
3224
3225
3226
3227 void setSummary() {
3228 summary = true;
3229 }
3230
3231
3232
3233
3234
3235 void setCheckMetaOnly() {
3236 checkMetaOnly = true;
3237 }
3238
3239
3240
3241
3242
3243
3244
3245 void setShouldRerun() {
3246 rerun = true;
3247 }
3248
3249 boolean shouldRerun() {
3250 return rerun;
3251 }
3252
3253
3254
3255
3256
3257 public void setFixAssignments(boolean shouldFix) {
3258 fixAssignments = shouldFix;
3259 }
3260
3261 boolean shouldFixAssignments() {
3262 return fixAssignments;
3263 }
3264
3265 public void setFixMeta(boolean shouldFix) {
3266 fixMeta = shouldFix;
3267 }
3268
3269 boolean shouldFixMeta() {
3270 return fixMeta;
3271 }
3272
3273 public void setFixEmptyMetaCells(boolean shouldFix) {
3274 fixEmptyMetaCells = shouldFix;
3275 }
3276
3277 boolean shouldFixEmptyMetaCells() {
3278 return fixEmptyMetaCells;
3279 }
3280
3281 public void setCheckHdfs(boolean checking) {
3282 checkHdfs = checking;
3283 }
3284
3285 boolean shouldCheckHdfs() {
3286 return checkHdfs;
3287 }
3288
3289 public void setFixHdfsHoles(boolean shouldFix) {
3290 fixHdfsHoles = shouldFix;
3291 }
3292
3293 boolean shouldFixHdfsHoles() {
3294 return fixHdfsHoles;
3295 }
3296
3297 public void setFixTableOrphans(boolean shouldFix) {
3298 fixTableOrphans = shouldFix;
3299 }
3300
3301 boolean shouldFixTableOrphans() {
3302 return fixTableOrphans;
3303 }
3304
3305 public void setFixHdfsOverlaps(boolean shouldFix) {
3306 fixHdfsOverlaps = shouldFix;
3307 }
3308
3309 boolean shouldFixHdfsOverlaps() {
3310 return fixHdfsOverlaps;
3311 }
3312
3313 public void setFixHdfsOrphans(boolean shouldFix) {
3314 fixHdfsOrphans = shouldFix;
3315 }
3316
3317 boolean shouldFixHdfsOrphans() {
3318 return fixHdfsOrphans;
3319 }
3320
3321 public void setFixVersionFile(boolean shouldFix) {
3322 fixVersionFile = shouldFix;
3323 }
3324
3325 public boolean shouldFixVersionFile() {
3326 return fixVersionFile;
3327 }
3328
3329 public void setSidelineBigOverlaps(boolean sbo) {
3330 this.sidelineBigOverlaps = sbo;
3331 }
3332
3333 public boolean shouldSidelineBigOverlaps() {
3334 return sidelineBigOverlaps;
3335 }
3336
3337 public void setFixSplitParents(boolean shouldFix) {
3338 fixSplitParents = shouldFix;
3339 }
3340
3341 boolean shouldFixSplitParents() {
3342 return fixSplitParents;
3343 }
3344
3345 public void setFixReferenceFiles(boolean shouldFix) {
3346 fixReferenceFiles = shouldFix;
3347 }
3348
3349 boolean shouldFixReferenceFiles() {
3350 return fixReferenceFiles;
3351 }
3352
3353 public boolean shouldIgnorePreCheckPermission() {
3354 return ignorePreCheckPermission;
3355 }
3356
3357 public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
3358 this.ignorePreCheckPermission = ignorePreCheckPermission;
3359 }
3360
3361
3362
3363
3364 public void setMaxMerge(int mm) {
3365 this.maxMerge = mm;
3366 }
3367
3368 public int getMaxMerge() {
3369 return maxMerge;
3370 }
3371
3372 public void setMaxOverlapsToSideline(int mo) {
3373 this.maxOverlapsToSideline = mo;
3374 }
3375
3376 public int getMaxOverlapsToSideline() {
3377 return maxOverlapsToSideline;
3378 }
3379
3380
3381
3382
3383
3384 boolean isTableIncluded(String table) {
3385 return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
3386 }
3387
3388 public void includeTable(String table) {
3389 tablesIncluded.add(table);
3390 }
3391
3392 Set<String> getIncludedTables() {
3393 return new HashSet<String>(tablesIncluded);
3394 }
3395
3396
3397
3398
3399
3400
3401 public void setTimeLag(long seconds) {
3402 timelag = seconds * 1000;
3403 }
3404
3405
3406
3407
3408
3409 public void setSidelineDir(String sidelineDir) {
3410 this.sidelineDir = new Path(sidelineDir);
3411 }
3412
3413 protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
3414 return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
3415 }
3416
3417 public HFileCorruptionChecker getHFilecorruptionChecker() {
3418 return hfcc;
3419 }
3420
3421 public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
3422 this.hfcc = hfcc;
3423 }
3424
3425 public void setRetCode(int code) {
3426 this.retcode = code;
3427 }
3428
3429 public int getRetCode() {
3430 return retcode;
3431 }
3432
3433 protected HBaseFsck printUsageAndExit() {
3434 StringWriter sw = new StringWriter(2048);
3435 PrintWriter out = new PrintWriter(sw);
3436 out.println("Usage: fsck [opts] {only tables}");
3437 out.println(" where [opts] are:");
3438 out.println(" -help Display help options (this)");
3439 out.println(" -details Display full report of all regions.");
3440 out.println(" -timelag <timeInSeconds> Process only regions that " +
3441 " have not experienced any metadata updates in the last " +
3442 " <timeInSeconds> seconds.");
3443 out.println(" -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
3444 " before checking if the fix worked if run with -fix");
3445 out.println(" -summary Print only summary of the tables and status.");
3446 out.println(" -metaonly Only check the state of the .META. table.");
3447 out.println(" -sidelineDir <hdfs://> HDFS path to backup existing meta.");
3448
3449 out.println("");
3450 out.println(" Metadata Repair options: (expert features, use with caution!)");
3451 out.println(" -fix Try to fix region assignments. This is for backwards compatiblity");
3452 out.println(" -fixAssignments Try to fix region assignments. Replaces the old -fix");
3453 out.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good.");
3454 out.println(" -noHdfsChecking Don't load/check region info from HDFS."
3455 + " Assumes META region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
3456 out.println(" -fixHdfsHoles Try to fix region holes in hdfs.");
3457 out.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs");
3458 out.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
3459 out.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs.");
3460 out.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs.");
3461 out.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
3462 out.println(" -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps");
3463 out.println(" -maxOverlapsToSideline <n> When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
3464 out.println(" -fixSplitParents Try to force offline split parents to be online.");
3465 out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check");
3466 out.println(" -fixReferenceFiles Try to offline lingering reference store files");
3467 out.println(" -fixEmptyMetaCells Try to fix .META. entries not referencing any region"
3468 + " (empty REGIONINFO_QUALIFIER rows)");
3469
3470 out.println("");
3471 out.println(" Datafile Repair options: (expert features, use with caution!)");
3472 out.println(" -checkCorruptHFiles Check all Hfiles by opening them to make sure they are valid");
3473 out.println(" -sidelineCorruptHfiles Quarantine corrupted HFiles. implies -checkCorruptHfiles");
3474
3475 out.println("");
3476 out.println(" Metadata Repair shortcuts");
3477 out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
3478 "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles");
3479 out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
3480
3481 out.flush();
3482 errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
3483
3484 setRetCode(-2);
3485 return this;
3486 }
3487
3488
3489
3490
3491
3492
3493
3494 public static void main(String[] args) throws Exception {
3495
3496 Configuration conf = HBaseConfiguration.create();
3497 Path hbasedir = FSUtils.getRootDir(conf);
3498 URI defaultFs = hbasedir.getFileSystem(conf).getUri();
3499 FSUtils.setFsDefault(conf, new Path(defaultFs));
3500
3501 int ret = ToolRunner.run(new HBaseFsck(conf), args);
3502 System.exit(ret);
3503 }
3504
3505 @Override
3506 public int run(String[] args) throws Exception {
3507 exec(executor, args);
3508 return getRetCode();
3509 }
3510
3511 public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
3512 ServiceException, InterruptedException {
3513 long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
3514
3515 boolean checkCorruptHFiles = false;
3516 boolean sidelineCorruptHFiles = false;
3517
3518
3519 for (int i = 0; i < args.length; i++) {
3520 String cmd = args[i];
3521 if (cmd.equals("-help") || cmd.equals("-h")) {
3522 return printUsageAndExit();
3523 } else if (cmd.equals("-details")) {
3524 setDisplayFullReport();
3525 } else if (cmd.equals("-timelag")) {
3526 if (i == args.length - 1) {
3527 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
3528 return printUsageAndExit();
3529 }
3530 try {
3531 long timelag = Long.parseLong(args[i+1]);
3532 setTimeLag(timelag);
3533 } catch (NumberFormatException e) {
3534 errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
3535 return printUsageAndExit();
3536 }
3537 i++;
3538 } else if (cmd.equals("-sleepBeforeRerun")) {
3539 if (i == args.length - 1) {
3540 errors.reportError(ERROR_CODE.WRONG_USAGE,
3541 "HBaseFsck: -sleepBeforeRerun needs a value.");
3542 return printUsageAndExit();
3543 }
3544 try {
3545 sleepBeforeRerun = Long.parseLong(args[i+1]);
3546 } catch (NumberFormatException e) {
3547 errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
3548 return printUsageAndExit();
3549 }
3550 i++;
3551 } else if (cmd.equals("-sidelineDir")) {
3552 if (i == args.length - 1) {
3553 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
3554 return printUsageAndExit();
3555 }
3556 i++;
3557 setSidelineDir(args[i]);
3558 } else if (cmd.equals("-fix")) {
3559 errors.reportError(ERROR_CODE.WRONG_USAGE,
3560 "This option is deprecated, please use -fixAssignments instead.");
3561 setFixAssignments(true);
3562 } else if (cmd.equals("-fixAssignments")) {
3563 setFixAssignments(true);
3564 } else if (cmd.equals("-fixMeta")) {
3565 setFixMeta(true);
3566 } else if (cmd.equals("-noHdfsChecking")) {
3567 setCheckHdfs(false);
3568 } else if (cmd.equals("-fixHdfsHoles")) {
3569 setFixHdfsHoles(true);
3570 } else if (cmd.equals("-fixHdfsOrphans")) {
3571 setFixHdfsOrphans(true);
3572 } else if (cmd.equals("-fixTableOrphans")) {
3573 setFixTableOrphans(true);
3574 } else if (cmd.equals("-fixHdfsOverlaps")) {
3575 setFixHdfsOverlaps(true);
3576 } else if (cmd.equals("-fixVersionFile")) {
3577 setFixVersionFile(true);
3578 } else if (cmd.equals("-sidelineBigOverlaps")) {
3579 setSidelineBigOverlaps(true);
3580 } else if (cmd.equals("-fixSplitParents")) {
3581 setFixSplitParents(true);
3582 } else if (cmd.equals("-ignorePreCheckPermission")) {
3583 setIgnorePreCheckPermission(true);
3584 } else if (cmd.equals("-checkCorruptHFiles")) {
3585 checkCorruptHFiles = true;
3586 } else if (cmd.equals("-sidelineCorruptHFiles")) {
3587 sidelineCorruptHFiles = true;
3588 } else if (cmd.equals("-fixReferenceFiles")) {
3589 setFixReferenceFiles(true);
3590 } else if (cmd.equals("-fixEmptyMetaCells")) {
3591 setFixEmptyMetaCells(true);
3592 } else if (cmd.equals("-repair")) {
3593
3594
3595 setFixHdfsHoles(true);
3596 setFixHdfsOrphans(true);
3597 setFixMeta(true);
3598 setFixAssignments(true);
3599 setFixHdfsOverlaps(true);
3600 setFixVersionFile(true);
3601 setSidelineBigOverlaps(true);
3602 setFixSplitParents(false);
3603 setCheckHdfs(true);
3604 setFixReferenceFiles(true);
3605 } else if (cmd.equals("-repairHoles")) {
3606
3607 setFixHdfsHoles(true);
3608 setFixHdfsOrphans(false);
3609 setFixMeta(true);
3610 setFixAssignments(true);
3611 setFixHdfsOverlaps(false);
3612 setSidelineBigOverlaps(false);
3613 setFixSplitParents(false);
3614 setCheckHdfs(true);
3615 } else if (cmd.equals("-maxOverlapsToSideline")) {
3616 if (i == args.length - 1) {
3617 errors.reportError(ERROR_CODE.WRONG_USAGE,
3618 "-maxOverlapsToSideline needs a numeric value argument.");
3619 return printUsageAndExit();
3620 }
3621 try {
3622 int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
3623 setMaxOverlapsToSideline(maxOverlapsToSideline);
3624 } catch (NumberFormatException e) {
3625 errors.reportError(ERROR_CODE.WRONG_USAGE,
3626 "-maxOverlapsToSideline needs a numeric value argument.");
3627 return printUsageAndExit();
3628 }
3629 i++;
3630 } else if (cmd.equals("-maxMerge")) {
3631 if (i == args.length - 1) {
3632 errors.reportError(ERROR_CODE.WRONG_USAGE,
3633 "-maxMerge needs a numeric value argument.");
3634 return printUsageAndExit();
3635 }
3636 try {
3637 int maxMerge = Integer.parseInt(args[i+1]);
3638 setMaxMerge(maxMerge);
3639 } catch (NumberFormatException e) {
3640 errors.reportError(ERROR_CODE.WRONG_USAGE,
3641 "-maxMerge needs a numeric value argument.");
3642 return printUsageAndExit();
3643 }
3644 i++;
3645 } else if (cmd.equals("-summary")) {
3646 setSummary();
3647 } else if (cmd.equals("-metaonly")) {
3648 setCheckMetaOnly();
3649 } else if (cmd.startsWith("-")) {
3650 errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
3651 return printUsageAndExit();
3652 } else {
3653 includeTable(cmd);
3654 errors.print("Allow checking/fixes for table: " + cmd);
3655 }
3656 }
3657
3658
3659 try {
3660 preCheckPermission();
3661 } catch (AccessControlException ace) {
3662 Runtime.getRuntime().exit(-1);
3663 } catch (IOException ioe) {
3664 Runtime.getRuntime().exit(-1);
3665 }
3666
3667
3668 connect();
3669
3670
3671 if (checkCorruptHFiles || sidelineCorruptHFiles) {
3672 LOG.info("Checking all hfiles for corruption");
3673 HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
3674 setHFileCorruptionChecker(hfcc);
3675 Collection<String> tables = getIncludedTables();
3676 Collection<Path> tableDirs = new ArrayList<Path>();
3677 Path rootdir = FSUtils.getRootDir(getConf());
3678 if (tables.size() > 0) {
3679 for (String t : tables) {
3680 tableDirs.add(FSUtils.getTablePath(rootdir, t));
3681 }
3682 } else {
3683 tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
3684 }
3685 hfcc.checkTables(tableDirs);
3686 hfcc.report(errors);
3687 }
3688
3689
3690 int code = onlineHbck();
3691 setRetCode(code);
3692
3693
3694
3695
3696 if (shouldRerun()) {
3697 try {
3698 LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
3699 Thread.sleep(sleepBeforeRerun);
3700 } catch (InterruptedException ie) {
3701 return this;
3702 }
3703
3704 setFixAssignments(false);
3705 setFixMeta(false);
3706 setFixHdfsHoles(false);
3707 setFixHdfsOverlaps(false);
3708 setFixVersionFile(false);
3709 setFixTableOrphans(false);
3710 errors.resetErrors();
3711 code = onlineHbck();
3712 setRetCode(code);
3713 }
3714 return this;
3715 }
3716
3717
3718
3719
3720 void debugLsr(Path p) throws IOException {
3721 debugLsr(getConf(), p, errors);
3722 }
3723
3724
3725
3726
3727 public static void debugLsr(Configuration conf,
3728 Path p) throws IOException {
3729 debugLsr(conf, p, new PrintingErrorReporter());
3730 }
3731
3732
3733
3734
3735 public static void debugLsr(Configuration conf,
3736 Path p, ErrorReporter errors) throws IOException {
3737 if (!LOG.isDebugEnabled() || p == null) {
3738 return;
3739 }
3740 FileSystem fs = p.getFileSystem(conf);
3741
3742 if (!fs.exists(p)) {
3743
3744 return;
3745 }
3746 errors.print(p.toString());
3747
3748 if (fs.isFile(p)) {
3749 return;
3750 }
3751
3752 if (fs.getFileStatus(p).isDir()) {
3753 FileStatus[] fss= fs.listStatus(p);
3754 for (FileStatus status : fss) {
3755 debugLsr(conf, status.getPath(), errors);
3756 }
3757 }
3758 }
3759 }