1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.util;
19
20 import java.io.IOException;
21 import java.io.PrintWriter;
22 import java.io.StringWriter;
23 import java.net.URI;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.Collection;
27 import java.util.Collections;
28 import java.util.Comparator;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.Iterator;
32 import java.util.List;
33 import java.util.Map;
34 import java.util.Map.Entry;
35 import java.util.Set;
36 import java.util.SortedMap;
37 import java.util.SortedSet;
38 import java.util.TreeMap;
39 import java.util.TreeSet;
40 import java.util.concurrent.Callable;
41 import java.util.concurrent.ConcurrentSkipListMap;
42 import java.util.concurrent.ExecutionException;
43 import java.util.concurrent.ExecutorService;
44 import java.util.concurrent.Future;
45 import java.util.concurrent.ScheduledThreadPoolExecutor;
46 import java.util.concurrent.atomic.AtomicInteger;
47
48 import org.apache.commons.logging.Log;
49 import org.apache.commons.logging.LogFactory;
50 import org.apache.hadoop.classification.InterfaceAudience;
51 import org.apache.hadoop.classification.InterfaceStability;
52 import org.apache.hadoop.conf.Configuration;
53 import org.apache.hadoop.conf.Configured;
54 import org.apache.hadoop.fs.FileStatus;
55 import org.apache.hadoop.fs.FileSystem;
56 import org.apache.hadoop.fs.Path;
57 import org.apache.hadoop.fs.permission.FsAction;
58 import org.apache.hadoop.hbase.Abortable;
59 import org.apache.hadoop.hbase.Cell;
60 import org.apache.hadoop.hbase.ClusterStatus;
61 import org.apache.hadoop.hbase.HBaseConfiguration;
62 import org.apache.hadoop.hbase.HColumnDescriptor;
63 import org.apache.hadoop.hbase.HConstants;
64 import org.apache.hadoop.hbase.HRegionInfo;
65 import org.apache.hadoop.hbase.HRegionLocation;
66 import org.apache.hadoop.hbase.HTableDescriptor;
67 import org.apache.hadoop.hbase.KeyValue;
68 import org.apache.hadoop.hbase.MasterNotRunningException;
69 import org.apache.hadoop.hbase.ServerName;
70 import org.apache.hadoop.hbase.TableName;
71 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
72 import org.apache.hadoop.hbase.catalog.MetaEditor;
73 import org.apache.hadoop.hbase.client.Delete;
74 import org.apache.hadoop.hbase.client.Get;
75 import org.apache.hadoop.hbase.client.HBaseAdmin;
76 import org.apache.hadoop.hbase.client.HConnectable;
77 import org.apache.hadoop.hbase.client.HConnection;
78 import org.apache.hadoop.hbase.client.HConnectionManager;
79 import org.apache.hadoop.hbase.client.HTable;
80 import org.apache.hadoop.hbase.client.MetaScanner;
81 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
82 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
83 import org.apache.hadoop.hbase.client.Put;
84 import org.apache.hadoop.hbase.client.Result;
85 import org.apache.hadoop.hbase.client.RowMutations;
86 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
87 import org.apache.hadoop.hbase.io.hfile.HFile;
88 import org.apache.hadoop.hbase.master.MasterFileSystem;
89 import org.apache.hadoop.hbase.master.RegionState;
90 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
91 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
92 import org.apache.hadoop.hbase.regionserver.HRegion;
93 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
94 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
95 import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
96 import org.apache.hadoop.hbase.security.User;
97 import org.apache.hadoop.hbase.security.UserProvider;
98 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
99 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
100 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
101 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
102 import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
103 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
104 import org.apache.hadoop.hbase.zookeeper.ZKTableReadOnly;
105 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
106 import org.apache.hadoop.security.AccessControlException;
107 import org.apache.hadoop.security.UserGroupInformation;
108 import org.apache.hadoop.util.ReflectionUtils;
109 import org.apache.hadoop.util.Tool;
110 import org.apache.hadoop.util.ToolRunner;
111 import org.apache.zookeeper.KeeperException;
112
113 import com.google.common.base.Joiner;
114 import com.google.common.base.Preconditions;
115 import com.google.common.collect.Lists;
116 import com.google.common.collect.Multimap;
117 import com.google.common.collect.TreeMultimap;
118 import com.google.protobuf.ServiceException;
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165 @InterfaceAudience.Public
166 @InterfaceStability.Evolving
167 public class HBaseFsck extends Configured {
168 public static final long DEFAULT_TIME_LAG = 60000;
169 public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
170 private static final int MAX_NUM_THREADS = 50;
171 private static boolean rsSupportsOffline = true;
172 private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
173 private static final int DEFAULT_MAX_MERGE = 5;
174 private static final String TO_BE_LOADED = "to_be_loaded";
175
176
177
178
179 private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
180 private ClusterStatus status;
181 private HConnection connection;
182 private HBaseAdmin admin;
183 private HTable meta;
184 protected ExecutorService executor;
185 private long startMillis = System.currentTimeMillis();
186 private HFileCorruptionChecker hfcc;
187 private int retcode = 0;
188
189
190
191
192 private static boolean details = false;
193 private long timelag = DEFAULT_TIME_LAG;
194 private boolean fixAssignments = false;
195 private boolean fixMeta = false;
196 private boolean checkHdfs = true;
197 private boolean fixHdfsHoles = false;
198 private boolean fixHdfsOverlaps = false;
199 private boolean fixHdfsOrphans = false;
200 private boolean fixTableOrphans = false;
201 private boolean fixVersionFile = false;
202 private boolean fixSplitParents = false;
203 private boolean fixReferenceFiles = false;
204 private boolean fixEmptyMetaCells = false;
205 private boolean fixTableLocks = false;
206
207
208
209 private Set<TableName> tablesIncluded = new HashSet<TableName>();
210 private int maxMerge = DEFAULT_MAX_MERGE;
211 private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
212 private boolean sidelineBigOverlaps = false;
213 private Path sidelineDir = null;
214
215 private boolean rerun = false;
216 private static boolean summary = false;
217 private boolean checkMetaOnly = false;
218 private boolean ignorePreCheckPermission = false;
219
220
221
222
223 final private ErrorReporter errors;
224 int fixes = 0;
225
226
227
228
229
230
231 private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
232 private TreeSet<TableName> disabledTables =
233 new TreeSet<TableName>();
234
235 private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
236
237
238
239
240
241
242
243
244
245
246
247 private SortedMap<TableName, TableInfo> tablesInfo =
248 new ConcurrentSkipListMap<TableName, TableInfo>();
249
250
251
252
253 private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
254
255 private Map<TableName, Set<String>> orphanTableDirs =
256 new HashMap<TableName, Set<String>>();
257
258
259
260
261
262
263
264
265 public HBaseFsck(Configuration conf) throws MasterNotRunningException,
266 ZooKeeperConnectionException, IOException, ClassNotFoundException {
267 super(conf);
268 errors = getErrorReporter(conf);
269
270 int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
271 executor = new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
272 }
273
274
275
276
277
278
279
280
281
282
283
284 public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
285 ZooKeeperConnectionException, IOException, ClassNotFoundException {
286 super(conf);
287 errors = getErrorReporter(getConf());
288 this.executor = exec;
289 }
290
291
292
293
294
295 public void connect() throws IOException {
296 admin = new HBaseAdmin(getConf());
297 meta = new HTable(getConf(), TableName.META_TABLE_NAME);
298 status = admin.getClusterStatus();
299 connection = admin.getConnection();
300 }
301
302
303
304
305 private void loadDeployedRegions() throws IOException, InterruptedException {
306
307 Collection<ServerName> regionServers = status.getServers();
308 errors.print("Number of live region servers: " + regionServers.size());
309 if (details) {
310 for (ServerName rsinfo: regionServers) {
311 errors.print(" " + rsinfo.getServerName());
312 }
313 }
314
315
316 Collection<ServerName> deadRegionServers = status.getDeadServerNames();
317 errors.print("Number of dead region servers: " + deadRegionServers.size());
318 if (details) {
319 for (ServerName name: deadRegionServers) {
320 errors.print(" " + name);
321 }
322 }
323
324
325 errors.print("Master: " + status.getMaster());
326
327
328 Collection<ServerName> backupMasters = status.getBackupMasters();
329 errors.print("Number of backup masters: " + backupMasters.size());
330 if (details) {
331 for (ServerName name: backupMasters) {
332 errors.print(" " + name);
333 }
334 }
335
336 errors.print("Average load: " + status.getAverageLoad());
337 errors.print("Number of requests: " + status.getRequestsCount());
338 errors.print("Number of regions: " + status.getRegionsCount());
339
340 Map<String, RegionState> rits = status.getRegionsInTransition();
341 errors.print("Number of regions in transition: " + rits.size());
342 if (details) {
343 for (RegionState state: rits.values()) {
344 errors.print(" " + state.toDescriptiveString());
345 }
346 }
347
348
349 processRegionServers(regionServers);
350 }
351
352
353
354
355 private void clearState() {
356
357 fixes = 0;
358 regionInfoMap.clear();
359 emptyRegionInfoQualifiers.clear();
360 disabledTables.clear();
361 errors.clear();
362 tablesInfo.clear();
363 orphanHdfsDirs.clear();
364 }
365
366
367
368
369
370
371 public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
372
373 if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
374 || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
375 LOG.info("Loading regioninfos HDFS");
376
377 int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
378 int curIter = 0;
379 do {
380 clearState();
381
382 restoreHdfsIntegrity();
383 curIter++;
384 } while (fixes > 0 && curIter <= maxIterations);
385
386
387
388 if (curIter > 2) {
389 if (curIter == maxIterations) {
390 LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
391 + "Tables integrity may not be fully repaired!");
392 } else {
393 LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
394 }
395 }
396 }
397 }
398
399
400
401
402
403
404
405
406
407 public int onlineConsistencyRepair() throws IOException, KeeperException,
408 InterruptedException {
409 clearState();
410
411
412 loadDeployedRegions();
413
414 if (!recordMetaRegion()) {
415
416 errors.reportError("Fatal error: unable to get hbase:meta region location. Exiting...");
417 return -2;
418 }
419
420 if (!checkMetaRegion()) {
421 String errorMsg = "hbase:meta table is not consistent. ";
422 if (shouldFixAssignments()) {
423 errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
424 } else {
425 errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
426 }
427 errors.reportError(errorMsg + " Exiting...");
428 return -2;
429 }
430
431 LOG.info("Loading regionsinfo from the hbase:meta table");
432 boolean success = loadMetaEntries();
433 if (!success) return -1;
434
435
436 reportEmptyMetaCells();
437
438
439 if (shouldFixEmptyMetaCells()) {
440 fixEmptyMetaCells();
441 }
442
443
444 if (!checkMetaOnly) {
445 reportTablesInFlux();
446 }
447
448
449 if (shouldCheckHdfs()) {
450 loadHdfsRegionDirs();
451 loadHdfsRegionInfos();
452 }
453
454
455 loadDisabledTables();
456
457
458 fixOrphanTables();
459
460
461 checkAndFixConsistency();
462
463
464 checkIntegrity();
465 return errors.getErrorList().size();
466 }
467
468
469
470
471
472 public int onlineHbck() throws IOException, KeeperException, InterruptedException, ServiceException {
473
474 errors.print("Version: " + status.getHBaseVersion());
475 offlineHdfsIntegrityRepair();
476
477
478 boolean oldBalancer = admin.setBalancerRunning(false, true);
479 try {
480 onlineConsistencyRepair();
481 }
482 finally {
483 admin.setBalancerRunning(oldBalancer, false);
484 }
485
486 offlineReferenceFileRepair();
487
488 checkAndFixTableLocks();
489
490
491 printTableSummary(tablesInfo);
492 return errors.summarize();
493 }
494
495
496
497
498 private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
499 for (HbckInfo hi : orphanHdfsDirs) {
500 LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
501 adoptHdfsOrphan(hi);
502 }
503 }
504
505
506
507
508
509
510
511
512
513
514 @SuppressWarnings("deprecation")
515 private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
516 Path p = hi.getHdfsRegionDir();
517 FileSystem fs = p.getFileSystem(getConf());
518 FileStatus[] dirs = fs.listStatus(p);
519 if (dirs == null) {
520 LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " +
521 p + ". This dir could probably be deleted.");
522 return ;
523 }
524
525 TableName tableName = hi.getTableName();
526 TableInfo tableInfo = tablesInfo.get(tableName);
527 Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
528 HTableDescriptor template = tableInfo.getHTD();
529
530
531 Pair<byte[],byte[]> orphanRegionRange = null;
532 for (FileStatus cf : dirs) {
533 String cfName= cf.getPath().getName();
534
535 if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
536
537 FileStatus[] hfiles = fs.listStatus(cf.getPath());
538 for (FileStatus hfile : hfiles) {
539 byte[] start, end;
540 HFile.Reader hf = null;
541 try {
542 CacheConfig cacheConf = new CacheConfig(getConf());
543 hf = HFile.createReader(fs, hfile.getPath(), cacheConf);
544 hf.loadFileInfo();
545 KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
546 start = startKv.getRow();
547 KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
548 end = endKv.getRow();
549 } catch (IOException ioe) {
550 LOG.warn("Problem reading orphan file " + hfile + ", skipping");
551 continue;
552 } catch (NullPointerException ioe) {
553 LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
554 continue;
555 } finally {
556 if (hf != null) {
557 hf.close();
558 }
559 }
560
561
562 if (orphanRegionRange == null) {
563
564 orphanRegionRange = new Pair<byte[], byte[]>(start, end);
565 } else {
566
567
568
569 if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
570 orphanRegionRange.setFirst(start);
571 }
572 if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
573 orphanRegionRange.setSecond(end);
574 }
575 }
576 }
577 }
578 if (orphanRegionRange == null) {
579 LOG.warn("No data in dir " + p + ", sidelining data");
580 fixes++;
581 sidelineRegionDir(fs, hi);
582 return;
583 }
584 LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
585 Bytes.toString(orphanRegionRange.getSecond()) + ")");
586
587
588 HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(), orphanRegionRange.getSecond());
589 LOG.info("Creating new region : " + hri);
590 HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
591 Path target = region.getRegionFileSystem().getRegionDir();
592
593
594 mergeRegionDirs(target, hi);
595 fixes++;
596 }
597
598
599
600
601
602
603
604
605
606 private int restoreHdfsIntegrity() throws IOException, InterruptedException {
607
608 LOG.info("Loading HBase regioninfo from HDFS...");
609 loadHdfsRegionDirs();
610
611 int errs = errors.getErrorList().size();
612
613 tablesInfo = loadHdfsRegionInfos();
614 checkHdfsIntegrity(false, false);
615
616 if (errors.getErrorList().size() == errs) {
617 LOG.info("No integrity errors. We are done with this phase. Glorious.");
618 return 0;
619 }
620
621 if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
622 adoptHdfsOrphans(orphanHdfsDirs);
623
624 }
625
626
627 if (shouldFixHdfsHoles()) {
628 clearState();
629 loadHdfsRegionDirs();
630 tablesInfo = loadHdfsRegionInfos();
631 tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
632 }
633
634
635 if (shouldFixHdfsOverlaps()) {
636
637 clearState();
638 loadHdfsRegionDirs();
639 tablesInfo = loadHdfsRegionInfos();
640 tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
641 }
642
643 return errors.getErrorList().size();
644 }
645
646
647
648
649
650
651
652
653
654 private void offlineReferenceFileRepair() throws IOException {
655 Configuration conf = getConf();
656 Path hbaseRoot = FSUtils.getRootDir(conf);
657 FileSystem fs = hbaseRoot.getFileSystem(conf);
658 Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot);
659 for (Path path: allFiles.values()) {
660 boolean isReference = false;
661 try {
662 isReference = StoreFileInfo.isReference(path);
663 } catch (Throwable t) {
664
665
666
667
668 }
669 if (!isReference) continue;
670
671 Path referredToFile = StoreFileInfo.getReferredToFile(path);
672 if (fs.exists(referredToFile)) continue;
673
674
675 errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
676 "Found lingering reference file " + path);
677 if (!shouldFixReferenceFiles()) continue;
678
679
680 boolean success = false;
681 String pathStr = path.toString();
682
683
684
685
686
687 int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
688 for (int i = 0; index > 0 && i < 5; i++) {
689 index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
690 }
691 if (index > 0) {
692 Path rootDir = getSidelineDir();
693 Path dst = new Path(rootDir, pathStr.substring(index + 1));
694 fs.mkdirs(dst.getParent());
695 LOG.info("Trying to sildeline reference file "
696 + path + " to " + dst);
697 setShouldRerun();
698
699 success = fs.rename(path, dst);
700 }
701 if (!success) {
702 LOG.error("Failed to sideline reference file " + path);
703 }
704 }
705 }
706
707
708
709
710 private void reportEmptyMetaCells() {
711 errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
712 emptyRegionInfoQualifiers.size());
713 if (details) {
714 for (Result r: emptyRegionInfoQualifiers) {
715 errors.print(" " + r);
716 }
717 }
718 }
719
720
721
722
723 private void reportTablesInFlux() {
724 AtomicInteger numSkipped = new AtomicInteger(0);
725 HTableDescriptor[] allTables = getTables(numSkipped);
726 errors.print("Number of Tables: " + allTables.length);
727 if (details) {
728 if (numSkipped.get() > 0) {
729 errors.detail("Number of Tables in flux: " + numSkipped.get());
730 }
731 for (HTableDescriptor td : allTables) {
732 errors.detail(" Table: " + td.getTableName() + "\t" +
733 (td.isReadOnly() ? "ro" : "rw") + "\t" +
734 (td.isMetaRegion() ? "META" : " ") + "\t" +
735 " families: " + td.getFamilies().size());
736 }
737 }
738 }
739
740 public ErrorReporter getErrors() {
741 return errors;
742 }
743
744
745
746
747
748 private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
749 Path regionDir = hbi.getHdfsRegionDir();
750 if (regionDir == null) {
751 LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
752 return;
753 }
754
755 if (hbi.hdfsEntry.hri != null) {
756
757 return;
758 }
759
760 FileSystem fs = FileSystem.get(getConf());
761 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
762 LOG.debug("HRegionInfo read: " + hri.toString());
763 hbi.hdfsEntry.hri = hri;
764 }
765
766
767
768
769
770 public static class RegionRepairException extends IOException {
771 private static final long serialVersionUID = 1L;
772 final IOException ioe;
773 public RegionRepairException(String s, IOException ioe) {
774 super(s);
775 this.ioe = ioe;
776 }
777 }
778
779
780
781
782 private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
783 throws IOException, InterruptedException {
784 tablesInfo.clear();
785
786 Collection<HbckInfo> hbckInfos = regionInfoMap.values();
787
788
789 List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
790 List<Future<Void>> hbiFutures;
791
792 for (HbckInfo hbi : hbckInfos) {
793 WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
794 hbis.add(work);
795 }
796
797
798 hbiFutures = executor.invokeAll(hbis);
799
800 for(int i=0; i<hbiFutures.size(); i++) {
801 WorkItemHdfsRegionInfo work = hbis.get(i);
802 Future<Void> f = hbiFutures.get(i);
803 try {
804 f.get();
805 } catch(ExecutionException e) {
806 LOG.warn("Failed to read .regioninfo file for region " +
807 work.hbi.getRegionNameAsString(), e.getCause());
808 }
809 }
810
811 Path hbaseRoot = FSUtils.getRootDir(getConf());
812 FileSystem fs = hbaseRoot.getFileSystem(getConf());
813
814 for (HbckInfo hbi: hbckInfos) {
815
816 if (hbi.getHdfsHRI() == null) {
817
818 continue;
819 }
820
821
822
823 TableName tableName = hbi.getTableName();
824 if (tableName == null) {
825
826 LOG.warn("tableName was null for: " + hbi);
827 continue;
828 }
829
830 TableInfo modTInfo = tablesInfo.get(tableName);
831 if (modTInfo == null) {
832
833 modTInfo = new TableInfo(tableName);
834 tablesInfo.put(tableName, modTInfo);
835 try {
836 HTableDescriptor htd =
837 FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
838 modTInfo.htds.add(htd);
839 } catch (IOException ioe) {
840 if (!orphanTableDirs.containsKey(tableName)) {
841 LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
842
843 errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
844 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
845 Set<String> columns = new HashSet<String>();
846 orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
847 }
848 }
849 }
850 if (!hbi.isSkipChecks()) {
851 modTInfo.addRegionInfo(hbi);
852 }
853 }
854
855 loadTableInfosForTablesWithNoRegion();
856
857 return tablesInfo;
858 }
859
860
861
862
863
864
865
866
867 private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
868 Path regionDir = hbi.getHdfsRegionDir();
869 FileSystem fs = regionDir.getFileSystem(getConf());
870 FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
871 for (FileStatus subdir : subDirs) {
872 String columnfamily = subdir.getPath().getName();
873 columns.add(columnfamily);
874 }
875 return columns;
876 }
877
878
879
880
881
882
883
884
885 private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
886 Set<String> columns) throws IOException {
887 if (columns ==null || columns.isEmpty()) return false;
888 HTableDescriptor htd = new HTableDescriptor(tableName);
889 for (String columnfamimly : columns) {
890 htd.addFamily(new HColumnDescriptor(columnfamimly));
891 }
892 fstd.createTableDescriptor(htd, true);
893 return true;
894 }
895
896
897
898
899
900 public void fixEmptyMetaCells() throws IOException {
901 if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
902 LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
903 for (Result region : emptyRegionInfoQualifiers) {
904 deleteMetaRegion(region.getRow());
905 errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
906 }
907 emptyRegionInfoQualifiers.clear();
908 }
909 }
910
911
912
913
914
915
916
917
918
919
920 public void fixOrphanTables() throws IOException {
921 if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
922
923 List<TableName> tmpList = new ArrayList<TableName>();
924 tmpList.addAll(orphanTableDirs.keySet());
925 HTableDescriptor[] htds = getHTableDescriptors(tmpList);
926 Iterator<Entry<TableName, Set<String>>> iter =
927 orphanTableDirs.entrySet().iterator();
928 int j = 0;
929 int numFailedCase = 0;
930 FSTableDescriptors fstd = new FSTableDescriptors(getConf());
931 while (iter.hasNext()) {
932 Entry<TableName, Set<String>> entry =
933 (Entry<TableName, Set<String>>) iter.next();
934 TableName tableName = entry.getKey();
935 LOG.info("Trying to fix orphan table error: " + tableName);
936 if (j < htds.length) {
937 if (tableName.equals(htds[j].getTableName())) {
938 HTableDescriptor htd = htds[j];
939 LOG.info("fixing orphan table: " + tableName + " from cache");
940 fstd.createTableDescriptor(htd, true);
941 j++;
942 iter.remove();
943 }
944 } else {
945 if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
946 LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
947 LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
948 iter.remove();
949 } else {
950 LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
951 numFailedCase++;
952 }
953 }
954 fixes++;
955 }
956
957 if (orphanTableDirs.isEmpty()) {
958
959
960 setShouldRerun();
961 LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
962 } else if (numFailedCase > 0) {
963 LOG.error("Failed to fix " + numFailedCase
964 + " OrphanTables with default .tableinfo files");
965 }
966
967 }
968
969 orphanTableDirs.clear();
970
971 }
972
973
974
975
976
977
978 private HRegion createNewMeta() throws IOException {
979 Path rootdir = FSUtils.getRootDir(getConf());
980 Configuration c = getConf();
981 HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
982 MasterFileSystem.setInfoFamilyCachingForMeta(false);
983 HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c,
984 HTableDescriptor.META_TABLEDESC);
985 MasterFileSystem.setInfoFamilyCachingForMeta(true);
986 return meta;
987 }
988
989
990
991
992
993
994
995 private ArrayList<Put> generatePuts(
996 SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
997 ArrayList<Put> puts = new ArrayList<Put>();
998 boolean hasProblems = false;
999 for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) {
1000 TableName name = e.getKey();
1001
1002
1003 if (name.compareTo(TableName.META_TABLE_NAME) == 0) {
1004 continue;
1005 }
1006
1007 TableInfo ti = e.getValue();
1008 for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1009 .entrySet()) {
1010 Collection<HbckInfo> his = spl.getValue();
1011 int sz = his.size();
1012 if (sz != 1) {
1013
1014 LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1015 + " had " + sz + " regions instead of exactly 1." );
1016 hasProblems = true;
1017 continue;
1018 }
1019
1020
1021 HbckInfo hi = his.iterator().next();
1022 HRegionInfo hri = hi.getHdfsHRI();
1023 Put p = MetaEditor.makePutFromRegionInfo(hri);
1024 puts.add(p);
1025 }
1026 }
1027 return hasProblems ? null : puts;
1028 }
1029
1030
1031
1032
1033 private void suggestFixes(
1034 SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1035 for (TableInfo tInfo : tablesInfo.values()) {
1036 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1037 tInfo.checkRegionChain(handler);
1038 }
1039 }
1040
1041
1042
1043
1044
1045
1046
1047
1048 public boolean rebuildMeta(boolean fix) throws IOException,
1049 InterruptedException {
1050
1051
1052
1053
1054
1055 LOG.info("Loading HBase regioninfo from HDFS...");
1056 loadHdfsRegionDirs();
1057
1058 int errs = errors.getErrorList().size();
1059 tablesInfo = loadHdfsRegionInfos();
1060 checkHdfsIntegrity(false, false);
1061
1062
1063 if (errors.getErrorList().size() != errs) {
1064
1065 while(true) {
1066 fixes = 0;
1067 suggestFixes(tablesInfo);
1068 errors.clear();
1069 loadHdfsRegionInfos();
1070 checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1071
1072 int errCount = errors.getErrorList().size();
1073
1074 if (fixes == 0) {
1075 if (errCount > 0) {
1076 return false;
1077 } else {
1078 break;
1079 }
1080 }
1081 }
1082 }
1083
1084
1085 LOG.info("HDFS regioninfo's seems good. Sidelining old hbase:meta");
1086 Path backupDir = sidelineOldMeta();
1087
1088 LOG.info("Creating new hbase:meta");
1089 HRegion meta = createNewMeta();
1090
1091
1092 List<Put> puts = generatePuts(tablesInfo);
1093 if (puts == null) {
1094 LOG.fatal("Problem encountered when creating new hbase:meta entries. " +
1095 "You may need to restore the previously sidelined hbase:meta");
1096 return false;
1097 }
1098 meta.batchMutate(puts.toArray(new Put[0]));
1099 HRegion.closeHRegion(meta);
1100 LOG.info("Success! hbase:meta table rebuilt.");
1101 LOG.info("Old hbase:meta is moved into " + backupDir);
1102 return true;
1103 }
1104
1105 private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1106 boolean fixOverlaps) throws IOException {
1107 LOG.info("Checking HBase region split map from HDFS data...");
1108 for (TableInfo tInfo : tablesInfo.values()) {
1109 TableIntegrityErrorHandler handler;
1110 if (fixHoles || fixOverlaps) {
1111 handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1112 fixHoles, fixOverlaps);
1113 } else {
1114 handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1115 }
1116 if (!tInfo.checkRegionChain(handler)) {
1117
1118 errors.report("Found inconsistency in table " + tInfo.getName());
1119 }
1120 }
1121 return tablesInfo;
1122 }
1123
1124 private Path getSidelineDir() throws IOException {
1125 if (sidelineDir == null) {
1126 Path hbaseDir = FSUtils.getRootDir(getConf());
1127 Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1128 sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1129 + startMillis);
1130 }
1131 return sidelineDir;
1132 }
1133
1134
1135
1136
1137 Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1138 return sidelineRegionDir(fs, null, hi);
1139 }
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149 Path sidelineRegionDir(FileSystem fs,
1150 String parentDir, HbckInfo hi) throws IOException {
1151 TableName tableName = hi.getTableName();
1152 Path regionDir = hi.getHdfsRegionDir();
1153
1154 if (!fs.exists(regionDir)) {
1155 LOG.warn("No previous " + regionDir + " exists. Continuing.");
1156 return null;
1157 }
1158
1159 Path rootDir = getSidelineDir();
1160 if (parentDir != null) {
1161 rootDir = new Path(rootDir, parentDir);
1162 }
1163 Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1164 Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1165 fs.mkdirs(sidelineRegionDir);
1166 boolean success = false;
1167 FileStatus[] cfs = fs.listStatus(regionDir);
1168 if (cfs == null) {
1169 LOG.info("Region dir is empty: " + regionDir);
1170 } else {
1171 for (FileStatus cf : cfs) {
1172 Path src = cf.getPath();
1173 Path dst = new Path(sidelineRegionDir, src.getName());
1174 if (fs.isFile(src)) {
1175
1176 success = fs.rename(src, dst);
1177 if (!success) {
1178 String msg = "Unable to rename file " + src + " to " + dst;
1179 LOG.error(msg);
1180 throw new IOException(msg);
1181 }
1182 continue;
1183 }
1184
1185
1186 fs.mkdirs(dst);
1187
1188 LOG.info("Sidelining files from " + src + " into containing region " + dst);
1189
1190
1191
1192
1193 FileStatus[] hfiles = fs.listStatus(src);
1194 if (hfiles != null && hfiles.length > 0) {
1195 for (FileStatus hfile : hfiles) {
1196 success = fs.rename(hfile.getPath(), dst);
1197 if (!success) {
1198 String msg = "Unable to rename file " + src + " to " + dst;
1199 LOG.error(msg);
1200 throw new IOException(msg);
1201 }
1202 }
1203 }
1204 LOG.debug("Sideline directory contents:");
1205 debugLsr(sidelineRegionDir);
1206 }
1207 }
1208
1209 LOG.info("Removing old region dir: " + regionDir);
1210 success = fs.delete(regionDir, true);
1211 if (!success) {
1212 String msg = "Unable to delete dir " + regionDir;
1213 LOG.error(msg);
1214 throw new IOException(msg);
1215 }
1216 return sidelineRegionDir;
1217 }
1218
1219
1220
1221
1222 void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir,
1223 Path backupHbaseDir) throws IOException {
1224 Path tableDir = FSUtils.getTableDir(hbaseDir, tableName);
1225 if (fs.exists(tableDir)) {
1226 Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName);
1227 fs.mkdirs(backupTableDir.getParent());
1228 boolean success = fs.rename(tableDir, backupTableDir);
1229 if (!success) {
1230 throw new IOException("Failed to move " + tableName + " from "
1231 + tableDir + " to " + backupTableDir);
1232 }
1233 } else {
1234 LOG.info("No previous " + tableName + " exists. Continuing.");
1235 }
1236 }
1237
1238
1239
1240
1241 Path sidelineOldMeta() throws IOException {
1242
1243 Path hbaseDir = FSUtils.getRootDir(getConf());
1244 FileSystem fs = hbaseDir.getFileSystem(getConf());
1245 Path backupDir = getSidelineDir();
1246 fs.mkdirs(backupDir);
1247 try {
1248 sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir);
1249 } catch (IOException e) {
1250 LOG.fatal("... failed to sideline meta. Currently in inconsistent state. To restore "
1251 + "try to rename .META. in " + backupDir.getName() + " to "
1252 + hbaseDir.getName() + ".", e);
1253 throw e;
1254 }
1255 return backupDir;
1256 }
1257
1258
1259
1260
1261
1262
1263 private void loadDisabledTables()
1264 throws ZooKeeperConnectionException, IOException {
1265 HConnectionManager.execute(new HConnectable<Void>(getConf()) {
1266 @Override
1267 public Void connect(HConnection connection) throws IOException {
1268 ZooKeeperWatcher zkw = createZooKeeperWatcher();
1269 try {
1270 for (TableName tableName :
1271 ZKTableReadOnly.getDisabledOrDisablingTables(zkw)) {
1272 disabledTables.add(tableName);
1273 }
1274 } catch (KeeperException ke) {
1275 throw new IOException(ke);
1276 } finally {
1277 zkw.close();
1278 }
1279 return null;
1280 }
1281 });
1282 }
1283
1284
1285
1286
1287 private boolean isTableDisabled(HRegionInfo regionInfo) {
1288 return disabledTables.contains(regionInfo.getTable());
1289 }
1290
1291
1292
1293
1294
1295 public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1296 Path rootDir = FSUtils.getRootDir(getConf());
1297 FileSystem fs = rootDir.getFileSystem(getConf());
1298
1299
1300 List<FileStatus> tableDirs = Lists.newArrayList();
1301
1302 boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1303
1304 List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1305 for (Path path : paths) {
1306 TableName tableName = FSUtils.getTableName(path);
1307 if ((!checkMetaOnly &&
1308 isTableIncluded(tableName)) ||
1309 tableName.equals(TableName.META_TABLE_NAME)) {
1310 tableDirs.add(fs.getFileStatus(path));
1311 }
1312 }
1313
1314
1315 if (!foundVersionFile) {
1316 errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1317 "Version file does not exist in root dir " + rootDir);
1318 if (shouldFixVersionFile()) {
1319 LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1320 + " file.");
1321 setShouldRerun();
1322 FSUtils.setVersion(fs, rootDir, getConf().getInt(
1323 HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1324 HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1325 HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1326 }
1327 }
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353 private boolean recordMetaRegion() throws IOException {
1354 HRegionLocation metaLocation = connection.locateRegion(
1355 TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW);
1356
1357
1358 if (metaLocation == null || metaLocation.getRegionInfo() == null ||
1359 metaLocation.getHostname() == null) {
1360 errors.reportError(ERROR_CODE.NULL_META_REGION,
1361 "META region or some of its attributes are null.");
1362 return false;
1363 }
1364 ServerName sn;
1365 try {
1366 sn = getMetaRegionServerName();
1367 } catch (KeeperException e) {
1368 throw new IOException(e);
1369 }
1370 MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
1371 HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
1372 if (hbckInfo == null) {
1373 regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
1374 } else {
1375 hbckInfo.metaEntry = m;
1376 }
1377 return true;
1378 }
1379
1380 private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1381 return new ZooKeeperWatcher(getConf(), "hbase Fsck", new Abortable() {
1382 @Override
1383 public void abort(String why, Throwable e) {
1384 LOG.error(why, e);
1385 System.exit(1);
1386 }
1387
1388 @Override
1389 public boolean isAborted() {
1390 return false;
1391 }
1392
1393 });
1394 }
1395
1396 private ServerName getMetaRegionServerName()
1397 throws IOException, KeeperException {
1398 ZooKeeperWatcher zkw = createZooKeeperWatcher();
1399 ServerName sn = null;
1400 try {
1401 sn = MetaRegionTracker.getMetaRegionLocation(zkw);
1402 } finally {
1403 zkw.close();
1404 }
1405 return sn;
1406 }
1407
1408
1409
1410
1411
1412
1413 void processRegionServers(Collection<ServerName> regionServerList)
1414 throws IOException, InterruptedException {
1415
1416 List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1417 List<Future<Void>> workFutures;
1418
1419
1420 for (ServerName rsinfo: regionServerList) {
1421 workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1422 }
1423
1424 workFutures = executor.invokeAll(workItems);
1425
1426 for(int i=0; i<workFutures.size(); i++) {
1427 WorkItemRegion item = workItems.get(i);
1428 Future<Void> f = workFutures.get(i);
1429 try {
1430 f.get();
1431 } catch(ExecutionException e) {
1432 LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1433 e.getCause());
1434 }
1435 }
1436 }
1437
1438
1439
1440
1441 private void checkAndFixConsistency()
1442 throws IOException, KeeperException, InterruptedException {
1443 for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1444 checkRegionConsistency(e.getKey(), e.getValue());
1445 }
1446 }
1447
1448 private void preCheckPermission() throws IOException, AccessControlException {
1449 if (shouldIgnorePreCheckPermission()) {
1450 return;
1451 }
1452
1453 Path hbaseDir = FSUtils.getRootDir(getConf());
1454 FileSystem fs = hbaseDir.getFileSystem(getConf());
1455 UserProvider userProvider = UserProvider.instantiate(getConf());
1456 UserGroupInformation ugi = userProvider.getCurrent().getUGI();
1457 FileStatus[] files = fs.listStatus(hbaseDir);
1458 for (FileStatus file : files) {
1459 try {
1460 FSUtils.checkAccess(ugi, file, FsAction.WRITE);
1461 } catch (AccessControlException ace) {
1462 LOG.warn("Got AccessControlException when preCheckPermission ", ace);
1463 errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
1464 + " does not have write perms to " + file.getPath()
1465 + ". Please rerun hbck as hdfs user " + file.getOwner());
1466 throw new AccessControlException(ace);
1467 }
1468 }
1469 }
1470
1471
1472
1473
1474 private void deleteMetaRegion(HbckInfo hi) throws IOException {
1475 deleteMetaRegion(hi.metaEntry.getRegionName());
1476 }
1477
1478
1479
1480
1481 private void deleteMetaRegion(byte[] metaKey) throws IOException {
1482 Delete d = new Delete(metaKey);
1483 meta.delete(d);
1484 meta.flushCommits();
1485 LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
1486 }
1487
1488
1489
1490
1491 private void resetSplitParent(HbckInfo hi) throws IOException {
1492 RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
1493 Delete d = new Delete(hi.metaEntry.getRegionName());
1494 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1495 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1496 mutations.add(d);
1497
1498 HRegionInfo hri = new HRegionInfo(hi.metaEntry);
1499 hri.setOffline(false);
1500 hri.setSplit(false);
1501 Put p = MetaEditor.makePutFromRegionInfo(hri);
1502 mutations.add(p);
1503
1504 meta.mutateRow(mutations);
1505 meta.flushCommits();
1506 LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
1507 }
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517 private void offline(byte[] regionName) throws IOException {
1518 String regionString = Bytes.toStringBinary(regionName);
1519 if (!rsSupportsOffline) {
1520 LOG.warn("Using unassign region " + regionString
1521 + " instead of using offline method, you should"
1522 + " restart HMaster after these repairs");
1523 admin.unassign(regionName, true);
1524 return;
1525 }
1526
1527
1528 try {
1529 LOG.info("Offlining region " + regionString);
1530 admin.offline(regionName);
1531 } catch (IOException ioe) {
1532 String notFoundMsg = "java.lang.NoSuchMethodException: " +
1533 "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1534 if (ioe.getMessage().contains(notFoundMsg)) {
1535 LOG.warn("Using unassign region " + regionString
1536 + " instead of using offline method, you should"
1537 + " restart HMaster after these repairs");
1538 rsSupportsOffline = false;
1539 admin.unassign(regionName, true);
1540 return;
1541 }
1542 throw ioe;
1543 }
1544 }
1545
1546 private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
1547 for (OnlineEntry rse : hi.deployedEntries) {
1548 LOG.debug("Undeploy region " + rse.hri + " from " + rse.hsa);
1549 try {
1550 HBaseFsckRepair.closeRegionSilentlyAndWait(admin, rse.hsa, rse.hri);
1551 offline(rse.hri.getRegionName());
1552 } catch (IOException ioe) {
1553 LOG.warn("Got exception when attempting to offline region "
1554 + Bytes.toString(rse.hri.getRegionName()), ioe);
1555 }
1556 }
1557 }
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571 private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
1572 if (hi.metaEntry == null && hi.hdfsEntry == null) {
1573 undeployRegions(hi);
1574 return;
1575 }
1576
1577
1578 Get get = new Get(hi.getRegionName());
1579 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1580 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1581 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1582 Result r = meta.get(get);
1583 ServerName serverName = HRegionInfo.getServerName(r);
1584 if (serverName == null) {
1585 errors.reportError("Unable to close region "
1586 + hi.getRegionNameAsString() + " because meta does not "
1587 + "have handle to reach it.");
1588 return;
1589 }
1590
1591 HRegionInfo hri = HRegionInfo.getHRegionInfo(r);
1592 if (hri == null) {
1593 LOG.warn("Unable to close region " + hi.getRegionNameAsString()
1594 + " because hbase:meta had invalid or missing "
1595 + HConstants.CATALOG_FAMILY_STR + ":"
1596 + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
1597 + " qualifier value.");
1598 return;
1599 }
1600
1601
1602 HBaseFsckRepair.closeRegionSilentlyAndWait(admin, serverName, hri);
1603 }
1604
1605 private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
1606 KeeperException, InterruptedException {
1607
1608 if (shouldFixAssignments()) {
1609 errors.print(msg);
1610 undeployRegions(hbi);
1611 setShouldRerun();
1612 HRegionInfo hri = hbi.getHdfsHRI();
1613 if (hri == null) {
1614 hri = hbi.metaEntry;
1615 }
1616 HBaseFsckRepair.fixUnassigned(admin, hri);
1617 HBaseFsckRepair.waitUntilAssigned(admin, hri);
1618 }
1619 }
1620
1621
1622
1623
1624 private void checkRegionConsistency(final String key, final HbckInfo hbi)
1625 throws IOException, KeeperException, InterruptedException {
1626 String descriptiveName = hbi.toString();
1627
1628 boolean inMeta = hbi.metaEntry != null;
1629
1630 boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
1631 boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
1632 boolean isDeployed = !hbi.deployedOn.isEmpty();
1633 boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
1634 boolean deploymentMatchesMeta =
1635 hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
1636 hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
1637 boolean splitParent =
1638 (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
1639 boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
1640 boolean recentlyModified = inHdfs &&
1641 hbi.getModTime() + timelag > System.currentTimeMillis();
1642
1643
1644 if (hbi.containsOnlyHdfsEdits()) {
1645 return;
1646 }
1647 if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
1648 return;
1649 } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
1650 LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
1651 "tabled that is not deployed");
1652 return;
1653 } else if (recentlyModified) {
1654 LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
1655 return;
1656 }
1657
1658 else if (!inMeta && !inHdfs && !isDeployed) {
1659
1660 assert false : "Entry for region with no data";
1661 } else if (!inMeta && !inHdfs && isDeployed) {
1662 errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
1663 + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
1664 "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1665 if (shouldFixAssignments()) {
1666 undeployRegions(hbi);
1667 }
1668
1669 } else if (!inMeta && inHdfs && !isDeployed) {
1670 errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
1671 + descriptiveName + " on HDFS, but not listed in hbase:meta " +
1672 "or deployed on any region server");
1673
1674 if (shouldFixMeta()) {
1675 if (!hbi.isHdfsRegioninfoPresent()) {
1676 LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
1677 + " in table integrity repair phase if -fixHdfsOrphans was" +
1678 " used.");
1679 return;
1680 }
1681
1682 LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
1683 HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
1684
1685 tryAssignmentRepair(hbi, "Trying to reassign region...");
1686 }
1687
1688 } else if (!inMeta && inHdfs && isDeployed) {
1689 errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
1690 + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1691 debugLsr(hbi.getHdfsRegionDir());
1692 if (shouldFixMeta()) {
1693 if (!hbi.isHdfsRegioninfoPresent()) {
1694 LOG.error("This should have been repaired in table integrity repair phase");
1695 return;
1696 }
1697
1698 LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
1699 HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
1700
1701 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
1702 }
1703
1704
1705 } else if (inMeta && inHdfs && !isDeployed && splitParent) {
1706
1707
1708 if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
1709
1710 HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
1711 HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
1712 if (infoA != null && infoB != null) {
1713
1714 hbi.setSkipChecks(true);
1715 return;
1716 }
1717 }
1718 errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
1719 + descriptiveName + " is a split parent in META, in HDFS, "
1720 + "and not deployed on any region server. This could be transient.");
1721 if (shouldFixSplitParents()) {
1722 setShouldRerun();
1723 resetSplitParent(hbi);
1724 }
1725 } else if (inMeta && !inHdfs && !isDeployed) {
1726 errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
1727 + descriptiveName + " found in META, but not in HDFS "
1728 + "or deployed on any region server.");
1729 if (shouldFixMeta()) {
1730 deleteMetaRegion(hbi);
1731 }
1732 } else if (inMeta && !inHdfs && isDeployed) {
1733 errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
1734 + " found in META, but not in HDFS, " +
1735 "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1736
1737
1738
1739 if (shouldFixAssignments()) {
1740 errors.print("Trying to fix unassigned region...");
1741 closeRegion(hbi);
1742 }
1743 if (shouldFixMeta()) {
1744
1745 deleteMetaRegion(hbi);
1746 }
1747 } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
1748 errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
1749 + " not deployed on any region server.");
1750 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
1751 } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
1752 errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
1753 "Region " + descriptiveName + " should not be deployed according " +
1754 "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1755 if (shouldFixAssignments()) {
1756 errors.print("Trying to close the region " + descriptiveName);
1757 setShouldRerun();
1758 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1759 }
1760 } else if (inMeta && inHdfs && isMultiplyDeployed) {
1761 errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
1762 + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer
1763 + " but is multiply assigned to region servers " +
1764 Joiner.on(", ").join(hbi.deployedOn));
1765
1766 if (shouldFixAssignments()) {
1767 errors.print("Trying to fix assignment error...");
1768 setShouldRerun();
1769 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1770 }
1771 } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
1772 errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
1773 + descriptiveName + " listed in hbase:meta on region server " +
1774 hbi.metaEntry.regionServer + " but found on region server " +
1775 hbi.deployedOn.get(0));
1776
1777 if (shouldFixAssignments()) {
1778 errors.print("Trying to fix assignment error...");
1779 setShouldRerun();
1780 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1781 HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
1782 }
1783 } else {
1784 errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
1785 " is in an unforeseen state:" +
1786 " inMeta=" + inMeta +
1787 " inHdfs=" + inHdfs +
1788 " isDeployed=" + isDeployed +
1789 " isMultiplyDeployed=" + isMultiplyDeployed +
1790 " deploymentMatchesMeta=" + deploymentMatchesMeta +
1791 " shouldBeDeployed=" + shouldBeDeployed);
1792 }
1793 }
1794
1795
1796
1797
1798
1799
1800
1801 SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
1802 tablesInfo = new TreeMap<TableName,TableInfo> ();
1803 List<HbckInfo> noHDFSRegionInfos = new ArrayList<HbckInfo>();
1804 LOG.debug("There are " + regionInfoMap.size() + " region info entries");
1805 for (HbckInfo hbi : regionInfoMap.values()) {
1806
1807 if (hbi.metaEntry == null) {
1808
1809 noHDFSRegionInfos.add(hbi);
1810 Path p = hbi.getHdfsRegionDir();
1811 if (p == null) {
1812 errors.report("No regioninfo in Meta or HDFS. " + hbi);
1813 }
1814
1815
1816 continue;
1817 }
1818 if (hbi.metaEntry.regionServer == null) {
1819 errors.detail("Skipping region because no region server: " + hbi);
1820 continue;
1821 }
1822 if (hbi.metaEntry.isOffline()) {
1823 errors.detail("Skipping region because it is offline: " + hbi);
1824 continue;
1825 }
1826 if (hbi.containsOnlyHdfsEdits()) {
1827 errors.detail("Skipping region because it only contains edits" + hbi);
1828 continue;
1829 }
1830
1831
1832
1833
1834
1835
1836 if (hbi.deployedOn.size() == 0) continue;
1837
1838
1839 TableName tableName = hbi.metaEntry.getTable();
1840 TableInfo modTInfo = tablesInfo.get(tableName);
1841 if (modTInfo == null) {
1842 modTInfo = new TableInfo(tableName);
1843 }
1844 for (ServerName server : hbi.deployedOn) {
1845 modTInfo.addServer(server);
1846 }
1847
1848 if (!hbi.isSkipChecks()) {
1849 modTInfo.addRegionInfo(hbi);
1850 }
1851
1852 tablesInfo.put(tableName, modTInfo);
1853 }
1854
1855 loadTableInfosForTablesWithNoRegion();
1856
1857 for (TableInfo tInfo : tablesInfo.values()) {
1858 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1859 if (!tInfo.checkRegionChain(handler)) {
1860 errors.report("Found inconsistency in table " + tInfo.getName());
1861 }
1862 }
1863 return tablesInfo;
1864 }
1865
1866
1867
1868
1869 private void loadTableInfosForTablesWithNoRegion() throws IOException {
1870 Map<String, HTableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
1871 for (HTableDescriptor htd : allTables.values()) {
1872 if (checkMetaOnly && !htd.isMetaTable()) {
1873 continue;
1874 }
1875
1876 TableName tableName = htd.getTableName();
1877 if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
1878 TableInfo tableInfo = new TableInfo(tableName);
1879 tableInfo.htds.add(htd);
1880 tablesInfo.put(htd.getTableName(), tableInfo);
1881 }
1882 }
1883 }
1884
1885
1886
1887
1888
1889 public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
1890 int fileMoves = 0;
1891
1892 LOG.debug("Contained region dir after close and pause");
1893 debugLsr(contained.getHdfsRegionDir());
1894
1895
1896 FileSystem fs = targetRegionDir.getFileSystem(getConf());
1897 FileStatus[] dirs = fs.listStatus(contained.getHdfsRegionDir());
1898
1899 if (dirs == null) {
1900 if (!fs.exists(contained.getHdfsRegionDir())) {
1901 LOG.warn("HDFS region dir " + contained.getHdfsRegionDir() + " already sidelined.");
1902 } else {
1903 sidelineRegionDir(fs, contained);
1904 }
1905 return fileMoves;
1906 }
1907
1908 for (FileStatus cf : dirs) {
1909 Path src = cf.getPath();
1910 Path dst = new Path(targetRegionDir, src.getName());
1911
1912 if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
1913
1914 continue;
1915 }
1916
1917 if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
1918
1919 continue;
1920 }
1921
1922 LOG.info("Moving files from " + src + " into containing region " + dst);
1923
1924
1925
1926
1927 for (FileStatus hfile : fs.listStatus(src)) {
1928 boolean success = fs.rename(hfile.getPath(), dst);
1929 if (success) {
1930 fileMoves++;
1931 }
1932 }
1933 LOG.debug("Sideline directory contents:");
1934 debugLsr(targetRegionDir);
1935 }
1936
1937
1938 sidelineRegionDir(fs, contained);
1939 LOG.info("Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
1940 getSidelineDir());
1941 debugLsr(contained.getHdfsRegionDir());
1942
1943 return fileMoves;
1944 }
1945
1946
1947
1948
1949 public class TableInfo {
1950 TableName tableName;
1951 TreeSet <ServerName> deployedOn;
1952
1953
1954 final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
1955
1956
1957 final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
1958
1959
1960 final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
1961
1962
1963 final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
1964
1965
1966 final Multimap<byte[], HbckInfo> overlapGroups =
1967 TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
1968
1969 TableInfo(TableName name) {
1970 this.tableName = name;
1971 deployedOn = new TreeSet <ServerName>();
1972 }
1973
1974
1975
1976
1977 private HTableDescriptor getHTD() {
1978 if (htds.size() == 1) {
1979 return (HTableDescriptor)htds.toArray()[0];
1980 } else {
1981 LOG.error("None/Multiple table descriptors found for table '"
1982 + tableName + "' regions: " + htds);
1983 }
1984 return null;
1985 }
1986
1987 public void addRegionInfo(HbckInfo hir) {
1988 if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
1989
1990 sc.add(hir);
1991 return;
1992 }
1993
1994
1995 if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
1996 errors.reportError(
1997 ERROR_CODE.REGION_CYCLE,
1998 String.format("The endkey for this region comes before the "
1999 + "startkey, startkey=%s, endkey=%s",
2000 Bytes.toStringBinary(hir.getStartKey()),
2001 Bytes.toStringBinary(hir.getEndKey())), this, hir);
2002 backwards.add(hir);
2003 return;
2004 }
2005
2006
2007 sc.add(hir);
2008 }
2009
2010 public void addServer(ServerName server) {
2011 this.deployedOn.add(server);
2012 }
2013
2014 public TableName getName() {
2015 return tableName;
2016 }
2017
2018 public int getNumRegions() {
2019 return sc.getStarts().size() + backwards.size();
2020 }
2021
2022 private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2023 ErrorReporter errors;
2024
2025 IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2026 this.errors = errors;
2027 setTableInfo(ti);
2028 }
2029
2030 @Override
2031 public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2032 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2033 "First region should start with an empty key. You need to "
2034 + " create a new region and regioninfo in HDFS to plug the hole.",
2035 getTableInfo(), hi);
2036 }
2037
2038 @Override
2039 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2040 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2041 "Last region should end with an empty key. You need to "
2042 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2043 }
2044
2045 @Override
2046 public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2047 errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2048 "Region has the same start and end key.", getTableInfo(), hi);
2049 }
2050
2051 @Override
2052 public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2053 byte[] key = r1.getStartKey();
2054
2055 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2056 "Multiple regions have the same startkey: "
2057 + Bytes.toStringBinary(key), getTableInfo(), r1);
2058 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2059 "Multiple regions have the same startkey: "
2060 + Bytes.toStringBinary(key), getTableInfo(), r2);
2061 }
2062
2063 @Override
2064 public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2065 errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2066 "There is an overlap in the region chain.",
2067 getTableInfo(), hi1, hi2);
2068 }
2069
2070 @Override
2071 public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2072 errors.reportError(
2073 ERROR_CODE.HOLE_IN_REGION_CHAIN,
2074 "There is a hole in the region chain between "
2075 + Bytes.toStringBinary(holeStart) + " and "
2076 + Bytes.toStringBinary(holeStop)
2077 + ". You need to create a new .regioninfo and region "
2078 + "dir in hdfs to plug the hole.");
2079 }
2080 };
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094 private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2095 Configuration conf;
2096
2097 boolean fixOverlaps = true;
2098
2099 HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2100 boolean fixHoles, boolean fixOverlaps) {
2101 super(ti, errors);
2102 this.conf = conf;
2103 this.fixOverlaps = fixOverlaps;
2104
2105 }
2106
2107
2108
2109
2110
2111
2112 @Override
2113 public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2114 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2115 "First region should start with an empty key. Creating a new " +
2116 "region and regioninfo in HDFS to plug the hole.",
2117 getTableInfo(), next);
2118 HTableDescriptor htd = getTableInfo().getHTD();
2119
2120 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(),
2121 HConstants.EMPTY_START_ROW, next.getStartKey());
2122
2123
2124 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2125 LOG.info("Table region start key was not empty. Created new empty region: "
2126 + newRegion + " " +region);
2127 fixes++;
2128 }
2129
2130 @Override
2131 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2132 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2133 "Last region should end with an empty key. Creating a new "
2134 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2135 HTableDescriptor htd = getTableInfo().getHTD();
2136
2137 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), curEndKey,
2138 HConstants.EMPTY_START_ROW);
2139
2140 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2141 LOG.info("Table region end key was not empty. Created new empty region: " + newRegion
2142 + " " + region);
2143 fixes++;
2144 }
2145
2146
2147
2148
2149
2150 @Override
2151 public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2152 errors.reportError(
2153 ERROR_CODE.HOLE_IN_REGION_CHAIN,
2154 "There is a hole in the region chain between "
2155 + Bytes.toStringBinary(holeStartKey) + " and "
2156 + Bytes.toStringBinary(holeStopKey)
2157 + ". Creating a new regioninfo and region "
2158 + "dir in hdfs to plug the hole.");
2159 HTableDescriptor htd = getTableInfo().getHTD();
2160 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey);
2161 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2162 LOG.info("Plugged hold by creating new empty region: "+ newRegion + " " +region);
2163 fixes++;
2164 }
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175 @Override
2176 public void handleOverlapGroup(Collection<HbckInfo> overlap)
2177 throws IOException {
2178 Preconditions.checkNotNull(overlap);
2179 Preconditions.checkArgument(overlap.size() >0);
2180
2181 if (!this.fixOverlaps) {
2182 LOG.warn("Not attempting to repair overlaps.");
2183 return;
2184 }
2185
2186 if (overlap.size() > maxMerge) {
2187 LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2188 "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2189 if (sidelineBigOverlaps) {
2190
2191 sidelineBigOverlaps(overlap);
2192 }
2193 return;
2194 }
2195
2196 mergeOverlaps(overlap);
2197 }
2198
2199 void mergeOverlaps(Collection<HbckInfo> overlap)
2200 throws IOException {
2201 LOG.info("== Merging regions into one region: "
2202 + Joiner.on(",").join(overlap));
2203
2204 Pair<byte[], byte[]> range = null;
2205 for (HbckInfo hi : overlap) {
2206 if (range == null) {
2207 range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2208 } else {
2209 if (RegionSplitCalculator.BYTES_COMPARATOR
2210 .compare(hi.getStartKey(), range.getFirst()) < 0) {
2211 range.setFirst(hi.getStartKey());
2212 }
2213 if (RegionSplitCalculator.BYTES_COMPARATOR
2214 .compare(hi.getEndKey(), range.getSecond()) > 0) {
2215 range.setSecond(hi.getEndKey());
2216 }
2217 }
2218
2219 LOG.debug("Closing region before moving data around: " + hi);
2220 LOG.debug("Contained region dir before close");
2221 debugLsr(hi.getHdfsRegionDir());
2222 try {
2223 LOG.info("Closing region: " + hi);
2224 closeRegion(hi);
2225 } catch (IOException ioe) {
2226 LOG.warn("Was unable to close region " + hi
2227 + ". Just continuing... ", ioe);
2228 } catch (InterruptedException e) {
2229 LOG.warn("Was unable to close region " + hi
2230 + ". Just continuing... ", e);
2231 }
2232
2233 try {
2234 LOG.info("Offlining region: " + hi);
2235 offline(hi.getRegionName());
2236 } catch (IOException ioe) {
2237 LOG.warn("Unable to offline region from master: " + hi
2238 + ". Just continuing... ", ioe);
2239 }
2240 }
2241
2242
2243 HTableDescriptor htd = getTableInfo().getHTD();
2244
2245 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), range.getFirst(),
2246 range.getSecond());
2247 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2248 LOG.info("Created new empty container region: " +
2249 newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
2250 debugLsr(region.getRegionFileSystem().getRegionDir());
2251
2252
2253 boolean didFix= false;
2254 Path target = region.getRegionFileSystem().getRegionDir();
2255 for (HbckInfo contained : overlap) {
2256 LOG.info("Merging " + contained + " into " + target );
2257 int merges = mergeRegionDirs(target, contained);
2258 if (merges > 0) {
2259 didFix = true;
2260 }
2261 }
2262 if (didFix) {
2263 fixes++;
2264 }
2265 }
2266
2267
2268
2269
2270
2271
2272
2273
2274 void sidelineBigOverlaps(
2275 Collection<HbckInfo> bigOverlap) throws IOException {
2276 int overlapsToSideline = bigOverlap.size() - maxMerge;
2277 if (overlapsToSideline > maxOverlapsToSideline) {
2278 overlapsToSideline = maxOverlapsToSideline;
2279 }
2280 List<HbckInfo> regionsToSideline =
2281 RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
2282 FileSystem fs = FileSystem.get(conf);
2283 for (HbckInfo regionToSideline: regionsToSideline) {
2284 try {
2285 LOG.info("Closing region: " + regionToSideline);
2286 closeRegion(regionToSideline);
2287 } catch (IOException ioe) {
2288 LOG.warn("Was unable to close region " + regionToSideline
2289 + ". Just continuing... ", ioe);
2290 } catch (InterruptedException e) {
2291 LOG.warn("Was unable to close region " + regionToSideline
2292 + ". Just continuing... ", e);
2293 }
2294
2295 try {
2296 LOG.info("Offlining region: " + regionToSideline);
2297 offline(regionToSideline.getRegionName());
2298 } catch (IOException ioe) {
2299 LOG.warn("Unable to offline region from master: " + regionToSideline
2300 + ". Just continuing... ", ioe);
2301 }
2302
2303 LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
2304 Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
2305 if (sidelineRegionDir != null) {
2306 sidelinedRegions.put(sidelineRegionDir, regionToSideline);
2307 LOG.info("After sidelined big overlapped region: "
2308 + regionToSideline.getRegionNameAsString()
2309 + " to " + sidelineRegionDir.toString());
2310 fixes++;
2311 }
2312 }
2313 }
2314 }
2315
2316
2317
2318
2319
2320
2321
2322 public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
2323
2324
2325
2326 if (disabledTables.contains(this.tableName)) {
2327 return true;
2328 }
2329 int originalErrorsCount = errors.getErrorList().size();
2330 Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
2331 SortedSet<byte[]> splits = sc.getSplits();
2332
2333 byte[] prevKey = null;
2334 byte[] problemKey = null;
2335
2336 if (splits.size() == 0) {
2337
2338 handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
2339 }
2340
2341 for (byte[] key : splits) {
2342 Collection<HbckInfo> ranges = regions.get(key);
2343 if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
2344 for (HbckInfo rng : ranges) {
2345 handler.handleRegionStartKeyNotEmpty(rng);
2346 }
2347 }
2348
2349
2350 for (HbckInfo rng : ranges) {
2351
2352 byte[] endKey = rng.getEndKey();
2353 endKey = (endKey.length == 0) ? null : endKey;
2354 if (Bytes.equals(rng.getStartKey(),endKey)) {
2355 handler.handleDegenerateRegion(rng);
2356 }
2357 }
2358
2359 if (ranges.size() == 1) {
2360
2361 if (problemKey != null) {
2362 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2363 }
2364 problemKey = null;
2365 } else if (ranges.size() > 1) {
2366
2367
2368 if (problemKey == null) {
2369
2370 LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
2371 problemKey = key;
2372 }
2373 overlapGroups.putAll(problemKey, ranges);
2374
2375
2376 ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
2377
2378 for (HbckInfo r1 : ranges) {
2379 subRange.remove(r1);
2380 for (HbckInfo r2 : subRange) {
2381 if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
2382 handler.handleDuplicateStartKeys(r1,r2);
2383 } else {
2384
2385 handler.handleOverlapInRegionChain(r1, r2);
2386 }
2387 }
2388 }
2389
2390 } else if (ranges.size() == 0) {
2391 if (problemKey != null) {
2392 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2393 }
2394 problemKey = null;
2395
2396 byte[] holeStopKey = sc.getSplits().higher(key);
2397
2398 if (holeStopKey != null) {
2399
2400 handler.handleHoleInRegionChain(key, holeStopKey);
2401 }
2402 }
2403 prevKey = key;
2404 }
2405
2406
2407
2408 if (prevKey != null) {
2409 handler.handleRegionEndKeyNotEmpty(prevKey);
2410 }
2411
2412 for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2413 handler.handleOverlapGroup(overlap);
2414 }
2415
2416 if (details) {
2417
2418 errors.print("---- Table '" + this.tableName
2419 + "': region split map");
2420 dump(splits, regions);
2421 errors.print("---- Table '" + this.tableName
2422 + "': overlap groups");
2423 dumpOverlapProblems(overlapGroups);
2424 errors.print("There are " + overlapGroups.keySet().size()
2425 + " overlap groups with " + overlapGroups.size()
2426 + " overlapping regions");
2427 }
2428 if (!sidelinedRegions.isEmpty()) {
2429 LOG.warn("Sidelined big overlapped regions, please bulk load them!");
2430 errors.print("---- Table '" + this.tableName
2431 + "': sidelined big overlapped regions");
2432 dumpSidelinedRegions(sidelinedRegions);
2433 }
2434 return errors.getErrorList().size() == originalErrorsCount;
2435 }
2436
2437
2438
2439
2440
2441
2442
2443 void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
2444
2445 StringBuilder sb = new StringBuilder();
2446 for (byte[] k : splits) {
2447 sb.setLength(0);
2448 sb.append(Bytes.toStringBinary(k) + ":\t");
2449 for (HbckInfo r : regions.get(k)) {
2450 sb.append("[ "+ r.toString() + ", "
2451 + Bytes.toStringBinary(r.getEndKey())+ "]\t");
2452 }
2453 errors.print(sb.toString());
2454 }
2455 }
2456 }
2457
2458 public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
2459
2460
2461 for (byte[] k : regions.keySet()) {
2462 errors.print(Bytes.toStringBinary(k) + ":");
2463 for (HbckInfo r : regions.get(k)) {
2464 errors.print("[ " + r.toString() + ", "
2465 + Bytes.toStringBinary(r.getEndKey()) + "]");
2466 }
2467 errors.print("----");
2468 }
2469 }
2470
2471 public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
2472 for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
2473 TableName tableName = entry.getValue().getTableName();
2474 Path path = entry.getKey();
2475 errors.print("This sidelined region dir should be bulk loaded: "
2476 + path.toString());
2477 errors.print("Bulk load command looks like: "
2478 + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
2479 + path.toUri().getPath() + " "+ tableName);
2480 }
2481 }
2482
2483 public Multimap<byte[], HbckInfo> getOverlapGroups(
2484 TableName table) {
2485 TableInfo ti = tablesInfo.get(table);
2486 return ti.overlapGroups;
2487 }
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498 HTableDescriptor[] getTables(AtomicInteger numSkipped) {
2499 List<TableName> tableNames = new ArrayList<TableName>();
2500 long now = System.currentTimeMillis();
2501
2502 for (HbckInfo hbi : regionInfoMap.values()) {
2503 MetaEntry info = hbi.metaEntry;
2504
2505
2506
2507 if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
2508 if (info.modTime + timelag < now) {
2509 tableNames.add(info.getTable());
2510 } else {
2511 numSkipped.incrementAndGet();
2512 }
2513 }
2514 }
2515 return getHTableDescriptors(tableNames);
2516 }
2517
2518 HTableDescriptor[] getHTableDescriptors(List<TableName> tableNames) {
2519 HTableDescriptor[] htd = new HTableDescriptor[0];
2520 try {
2521 LOG.info("getHTableDescriptors == tableNames => " + tableNames);
2522 htd = new HBaseAdmin(getConf()).getTableDescriptorsByTableName(tableNames);
2523 } catch (IOException e) {
2524 LOG.debug("Exception getting table descriptors", e);
2525 }
2526 return htd;
2527 }
2528
2529
2530
2531
2532
2533
2534 private synchronized HbckInfo getOrCreateInfo(String name) {
2535 HbckInfo hbi = regionInfoMap.get(name);
2536 if (hbi == null) {
2537 hbi = new HbckInfo(null);
2538 regionInfoMap.put(name, hbi);
2539 }
2540 return hbi;
2541 }
2542
2543 private void checkAndFixTableLocks() throws IOException {
2544 TableLockChecker checker = new TableLockChecker(createZooKeeperWatcher(), errors);
2545 checker.checkTableLocks();
2546
2547 if (this.fixTableLocks) {
2548 checker.fixExpiredTableLocks();
2549 }
2550 }
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561 boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
2562 List<HbckInfo> metaRegions = Lists.newArrayList();
2563 for (HbckInfo value : regionInfoMap.values()) {
2564 if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
2565 metaRegions.add(value);
2566 }
2567 }
2568
2569
2570
2571 HbckInfo metaHbckInfo = metaRegions.get(0);
2572 List<ServerName> servers = metaHbckInfo.deployedOn;
2573 if (servers.size() != 1) {
2574 if (servers.size() == 0) {
2575 errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta is not found on any region.");
2576 if (shouldFixAssignments()) {
2577 errors.print("Trying to fix a problem with hbase:meta..");
2578 setShouldRerun();
2579
2580 HBaseFsckRepair.fixUnassigned(admin, metaHbckInfo.metaEntry);
2581 HBaseFsckRepair.waitUntilAssigned(admin, metaHbckInfo.metaEntry);
2582 }
2583 } else if (servers.size() > 1) {
2584 errors
2585 .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta is found on more than one region.");
2586 if (shouldFixAssignments()) {
2587 errors.print("Trying to fix a problem with hbase:meta..");
2588 setShouldRerun();
2589
2590 HBaseFsckRepair.fixMultiAssignment(admin, metaHbckInfo.metaEntry, servers);
2591 }
2592 }
2593
2594 return false;
2595 }
2596
2597 return true;
2598 }
2599
2600
2601
2602
2603
2604 boolean loadMetaEntries() throws IOException {
2605 MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
2606 int countRecord = 1;
2607
2608
2609 final Comparator<Cell> comp = new Comparator<Cell>() {
2610 @Override
2611 public int compare(Cell k1, Cell k2) {
2612 return (int)(k1.getTimestamp() - k2.getTimestamp());
2613 }
2614 };
2615
2616 @Override
2617 public boolean processRow(Result result) throws IOException {
2618 try {
2619
2620
2621 long ts = Collections.max(result.listCells(), comp).getTimestamp();
2622 Pair<HRegionInfo, ServerName> pair = HRegionInfo.getHRegionInfoAndServerName(result);
2623 if (pair == null || pair.getFirst() == null) {
2624 emptyRegionInfoQualifiers.add(result);
2625 errors.reportError(ERROR_CODE.EMPTY_META_CELL,
2626 "Empty REGIONINFO_QUALIFIER found in hbase:meta");
2627 return true;
2628 }
2629 ServerName sn = null;
2630 if (pair.getSecond() != null) {
2631 sn = pair.getSecond();
2632 }
2633 HRegionInfo hri = pair.getFirst();
2634 if (!(isTableIncluded(hri.getTable())
2635 || hri.isMetaRegion())) {
2636 return true;
2637 }
2638 PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
2639 MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
2640 HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
2641 if (previous == null) {
2642 regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
2643 } else if (previous.metaEntry == null) {
2644 previous.metaEntry = m;
2645 } else {
2646 throw new IOException("Two entries in hbase:meta are same " + previous);
2647 }
2648
2649
2650 if (countRecord % 100 == 0) {
2651 errors.progress();
2652 }
2653 countRecord++;
2654 return true;
2655 } catch (RuntimeException e) {
2656 LOG.error("Result=" + result);
2657 throw e;
2658 }
2659 }
2660 };
2661 if (!checkMetaOnly) {
2662
2663 MetaScanner.metaScan(getConf(), visitor);
2664 }
2665
2666 errors.print("");
2667 return true;
2668 }
2669
2670
2671
2672
2673 static class MetaEntry extends HRegionInfo {
2674 ServerName regionServer;
2675 long modTime;
2676 HRegionInfo splitA, splitB;
2677
2678 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
2679 this(rinfo, regionServer, modTime, null, null);
2680 }
2681
2682 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
2683 HRegionInfo splitA, HRegionInfo splitB) {
2684 super(rinfo);
2685 this.regionServer = regionServer;
2686 this.modTime = modTime;
2687 this.splitA = splitA;
2688 this.splitB = splitB;
2689 }
2690
2691 @Override
2692 public boolean equals(Object o) {
2693 boolean superEq = super.equals(o);
2694 if (!superEq) {
2695 return superEq;
2696 }
2697
2698 MetaEntry me = (MetaEntry) o;
2699 if (!regionServer.equals(me.regionServer)) {
2700 return false;
2701 }
2702 return (modTime == me.modTime);
2703 }
2704
2705 @Override
2706 public int hashCode() {
2707 int hash = Arrays.hashCode(getRegionName());
2708 hash ^= getRegionId();
2709 hash ^= Arrays.hashCode(getStartKey());
2710 hash ^= Arrays.hashCode(getEndKey());
2711 hash ^= Boolean.valueOf(isOffline()).hashCode();
2712 hash ^= getTable().hashCode();
2713 if (regionServer != null) {
2714 hash ^= regionServer.hashCode();
2715 }
2716 hash ^= modTime;
2717 return hash;
2718 }
2719 }
2720
2721
2722
2723
2724 static class HdfsEntry {
2725 HRegionInfo hri;
2726 Path hdfsRegionDir = null;
2727 long hdfsRegionDirModTime = 0;
2728 boolean hdfsRegioninfoFilePresent = false;
2729 boolean hdfsOnlyEdits = false;
2730 }
2731
2732
2733
2734
2735 static class OnlineEntry {
2736 HRegionInfo hri;
2737 ServerName hsa;
2738
2739 @Override
2740 public String toString() {
2741 return hsa.toString() + ";" + hri.getRegionNameAsString();
2742 }
2743 }
2744
2745
2746
2747
2748
2749 public static class HbckInfo implements KeyRange {
2750 private MetaEntry metaEntry = null;
2751 private HdfsEntry hdfsEntry = null;
2752 private List<OnlineEntry> deployedEntries = Lists.newArrayList();
2753 private List<ServerName> deployedOn = Lists.newArrayList();
2754 private boolean skipChecks = false;
2755
2756 HbckInfo(MetaEntry metaEntry) {
2757 this.metaEntry = metaEntry;
2758 }
2759
2760 public synchronized void addServer(HRegionInfo hri, ServerName server) {
2761 OnlineEntry rse = new OnlineEntry() ;
2762 rse.hri = hri;
2763 rse.hsa = server;
2764 this.deployedEntries.add(rse);
2765 this.deployedOn.add(server);
2766 }
2767
2768 @Override
2769 public synchronized String toString() {
2770 StringBuilder sb = new StringBuilder();
2771 sb.append("{ meta => ");
2772 sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
2773 sb.append( ", hdfs => " + getHdfsRegionDir());
2774 sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
2775 sb.append(" }");
2776 return sb.toString();
2777 }
2778
2779 @Override
2780 public byte[] getStartKey() {
2781 if (this.metaEntry != null) {
2782 return this.metaEntry.getStartKey();
2783 } else if (this.hdfsEntry != null) {
2784 return this.hdfsEntry.hri.getStartKey();
2785 } else {
2786 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
2787 return null;
2788 }
2789 }
2790
2791 @Override
2792 public byte[] getEndKey() {
2793 if (this.metaEntry != null) {
2794 return this.metaEntry.getEndKey();
2795 } else if (this.hdfsEntry != null) {
2796 return this.hdfsEntry.hri.getEndKey();
2797 } else {
2798 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
2799 return null;
2800 }
2801 }
2802
2803 public TableName getTableName() {
2804 if (this.metaEntry != null) {
2805 return this.metaEntry.getTable();
2806 } else if (this.hdfsEntry != null) {
2807
2808
2809 Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
2810 return FSUtils.getTableName(tableDir);
2811 } else {
2812
2813
2814 return null;
2815 }
2816 }
2817
2818 public String getRegionNameAsString() {
2819 if (metaEntry != null) {
2820 return metaEntry.getRegionNameAsString();
2821 } else if (hdfsEntry != null) {
2822 if (hdfsEntry.hri != null) {
2823 return hdfsEntry.hri.getRegionNameAsString();
2824 }
2825 }
2826 return null;
2827 }
2828
2829 public byte[] getRegionName() {
2830 if (metaEntry != null) {
2831 return metaEntry.getRegionName();
2832 } else if (hdfsEntry != null) {
2833 return hdfsEntry.hri.getRegionName();
2834 } else {
2835 return null;
2836 }
2837 }
2838
2839 Path getHdfsRegionDir() {
2840 if (hdfsEntry == null) {
2841 return null;
2842 }
2843 return hdfsEntry.hdfsRegionDir;
2844 }
2845
2846 boolean containsOnlyHdfsEdits() {
2847 if (hdfsEntry == null) {
2848 return false;
2849 }
2850 return hdfsEntry.hdfsOnlyEdits;
2851 }
2852
2853 boolean isHdfsRegioninfoPresent() {
2854 if (hdfsEntry == null) {
2855 return false;
2856 }
2857 return hdfsEntry.hdfsRegioninfoFilePresent;
2858 }
2859
2860 long getModTime() {
2861 if (hdfsEntry == null) {
2862 return 0;
2863 }
2864 return hdfsEntry.hdfsRegionDirModTime;
2865 }
2866
2867 HRegionInfo getHdfsHRI() {
2868 if (hdfsEntry == null) {
2869 return null;
2870 }
2871 return hdfsEntry.hri;
2872 }
2873
2874 public void setSkipChecks(boolean skipChecks) {
2875 this.skipChecks = skipChecks;
2876 }
2877
2878 public boolean isSkipChecks() {
2879 return skipChecks;
2880 }
2881 }
2882
2883 final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
2884 @Override
2885 public int compare(HbckInfo l, HbckInfo r) {
2886 if (l == r) {
2887
2888 return 0;
2889 }
2890
2891 int tableCompare = l.getTableName().compareTo(r.getTableName());
2892 if (tableCompare != 0) {
2893 return tableCompare;
2894 }
2895
2896 int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
2897 l.getStartKey(), r.getStartKey());
2898 if (startComparison != 0) {
2899 return startComparison;
2900 }
2901
2902
2903 byte[] endKey = r.getEndKey();
2904 endKey = (endKey.length == 0) ? null : endKey;
2905 byte[] endKey2 = l.getEndKey();
2906 endKey2 = (endKey2.length == 0) ? null : endKey2;
2907 int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
2908 endKey2, endKey);
2909
2910 if (endComparison != 0) {
2911 return endComparison;
2912 }
2913
2914
2915
2916 if (l.hdfsEntry == null && r.hdfsEntry == null) {
2917 return 0;
2918 }
2919 if (l.hdfsEntry == null && r.hdfsEntry != null) {
2920 return 1;
2921 }
2922
2923 if (r.hdfsEntry == null) {
2924 return -1;
2925 }
2926
2927 return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
2928 }
2929 };
2930
2931
2932
2933
2934 private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
2935 StringBuilder sb = new StringBuilder();
2936 errors.print("Summary:");
2937 for (TableInfo tInfo : tablesInfo.values()) {
2938 if (errors.tableHasErrors(tInfo)) {
2939 errors.print("Table " + tInfo.getName() + " is inconsistent.");
2940 } else {
2941 errors.print(" " + tInfo.getName() + " is okay.");
2942 }
2943 errors.print(" Number of regions: " + tInfo.getNumRegions());
2944 sb.setLength(0);
2945 sb.append(" Deployed on: ");
2946 for (ServerName server : tInfo.deployedOn) {
2947 sb.append(" " + server.toString());
2948 }
2949 errors.print(sb.toString());
2950 }
2951 }
2952
2953 static ErrorReporter getErrorReporter(
2954 final Configuration conf) throws ClassNotFoundException {
2955 Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
2956 return (ErrorReporter)ReflectionUtils.newInstance(reporter, conf);
2957 }
2958
2959 public interface ErrorReporter {
2960 enum ERROR_CODE {
2961 UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
2962 NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED,
2963 MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
2964 FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
2965 HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
2966 ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
2967 WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK
2968 }
2969 void clear();
2970 void report(String message);
2971 void reportError(String message);
2972 void reportError(ERROR_CODE errorCode, String message);
2973 void reportError(ERROR_CODE errorCode, String message, TableInfo table);
2974 void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
2975 void reportError(
2976 ERROR_CODE errorCode,
2977 String message,
2978 TableInfo table,
2979 HbckInfo info1,
2980 HbckInfo info2
2981 );
2982 int summarize();
2983 void detail(String details);
2984 ArrayList<ERROR_CODE> getErrorList();
2985 void progress();
2986 void print(String message);
2987 void resetErrors();
2988 boolean tableHasErrors(TableInfo table);
2989 }
2990
2991 static class PrintingErrorReporter implements ErrorReporter {
2992 public int errorCount = 0;
2993 private int showProgress;
2994
2995 Set<TableInfo> errorTables = new HashSet<TableInfo>();
2996
2997
2998 private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
2999
3000 @Override
3001 public void clear() {
3002 errorTables.clear();
3003 errorList.clear();
3004 errorCount = 0;
3005 }
3006
3007 @Override
3008 public synchronized void reportError(ERROR_CODE errorCode, String message) {
3009 if (errorCode == ERROR_CODE.WRONG_USAGE) {
3010 System.err.println(message);
3011 return;
3012 }
3013
3014 errorList.add(errorCode);
3015 if (!summary) {
3016 System.out.println("ERROR: " + message);
3017 }
3018 errorCount++;
3019 showProgress = 0;
3020 }
3021
3022 @Override
3023 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
3024 errorTables.add(table);
3025 reportError(errorCode, message);
3026 }
3027
3028 @Override
3029 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3030 HbckInfo info) {
3031 errorTables.add(table);
3032 String reference = "(region " + info.getRegionNameAsString() + ")";
3033 reportError(errorCode, reference + " " + message);
3034 }
3035
3036 @Override
3037 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3038 HbckInfo info1, HbckInfo info2) {
3039 errorTables.add(table);
3040 String reference = "(regions " + info1.getRegionNameAsString()
3041 + " and " + info2.getRegionNameAsString() + ")";
3042 reportError(errorCode, reference + " " + message);
3043 }
3044
3045 @Override
3046 public synchronized void reportError(String message) {
3047 reportError(ERROR_CODE.UNKNOWN, message);
3048 }
3049
3050
3051
3052
3053
3054
3055 @Override
3056 public synchronized void report(String message) {
3057 if (! summary) {
3058 System.out.println("ERROR: " + message);
3059 }
3060 showProgress = 0;
3061 }
3062
3063 @Override
3064 public synchronized int summarize() {
3065 System.out.println(Integer.toString(errorCount) +
3066 " inconsistencies detected.");
3067 if (errorCount == 0) {
3068 System.out.println("Status: OK");
3069 return 0;
3070 } else {
3071 System.out.println("Status: INCONSISTENT");
3072 return -1;
3073 }
3074 }
3075
3076 @Override
3077 public ArrayList<ERROR_CODE> getErrorList() {
3078 return errorList;
3079 }
3080
3081 @Override
3082 public synchronized void print(String message) {
3083 if (!summary) {
3084 System.out.println(message);
3085 }
3086 }
3087
3088 @Override
3089 public boolean tableHasErrors(TableInfo table) {
3090 return errorTables.contains(table);
3091 }
3092
3093 @Override
3094 public void resetErrors() {
3095 errorCount = 0;
3096 }
3097
3098 @Override
3099 public synchronized void detail(String message) {
3100 if (details) {
3101 System.out.println(message);
3102 }
3103 showProgress = 0;
3104 }
3105
3106 @Override
3107 public synchronized void progress() {
3108 if (showProgress++ == 10) {
3109 if (!summary) {
3110 System.out.print(".");
3111 }
3112 showProgress = 0;
3113 }
3114 }
3115 }
3116
3117
3118
3119
3120 static class WorkItemRegion implements Callable<Void> {
3121 private HBaseFsck hbck;
3122 private ServerName rsinfo;
3123 private ErrorReporter errors;
3124 private HConnection connection;
3125
3126 WorkItemRegion(HBaseFsck hbck, ServerName info,
3127 ErrorReporter errors, HConnection connection) {
3128 this.hbck = hbck;
3129 this.rsinfo = info;
3130 this.errors = errors;
3131 this.connection = connection;
3132 }
3133
3134 @Override
3135 public synchronized Void call() throws IOException {
3136 errors.progress();
3137 try {
3138 BlockingInterface server = connection.getAdmin(rsinfo);
3139
3140
3141 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
3142 regions = filterRegions(regions);
3143
3144 if (details) {
3145 errors.detail("RegionServer: " + rsinfo.getServerName() +
3146 " number of regions: " + regions.size());
3147 for (HRegionInfo rinfo: regions) {
3148 errors.detail(" " + rinfo.getRegionNameAsString() +
3149 " id: " + rinfo.getRegionId() +
3150 " encoded_name: " + rinfo.getEncodedName() +
3151 " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
3152 " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
3153 }
3154 }
3155
3156
3157 for (HRegionInfo r:regions) {
3158 HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
3159 hbi.addServer(r, rsinfo);
3160 }
3161 } catch (IOException e) {
3162 errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
3163 " Unable to fetch region information. " + e);
3164 throw e;
3165 }
3166 return null;
3167 }
3168
3169 private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
3170 List<HRegionInfo> ret = Lists.newArrayList();
3171 for (HRegionInfo hri : regions) {
3172 if (hri.isMetaTable() || (!hbck.checkMetaOnly
3173 && hbck.isTableIncluded(hri.getTable()))) {
3174 ret.add(hri);
3175 }
3176 }
3177 return ret;
3178 }
3179 }
3180
3181
3182
3183
3184
3185 static class WorkItemHdfsDir implements Callable<Void> {
3186 private HBaseFsck hbck;
3187 private FileStatus tableDir;
3188 private ErrorReporter errors;
3189 private FileSystem fs;
3190
3191 WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors,
3192 FileStatus status) {
3193 this.hbck = hbck;
3194 this.fs = fs;
3195 this.tableDir = status;
3196 this.errors = errors;
3197 }
3198
3199 @Override
3200 public synchronized Void call() throws IOException {
3201 try {
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256 static class WorkItemHdfsRegionInfo implements Callable<Void> {
3257 private HbckInfo hbi;
3258 private HBaseFsck hbck;
3259 private ErrorReporter errors;
3260
3261 WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
3262 this.hbi = hbi;
3263 this.hbck = hbck;
3264 this.errors = errors;
3265 }
3266
3267 @Override
3268 public synchronized Void call() throws IOException {
3269
3270 if (hbi.getHdfsHRI() == null) {
3271 try {
3272 hbck.loadHdfsRegioninfo(hbi);
3273 } catch (IOException ioe) {
3274 String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
3275 + hbi.getTableName() + " in hdfs dir "
3276 + hbi.getHdfsRegionDir()
3277 + "! It may be an invalid format or version file. Treating as "
3278 + "an orphaned regiondir.";
3279 errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
3280 try {
3281 hbck.debugLsr(hbi.getHdfsRegionDir());
3282 } catch (IOException ioe2) {
3283 LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
3284 throw ioe2;
3285 }
3286 hbck.orphanHdfsDirs.add(hbi);
3287 throw ioe;
3288 }
3289 }
3290 return null;
3291 }
3292 };
3293
3294
3295
3296
3297
3298 public static void setDisplayFullReport() {
3299 details = true;
3300 }
3301
3302
3303
3304
3305
3306 void setSummary() {
3307 summary = true;
3308 }
3309
3310
3311
3312
3313
3314 void setCheckMetaOnly() {
3315 checkMetaOnly = true;
3316 }
3317
3318
3319
3320
3321
3322 public void setFixTableLocks(boolean shouldFix) {
3323 fixTableLocks = shouldFix;
3324 }
3325
3326
3327
3328
3329
3330
3331
3332 void setShouldRerun() {
3333 rerun = true;
3334 }
3335
3336 boolean shouldRerun() {
3337 return rerun;
3338 }
3339
3340
3341
3342
3343
3344 public void setFixAssignments(boolean shouldFix) {
3345 fixAssignments = shouldFix;
3346 }
3347
3348 boolean shouldFixAssignments() {
3349 return fixAssignments;
3350 }
3351
3352 public void setFixMeta(boolean shouldFix) {
3353 fixMeta = shouldFix;
3354 }
3355
3356 boolean shouldFixMeta() {
3357 return fixMeta;
3358 }
3359
3360 public void setFixEmptyMetaCells(boolean shouldFix) {
3361 fixEmptyMetaCells = shouldFix;
3362 }
3363
3364 boolean shouldFixEmptyMetaCells() {
3365 return fixEmptyMetaCells;
3366 }
3367
3368 public void setCheckHdfs(boolean checking) {
3369 checkHdfs = checking;
3370 }
3371
3372 boolean shouldCheckHdfs() {
3373 return checkHdfs;
3374 }
3375
3376 public void setFixHdfsHoles(boolean shouldFix) {
3377 fixHdfsHoles = shouldFix;
3378 }
3379
3380 boolean shouldFixHdfsHoles() {
3381 return fixHdfsHoles;
3382 }
3383
3384 public void setFixTableOrphans(boolean shouldFix) {
3385 fixTableOrphans = shouldFix;
3386 }
3387
3388 boolean shouldFixTableOrphans() {
3389 return fixTableOrphans;
3390 }
3391
3392 public void setFixHdfsOverlaps(boolean shouldFix) {
3393 fixHdfsOverlaps = shouldFix;
3394 }
3395
3396 boolean shouldFixHdfsOverlaps() {
3397 return fixHdfsOverlaps;
3398 }
3399
3400 public void setFixHdfsOrphans(boolean shouldFix) {
3401 fixHdfsOrphans = shouldFix;
3402 }
3403
3404 boolean shouldFixHdfsOrphans() {
3405 return fixHdfsOrphans;
3406 }
3407
3408 public void setFixVersionFile(boolean shouldFix) {
3409 fixVersionFile = shouldFix;
3410 }
3411
3412 public boolean shouldFixVersionFile() {
3413 return fixVersionFile;
3414 }
3415
3416 public void setSidelineBigOverlaps(boolean sbo) {
3417 this.sidelineBigOverlaps = sbo;
3418 }
3419
3420 public boolean shouldSidelineBigOverlaps() {
3421 return sidelineBigOverlaps;
3422 }
3423
3424 public void setFixSplitParents(boolean shouldFix) {
3425 fixSplitParents = shouldFix;
3426 }
3427
3428 boolean shouldFixSplitParents() {
3429 return fixSplitParents;
3430 }
3431
3432 public void setFixReferenceFiles(boolean shouldFix) {
3433 fixReferenceFiles = shouldFix;
3434 }
3435
3436 boolean shouldFixReferenceFiles() {
3437 return fixReferenceFiles;
3438 }
3439
3440 public boolean shouldIgnorePreCheckPermission() {
3441 return ignorePreCheckPermission;
3442 }
3443
3444 public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
3445 this.ignorePreCheckPermission = ignorePreCheckPermission;
3446 }
3447
3448
3449
3450
3451 public void setMaxMerge(int mm) {
3452 this.maxMerge = mm;
3453 }
3454
3455 public int getMaxMerge() {
3456 return maxMerge;
3457 }
3458
3459 public void setMaxOverlapsToSideline(int mo) {
3460 this.maxOverlapsToSideline = mo;
3461 }
3462
3463 public int getMaxOverlapsToSideline() {
3464 return maxOverlapsToSideline;
3465 }
3466
3467
3468
3469
3470
3471 boolean isTableIncluded(TableName table) {
3472 return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
3473 }
3474
3475 public void includeTable(TableName table) {
3476 tablesIncluded.add(table);
3477 }
3478
3479 Set<TableName> getIncludedTables() {
3480 return new HashSet<TableName>(tablesIncluded);
3481 }
3482
3483
3484
3485
3486
3487
3488 public void setTimeLag(long seconds) {
3489 timelag = seconds * 1000;
3490 }
3491
3492
3493
3494
3495
3496 public void setSidelineDir(String sidelineDir) {
3497 this.sidelineDir = new Path(sidelineDir);
3498 }
3499
3500 protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
3501 return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
3502 }
3503
3504 public HFileCorruptionChecker getHFilecorruptionChecker() {
3505 return hfcc;
3506 }
3507
3508 public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
3509 this.hfcc = hfcc;
3510 }
3511
3512 public void setRetCode(int code) {
3513 this.retcode = code;
3514 }
3515
3516 public int getRetCode() {
3517 return retcode;
3518 }
3519
3520 protected HBaseFsck printUsageAndExit() {
3521 StringWriter sw = new StringWriter(2048);
3522 PrintWriter out = new PrintWriter(sw);
3523 out.println("Usage: fsck [opts] {only tables}");
3524 out.println(" where [opts] are:");
3525 out.println(" -help Display help options (this)");
3526 out.println(" -details Display full report of all regions.");
3527 out.println(" -timelag <timeInSeconds> Process only regions that " +
3528 " have not experienced any metadata updates in the last " +
3529 " <timeInSeconds> seconds.");
3530 out.println(" -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
3531 " before checking if the fix worked if run with -fix");
3532 out.println(" -summary Print only summary of the tables and status.");
3533 out.println(" -metaonly Only check the state of the hbase:meta table.");
3534 out.println(" -sidelineDir <hdfs://> HDFS path to backup existing meta.");
3535
3536 out.println("");
3537 out.println(" Metadata Repair options: (expert features, use with caution!)");
3538 out.println(" -fix Try to fix region assignments. This is for backwards compatiblity");
3539 out.println(" -fixAssignments Try to fix region assignments. Replaces the old -fix");
3540 out.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good.");
3541 out.println(" -noHdfsChecking Don't load/check region info from HDFS."
3542 + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
3543 out.println(" -fixHdfsHoles Try to fix region holes in hdfs.");
3544 out.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs");
3545 out.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
3546 out.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs.");
3547 out.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs.");
3548 out.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
3549 out.println(" -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps");
3550 out.println(" -maxOverlapsToSideline <n> When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
3551 out.println(" -fixSplitParents Try to force offline split parents to be online.");
3552 out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check");
3553 out.println(" -fixReferenceFiles Try to offline lingering reference store files");
3554 out.println(" -fixEmptyMetaCells Try to fix hbase:meta entries not referencing any region"
3555 + " (empty REGIONINFO_QUALIFIER rows)");
3556
3557 out.println("");
3558 out.println(" Datafile Repair options: (expert features, use with caution!)");
3559 out.println(" -checkCorruptHFiles Check all Hfiles by opening them to make sure they are valid");
3560 out.println(" -sidelineCorruptHfiles Quarantine corrupted HFiles. implies -checkCorruptHfiles");
3561
3562 out.println("");
3563 out.println(" Metadata Repair shortcuts");
3564 out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
3565 "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles -fixTableLocks");
3566 out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
3567
3568 out.println("");
3569 out.println(" Table lock options");
3570 out.println(" -fixTableLocks Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
3571
3572 out.flush();
3573 errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
3574
3575 setRetCode(-2);
3576 return this;
3577 }
3578
3579
3580
3581
3582
3583
3584
3585 public static void main(String[] args) throws Exception {
3586
3587 Configuration conf = HBaseConfiguration.create();
3588 Path hbasedir = FSUtils.getRootDir(conf);
3589 URI defaultFs = hbasedir.getFileSystem(conf).getUri();
3590 FSUtils.setFsDefault(conf, new Path(defaultFs));
3591
3592 int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
3593 System.exit(ret);
3594 }
3595
3596
3597
3598
3599 static class HBaseFsckTool extends Configured implements Tool {
3600 HBaseFsckTool(Configuration conf) { super(conf); }
3601 @Override
3602 public int run(String[] args) throws Exception {
3603 HBaseFsck hbck = new HBaseFsck(getConf());
3604 hbck.exec(hbck.executor, args);
3605 return hbck.getRetCode();
3606 }
3607 };
3608
3609
3610 public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
3611 ServiceException, InterruptedException {
3612 long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
3613
3614 boolean checkCorruptHFiles = false;
3615 boolean sidelineCorruptHFiles = false;
3616
3617
3618 for (int i = 0; i < args.length; i++) {
3619 String cmd = args[i];
3620 if (cmd.equals("-help") || cmd.equals("-h")) {
3621 return printUsageAndExit();
3622 } else if (cmd.equals("-details")) {
3623 setDisplayFullReport();
3624 } else if (cmd.equals("-timelag")) {
3625 if (i == args.length - 1) {
3626 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
3627 return printUsageAndExit();
3628 }
3629 try {
3630 long timelag = Long.parseLong(args[i+1]);
3631 setTimeLag(timelag);
3632 } catch (NumberFormatException e) {
3633 errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
3634 return printUsageAndExit();
3635 }
3636 i++;
3637 } else if (cmd.equals("-sleepBeforeRerun")) {
3638 if (i == args.length - 1) {
3639 errors.reportError(ERROR_CODE.WRONG_USAGE,
3640 "HBaseFsck: -sleepBeforeRerun needs a value.");
3641 return printUsageAndExit();
3642 }
3643 try {
3644 sleepBeforeRerun = Long.parseLong(args[i+1]);
3645 } catch (NumberFormatException e) {
3646 errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
3647 return printUsageAndExit();
3648 }
3649 i++;
3650 } else if (cmd.equals("-sidelineDir")) {
3651 if (i == args.length - 1) {
3652 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
3653 return printUsageAndExit();
3654 }
3655 i++;
3656 setSidelineDir(args[i]);
3657 } else if (cmd.equals("-fix")) {
3658 errors.reportError(ERROR_CODE.WRONG_USAGE,
3659 "This option is deprecated, please use -fixAssignments instead.");
3660 setFixAssignments(true);
3661 } else if (cmd.equals("-fixAssignments")) {
3662 setFixAssignments(true);
3663 } else if (cmd.equals("-fixMeta")) {
3664 setFixMeta(true);
3665 } else if (cmd.equals("-noHdfsChecking")) {
3666 setCheckHdfs(false);
3667 } else if (cmd.equals("-fixHdfsHoles")) {
3668 setFixHdfsHoles(true);
3669 } else if (cmd.equals("-fixHdfsOrphans")) {
3670 setFixHdfsOrphans(true);
3671 } else if (cmd.equals("-fixTableOrphans")) {
3672 setFixTableOrphans(true);
3673 } else if (cmd.equals("-fixHdfsOverlaps")) {
3674 setFixHdfsOverlaps(true);
3675 } else if (cmd.equals("-fixVersionFile")) {
3676 setFixVersionFile(true);
3677 } else if (cmd.equals("-sidelineBigOverlaps")) {
3678 setSidelineBigOverlaps(true);
3679 } else if (cmd.equals("-fixSplitParents")) {
3680 setFixSplitParents(true);
3681 } else if (cmd.equals("-ignorePreCheckPermission")) {
3682 setIgnorePreCheckPermission(true);
3683 } else if (cmd.equals("-checkCorruptHFiles")) {
3684 checkCorruptHFiles = true;
3685 } else if (cmd.equals("-sidelineCorruptHFiles")) {
3686 sidelineCorruptHFiles = true;
3687 } else if (cmd.equals("-fixReferenceFiles")) {
3688 setFixReferenceFiles(true);
3689 } else if (cmd.equals("-fixEmptyMetaCells")) {
3690 setFixEmptyMetaCells(true);
3691 } else if (cmd.equals("-repair")) {
3692
3693
3694 setFixHdfsHoles(true);
3695 setFixHdfsOrphans(true);
3696 setFixMeta(true);
3697 setFixAssignments(true);
3698 setFixHdfsOverlaps(true);
3699 setFixVersionFile(true);
3700 setSidelineBigOverlaps(true);
3701 setFixSplitParents(false);
3702 setCheckHdfs(true);
3703 setFixReferenceFiles(true);
3704 setFixTableLocks(true);
3705 } else if (cmd.equals("-repairHoles")) {
3706
3707 setFixHdfsHoles(true);
3708 setFixHdfsOrphans(false);
3709 setFixMeta(true);
3710 setFixAssignments(true);
3711 setFixHdfsOverlaps(false);
3712 setSidelineBigOverlaps(false);
3713 setFixSplitParents(false);
3714 setCheckHdfs(true);
3715 } else if (cmd.equals("-maxOverlapsToSideline")) {
3716 if (i == args.length - 1) {
3717 errors.reportError(ERROR_CODE.WRONG_USAGE,
3718 "-maxOverlapsToSideline needs a numeric value argument.");
3719 return printUsageAndExit();
3720 }
3721 try {
3722 int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
3723 setMaxOverlapsToSideline(maxOverlapsToSideline);
3724 } catch (NumberFormatException e) {
3725 errors.reportError(ERROR_CODE.WRONG_USAGE,
3726 "-maxOverlapsToSideline needs a numeric value argument.");
3727 return printUsageAndExit();
3728 }
3729 i++;
3730 } else if (cmd.equals("-maxMerge")) {
3731 if (i == args.length - 1) {
3732 errors.reportError(ERROR_CODE.WRONG_USAGE,
3733 "-maxMerge needs a numeric value argument.");
3734 return printUsageAndExit();
3735 }
3736 try {
3737 int maxMerge = Integer.parseInt(args[i+1]);
3738 setMaxMerge(maxMerge);
3739 } catch (NumberFormatException e) {
3740 errors.reportError(ERROR_CODE.WRONG_USAGE,
3741 "-maxMerge needs a numeric value argument.");
3742 return printUsageAndExit();
3743 }
3744 i++;
3745 } else if (cmd.equals("-summary")) {
3746 setSummary();
3747 } else if (cmd.equals("-metaonly")) {
3748 setCheckMetaOnly();
3749 } else if (cmd.equals("-fixTableLocks")) {
3750 setFixTableLocks(true);
3751 } else if (cmd.startsWith("-")) {
3752 errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
3753 return printUsageAndExit();
3754 } else {
3755 includeTable(TableName.valueOf(cmd));
3756 errors.print("Allow checking/fixes for table: " + cmd);
3757 }
3758 }
3759
3760
3761 try {
3762 preCheckPermission();
3763 } catch (AccessControlException ace) {
3764 Runtime.getRuntime().exit(-1);
3765 } catch (IOException ioe) {
3766 Runtime.getRuntime().exit(-1);
3767 }
3768
3769
3770 connect();
3771
3772
3773 if (checkCorruptHFiles || sidelineCorruptHFiles) {
3774 LOG.info("Checking all hfiles for corruption");
3775 HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
3776 setHFileCorruptionChecker(hfcc);
3777 Collection<TableName> tables = getIncludedTables();
3778 Collection<Path> tableDirs = new ArrayList<Path>();
3779 Path rootdir = FSUtils.getRootDir(getConf());
3780 if (tables.size() > 0) {
3781 for (TableName t : tables) {
3782 tableDirs.add(FSUtils.getTableDir(rootdir, t));
3783 }
3784 } else {
3785 tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
3786 }
3787 hfcc.checkTables(tableDirs);
3788 hfcc.report(errors);
3789 }
3790
3791
3792 int code = onlineHbck();
3793 setRetCode(code);
3794
3795
3796
3797
3798 if (shouldRerun()) {
3799 try {
3800 LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
3801 Thread.sleep(sleepBeforeRerun);
3802 } catch (InterruptedException ie) {
3803 return this;
3804 }
3805
3806 setFixAssignments(false);
3807 setFixMeta(false);
3808 setFixHdfsHoles(false);
3809 setFixHdfsOverlaps(false);
3810 setFixVersionFile(false);
3811 setFixTableOrphans(false);
3812 errors.resetErrors();
3813 code = onlineHbck();
3814 setRetCode(code);
3815 }
3816 return this;
3817 }
3818
3819
3820
3821
3822 void debugLsr(Path p) throws IOException {
3823 debugLsr(getConf(), p, errors);
3824 }
3825
3826
3827
3828
3829 public static void debugLsr(Configuration conf,
3830 Path p) throws IOException {
3831 debugLsr(conf, p, new PrintingErrorReporter());
3832 }
3833
3834
3835
3836
3837 public static void debugLsr(Configuration conf,
3838 Path p, ErrorReporter errors) throws IOException {
3839 if (!LOG.isDebugEnabled() || p == null) {
3840 return;
3841 }
3842 FileSystem fs = p.getFileSystem(conf);
3843
3844 if (!fs.exists(p)) {
3845
3846 return;
3847 }
3848 errors.print(p.toString());
3849
3850 if (fs.isFile(p)) {
3851 return;
3852 }
3853
3854 if (fs.getFileStatus(p).isDir()) {
3855 FileStatus[] fss= fs.listStatus(p);
3856 for (FileStatus status : fss) {
3857 debugLsr(conf, status.getPath(), errors);
3858 }
3859 }
3860 }
3861 }