1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.util;
19
20 import java.io.FileNotFoundException;
21 import java.io.IOException;
22 import java.io.PrintWriter;
23 import java.io.StringWriter;
24 import java.net.InetAddress;
25 import java.net.URI;
26 import java.util.ArrayList;
27 import java.util.Arrays;
28 import java.util.Collection;
29 import java.util.Collections;
30 import java.util.Comparator;
31 import java.util.HashMap;
32 import java.util.HashSet;
33 import java.util.Iterator;
34 import java.util.List;
35 import java.util.Map;
36 import java.util.Map.Entry;
37 import java.util.Set;
38 import java.util.SortedMap;
39 import java.util.SortedSet;
40 import java.util.TreeMap;
41 import java.util.TreeSet;
42 import java.util.concurrent.Callable;
43 import java.util.concurrent.ConcurrentSkipListMap;
44 import java.util.concurrent.ExecutionException;
45 import java.util.concurrent.ExecutorService;
46 import java.util.concurrent.Future;
47 import java.util.concurrent.ScheduledThreadPoolExecutor;
48 import java.util.concurrent.atomic.AtomicInteger;
49 import java.util.concurrent.atomic.AtomicBoolean;
50
51 import org.apache.commons.lang.StringUtils;
52 import org.apache.commons.logging.Log;
53 import org.apache.commons.logging.LogFactory;
54 import org.apache.hadoop.hbase.classification.InterfaceAudience;
55 import org.apache.hadoop.hbase.classification.InterfaceStability;
56 import org.apache.hadoop.conf.Configuration;
57 import org.apache.hadoop.conf.Configured;
58 import org.apache.hadoop.fs.FSDataOutputStream;
59 import org.apache.hadoop.fs.FileStatus;
60 import org.apache.hadoop.fs.FileSystem;
61 import org.apache.hadoop.fs.Path;
62 import org.apache.hadoop.fs.permission.FsAction;
63 import org.apache.hadoop.fs.permission.FsPermission;
64 import org.apache.hadoop.hbase.Abortable;
65 import org.apache.hadoop.hbase.Cell;
66 import org.apache.hadoop.hbase.ClusterStatus;
67 import org.apache.hadoop.hbase.HBaseConfiguration;
68 import org.apache.hadoop.hbase.HColumnDescriptor;
69 import org.apache.hadoop.hbase.HConstants;
70 import org.apache.hadoop.hbase.HRegionInfo;
71 import org.apache.hadoop.hbase.HRegionLocation;
72 import org.apache.hadoop.hbase.HTableDescriptor;
73 import org.apache.hadoop.hbase.KeyValue;
74 import org.apache.hadoop.hbase.MasterNotRunningException;
75 import org.apache.hadoop.hbase.ServerName;
76 import org.apache.hadoop.hbase.TableName;
77 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
78 import org.apache.hadoop.hbase.catalog.MetaEditor;
79 import org.apache.hadoop.hbase.client.Delete;
80 import org.apache.hadoop.hbase.client.Get;
81 import org.apache.hadoop.hbase.client.HBaseAdmin;
82 import org.apache.hadoop.hbase.client.HConnectable;
83 import org.apache.hadoop.hbase.client.HConnection;
84 import org.apache.hadoop.hbase.client.HConnectionManager;
85 import org.apache.hadoop.hbase.client.HTable;
86 import org.apache.hadoop.hbase.client.MetaScanner;
87 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
88 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
89 import org.apache.hadoop.hbase.client.Put;
90 import org.apache.hadoop.hbase.client.Result;
91 import org.apache.hadoop.hbase.client.RowMutations;
92 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
93 import org.apache.hadoop.hbase.io.hfile.HFile;
94 import org.apache.hadoop.hbase.master.MasterFileSystem;
95 import org.apache.hadoop.hbase.master.RegionState;
96 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
97 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
98 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
99 import org.apache.hadoop.hbase.regionserver.HRegion;
100 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
101 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
102 import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
103 import org.apache.hadoop.hbase.security.UserProvider;
104 import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
105 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
106 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
107 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
108 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
109 import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
110 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
111 import org.apache.hadoop.hbase.zookeeper.ZKTable;
112 import org.apache.hadoop.hbase.zookeeper.ZKTableReadOnly;
113 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
114 import org.apache.hadoop.hbase.security.AccessDeniedException;
115 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
116 import org.apache.hadoop.io.IOUtils;
117 import org.apache.hadoop.ipc.RemoteException;
118 import org.apache.hadoop.security.UserGroupInformation;
119 import org.apache.hadoop.util.ReflectionUtils;
120 import org.apache.hadoop.util.Tool;
121 import org.apache.hadoop.util.ToolRunner;
122 import org.apache.zookeeper.KeeperException;
123
124 import com.google.common.annotations.VisibleForTesting;
125 import com.google.common.base.Joiner;
126 import com.google.common.base.Preconditions;
127 import com.google.common.collect.ImmutableList;
128 import com.google.common.collect.Lists;
129 import com.google.common.collect.Multimap;
130 import com.google.common.collect.Ordering;
131 import com.google.common.collect.TreeMultimap;
132 import com.google.protobuf.ServiceException;
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179 @InterfaceAudience.Public
180 @InterfaceStability.Evolving
181 public class HBaseFsck extends Configured {
182 public static final long DEFAULT_TIME_LAG = 60000;
183 public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
184 private static final int MAX_NUM_THREADS = 50;
185 private static boolean rsSupportsOffline = true;
186 private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
187 private static final int DEFAULT_MAX_MERGE = 5;
188 private static final String TO_BE_LOADED = "to_be_loaded";
189 private static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
190
191
192
193
194
195 private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
196 private ClusterStatus status;
197 private HConnection connection;
198 private HBaseAdmin admin;
199 private HTable meta;
200
201 protected ExecutorService executor;
202 private long startMillis = System.currentTimeMillis();
203 private HFileCorruptionChecker hfcc;
204 private int retcode = 0;
205 private Path HBCK_LOCK_PATH;
206 private FSDataOutputStream hbckOutFd;
207
208
209
210 private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
211
212
213
214
215 private static boolean details = false;
216 private long timelag = DEFAULT_TIME_LAG;
217 private boolean fixAssignments = false;
218 private boolean fixMeta = false;
219 private boolean checkHdfs = true;
220 private boolean fixHdfsHoles = false;
221 private boolean fixHdfsOverlaps = false;
222 private boolean fixHdfsOrphans = false;
223 private boolean fixTableOrphans = false;
224 private boolean fixVersionFile = false;
225 private boolean fixSplitParents = false;
226 private boolean fixReferenceFiles = false;
227 private boolean fixEmptyMetaCells = false;
228 private boolean fixTableLocks = false;
229 private boolean fixTableZNodes = false;
230 private boolean fixAny = false;
231
232
233
234 private Set<TableName> tablesIncluded = new HashSet<TableName>();
235 private int maxMerge = DEFAULT_MAX_MERGE;
236 private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
237 private boolean sidelineBigOverlaps = false;
238 private Path sidelineDir = null;
239
240 private boolean rerun = false;
241 private static boolean summary = false;
242 private boolean checkMetaOnly = false;
243 private boolean checkRegionBoundaries = false;
244 private boolean ignorePreCheckPermission = false;
245
246
247
248
249 final private ErrorReporter errors;
250 int fixes = 0;
251
252
253
254
255
256
257 private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
258 private TreeSet<TableName> disabledTables =
259 new TreeSet<TableName>();
260
261 private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
262
263
264
265
266
267
268
269
270
271
272
273 private SortedMap<TableName, TableInfo> tablesInfo =
274 new ConcurrentSkipListMap<TableName, TableInfo>();
275
276
277
278
279 private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
280
281 private Map<TableName, Set<String>> orphanTableDirs =
282 new HashMap<TableName, Set<String>>();
283
284
285
286
287 private Set<TableName> orphanedTableZNodes = new HashSet<TableName>();
288
289
290
291
292
293
294
295
296 public HBaseFsck(Configuration conf) throws MasterNotRunningException,
297 ZooKeeperConnectionException, IOException, ClassNotFoundException {
298 super(conf);
299
300 setConf(HBaseConfiguration.create(getConf()));
301
302 getConf().setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
303 errors = getErrorReporter(conf);
304
305 int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
306 executor = new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
307 }
308
309
310
311
312
313
314
315
316
317
318
319 public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
320 ZooKeeperConnectionException, IOException, ClassNotFoundException {
321 super(conf);
322 errors = getErrorReporter(getConf());
323 this.executor = exec;
324 }
325
326
327
328
329
330
331
332 private FSDataOutputStream checkAndMarkRunningHbck() throws IOException {
333 long start = EnvironmentEdgeManager.currentTimeMillis();
334 try {
335 FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
336 FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(),
337 HConstants.DATA_FILE_UMASK_KEY);
338 Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY);
339 fs.mkdirs(tmpDir);
340 HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE);
341 final FSDataOutputStream out = FSUtils.create(fs, HBCK_LOCK_PATH, defaultPerms, false);
342 out.writeBytes(InetAddress.getLocalHost().toString());
343 out.flush();
344 return out;
345 } catch(RemoteException e) {
346 if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
347 return null;
348 } else {
349 throw e;
350 }
351 } finally {
352 long duration = EnvironmentEdgeManager.currentTimeMillis() - start;
353 if (duration > 30000) {
354 LOG.warn("Took " + duration + " milliseconds to obtain lock");
355
356 return null;
357 }
358 }
359 }
360
361 private void unlockHbck() {
362 if(hbckLockCleanup.compareAndSet(true, false)){
363 IOUtils.closeStream(hbckOutFd);
364 try{
365 FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true);
366 } catch(IOException ioe) {
367 LOG.warn("Failed to delete " + HBCK_LOCK_PATH);
368 LOG.debug(ioe);
369 }
370 }
371 }
372
373
374
375
376
377 public void connect() throws IOException {
378
379
380 hbckOutFd = checkAndMarkRunningHbck();
381 if (hbckOutFd == null) {
382 setRetCode(-1);
383 LOG.error("Another instance of hbck is running, exiting this instance.[If you are sure" +
384 " no other instance is running, delete the lock file " +
385 HBCK_LOCK_PATH + " and rerun the tool]");
386 throw new IOException("Duplicate hbck - Abort");
387 }
388
389
390 hbckLockCleanup.set(true);
391
392
393
394
395 Runtime.getRuntime().addShutdownHook(new Thread() {
396 @Override
397 public void run() {
398 unlockHbck();
399 }
400 });
401 LOG.debug("Launching hbck");
402
403 connection = HConnectionManager.createConnection(getConf());
404 admin = new HBaseAdmin(connection);
405 meta = new HTable(TableName.META_TABLE_NAME, connection);
406 status = admin.getClusterStatus();
407 }
408
409
410
411
412 private void loadDeployedRegions() throws IOException, InterruptedException {
413
414 Collection<ServerName> regionServers = status.getServers();
415 errors.print("Number of live region servers: " + regionServers.size());
416 if (details) {
417 for (ServerName rsinfo: regionServers) {
418 errors.print(" " + rsinfo.getServerName());
419 }
420 }
421
422
423 Collection<ServerName> deadRegionServers = status.getDeadServerNames();
424 errors.print("Number of dead region servers: " + deadRegionServers.size());
425 if (details) {
426 for (ServerName name: deadRegionServers) {
427 errors.print(" " + name);
428 }
429 }
430
431
432 errors.print("Master: " + status.getMaster());
433
434
435 Collection<ServerName> backupMasters = status.getBackupMasters();
436 errors.print("Number of backup masters: " + backupMasters.size());
437 if (details) {
438 for (ServerName name: backupMasters) {
439 errors.print(" " + name);
440 }
441 }
442
443 errors.print("Average load: " + status.getAverageLoad());
444 errors.print("Number of requests: " + status.getRequestsCount());
445 errors.print("Number of regions: " + status.getRegionsCount());
446
447 Map<String, RegionState> rits = status.getRegionsInTransition();
448 errors.print("Number of regions in transition: " + rits.size());
449 if (details) {
450 for (RegionState state: rits.values()) {
451 errors.print(" " + state.toDescriptiveString());
452 }
453 }
454
455
456 processRegionServers(regionServers);
457 }
458
459
460
461
462 private void clearState() {
463
464 fixes = 0;
465 regionInfoMap.clear();
466 emptyRegionInfoQualifiers.clear();
467 disabledTables.clear();
468 errors.clear();
469 tablesInfo.clear();
470 orphanHdfsDirs.clear();
471 }
472
473
474
475
476
477
478 public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
479
480 if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
481 || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
482 LOG.info("Loading regioninfos HDFS");
483
484 int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
485 int curIter = 0;
486 do {
487 clearState();
488
489 restoreHdfsIntegrity();
490 curIter++;
491 } while (fixes > 0 && curIter <= maxIterations);
492
493
494
495 if (curIter > 2) {
496 if (curIter == maxIterations) {
497 LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
498 + "Tables integrity may not be fully repaired!");
499 } else {
500 LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
501 }
502 }
503 }
504 }
505
506
507
508
509
510
511
512
513
514 public int onlineConsistencyRepair() throws IOException, KeeperException,
515 InterruptedException {
516 clearState();
517
518
519 loadDeployedRegions();
520
521 recordMetaRegion();
522
523 if (!checkMetaRegion()) {
524 String errorMsg = "hbase:meta table is not consistent. ";
525 if (shouldFixAssignments()) {
526 errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
527 } else {
528 errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
529 }
530 errors.reportError(errorMsg + " Exiting...");
531 return -2;
532 }
533
534 LOG.info("Loading regionsinfo from the hbase:meta table");
535 boolean success = loadMetaEntries();
536 if (!success) return -1;
537
538
539 reportEmptyMetaCells();
540
541
542 if (shouldFixEmptyMetaCells()) {
543 fixEmptyMetaCells();
544 }
545
546
547 if (!checkMetaOnly) {
548 reportTablesInFlux();
549 }
550
551
552 if (shouldCheckHdfs()) {
553 LOG.info("Loading region directories from HDFS");
554 loadHdfsRegionDirs();
555 LOG.info("Loading region information from HDFS");
556 loadHdfsRegionInfos();
557 }
558
559
560 loadDisabledTables();
561
562
563 fixOrphanTables();
564
565 LOG.info("Checking and fixing region consistency");
566
567
568 checkAndFixConsistency();
569
570
571 checkIntegrity();
572 return errors.getErrorList().size();
573 }
574
575
576
577
578
579 public int onlineHbck() throws IOException, KeeperException, InterruptedException, ServiceException {
580
581 errors.print("Version: " + status.getHBaseVersion());
582 offlineHdfsIntegrityRepair();
583
584
585 boolean oldBalancer = admin.setBalancerRunning(false, true);
586 try {
587 onlineConsistencyRepair();
588 }
589 finally {
590 admin.setBalancerRunning(oldBalancer, false);
591 }
592
593 if (checkRegionBoundaries) {
594 checkRegionBoundaries();
595 }
596
597 offlineReferenceFileRepair();
598
599 checkAndFixTableLocks();
600
601
602 checkAndFixOrphanedTableZNodes();
603
604
605 unlockHbck();
606
607
608 printTableSummary(tablesInfo);
609 return errors.summarize();
610 }
611
612 public static byte[] keyOnly (byte[] b) {
613 if (b == null)
614 return b;
615 int rowlength = Bytes.toShort(b, 0);
616 byte[] result = new byte[rowlength];
617 System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
618 return result;
619 }
620
621 private static class RegionBoundariesInformation {
622 public byte [] regionName;
623 public byte [] metaFirstKey;
624 public byte [] metaLastKey;
625 public byte [] storesFirstKey;
626 public byte [] storesLastKey;
627 @Override
628 public String toString () {
629 return "regionName=" + Bytes.toStringBinary(regionName) +
630 "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
631 "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
632 "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
633 "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
634 }
635 }
636
637 public void checkRegionBoundaries() {
638 try {
639 ByteArrayComparator comparator = new ByteArrayComparator();
640 List<HRegionInfo> regions = MetaScanner.listAllRegions(getConf(), false);
641 final RegionBoundariesInformation currentRegionBoundariesInformation =
642 new RegionBoundariesInformation();
643 Path hbaseRoot = FSUtils.getRootDir(getConf());
644 for (HRegionInfo regionInfo : regions) {
645 Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
646 currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
647
648
649 Path path = new Path(tableDir, regionInfo.getEncodedName());
650 FileSystem fs = path.getFileSystem(getConf());
651 FileStatus[] files = fs.listStatus(path);
652
653 byte[] storeFirstKey = null;
654 byte[] storeLastKey = null;
655 for (FileStatus file : files) {
656 String fileName = file.getPath().toString();
657 fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
658 if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
659 FileStatus[] storeFiles = fs.listStatus(file.getPath());
660
661 for (FileStatus storeFile : storeFiles) {
662 HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), new CacheConfig(
663 getConf()), getConf());
664 if ((reader.getFirstKey() != null)
665 && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
666 reader.getFirstKey()) > 0))) {
667 storeFirstKey = reader.getFirstKey();
668 }
669 if ((reader.getLastKey() != null)
670 && ((storeLastKey == null) || (comparator.compare(storeLastKey,
671 reader.getLastKey())) < 0)) {
672 storeLastKey = reader.getLastKey();
673 }
674 reader.close();
675 }
676 }
677 }
678 currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
679 currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
680 currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
681 currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
682 if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
683 currentRegionBoundariesInformation.metaFirstKey = null;
684 if (currentRegionBoundariesInformation.metaLastKey.length == 0)
685 currentRegionBoundariesInformation.metaLastKey = null;
686
687
688
689
690
691
692 boolean valid = true;
693
694 if ((currentRegionBoundariesInformation.storesFirstKey != null)
695 && (currentRegionBoundariesInformation.metaFirstKey != null)) {
696 valid = valid
697 && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
698 currentRegionBoundariesInformation.metaFirstKey) >= 0;
699 }
700
701 if ((currentRegionBoundariesInformation.storesLastKey != null)
702 && (currentRegionBoundariesInformation.metaLastKey != null)) {
703 valid = valid
704 && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
705 currentRegionBoundariesInformation.metaLastKey) < 0;
706 }
707 if (!valid) {
708 errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
709 tablesInfo.get(regionInfo.getTable()));
710 LOG.warn("Region's boundaries not alligned between stores and META for:");
711 LOG.warn(currentRegionBoundariesInformation);
712 }
713 }
714 } catch (IOException e) {
715 LOG.error(e);
716 }
717 }
718
719
720
721
722 private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
723 for (HbckInfo hi : orphanHdfsDirs) {
724 LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
725 adoptHdfsOrphan(hi);
726 }
727 }
728
729
730
731
732
733
734
735
736
737
738 @SuppressWarnings("deprecation")
739 private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
740 Path p = hi.getHdfsRegionDir();
741 FileSystem fs = p.getFileSystem(getConf());
742 FileStatus[] dirs = fs.listStatus(p);
743 if (dirs == null) {
744 LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " +
745 p + ". This dir could probably be deleted.");
746 return ;
747 }
748
749 TableName tableName = hi.getTableName();
750 TableInfo tableInfo = tablesInfo.get(tableName);
751 Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
752 HTableDescriptor template = tableInfo.getHTD();
753
754
755 Pair<byte[],byte[]> orphanRegionRange = null;
756 for (FileStatus cf : dirs) {
757 String cfName= cf.getPath().getName();
758
759 if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
760
761 FileStatus[] hfiles = fs.listStatus(cf.getPath());
762 for (FileStatus hfile : hfiles) {
763 byte[] start, end;
764 HFile.Reader hf = null;
765 try {
766 CacheConfig cacheConf = new CacheConfig(getConf());
767 hf = HFile.createReader(fs, hfile.getPath(), cacheConf, getConf());
768 hf.loadFileInfo();
769 KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
770 start = startKv.getRow();
771 KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
772 end = endKv.getRow();
773 } catch (IOException ioe) {
774 LOG.warn("Problem reading orphan file " + hfile + ", skipping");
775 continue;
776 } catch (NullPointerException ioe) {
777 LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
778 continue;
779 } finally {
780 if (hf != null) {
781 hf.close();
782 }
783 }
784
785
786 if (orphanRegionRange == null) {
787
788 orphanRegionRange = new Pair<byte[], byte[]>(start, end);
789 } else {
790
791
792
793 if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
794 orphanRegionRange.setFirst(start);
795 }
796 if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
797 orphanRegionRange.setSecond(end);
798 }
799 }
800 }
801 }
802 if (orphanRegionRange == null) {
803 LOG.warn("No data in dir " + p + ", sidelining data");
804 fixes++;
805 sidelineRegionDir(fs, hi);
806 return;
807 }
808 LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
809 Bytes.toString(orphanRegionRange.getSecond()) + ")");
810
811
812 HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(), orphanRegionRange.getSecond());
813 LOG.info("Creating new region : " + hri);
814 HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
815 Path target = region.getRegionFileSystem().getRegionDir();
816
817
818 mergeRegionDirs(target, hi);
819 fixes++;
820 }
821
822
823
824
825
826
827
828
829
830 private int restoreHdfsIntegrity() throws IOException, InterruptedException {
831
832 LOG.info("Loading HBase regioninfo from HDFS...");
833 loadHdfsRegionDirs();
834
835 int errs = errors.getErrorList().size();
836
837 tablesInfo = loadHdfsRegionInfos();
838 checkHdfsIntegrity(false, false);
839
840 if (errors.getErrorList().size() == errs) {
841 LOG.info("No integrity errors. We are done with this phase. Glorious.");
842 return 0;
843 }
844
845 if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
846 adoptHdfsOrphans(orphanHdfsDirs);
847
848 }
849
850
851 if (shouldFixHdfsHoles()) {
852 clearState();
853 loadHdfsRegionDirs();
854 tablesInfo = loadHdfsRegionInfos();
855 tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
856 }
857
858
859 if (shouldFixHdfsOverlaps()) {
860
861 clearState();
862 loadHdfsRegionDirs();
863 tablesInfo = loadHdfsRegionInfos();
864 tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
865 }
866
867 return errors.getErrorList().size();
868 }
869
870
871
872
873
874
875
876
877
878 private void offlineReferenceFileRepair() throws IOException {
879 Configuration conf = getConf();
880 Path hbaseRoot = FSUtils.getRootDir(conf);
881 FileSystem fs = hbaseRoot.getFileSystem(conf);
882 LOG.info("Computing mapping of all store files");
883 Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot, errors);
884 errors.print("");
885 LOG.info("Validating mapping using HDFS state");
886 for (Path path: allFiles.values()) {
887 boolean isReference = false;
888 try {
889 isReference = StoreFileInfo.isReference(path);
890 } catch (Throwable t) {
891
892
893
894
895 }
896 if (!isReference) continue;
897
898 Path referredToFile = StoreFileInfo.getReferredToFile(path);
899 if (fs.exists(referredToFile)) continue;
900
901
902 errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
903 "Found lingering reference file " + path);
904 if (!shouldFixReferenceFiles()) continue;
905
906
907 boolean success = false;
908 String pathStr = path.toString();
909
910
911
912
913
914 int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
915 for (int i = 0; index > 0 && i < 5; i++) {
916 index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
917 }
918 if (index > 0) {
919 Path rootDir = getSidelineDir();
920 Path dst = new Path(rootDir, pathStr.substring(index + 1));
921 fs.mkdirs(dst.getParent());
922 LOG.info("Trying to sildeline reference file "
923 + path + " to " + dst);
924 setShouldRerun();
925
926 success = fs.rename(path, dst);
927 }
928 if (!success) {
929 LOG.error("Failed to sideline reference file " + path);
930 }
931 }
932 }
933
934
935
936
937 private void reportEmptyMetaCells() {
938 errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
939 emptyRegionInfoQualifiers.size());
940 if (details) {
941 for (Result r: emptyRegionInfoQualifiers) {
942 errors.print(" " + r);
943 }
944 }
945 }
946
947
948
949
950 private void reportTablesInFlux() {
951 AtomicInteger numSkipped = new AtomicInteger(0);
952 HTableDescriptor[] allTables = getTables(numSkipped);
953 errors.print("Number of Tables: " + allTables.length);
954 if (details) {
955 if (numSkipped.get() > 0) {
956 errors.detail("Number of Tables in flux: " + numSkipped.get());
957 }
958 for (HTableDescriptor td : allTables) {
959 errors.detail(" Table: " + td.getTableName() + "\t" +
960 (td.isReadOnly() ? "ro" : "rw") + "\t" +
961 (td.isMetaRegion() ? "META" : " ") + "\t" +
962 " families: " + td.getFamilies().size());
963 }
964 }
965 }
966
967 public ErrorReporter getErrors() {
968 return errors;
969 }
970
971
972
973
974
975 private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
976 Path regionDir = hbi.getHdfsRegionDir();
977 if (regionDir == null) {
978 LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
979 return;
980 }
981
982 if (hbi.hdfsEntry.hri != null) {
983
984 return;
985 }
986
987 FileSystem fs = FileSystem.get(getConf());
988 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
989 LOG.debug("HRegionInfo read: " + hri.toString());
990 hbi.hdfsEntry.hri = hri;
991 }
992
993
994
995
996
997 public static class RegionRepairException extends IOException {
998 private static final long serialVersionUID = 1L;
999 final IOException ioe;
1000 public RegionRepairException(String s, IOException ioe) {
1001 super(s);
1002 this.ioe = ioe;
1003 }
1004 }
1005
1006
1007
1008
1009 private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
1010 throws IOException, InterruptedException {
1011 tablesInfo.clear();
1012
1013 Collection<HbckInfo> hbckInfos = regionInfoMap.values();
1014
1015
1016 List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
1017 List<Future<Void>> hbiFutures;
1018
1019 for (HbckInfo hbi : hbckInfos) {
1020 WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1021 hbis.add(work);
1022 }
1023
1024
1025 hbiFutures = executor.invokeAll(hbis);
1026
1027 for(int i=0; i<hbiFutures.size(); i++) {
1028 WorkItemHdfsRegionInfo work = hbis.get(i);
1029 Future<Void> f = hbiFutures.get(i);
1030 try {
1031 f.get();
1032 } catch(ExecutionException e) {
1033 LOG.warn("Failed to read .regioninfo file for region " +
1034 work.hbi.getRegionNameAsString(), e.getCause());
1035 }
1036 }
1037
1038 Path hbaseRoot = FSUtils.getRootDir(getConf());
1039 FileSystem fs = hbaseRoot.getFileSystem(getConf());
1040
1041 for (HbckInfo hbi: hbckInfos) {
1042
1043 if (hbi.getHdfsHRI() == null) {
1044
1045 continue;
1046 }
1047
1048
1049
1050 TableName tableName = hbi.getTableName();
1051 if (tableName == null) {
1052
1053 LOG.warn("tableName was null for: " + hbi);
1054 continue;
1055 }
1056
1057 TableInfo modTInfo = tablesInfo.get(tableName);
1058 if (modTInfo == null) {
1059
1060 modTInfo = new TableInfo(tableName);
1061 tablesInfo.put(tableName, modTInfo);
1062 try {
1063 HTableDescriptor htd =
1064 FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1065 modTInfo.htds.add(htd);
1066 } catch (IOException ioe) {
1067 if (!orphanTableDirs.containsKey(tableName)) {
1068 LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1069
1070 errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1071 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1072 Set<String> columns = new HashSet<String>();
1073 orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1074 }
1075 }
1076 }
1077 if (!hbi.isSkipChecks()) {
1078 modTInfo.addRegionInfo(hbi);
1079 }
1080 }
1081
1082 loadTableInfosForTablesWithNoRegion();
1083 errors.print("");
1084
1085 return tablesInfo;
1086 }
1087
1088
1089
1090
1091
1092
1093
1094
1095 private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1096 Path regionDir = hbi.getHdfsRegionDir();
1097 FileSystem fs = regionDir.getFileSystem(getConf());
1098 FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1099 for (FileStatus subdir : subDirs) {
1100 String columnfamily = subdir.getPath().getName();
1101 columns.add(columnfamily);
1102 }
1103 return columns;
1104 }
1105
1106
1107
1108
1109
1110
1111
1112
1113 private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1114 Set<String> columns) throws IOException {
1115 if (columns ==null || columns.isEmpty()) return false;
1116 HTableDescriptor htd = new HTableDescriptor(tableName);
1117 for (String columnfamimly : columns) {
1118 htd.addFamily(new HColumnDescriptor(columnfamimly));
1119 }
1120 fstd.createTableDescriptor(htd, true);
1121 return true;
1122 }
1123
1124
1125
1126
1127
1128 public void fixEmptyMetaCells() throws IOException {
1129 if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1130 LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1131 for (Result region : emptyRegionInfoQualifiers) {
1132 deleteMetaRegion(region.getRow());
1133 errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1134 }
1135 emptyRegionInfoQualifiers.clear();
1136 }
1137 }
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148 public void fixOrphanTables() throws IOException {
1149 if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1150
1151 List<TableName> tmpList = new ArrayList<TableName>();
1152 tmpList.addAll(orphanTableDirs.keySet());
1153 HTableDescriptor[] htds = getHTableDescriptors(tmpList);
1154 Iterator<Entry<TableName, Set<String>>> iter =
1155 orphanTableDirs.entrySet().iterator();
1156 int j = 0;
1157 int numFailedCase = 0;
1158 FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1159 while (iter.hasNext()) {
1160 Entry<TableName, Set<String>> entry =
1161 iter.next();
1162 TableName tableName = entry.getKey();
1163 LOG.info("Trying to fix orphan table error: " + tableName);
1164 if (j < htds.length) {
1165 if (tableName.equals(htds[j].getTableName())) {
1166 HTableDescriptor htd = htds[j];
1167 LOG.info("fixing orphan table: " + tableName + " from cache");
1168 fstd.createTableDescriptor(htd, true);
1169 j++;
1170 iter.remove();
1171 }
1172 } else {
1173 if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1174 LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1175 LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
1176 iter.remove();
1177 } else {
1178 LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1179 numFailedCase++;
1180 }
1181 }
1182 fixes++;
1183 }
1184
1185 if (orphanTableDirs.isEmpty()) {
1186
1187
1188 setShouldRerun();
1189 LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1190 } else if (numFailedCase > 0) {
1191 LOG.error("Failed to fix " + numFailedCase
1192 + " OrphanTables with default .tableinfo files");
1193 }
1194
1195 }
1196
1197 orphanTableDirs.clear();
1198
1199 }
1200
1201
1202
1203
1204
1205
1206 private HRegion createNewMeta() throws IOException {
1207 Path rootdir = FSUtils.getRootDir(getConf());
1208 Configuration c = getConf();
1209 HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
1210 HTableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
1211 MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, false);
1212 HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c, metaDescriptor);
1213 MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, true);
1214 return meta;
1215 }
1216
1217
1218
1219
1220
1221
1222
1223 private ArrayList<Put> generatePuts(
1224 SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1225 ArrayList<Put> puts = new ArrayList<Put>();
1226 boolean hasProblems = false;
1227 for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) {
1228 TableName name = e.getKey();
1229
1230
1231 if (name.compareTo(TableName.META_TABLE_NAME) == 0) {
1232 continue;
1233 }
1234
1235 TableInfo ti = e.getValue();
1236 for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1237 .entrySet()) {
1238 Collection<HbckInfo> his = spl.getValue();
1239 int sz = his.size();
1240 if (sz != 1) {
1241
1242 LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1243 + " had " + sz + " regions instead of exactly 1." );
1244 hasProblems = true;
1245 continue;
1246 }
1247
1248
1249 HbckInfo hi = his.iterator().next();
1250 HRegionInfo hri = hi.getHdfsHRI();
1251 Put p = MetaEditor.makePutFromRegionInfo(hri);
1252 puts.add(p);
1253 }
1254 }
1255 return hasProblems ? null : puts;
1256 }
1257
1258
1259
1260
1261 private void suggestFixes(
1262 SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1263 logParallelMerge();
1264 for (TableInfo tInfo : tablesInfo.values()) {
1265 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1266 tInfo.checkRegionChain(handler);
1267 }
1268 }
1269
1270
1271
1272
1273
1274
1275
1276
1277 public boolean rebuildMeta(boolean fix) throws IOException,
1278 InterruptedException {
1279
1280
1281
1282
1283
1284 LOG.info("Loading HBase regioninfo from HDFS...");
1285 loadHdfsRegionDirs();
1286
1287 int errs = errors.getErrorList().size();
1288 tablesInfo = loadHdfsRegionInfos();
1289 checkHdfsIntegrity(false, false);
1290
1291
1292 if (errors.getErrorList().size() != errs) {
1293
1294 while(true) {
1295 fixes = 0;
1296 suggestFixes(tablesInfo);
1297 errors.clear();
1298 loadHdfsRegionInfos();
1299 checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1300
1301 int errCount = errors.getErrorList().size();
1302
1303 if (fixes == 0) {
1304 if (errCount > 0) {
1305 return false;
1306 } else {
1307 break;
1308 }
1309 }
1310 }
1311 }
1312
1313
1314 LOG.info("HDFS regioninfo's seems good. Sidelining old hbase:meta");
1315 Path backupDir = sidelineOldMeta();
1316
1317 LOG.info("Creating new hbase:meta");
1318 HRegion meta = createNewMeta();
1319
1320
1321 List<Put> puts = generatePuts(tablesInfo);
1322 if (puts == null) {
1323 LOG.fatal("Problem encountered when creating new hbase:meta entries. " +
1324 "You may need to restore the previously sidelined hbase:meta");
1325 return false;
1326 }
1327 meta.batchMutate(puts.toArray(new Put[puts.size()]));
1328 HRegion.closeHRegion(meta);
1329 LOG.info("Success! hbase:meta table rebuilt.");
1330 LOG.info("Old hbase:meta is moved into " + backupDir);
1331 return true;
1332 }
1333
1334
1335
1336
1337 private void logParallelMerge() {
1338 if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1339 LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
1340 " false to run serially.");
1341 } else {
1342 LOG.info("Handling overlap merges serially. set hbasefsck.overlap.merge.parallel to" +
1343 " true to run in parallel.");
1344 }
1345 }
1346
1347 private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1348 boolean fixOverlaps) throws IOException {
1349 LOG.info("Checking HBase region split map from HDFS data...");
1350 logParallelMerge();
1351 for (TableInfo tInfo : tablesInfo.values()) {
1352 TableIntegrityErrorHandler handler;
1353 if (fixHoles || fixOverlaps) {
1354 handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1355 fixHoles, fixOverlaps);
1356 } else {
1357 handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1358 }
1359 if (!tInfo.checkRegionChain(handler)) {
1360
1361 errors.report("Found inconsistency in table " + tInfo.getName());
1362 }
1363 }
1364 return tablesInfo;
1365 }
1366
1367 private Path getSidelineDir() throws IOException {
1368 if (sidelineDir == null) {
1369 Path hbaseDir = FSUtils.getRootDir(getConf());
1370 Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1371 sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1372 + startMillis);
1373 }
1374 return sidelineDir;
1375 }
1376
1377
1378
1379
1380 Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1381 return sidelineRegionDir(fs, null, hi);
1382 }
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392 Path sidelineRegionDir(FileSystem fs,
1393 String parentDir, HbckInfo hi) throws IOException {
1394 TableName tableName = hi.getTableName();
1395 Path regionDir = hi.getHdfsRegionDir();
1396
1397 if (!fs.exists(regionDir)) {
1398 LOG.warn("No previous " + regionDir + " exists. Continuing.");
1399 return null;
1400 }
1401
1402 Path rootDir = getSidelineDir();
1403 if (parentDir != null) {
1404 rootDir = new Path(rootDir, parentDir);
1405 }
1406 Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1407 Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1408 fs.mkdirs(sidelineRegionDir);
1409 boolean success = false;
1410 FileStatus[] cfs = fs.listStatus(regionDir);
1411 if (cfs == null) {
1412 LOG.info("Region dir is empty: " + regionDir);
1413 } else {
1414 for (FileStatus cf : cfs) {
1415 Path src = cf.getPath();
1416 Path dst = new Path(sidelineRegionDir, src.getName());
1417 if (fs.isFile(src)) {
1418
1419 success = fs.rename(src, dst);
1420 if (!success) {
1421 String msg = "Unable to rename file " + src + " to " + dst;
1422 LOG.error(msg);
1423 throw new IOException(msg);
1424 }
1425 continue;
1426 }
1427
1428
1429 fs.mkdirs(dst);
1430
1431 LOG.info("Sidelining files from " + src + " into containing region " + dst);
1432
1433
1434
1435
1436 FileStatus[] hfiles = fs.listStatus(src);
1437 if (hfiles != null && hfiles.length > 0) {
1438 for (FileStatus hfile : hfiles) {
1439 success = fs.rename(hfile.getPath(), dst);
1440 if (!success) {
1441 String msg = "Unable to rename file " + src + " to " + dst;
1442 LOG.error(msg);
1443 throw new IOException(msg);
1444 }
1445 }
1446 }
1447 LOG.debug("Sideline directory contents:");
1448 debugLsr(sidelineRegionDir);
1449 }
1450 }
1451
1452 LOG.info("Removing old region dir: " + regionDir);
1453 success = fs.delete(regionDir, true);
1454 if (!success) {
1455 String msg = "Unable to delete dir " + regionDir;
1456 LOG.error(msg);
1457 throw new IOException(msg);
1458 }
1459 return sidelineRegionDir;
1460 }
1461
1462
1463
1464
1465 void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir,
1466 Path backupHbaseDir) throws IOException {
1467 Path tableDir = FSUtils.getTableDir(hbaseDir, tableName);
1468 if (fs.exists(tableDir)) {
1469 Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName);
1470 fs.mkdirs(backupTableDir.getParent());
1471 boolean success = fs.rename(tableDir, backupTableDir);
1472 if (!success) {
1473 throw new IOException("Failed to move " + tableName + " from "
1474 + tableDir + " to " + backupTableDir);
1475 }
1476 } else {
1477 LOG.info("No previous " + tableName + " exists. Continuing.");
1478 }
1479 }
1480
1481
1482
1483
1484 Path sidelineOldMeta() throws IOException {
1485
1486 Path hbaseDir = FSUtils.getRootDir(getConf());
1487 FileSystem fs = hbaseDir.getFileSystem(getConf());
1488 Path backupDir = getSidelineDir();
1489 fs.mkdirs(backupDir);
1490
1491 try {
1492 sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir);
1493 } catch (IOException e) {
1494 LOG.fatal("... failed to sideline meta. Currently in inconsistent state. To restore "
1495 + "try to rename hbase:meta in " + backupDir.getName() + " to "
1496 + hbaseDir.getName() + ".", e);
1497 throw e;
1498 }
1499 return backupDir;
1500 }
1501
1502
1503
1504
1505
1506
1507 private void loadDisabledTables()
1508 throws ZooKeeperConnectionException, IOException {
1509 HConnectionManager.execute(new HConnectable<Void>(getConf()) {
1510 @Override
1511 public Void connect(HConnection connection) throws IOException {
1512 ZooKeeperWatcher zkw = createZooKeeperWatcher();
1513 try {
1514 for (TableName tableName :
1515 ZKTableReadOnly.getDisabledOrDisablingTables(zkw)) {
1516 disabledTables.add(tableName);
1517 }
1518 } catch (KeeperException ke) {
1519 throw new IOException(ke);
1520 } finally {
1521 zkw.close();
1522 }
1523 return null;
1524 }
1525 });
1526 }
1527
1528
1529
1530
1531 private boolean isTableDisabled(HRegionInfo regionInfo) {
1532 return disabledTables.contains(regionInfo.getTable());
1533 }
1534
1535
1536
1537
1538
1539 public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1540 Path rootDir = FSUtils.getRootDir(getConf());
1541 FileSystem fs = rootDir.getFileSystem(getConf());
1542
1543
1544 List<FileStatus> tableDirs = Lists.newArrayList();
1545
1546 boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1547
1548 List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1549 for (Path path : paths) {
1550 TableName tableName = FSUtils.getTableName(path);
1551 if ((!checkMetaOnly &&
1552 isTableIncluded(tableName)) ||
1553 tableName.equals(TableName.META_TABLE_NAME)) {
1554 tableDirs.add(fs.getFileStatus(path));
1555 }
1556 }
1557
1558
1559 if (!foundVersionFile) {
1560 errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1561 "Version file does not exist in root dir " + rootDir);
1562 if (shouldFixVersionFile()) {
1563 LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1564 + " file.");
1565 setShouldRerun();
1566 FSUtils.setVersion(fs, rootDir, getConf().getInt(
1567 HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1568 HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1569 HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1570 }
1571 }
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598 private boolean recordMetaRegion() throws IOException {
1599 HRegionLocation metaLocation = connection.locateRegion(
1600 TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW);
1601
1602
1603 if (metaLocation == null || metaLocation.getRegionInfo() == null ||
1604 metaLocation.getHostname() == null) {
1605 errors.reportError(ERROR_CODE.NULL_META_REGION,
1606 "META region or some of its attributes are null.");
1607 return false;
1608 }
1609 ServerName sn;
1610 try {
1611 sn = getMetaRegionServerName();
1612 } catch (KeeperException e) {
1613 throw new IOException(e);
1614 }
1615 MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
1616 HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
1617 if (hbckInfo == null) {
1618 regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
1619 } else {
1620 hbckInfo.metaEntry = m;
1621 }
1622 return true;
1623 }
1624
1625 private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1626 return new ZooKeeperWatcher(getConf(), "hbase Fsck", new Abortable() {
1627 @Override
1628 public void abort(String why, Throwable e) {
1629 LOG.error(why, e);
1630 System.exit(1);
1631 }
1632
1633 @Override
1634 public boolean isAborted() {
1635 return false;
1636 }
1637
1638 });
1639 }
1640
1641 private ServerName getMetaRegionServerName()
1642 throws IOException, KeeperException {
1643 ZooKeeperWatcher zkw = createZooKeeperWatcher();
1644 ServerName sn = null;
1645 try {
1646 sn = MetaRegionTracker.getMetaRegionLocation(zkw);
1647 } finally {
1648 zkw.close();
1649 }
1650 return sn;
1651 }
1652
1653
1654
1655
1656
1657
1658 void processRegionServers(Collection<ServerName> regionServerList)
1659 throws IOException, InterruptedException {
1660
1661 List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1662 List<Future<Void>> workFutures;
1663
1664
1665 for (ServerName rsinfo: regionServerList) {
1666 workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1667 }
1668
1669 workFutures = executor.invokeAll(workItems);
1670
1671 for(int i=0; i<workFutures.size(); i++) {
1672 WorkItemRegion item = workItems.get(i);
1673 Future<Void> f = workFutures.get(i);
1674 try {
1675 f.get();
1676 } catch(ExecutionException e) {
1677 LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1678 e.getCause());
1679 }
1680 }
1681 }
1682
1683
1684
1685
1686 private void checkAndFixConsistency()
1687 throws IOException, KeeperException, InterruptedException {
1688 List<CheckRegionConsistencyWorkItem> workItems =
1689 new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size());
1690 for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1691 workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1692 }
1693 checkRegionConsistencyConcurrently(workItems);
1694 }
1695
1696
1697
1698
1699 private void checkRegionConsistencyConcurrently(
1700 final List<CheckRegionConsistencyWorkItem> workItems)
1701 throws IOException, KeeperException, InterruptedException {
1702 if (workItems.isEmpty()) {
1703 return;
1704 }
1705
1706 List<Future<Void>> workFutures = executor.invokeAll(workItems);
1707 for(Future<Void> f: workFutures) {
1708 try {
1709 f.get();
1710 } catch(ExecutionException e1) {
1711 LOG.warn("Could not check region consistency " , e1.getCause());
1712 if (e1.getCause() instanceof IOException) {
1713 throw (IOException)e1.getCause();
1714 } else if (e1.getCause() instanceof KeeperException) {
1715 throw (KeeperException)e1.getCause();
1716 } else if (e1.getCause() instanceof InterruptedException) {
1717 throw (InterruptedException)e1.getCause();
1718 } else {
1719 throw new IOException(e1.getCause());
1720 }
1721 }
1722 }
1723 }
1724
1725 class CheckRegionConsistencyWorkItem implements Callable<Void> {
1726 private final String key;
1727 private final HbckInfo hbi;
1728
1729 CheckRegionConsistencyWorkItem(String key, HbckInfo hbi) {
1730 this.key = key;
1731 this.hbi = hbi;
1732 }
1733
1734 @Override
1735 public synchronized Void call() throws Exception {
1736 checkRegionConsistency(key, hbi);
1737 return null;
1738 }
1739 }
1740
1741 private void preCheckPermission() throws IOException, AccessDeniedException {
1742 if (shouldIgnorePreCheckPermission()) {
1743 return;
1744 }
1745
1746 Path hbaseDir = FSUtils.getRootDir(getConf());
1747 FileSystem fs = hbaseDir.getFileSystem(getConf());
1748 UserProvider userProvider = UserProvider.instantiate(getConf());
1749 UserGroupInformation ugi = userProvider.getCurrent().getUGI();
1750 FileStatus[] files = fs.listStatus(hbaseDir);
1751 for (FileStatus file : files) {
1752 try {
1753 FSUtils.checkAccess(ugi, file, FsAction.WRITE);
1754 } catch (AccessDeniedException ace) {
1755 LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
1756 errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
1757 + " does not have write perms to " + file.getPath()
1758 + ". Please rerun hbck as hdfs user " + file.getOwner());
1759 throw ace;
1760 }
1761 }
1762 }
1763
1764
1765
1766
1767 private void deleteMetaRegion(HbckInfo hi) throws IOException {
1768 deleteMetaRegion(hi.metaEntry.getRegionName());
1769 }
1770
1771
1772
1773
1774 private void deleteMetaRegion(byte[] metaKey) throws IOException {
1775 Delete d = new Delete(metaKey);
1776 meta.delete(d);
1777 meta.flushCommits();
1778 LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
1779 }
1780
1781
1782
1783
1784 private void resetSplitParent(HbckInfo hi) throws IOException {
1785 RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
1786 Delete d = new Delete(hi.metaEntry.getRegionName());
1787 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1788 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1789 mutations.add(d);
1790
1791 HRegionInfo hri = new HRegionInfo(hi.metaEntry);
1792 hri.setOffline(false);
1793 hri.setSplit(false);
1794 Put p = MetaEditor.makePutFromRegionInfo(hri);
1795 mutations.add(p);
1796
1797 meta.mutateRow(mutations);
1798 meta.flushCommits();
1799 LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
1800 }
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810 private void offline(byte[] regionName) throws IOException {
1811 String regionString = Bytes.toStringBinary(regionName);
1812 if (!rsSupportsOffline) {
1813 LOG.warn("Using unassign region " + regionString
1814 + " instead of using offline method, you should"
1815 + " restart HMaster after these repairs");
1816 admin.unassign(regionName, true);
1817 return;
1818 }
1819
1820
1821 try {
1822 LOG.info("Offlining region " + regionString);
1823 admin.offline(regionName);
1824 } catch (IOException ioe) {
1825 String notFoundMsg = "java.lang.NoSuchMethodException: " +
1826 "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1827 if (ioe.getMessage().contains(notFoundMsg)) {
1828 LOG.warn("Using unassign region " + regionString
1829 + " instead of using offline method, you should"
1830 + " restart HMaster after these repairs");
1831 rsSupportsOffline = false;
1832 admin.unassign(regionName, true);
1833 return;
1834 }
1835 throw ioe;
1836 }
1837 }
1838
1839 private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
1840 for (OnlineEntry rse : hi.deployedEntries) {
1841 LOG.debug("Undeploy region " + rse.hri + " from " + rse.hsa);
1842 try {
1843 HBaseFsckRepair.closeRegionSilentlyAndWait(admin, rse.hsa, rse.hri);
1844 offline(rse.hri.getRegionName());
1845 } catch (IOException ioe) {
1846 LOG.warn("Got exception when attempting to offline region "
1847 + Bytes.toString(rse.hri.getRegionName()), ioe);
1848 }
1849 }
1850 }
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864 private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
1865 if (hi.metaEntry == null && hi.hdfsEntry == null) {
1866 undeployRegions(hi);
1867 return;
1868 }
1869
1870
1871 Get get = new Get(hi.getRegionName());
1872 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1873 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1874 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1875 Result r = meta.get(get);
1876 ServerName serverName = HRegionInfo.getServerName(r);
1877 if (serverName == null) {
1878 errors.reportError("Unable to close region "
1879 + hi.getRegionNameAsString() + " because meta does not "
1880 + "have handle to reach it.");
1881 return;
1882 }
1883
1884 HRegionInfo hri = HRegionInfo.getHRegionInfo(r);
1885 if (hri == null) {
1886 LOG.warn("Unable to close region " + hi.getRegionNameAsString()
1887 + " because hbase:meta had invalid or missing "
1888 + HConstants.CATALOG_FAMILY_STR + ":"
1889 + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
1890 + " qualifier value.");
1891 return;
1892 }
1893
1894
1895 HBaseFsckRepair.closeRegionSilentlyAndWait(admin, serverName, hri);
1896 }
1897
1898 private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
1899 KeeperException, InterruptedException {
1900
1901 if (shouldFixAssignments()) {
1902 errors.print(msg);
1903 undeployRegions(hbi);
1904 setShouldRerun();
1905 HRegionInfo hri = hbi.getHdfsHRI();
1906 if (hri == null) {
1907 hri = hbi.metaEntry;
1908 }
1909 HBaseFsckRepair.fixUnassigned(admin, hri);
1910 HBaseFsckRepair.waitUntilAssigned(admin, hri);
1911 }
1912 }
1913
1914
1915
1916
1917 private void checkRegionConsistency(final String key, final HbckInfo hbi)
1918 throws IOException, KeeperException, InterruptedException {
1919 String descriptiveName = hbi.toString();
1920
1921 boolean inMeta = hbi.metaEntry != null;
1922
1923 boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
1924 boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
1925 boolean isDeployed = !hbi.deployedOn.isEmpty();
1926 boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
1927 boolean deploymentMatchesMeta =
1928 hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
1929 hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
1930 boolean splitParent =
1931 (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
1932 boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
1933 boolean recentlyModified = inHdfs &&
1934 hbi.getModTime() + timelag > System.currentTimeMillis();
1935
1936
1937 if (hbi.containsOnlyHdfsEdits()) {
1938 return;
1939 }
1940 if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
1941 return;
1942 } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
1943 LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
1944 "tabled that is not deployed");
1945 return;
1946 } else if (recentlyModified) {
1947 LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
1948 return;
1949 }
1950
1951 else if (!inMeta && !inHdfs && !isDeployed) {
1952
1953 assert false : "Entry for region with no data";
1954 } else if (!inMeta && !inHdfs && isDeployed) {
1955 errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
1956 + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
1957 "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1958 if (shouldFixAssignments()) {
1959 undeployRegions(hbi);
1960 }
1961
1962 } else if (!inMeta && inHdfs && !isDeployed) {
1963 if (hbi.isMerged()) {
1964
1965
1966 hbi.setSkipChecks(true);
1967 LOG.info("Region " + descriptiveName
1968 + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
1969 return;
1970 }
1971 errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
1972 + descriptiveName + " on HDFS, but not listed in hbase:meta " +
1973 "or deployed on any region server");
1974
1975 if (shouldFixMeta()) {
1976 if (!hbi.isHdfsRegioninfoPresent()) {
1977 LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
1978 + " in table integrity repair phase if -fixHdfsOrphans was" +
1979 " used.");
1980 return;
1981 }
1982
1983 HRegionInfo hri = hbi.getHdfsHRI();
1984 TableInfo tableInfo = tablesInfo.get(hri.getTable());
1985 for (HRegionInfo region : tableInfo.getRegionsFromMeta()) {
1986 if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
1987 && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
1988 hri.getEndKey()) >= 0)
1989 && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
1990 if(region.isSplit() || region.isOffline()) continue;
1991 Path regionDir = hbi.getHdfsRegionDir();
1992 FileSystem fs = regionDir.getFileSystem(getConf());
1993 List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
1994 for (Path familyDir : familyDirs) {
1995 List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
1996 for (Path referenceFilePath : referenceFilePaths) {
1997 Path parentRegionDir =
1998 StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
1999 if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2000 LOG.warn(hri + " start and stop keys are in the range of " + region
2001 + ". The region might not be cleaned up from hdfs when region " + region
2002 + " split failed. Hence deleting from hdfs.");
2003 HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
2004 regionDir.getParent(), hri);
2005 return;
2006 }
2007 }
2008 }
2009 }
2010 }
2011
2012 LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2013 HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
2014
2015 tryAssignmentRepair(hbi, "Trying to reassign region...");
2016 }
2017
2018 } else if (!inMeta && inHdfs && isDeployed) {
2019 errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2020 + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2021 debugLsr(hbi.getHdfsRegionDir());
2022 if (shouldFixMeta()) {
2023 if (!hbi.isHdfsRegioninfoPresent()) {
2024 LOG.error("This should have been repaired in table integrity repair phase");
2025 return;
2026 }
2027
2028 LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2029 HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
2030
2031 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2032 }
2033
2034
2035 } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2036
2037
2038 if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
2039
2040 HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
2041 HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
2042 if (infoA != null && infoB != null) {
2043
2044 hbi.setSkipChecks(true);
2045 return;
2046 }
2047 }
2048 errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2049 + descriptiveName + " is a split parent in META, in HDFS, "
2050 + "and not deployed on any region server. This could be transient.");
2051 if (shouldFixSplitParents()) {
2052 setShouldRerun();
2053 resetSplitParent(hbi);
2054 }
2055 } else if (inMeta && !inHdfs && !isDeployed) {
2056 errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2057 + descriptiveName + " found in META, but not in HDFS "
2058 + "or deployed on any region server.");
2059 if (shouldFixMeta()) {
2060 deleteMetaRegion(hbi);
2061 }
2062 } else if (inMeta && !inHdfs && isDeployed) {
2063 errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2064 + " found in META, but not in HDFS, " +
2065 "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2066
2067
2068
2069 if (shouldFixAssignments()) {
2070 errors.print("Trying to fix unassigned region...");
2071 undeployRegions(hbi);
2072 }
2073 if (shouldFixMeta()) {
2074
2075 deleteMetaRegion(hbi);
2076 }
2077 } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2078 errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2079 + " not deployed on any region server.");
2080 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2081 } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2082 errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2083 "Region " + descriptiveName + " should not be deployed according " +
2084 "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2085 if (shouldFixAssignments()) {
2086 errors.print("Trying to close the region " + descriptiveName);
2087 setShouldRerun();
2088 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
2089 }
2090 } else if (inMeta && inHdfs && isMultiplyDeployed) {
2091 errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2092 + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer
2093 + " but is multiply assigned to region servers " +
2094 Joiner.on(", ").join(hbi.deployedOn));
2095
2096 if (shouldFixAssignments()) {
2097 errors.print("Trying to fix assignment error...");
2098 setShouldRerun();
2099 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
2100 }
2101 } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2102 errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2103 + descriptiveName + " listed in hbase:meta on region server " +
2104 hbi.metaEntry.regionServer + " but found on region server " +
2105 hbi.deployedOn.get(0));
2106
2107 if (shouldFixAssignments()) {
2108 errors.print("Trying to fix assignment error...");
2109 setShouldRerun();
2110 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
2111 HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2112 }
2113 } else {
2114 errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2115 " is in an unforeseen state:" +
2116 " inMeta=" + inMeta +
2117 " inHdfs=" + inHdfs +
2118 " isDeployed=" + isDeployed +
2119 " isMultiplyDeployed=" + isMultiplyDeployed +
2120 " deploymentMatchesMeta=" + deploymentMatchesMeta +
2121 " shouldBeDeployed=" + shouldBeDeployed);
2122 }
2123 }
2124
2125
2126
2127
2128
2129
2130
2131 SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
2132 tablesInfo = new TreeMap<TableName,TableInfo> ();
2133 List<HbckInfo> noHDFSRegionInfos = new ArrayList<HbckInfo>();
2134 LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2135 for (HbckInfo hbi : regionInfoMap.values()) {
2136
2137 if (hbi.metaEntry == null) {
2138
2139 noHDFSRegionInfos.add(hbi);
2140 Path p = hbi.getHdfsRegionDir();
2141 if (p == null) {
2142 errors.report("No regioninfo in Meta or HDFS. " + hbi);
2143 }
2144
2145
2146 continue;
2147 }
2148 if (hbi.metaEntry.regionServer == null) {
2149 errors.detail("Skipping region because no region server: " + hbi);
2150 continue;
2151 }
2152 if (hbi.metaEntry.isOffline()) {
2153 errors.detail("Skipping region because it is offline: " + hbi);
2154 continue;
2155 }
2156 if (hbi.containsOnlyHdfsEdits()) {
2157 errors.detail("Skipping region because it only contains edits" + hbi);
2158 continue;
2159 }
2160
2161
2162
2163
2164
2165
2166 if (hbi.deployedOn.size() == 0) continue;
2167
2168
2169 TableName tableName = hbi.metaEntry.getTable();
2170 TableInfo modTInfo = tablesInfo.get(tableName);
2171 if (modTInfo == null) {
2172 modTInfo = new TableInfo(tableName);
2173 }
2174 for (ServerName server : hbi.deployedOn) {
2175 modTInfo.addServer(server);
2176 }
2177
2178 if (!hbi.isSkipChecks()) {
2179 modTInfo.addRegionInfo(hbi);
2180 }
2181
2182 tablesInfo.put(tableName, modTInfo);
2183 }
2184
2185 loadTableInfosForTablesWithNoRegion();
2186
2187 logParallelMerge();
2188 for (TableInfo tInfo : tablesInfo.values()) {
2189 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2190 if (!tInfo.checkRegionChain(handler)) {
2191 errors.report("Found inconsistency in table " + tInfo.getName());
2192 }
2193 }
2194 return tablesInfo;
2195 }
2196
2197
2198
2199
2200 private void loadTableInfosForTablesWithNoRegion() throws IOException {
2201 Map<String, HTableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2202 for (HTableDescriptor htd : allTables.values()) {
2203 if (checkMetaOnly && !htd.isMetaTable()) {
2204 continue;
2205 }
2206
2207 TableName tableName = htd.getTableName();
2208 if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2209 TableInfo tableInfo = new TableInfo(tableName);
2210 tableInfo.htds.add(htd);
2211 tablesInfo.put(htd.getTableName(), tableInfo);
2212 }
2213 }
2214 }
2215
2216
2217
2218
2219
2220 public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2221 int fileMoves = 0;
2222 String thread = Thread.currentThread().getName();
2223 LOG.debug("[" + thread + "] Contained region dir after close and pause");
2224 debugLsr(contained.getHdfsRegionDir());
2225
2226
2227 FileSystem fs = targetRegionDir.getFileSystem(getConf());
2228 FileStatus[] dirs = null;
2229 try {
2230 dirs = fs.listStatus(contained.getHdfsRegionDir());
2231 } catch (FileNotFoundException fnfe) {
2232
2233
2234 if (!fs.exists(contained.getHdfsRegionDir())) {
2235 LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2236 + " is missing. Assuming already sidelined or moved.");
2237 } else {
2238 sidelineRegionDir(fs, contained);
2239 }
2240 return fileMoves;
2241 }
2242
2243 if (dirs == null) {
2244 if (!fs.exists(contained.getHdfsRegionDir())) {
2245 LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2246 + " already sidelined.");
2247 } else {
2248 sidelineRegionDir(fs, contained);
2249 }
2250 return fileMoves;
2251 }
2252
2253 for (FileStatus cf : dirs) {
2254 Path src = cf.getPath();
2255 Path dst = new Path(targetRegionDir, src.getName());
2256
2257 if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2258
2259 continue;
2260 }
2261
2262 if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2263
2264 continue;
2265 }
2266
2267 LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2268
2269
2270
2271
2272 for (FileStatus hfile : fs.listStatus(src)) {
2273 boolean success = fs.rename(hfile.getPath(), dst);
2274 if (success) {
2275 fileMoves++;
2276 }
2277 }
2278 LOG.debug("[" + thread + "] Sideline directory contents:");
2279 debugLsr(targetRegionDir);
2280 }
2281
2282
2283 sidelineRegionDir(fs, contained);
2284 LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2285 getSidelineDir());
2286 debugLsr(contained.getHdfsRegionDir());
2287
2288 return fileMoves;
2289 }
2290
2291
2292 static class WorkItemOverlapMerge implements Callable<Void> {
2293 private TableIntegrityErrorHandler handler;
2294 Collection<HbckInfo> overlapgroup;
2295
2296 WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2297 this.handler = handler;
2298 this.overlapgroup = overlapgroup;
2299 }
2300
2301 @Override
2302 public Void call() throws Exception {
2303 handler.handleOverlapGroup(overlapgroup);
2304 return null;
2305 }
2306 };
2307
2308
2309
2310
2311
2312 public class TableInfo {
2313 TableName tableName;
2314 TreeSet <ServerName> deployedOn;
2315
2316
2317 final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
2318
2319
2320 final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
2321
2322
2323 final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
2324
2325
2326 final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
2327
2328
2329 final Multimap<byte[], HbckInfo> overlapGroups =
2330 TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2331
2332
2333 private ImmutableList<HRegionInfo> regionsFromMeta = null;
2334
2335 TableInfo(TableName name) {
2336 this.tableName = name;
2337 deployedOn = new TreeSet <ServerName>();
2338 }
2339
2340
2341
2342
2343 private HTableDescriptor getHTD() {
2344 if (htds.size() == 1) {
2345 return (HTableDescriptor)htds.toArray()[0];
2346 } else {
2347 LOG.error("None/Multiple table descriptors found for table '"
2348 + tableName + "' regions: " + htds);
2349 }
2350 return null;
2351 }
2352
2353 public void addRegionInfo(HbckInfo hir) {
2354 if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2355
2356 sc.add(hir);
2357 return;
2358 }
2359
2360
2361 if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2362 errors.reportError(
2363 ERROR_CODE.REGION_CYCLE,
2364 String.format("The endkey for this region comes before the "
2365 + "startkey, startkey=%s, endkey=%s",
2366 Bytes.toStringBinary(hir.getStartKey()),
2367 Bytes.toStringBinary(hir.getEndKey())), this, hir);
2368 backwards.add(hir);
2369 return;
2370 }
2371
2372
2373 sc.add(hir);
2374 }
2375
2376 public void addServer(ServerName server) {
2377 this.deployedOn.add(server);
2378 }
2379
2380 public TableName getName() {
2381 return tableName;
2382 }
2383
2384 public int getNumRegions() {
2385 return sc.getStarts().size() + backwards.size();
2386 }
2387
2388 public synchronized ImmutableList<HRegionInfo> getRegionsFromMeta() {
2389
2390 if (regionsFromMeta == null) {
2391 List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
2392 for (HbckInfo h : HBaseFsck.this.regionInfoMap.values()) {
2393 if (tableName.equals(h.getTableName())) {
2394 if (h.metaEntry != null) {
2395 regions.add((HRegionInfo) h.metaEntry);
2396 }
2397 }
2398 }
2399 regionsFromMeta = Ordering.natural().immutableSortedCopy(regions);
2400 }
2401
2402 return regionsFromMeta;
2403 }
2404
2405 private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2406 ErrorReporter errors;
2407
2408 IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2409 this.errors = errors;
2410 setTableInfo(ti);
2411 }
2412
2413 @Override
2414 public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2415 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2416 "First region should start with an empty key. You need to "
2417 + " create a new region and regioninfo in HDFS to plug the hole.",
2418 getTableInfo(), hi);
2419 }
2420
2421 @Override
2422 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2423 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2424 "Last region should end with an empty key. You need to "
2425 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2426 }
2427
2428 @Override
2429 public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2430 errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2431 "Region has the same start and end key.", getTableInfo(), hi);
2432 }
2433
2434 @Override
2435 public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2436 byte[] key = r1.getStartKey();
2437
2438 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2439 "Multiple regions have the same startkey: "
2440 + Bytes.toStringBinary(key), getTableInfo(), r1);
2441 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2442 "Multiple regions have the same startkey: "
2443 + Bytes.toStringBinary(key), getTableInfo(), r2);
2444 }
2445
2446 @Override
2447 public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2448 errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2449 "There is an overlap in the region chain.",
2450 getTableInfo(), hi1, hi2);
2451 }
2452
2453 @Override
2454 public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2455 errors.reportError(
2456 ERROR_CODE.HOLE_IN_REGION_CHAIN,
2457 "There is a hole in the region chain between "
2458 + Bytes.toStringBinary(holeStart) + " and "
2459 + Bytes.toStringBinary(holeStop)
2460 + ". You need to create a new .regioninfo and region "
2461 + "dir in hdfs to plug the hole.");
2462 }
2463 };
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477 private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2478 Configuration conf;
2479
2480 boolean fixOverlaps = true;
2481
2482 HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2483 boolean fixHoles, boolean fixOverlaps) {
2484 super(ti, errors);
2485 this.conf = conf;
2486 this.fixOverlaps = fixOverlaps;
2487
2488 }
2489
2490
2491
2492
2493
2494
2495 @Override
2496 public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2497 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2498 "First region should start with an empty key. Creating a new " +
2499 "region and regioninfo in HDFS to plug the hole.",
2500 getTableInfo(), next);
2501 HTableDescriptor htd = getTableInfo().getHTD();
2502
2503 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(),
2504 HConstants.EMPTY_START_ROW, next.getStartKey());
2505
2506
2507 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2508 LOG.info("Table region start key was not empty. Created new empty region: "
2509 + newRegion + " " +region);
2510 fixes++;
2511 }
2512
2513 @Override
2514 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2515 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2516 "Last region should end with an empty key. Creating a new "
2517 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2518 HTableDescriptor htd = getTableInfo().getHTD();
2519
2520 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), curEndKey,
2521 HConstants.EMPTY_START_ROW);
2522
2523 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2524 LOG.info("Table region end key was not empty. Created new empty region: " + newRegion
2525 + " " + region);
2526 fixes++;
2527 }
2528
2529
2530
2531
2532
2533 @Override
2534 public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2535 errors.reportError(
2536 ERROR_CODE.HOLE_IN_REGION_CHAIN,
2537 "There is a hole in the region chain between "
2538 + Bytes.toStringBinary(holeStartKey) + " and "
2539 + Bytes.toStringBinary(holeStopKey)
2540 + ". Creating a new regioninfo and region "
2541 + "dir in hdfs to plug the hole.");
2542 HTableDescriptor htd = getTableInfo().getHTD();
2543 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey);
2544 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2545 LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
2546 fixes++;
2547 }
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560 @Override
2561 public void handleOverlapGroup(Collection<HbckInfo> overlap)
2562 throws IOException {
2563 Preconditions.checkNotNull(overlap);
2564 Preconditions.checkArgument(overlap.size() >0);
2565
2566 if (!this.fixOverlaps) {
2567 LOG.warn("Not attempting to repair overlaps.");
2568 return;
2569 }
2570
2571 if (overlap.size() > maxMerge) {
2572 LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2573 "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2574 if (sidelineBigOverlaps) {
2575
2576 sidelineBigOverlaps(overlap);
2577 }
2578 return;
2579 }
2580
2581 mergeOverlaps(overlap);
2582 }
2583
2584 void mergeOverlaps(Collection<HbckInfo> overlap)
2585 throws IOException {
2586 String thread = Thread.currentThread().getName();
2587 LOG.info("== [" + thread + "] Merging regions into one region: "
2588 + Joiner.on(",").join(overlap));
2589
2590 Pair<byte[], byte[]> range = null;
2591 for (HbckInfo hi : overlap) {
2592 if (range == null) {
2593 range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2594 } else {
2595 if (RegionSplitCalculator.BYTES_COMPARATOR
2596 .compare(hi.getStartKey(), range.getFirst()) < 0) {
2597 range.setFirst(hi.getStartKey());
2598 }
2599 if (RegionSplitCalculator.BYTES_COMPARATOR
2600 .compare(hi.getEndKey(), range.getSecond()) > 0) {
2601 range.setSecond(hi.getEndKey());
2602 }
2603 }
2604
2605 LOG.debug("[" + thread + "] Closing region before moving data around: " + hi);
2606 LOG.debug("[" + thread + "] Contained region dir before close");
2607 debugLsr(hi.getHdfsRegionDir());
2608 try {
2609 LOG.info("[" + thread + "] Closing region: " + hi);
2610 closeRegion(hi);
2611 } catch (IOException ioe) {
2612 LOG.warn("[" + thread + "] Was unable to close region " + hi
2613 + ". Just continuing... ", ioe);
2614 } catch (InterruptedException e) {
2615 LOG.warn("[" + thread + "] Was unable to close region " + hi
2616 + ". Just continuing... ", e);
2617 }
2618
2619 try {
2620 LOG.info("[" + thread + "] Offlining region: " + hi);
2621 offline(hi.getRegionName());
2622 } catch (IOException ioe) {
2623 LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
2624 + ". Just continuing... ", ioe);
2625 }
2626 }
2627
2628
2629 HTableDescriptor htd = getTableInfo().getHTD();
2630
2631 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), range.getFirst(),
2632 range.getSecond());
2633 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2634 LOG.info("[" + thread + "] Created new empty container region: " +
2635 newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
2636 debugLsr(region.getRegionFileSystem().getRegionDir());
2637
2638
2639 boolean didFix= false;
2640 Path target = region.getRegionFileSystem().getRegionDir();
2641 for (HbckInfo contained : overlap) {
2642 LOG.info("[" + thread + "] Merging " + contained + " into " + target );
2643 int merges = mergeRegionDirs(target, contained);
2644 if (merges > 0) {
2645 didFix = true;
2646 }
2647 }
2648 if (didFix) {
2649 fixes++;
2650 }
2651 }
2652
2653
2654
2655
2656
2657
2658
2659
2660 void sidelineBigOverlaps(
2661 Collection<HbckInfo> bigOverlap) throws IOException {
2662 int overlapsToSideline = bigOverlap.size() - maxMerge;
2663 if (overlapsToSideline > maxOverlapsToSideline) {
2664 overlapsToSideline = maxOverlapsToSideline;
2665 }
2666 List<HbckInfo> regionsToSideline =
2667 RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
2668 FileSystem fs = FileSystem.get(conf);
2669 for (HbckInfo regionToSideline: regionsToSideline) {
2670 try {
2671 LOG.info("Closing region: " + regionToSideline);
2672 closeRegion(regionToSideline);
2673 } catch (IOException ioe) {
2674 LOG.warn("Was unable to close region " + regionToSideline
2675 + ". Just continuing... ", ioe);
2676 } catch (InterruptedException e) {
2677 LOG.warn("Was unable to close region " + regionToSideline
2678 + ". Just continuing... ", e);
2679 }
2680
2681 try {
2682 LOG.info("Offlining region: " + regionToSideline);
2683 offline(regionToSideline.getRegionName());
2684 } catch (IOException ioe) {
2685 LOG.warn("Unable to offline region from master: " + regionToSideline
2686 + ". Just continuing... ", ioe);
2687 }
2688
2689 LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
2690 Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
2691 if (sidelineRegionDir != null) {
2692 sidelinedRegions.put(sidelineRegionDir, regionToSideline);
2693 LOG.info("After sidelined big overlapped region: "
2694 + regionToSideline.getRegionNameAsString()
2695 + " to " + sidelineRegionDir.toString());
2696 fixes++;
2697 }
2698 }
2699 }
2700 }
2701
2702
2703
2704
2705
2706
2707
2708 public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
2709
2710
2711
2712 if (disabledTables.contains(this.tableName)) {
2713 return true;
2714 }
2715 int originalErrorsCount = errors.getErrorList().size();
2716 Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
2717 SortedSet<byte[]> splits = sc.getSplits();
2718
2719 byte[] prevKey = null;
2720 byte[] problemKey = null;
2721
2722 if (splits.size() == 0) {
2723
2724 handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
2725 }
2726
2727 for (byte[] key : splits) {
2728 Collection<HbckInfo> ranges = regions.get(key);
2729 if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
2730 for (HbckInfo rng : ranges) {
2731 handler.handleRegionStartKeyNotEmpty(rng);
2732 }
2733 }
2734
2735
2736 for (HbckInfo rng : ranges) {
2737
2738 byte[] endKey = rng.getEndKey();
2739 endKey = (endKey.length == 0) ? null : endKey;
2740 if (Bytes.equals(rng.getStartKey(),endKey)) {
2741 handler.handleDegenerateRegion(rng);
2742 }
2743 }
2744
2745 if (ranges.size() == 1) {
2746
2747 if (problemKey != null) {
2748 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2749 }
2750 problemKey = null;
2751 } else if (ranges.size() > 1) {
2752
2753
2754 if (problemKey == null) {
2755
2756 LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
2757 problemKey = key;
2758 }
2759 overlapGroups.putAll(problemKey, ranges);
2760
2761
2762 ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
2763
2764 for (HbckInfo r1 : ranges) {
2765 subRange.remove(r1);
2766 for (HbckInfo r2 : subRange) {
2767 if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
2768 handler.handleDuplicateStartKeys(r1,r2);
2769 } else {
2770
2771 handler.handleOverlapInRegionChain(r1, r2);
2772 }
2773 }
2774 }
2775
2776 } else if (ranges.size() == 0) {
2777 if (problemKey != null) {
2778 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2779 }
2780 problemKey = null;
2781
2782 byte[] holeStopKey = sc.getSplits().higher(key);
2783
2784 if (holeStopKey != null) {
2785
2786 handler.handleHoleInRegionChain(key, holeStopKey);
2787 }
2788 }
2789 prevKey = key;
2790 }
2791
2792
2793
2794 if (prevKey != null) {
2795 handler.handleRegionEndKeyNotEmpty(prevKey);
2796 }
2797
2798
2799 if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
2800 boolean ok = handleOverlapsParallel(handler, prevKey);
2801 if (!ok) {
2802 return false;
2803 }
2804 } else {
2805 for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2806 handler.handleOverlapGroup(overlap);
2807 }
2808 }
2809
2810 if (details) {
2811
2812 errors.print("---- Table '" + this.tableName
2813 + "': region split map");
2814 dump(splits, regions);
2815 errors.print("---- Table '" + this.tableName
2816 + "': overlap groups");
2817 dumpOverlapProblems(overlapGroups);
2818 errors.print("There are " + overlapGroups.keySet().size()
2819 + " overlap groups with " + overlapGroups.size()
2820 + " overlapping regions");
2821 }
2822 if (!sidelinedRegions.isEmpty()) {
2823 LOG.warn("Sidelined big overlapped regions, please bulk load them!");
2824 errors.print("---- Table '" + this.tableName
2825 + "': sidelined big overlapped regions");
2826 dumpSidelinedRegions(sidelinedRegions);
2827 }
2828 return errors.getErrorList().size() == originalErrorsCount;
2829 }
2830
2831 private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
2832 throws IOException {
2833
2834
2835 List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size());
2836 List<Future<Void>> rets;
2837 for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2838
2839 merges.add(new WorkItemOverlapMerge(overlap, handler));
2840 }
2841 try {
2842 rets = executor.invokeAll(merges);
2843 } catch (InterruptedException e) {
2844 LOG.error("Overlap merges were interrupted", e);
2845 return false;
2846 }
2847 for(int i=0; i<merges.size(); i++) {
2848 WorkItemOverlapMerge work = merges.get(i);
2849 Future<Void> f = rets.get(i);
2850 try {
2851 f.get();
2852 } catch(ExecutionException e) {
2853 LOG.warn("Failed to merge overlap group" + work, e.getCause());
2854 } catch (InterruptedException e) {
2855 LOG.error("Waiting for overlap merges was interrupted", e);
2856 return false;
2857 }
2858 }
2859 return true;
2860 }
2861
2862
2863
2864
2865
2866
2867
2868 void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
2869
2870 StringBuilder sb = new StringBuilder();
2871 for (byte[] k : splits) {
2872 sb.setLength(0);
2873 sb.append(Bytes.toStringBinary(k) + ":\t");
2874 for (HbckInfo r : regions.get(k)) {
2875 sb.append("[ "+ r.toString() + ", "
2876 + Bytes.toStringBinary(r.getEndKey())+ "]\t");
2877 }
2878 errors.print(sb.toString());
2879 }
2880 }
2881 }
2882
2883 public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
2884
2885
2886 for (byte[] k : regions.keySet()) {
2887 errors.print(Bytes.toStringBinary(k) + ":");
2888 for (HbckInfo r : regions.get(k)) {
2889 errors.print("[ " + r.toString() + ", "
2890 + Bytes.toStringBinary(r.getEndKey()) + "]");
2891 }
2892 errors.print("----");
2893 }
2894 }
2895
2896 public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
2897 for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
2898 TableName tableName = entry.getValue().getTableName();
2899 Path path = entry.getKey();
2900 errors.print("This sidelined region dir should be bulk loaded: "
2901 + path.toString());
2902 errors.print("Bulk load command looks like: "
2903 + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
2904 + path.toUri().getPath() + " "+ tableName);
2905 }
2906 }
2907
2908 public Multimap<byte[], HbckInfo> getOverlapGroups(
2909 TableName table) {
2910 TableInfo ti = tablesInfo.get(table);
2911 return ti.overlapGroups;
2912 }
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923 HTableDescriptor[] getTables(AtomicInteger numSkipped) {
2924 List<TableName> tableNames = new ArrayList<TableName>();
2925 long now = System.currentTimeMillis();
2926
2927 for (HbckInfo hbi : regionInfoMap.values()) {
2928 MetaEntry info = hbi.metaEntry;
2929
2930
2931
2932 if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
2933 if (info.modTime + timelag < now) {
2934 tableNames.add(info.getTable());
2935 } else {
2936 numSkipped.incrementAndGet();
2937 }
2938 }
2939 }
2940 return getHTableDescriptors(tableNames);
2941 }
2942
2943 HTableDescriptor[] getHTableDescriptors(List<TableName> tableNames) {
2944 HTableDescriptor[] htd = new HTableDescriptor[0];
2945 try {
2946 LOG.info("getHTableDescriptors == tableNames => " + tableNames);
2947 htd = new HBaseAdmin(getConf()).getTableDescriptorsByTableName(tableNames);
2948 } catch (IOException e) {
2949 LOG.debug("Exception getting table descriptors", e);
2950 }
2951 return htd;
2952 }
2953
2954
2955
2956
2957
2958
2959 private synchronized HbckInfo getOrCreateInfo(String name) {
2960 HbckInfo hbi = regionInfoMap.get(name);
2961 if (hbi == null) {
2962 hbi = new HbckInfo(null);
2963 regionInfoMap.put(name, hbi);
2964 }
2965 return hbi;
2966 }
2967
2968 private void checkAndFixTableLocks() throws IOException {
2969 ZooKeeperWatcher zkw = createZooKeeperWatcher();
2970
2971 try {
2972 TableLockChecker checker = new TableLockChecker(createZooKeeperWatcher(), errors);
2973 checker.checkTableLocks();
2974
2975 if (this.fixTableLocks) {
2976 checker.fixExpiredTableLocks();
2977 }
2978 } finally {
2979 zkw.close();
2980 }
2981 }
2982
2983
2984
2985
2986
2987
2988
2989 private void checkAndFixOrphanedTableZNodes()
2990 throws IOException, KeeperException, InterruptedException {
2991 ZooKeeperWatcher zkw = createZooKeeperWatcher();
2992 try {
2993 ZKTable zkTable = new ZKTable(zkw);
2994 Set<TableName> enablingTables = zkTable.getEnablingTables(zkw);
2995 String msg;
2996 TableInfo tableInfo;
2997
2998 for (TableName tableName : enablingTables) {
2999
3000 tableInfo = tablesInfo.get(tableName);
3001 if (tableInfo != null) {
3002
3003 continue;
3004 }
3005
3006 msg = "Table " + tableName + " not found in hbase:meta. Orphaned table ZNode found.";
3007 LOG.warn(msg);
3008 orphanedTableZNodes.add(tableName);
3009 errors.reportError(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY, msg);
3010 }
3011
3012 if (orphanedTableZNodes.size() > 0 && this.fixTableZNodes) {
3013 for (TableName tableName : orphanedTableZNodes) {
3014
3015
3016
3017
3018 zkTable.setDisabledTable(tableName);
3019 }
3020 }
3021 } finally {
3022 zkw.close();
3023 }
3024 }
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035 boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
3036 List<HbckInfo> metaRegions = Lists.newArrayList();
3037 for (HbckInfo value : regionInfoMap.values()) {
3038 if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
3039 metaRegions.add(value);
3040 }
3041 }
3042
3043
3044
3045 List<ServerName> servers = new ArrayList<ServerName>();
3046 HbckInfo metaHbckInfo = null;
3047 if (!metaRegions.isEmpty()) {
3048 metaHbckInfo = metaRegions.get(0);
3049 servers = metaHbckInfo.deployedOn;
3050 }
3051 if (servers.size() != 1) {
3052 if (servers.size() == 0) {
3053 errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta is not found on any region.");
3054 if (shouldFixAssignments()) {
3055 errors.print("Trying to fix a problem with hbase:meta..");
3056 setShouldRerun();
3057
3058 HBaseFsckRepair.fixUnassigned(admin, HRegionInfo.FIRST_META_REGIONINFO);
3059 HBaseFsckRepair.waitUntilAssigned(admin, HRegionInfo.FIRST_META_REGIONINFO);
3060 }
3061 } else if (servers.size() > 1) {
3062 errors
3063 .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta is found on more than one region.");
3064 if (shouldFixAssignments()) {
3065 if (metaHbckInfo == null) {
3066 errors.print(
3067 "Unable to fix problem with hbase:meta due to hbase:meta region info missing");
3068 return false;
3069 }
3070 errors.print("Trying to fix a problem with hbase:meta..");
3071 setShouldRerun();
3072
3073 HBaseFsckRepair.fixMultiAssignment(admin, metaHbckInfo.metaEntry, servers);
3074 }
3075 }
3076
3077 return false;
3078 }
3079
3080 return true;
3081 }
3082
3083
3084
3085
3086
3087 boolean loadMetaEntries() throws IOException {
3088 MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
3089 int countRecord = 1;
3090
3091
3092 final Comparator<Cell> comp = new Comparator<Cell>() {
3093 @Override
3094 public int compare(Cell k1, Cell k2) {
3095 return (int)(k1.getTimestamp() - k2.getTimestamp());
3096 }
3097 };
3098
3099 @Override
3100 public boolean processRow(Result result) throws IOException {
3101 try {
3102
3103
3104 long ts = Collections.max(result.listCells(), comp).getTimestamp();
3105 Pair<HRegionInfo, ServerName> pair = HRegionInfo.getHRegionInfoAndServerName(result);
3106 if (pair == null || pair.getFirst() == null) {
3107 emptyRegionInfoQualifiers.add(result);
3108 errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3109 "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3110 return true;
3111 }
3112 ServerName sn = null;
3113 if (pair.getSecond() != null) {
3114 sn = pair.getSecond();
3115 }
3116 HRegionInfo hri = pair.getFirst();
3117 if (!(isTableIncluded(hri.getTable())
3118 || hri.isMetaRegion())) {
3119 return true;
3120 }
3121 PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
3122 MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
3123 HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
3124 if (previous == null) {
3125 regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
3126 } else if (previous.metaEntry == null) {
3127 previous.metaEntry = m;
3128 } else {
3129 throw new IOException("Two entries in hbase:meta are same " + previous);
3130 }
3131
3132 PairOfSameType<HRegionInfo> mergeRegions = HRegionInfo.getMergeRegions(result);
3133 for (HRegionInfo mergeRegion : new HRegionInfo[] {
3134 mergeRegions.getFirst(), mergeRegions.getSecond() }) {
3135 if (mergeRegion != null) {
3136
3137 HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
3138 hbInfo.setMerged(true);
3139 }
3140 }
3141
3142
3143 if (countRecord % 100 == 0) {
3144 errors.progress();
3145 }
3146 countRecord++;
3147 return true;
3148 } catch (RuntimeException e) {
3149 LOG.error("Result=" + result);
3150 throw e;
3151 }
3152 }
3153 };
3154 if (!checkMetaOnly) {
3155
3156 MetaScanner.metaScan(getConf(), visitor);
3157 }
3158
3159 errors.print("");
3160 return true;
3161 }
3162
3163
3164
3165
3166 static class MetaEntry extends HRegionInfo {
3167 ServerName regionServer;
3168 long modTime;
3169 HRegionInfo splitA, splitB;
3170
3171 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
3172 this(rinfo, regionServer, modTime, null, null);
3173 }
3174
3175 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
3176 HRegionInfo splitA, HRegionInfo splitB) {
3177 super(rinfo);
3178 this.regionServer = regionServer;
3179 this.modTime = modTime;
3180 this.splitA = splitA;
3181 this.splitB = splitB;
3182 }
3183
3184 @Override
3185 public boolean equals(Object o) {
3186 boolean superEq = super.equals(o);
3187 if (!superEq) {
3188 return superEq;
3189 }
3190
3191 MetaEntry me = (MetaEntry) o;
3192 if (!regionServer.equals(me.regionServer)) {
3193 return false;
3194 }
3195 return (modTime == me.modTime);
3196 }
3197
3198 @Override
3199 public int hashCode() {
3200 int hash = Arrays.hashCode(getRegionName());
3201 hash ^= getRegionId();
3202 hash ^= Arrays.hashCode(getStartKey());
3203 hash ^= Arrays.hashCode(getEndKey());
3204 hash ^= Boolean.valueOf(isOffline()).hashCode();
3205 hash ^= getTable().hashCode();
3206 if (regionServer != null) {
3207 hash ^= regionServer.hashCode();
3208 }
3209 hash ^= modTime;
3210 return hash;
3211 }
3212 }
3213
3214
3215
3216
3217 static class HdfsEntry {
3218 HRegionInfo hri;
3219 Path hdfsRegionDir = null;
3220 long hdfsRegionDirModTime = 0;
3221 boolean hdfsRegioninfoFilePresent = false;
3222 boolean hdfsOnlyEdits = false;
3223 }
3224
3225
3226
3227
3228 static class OnlineEntry {
3229 HRegionInfo hri;
3230 ServerName hsa;
3231
3232 @Override
3233 public String toString() {
3234 return hsa.toString() + ";" + hri.getRegionNameAsString();
3235 }
3236 }
3237
3238
3239
3240
3241
3242 public static class HbckInfo implements KeyRange {
3243 private MetaEntry metaEntry = null;
3244 private HdfsEntry hdfsEntry = null;
3245 private List<OnlineEntry> deployedEntries = Lists.newArrayList();
3246 private List<ServerName> deployedOn = Lists.newArrayList();
3247 private boolean skipChecks = false;
3248 private boolean isMerged = false;
3249
3250 HbckInfo(MetaEntry metaEntry) {
3251 this.metaEntry = metaEntry;
3252 }
3253
3254 public synchronized void addServer(HRegionInfo hri, ServerName server) {
3255 OnlineEntry rse = new OnlineEntry() ;
3256 rse.hri = hri;
3257 rse.hsa = server;
3258 this.deployedEntries.add(rse);
3259 this.deployedOn.add(server);
3260 }
3261
3262 @Override
3263 public synchronized String toString() {
3264 StringBuilder sb = new StringBuilder();
3265 sb.append("{ meta => ");
3266 sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
3267 sb.append( ", hdfs => " + getHdfsRegionDir());
3268 sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
3269 sb.append(" }");
3270 return sb.toString();
3271 }
3272
3273 @Override
3274 public byte[] getStartKey() {
3275 if (this.metaEntry != null) {
3276 return this.metaEntry.getStartKey();
3277 } else if (this.hdfsEntry != null) {
3278 return this.hdfsEntry.hri.getStartKey();
3279 } else {
3280 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3281 return null;
3282 }
3283 }
3284
3285 @Override
3286 public byte[] getEndKey() {
3287 if (this.metaEntry != null) {
3288 return this.metaEntry.getEndKey();
3289 } else if (this.hdfsEntry != null) {
3290 return this.hdfsEntry.hri.getEndKey();
3291 } else {
3292 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3293 return null;
3294 }
3295 }
3296
3297 public TableName getTableName() {
3298 if (this.metaEntry != null) {
3299 return this.metaEntry.getTable();
3300 } else if (this.hdfsEntry != null) {
3301
3302
3303 Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3304 return FSUtils.getTableName(tableDir);
3305 } else {
3306
3307
3308 return null;
3309 }
3310 }
3311
3312 public String getRegionNameAsString() {
3313 if (metaEntry != null) {
3314 return metaEntry.getRegionNameAsString();
3315 } else if (hdfsEntry != null) {
3316 if (hdfsEntry.hri != null) {
3317 return hdfsEntry.hri.getRegionNameAsString();
3318 }
3319 }
3320 return null;
3321 }
3322
3323 public byte[] getRegionName() {
3324 if (metaEntry != null) {
3325 return metaEntry.getRegionName();
3326 } else if (hdfsEntry != null) {
3327 return hdfsEntry.hri.getRegionName();
3328 } else {
3329 return null;
3330 }
3331 }
3332
3333 Path getHdfsRegionDir() {
3334 if (hdfsEntry == null) {
3335 return null;
3336 }
3337 return hdfsEntry.hdfsRegionDir;
3338 }
3339
3340 boolean containsOnlyHdfsEdits() {
3341 if (hdfsEntry == null) {
3342 return false;
3343 }
3344 return hdfsEntry.hdfsOnlyEdits;
3345 }
3346
3347 boolean isHdfsRegioninfoPresent() {
3348 if (hdfsEntry == null) {
3349 return false;
3350 }
3351 return hdfsEntry.hdfsRegioninfoFilePresent;
3352 }
3353
3354 long getModTime() {
3355 if (hdfsEntry == null) {
3356 return 0;
3357 }
3358 return hdfsEntry.hdfsRegionDirModTime;
3359 }
3360
3361 HRegionInfo getHdfsHRI() {
3362 if (hdfsEntry == null) {
3363 return null;
3364 }
3365 return hdfsEntry.hri;
3366 }
3367
3368 public void setSkipChecks(boolean skipChecks) {
3369 this.skipChecks = skipChecks;
3370 }
3371
3372 public boolean isSkipChecks() {
3373 return skipChecks;
3374 }
3375
3376 public void setMerged(boolean isMerged) {
3377 this.isMerged = isMerged;
3378 }
3379
3380 public boolean isMerged() {
3381 return this.isMerged;
3382 }
3383 }
3384
3385 final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
3386 @Override
3387 public int compare(HbckInfo l, HbckInfo r) {
3388 if (l == r) {
3389
3390 return 0;
3391 }
3392
3393 int tableCompare = l.getTableName().compareTo(r.getTableName());
3394 if (tableCompare != 0) {
3395 return tableCompare;
3396 }
3397
3398 int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3399 l.getStartKey(), r.getStartKey());
3400 if (startComparison != 0) {
3401 return startComparison;
3402 }
3403
3404
3405 byte[] endKey = r.getEndKey();
3406 endKey = (endKey.length == 0) ? null : endKey;
3407 byte[] endKey2 = l.getEndKey();
3408 endKey2 = (endKey2.length == 0) ? null : endKey2;
3409 int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3410 endKey2, endKey);
3411
3412 if (endComparison != 0) {
3413 return endComparison;
3414 }
3415
3416
3417
3418 if (l.hdfsEntry == null && r.hdfsEntry == null) {
3419 return 0;
3420 }
3421 if (l.hdfsEntry == null && r.hdfsEntry != null) {
3422 return 1;
3423 }
3424
3425 if (r.hdfsEntry == null) {
3426 return -1;
3427 }
3428
3429 return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
3430 }
3431 };
3432
3433
3434
3435
3436 private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
3437 StringBuilder sb = new StringBuilder();
3438 errors.print("Summary:");
3439 for (TableInfo tInfo : tablesInfo.values()) {
3440 if (errors.tableHasErrors(tInfo)) {
3441 errors.print("Table " + tInfo.getName() + " is inconsistent.");
3442 } else {
3443 errors.print(" " + tInfo.getName() + " is okay.");
3444 }
3445 errors.print(" Number of regions: " + tInfo.getNumRegions());
3446 sb.setLength(0);
3447 sb.append(" Deployed on: ");
3448 for (ServerName server : tInfo.deployedOn) {
3449 sb.append(" " + server.toString());
3450 }
3451 errors.print(sb.toString());
3452 }
3453 }
3454
3455 static ErrorReporter getErrorReporter(
3456 final Configuration conf) throws ClassNotFoundException {
3457 Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
3458 return ReflectionUtils.newInstance(reporter, conf);
3459 }
3460
3461 public interface ErrorReporter {
3462 enum ERROR_CODE {
3463 UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
3464 NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED,
3465 MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
3466 FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
3467 HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
3468 ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
3469 WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, ORPHANED_ZK_TABLE_ENTRY, BOUNDARIES_ERROR
3470 }
3471 void clear();
3472 void report(String message);
3473 void reportError(String message);
3474 void reportError(ERROR_CODE errorCode, String message);
3475 void reportError(ERROR_CODE errorCode, String message, TableInfo table);
3476 void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
3477 void reportError(
3478 ERROR_CODE errorCode,
3479 String message,
3480 TableInfo table,
3481 HbckInfo info1,
3482 HbckInfo info2
3483 );
3484 int summarize();
3485 void detail(String details);
3486 ArrayList<ERROR_CODE> getErrorList();
3487 void progress();
3488 void print(String message);
3489 void resetErrors();
3490 boolean tableHasErrors(TableInfo table);
3491 }
3492
3493 static class PrintingErrorReporter implements ErrorReporter {
3494 public int errorCount = 0;
3495 private int showProgress;
3496
3497 private static final int progressThreshold = 100;
3498
3499 Set<TableInfo> errorTables = new HashSet<TableInfo>();
3500
3501
3502 private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
3503
3504 @Override
3505 public void clear() {
3506 errorTables.clear();
3507 errorList.clear();
3508 errorCount = 0;
3509 }
3510
3511 @Override
3512 public synchronized void reportError(ERROR_CODE errorCode, String message) {
3513 if (errorCode == ERROR_CODE.WRONG_USAGE) {
3514 System.err.println(message);
3515 return;
3516 }
3517
3518 errorList.add(errorCode);
3519 if (!summary) {
3520 System.out.println("ERROR: " + message);
3521 }
3522 errorCount++;
3523 showProgress = 0;
3524 }
3525
3526 @Override
3527 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
3528 errorTables.add(table);
3529 reportError(errorCode, message);
3530 }
3531
3532 @Override
3533 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3534 HbckInfo info) {
3535 errorTables.add(table);
3536 String reference = "(region " + info.getRegionNameAsString() + ")";
3537 reportError(errorCode, reference + " " + message);
3538 }
3539
3540 @Override
3541 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3542 HbckInfo info1, HbckInfo info2) {
3543 errorTables.add(table);
3544 String reference = "(regions " + info1.getRegionNameAsString()
3545 + " and " + info2.getRegionNameAsString() + ")";
3546 reportError(errorCode, reference + " " + message);
3547 }
3548
3549 @Override
3550 public synchronized void reportError(String message) {
3551 reportError(ERROR_CODE.UNKNOWN, message);
3552 }
3553
3554
3555
3556
3557
3558
3559 @Override
3560 public synchronized void report(String message) {
3561 if (! summary) {
3562 System.out.println("ERROR: " + message);
3563 }
3564 showProgress = 0;
3565 }
3566
3567 @Override
3568 public synchronized int summarize() {
3569 System.out.println(Integer.toString(errorCount) +
3570 " inconsistencies detected.");
3571 if (errorCount == 0) {
3572 System.out.println("Status: OK");
3573 return 0;
3574 } else {
3575 System.out.println("Status: INCONSISTENT");
3576 return -1;
3577 }
3578 }
3579
3580 @Override
3581 public ArrayList<ERROR_CODE> getErrorList() {
3582 return errorList;
3583 }
3584
3585 @Override
3586 public synchronized void print(String message) {
3587 if (!summary) {
3588 System.out.println(message);
3589 }
3590 }
3591
3592 @Override
3593 public boolean tableHasErrors(TableInfo table) {
3594 return errorTables.contains(table);
3595 }
3596
3597 @Override
3598 public void resetErrors() {
3599 errorCount = 0;
3600 }
3601
3602 @Override
3603 public synchronized void detail(String message) {
3604 if (details) {
3605 System.out.println(message);
3606 }
3607 showProgress = 0;
3608 }
3609
3610 @Override
3611 public synchronized void progress() {
3612 if (showProgress++ == progressThreshold) {
3613 if (!summary) {
3614 System.out.print(".");
3615 }
3616 showProgress = 0;
3617 }
3618 }
3619 }
3620
3621
3622
3623
3624 static class WorkItemRegion implements Callable<Void> {
3625 private HBaseFsck hbck;
3626 private ServerName rsinfo;
3627 private ErrorReporter errors;
3628 private HConnection connection;
3629
3630 WorkItemRegion(HBaseFsck hbck, ServerName info,
3631 ErrorReporter errors, HConnection connection) {
3632 this.hbck = hbck;
3633 this.rsinfo = info;
3634 this.errors = errors;
3635 this.connection = connection;
3636 }
3637
3638 @Override
3639 public synchronized Void call() throws IOException {
3640 errors.progress();
3641 try {
3642 BlockingInterface server = connection.getAdmin(rsinfo);
3643
3644
3645 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
3646 regions = filterRegions(regions);
3647
3648 if (details) {
3649 errors.detail("RegionServer: " + rsinfo.getServerName() +
3650 " number of regions: " + regions.size());
3651 for (HRegionInfo rinfo: regions) {
3652 errors.detail(" " + rinfo.getRegionNameAsString() +
3653 " id: " + rinfo.getRegionId() +
3654 " encoded_name: " + rinfo.getEncodedName() +
3655 " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
3656 " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
3657 }
3658 }
3659
3660
3661 for (HRegionInfo r:regions) {
3662 HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
3663 hbi.addServer(r, rsinfo);
3664 }
3665 } catch (IOException e) {
3666 errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
3667 " Unable to fetch region information. " + e);
3668 throw e;
3669 }
3670 return null;
3671 }
3672
3673 private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
3674 List<HRegionInfo> ret = Lists.newArrayList();
3675 for (HRegionInfo hri : regions) {
3676 if (hri.isMetaTable() || (!hbck.checkMetaOnly
3677 && hbck.isTableIncluded(hri.getTable()))) {
3678 ret.add(hri);
3679 }
3680 }
3681 return ret;
3682 }
3683 }
3684
3685
3686
3687
3688
3689 static class WorkItemHdfsDir implements Callable<Void> {
3690 private HBaseFsck hbck;
3691 private FileStatus tableDir;
3692 private ErrorReporter errors;
3693 private FileSystem fs;
3694
3695 WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors,
3696 FileStatus status) {
3697 this.hbck = hbck;
3698 this.fs = fs;
3699 this.tableDir = status;
3700 this.errors = errors;
3701 }
3702
3703 @Override
3704 public synchronized Void call() throws IOException {
3705 try {
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762 static class WorkItemHdfsRegionInfo implements Callable<Void> {
3763 private HbckInfo hbi;
3764 private HBaseFsck hbck;
3765 private ErrorReporter errors;
3766
3767 WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
3768 this.hbi = hbi;
3769 this.hbck = hbck;
3770 this.errors = errors;
3771 }
3772
3773 @Override
3774 public synchronized Void call() throws IOException {
3775
3776 if (hbi.getHdfsHRI() == null) {
3777 try {
3778 errors.progress();
3779 hbck.loadHdfsRegioninfo(hbi);
3780 } catch (IOException ioe) {
3781 String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
3782 + hbi.getTableName() + " in hdfs dir "
3783 + hbi.getHdfsRegionDir()
3784 + "! It may be an invalid format or version file. Treating as "
3785 + "an orphaned regiondir.";
3786 errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
3787 try {
3788 hbck.debugLsr(hbi.getHdfsRegionDir());
3789 } catch (IOException ioe2) {
3790 LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
3791 throw ioe2;
3792 }
3793 hbck.orphanHdfsDirs.add(hbi);
3794 throw ioe;
3795 }
3796 }
3797 return null;
3798 }
3799 };
3800
3801
3802
3803
3804
3805 public static void setDisplayFullReport() {
3806 details = true;
3807 }
3808
3809
3810
3811
3812
3813 void setSummary() {
3814 summary = true;
3815 }
3816
3817
3818
3819
3820
3821 void setCheckMetaOnly() {
3822 checkMetaOnly = true;
3823 }
3824
3825
3826
3827
3828 void setRegionBoundariesCheck() {
3829 checkRegionBoundaries = true;
3830 }
3831
3832
3833
3834
3835
3836 public void setFixTableLocks(boolean shouldFix) {
3837 fixTableLocks = shouldFix;
3838 fixAny |= shouldFix;
3839 }
3840
3841
3842
3843
3844
3845 public void setFixTableZNodes(boolean shouldFix) {
3846 fixTableZNodes = shouldFix;
3847 fixAny |= shouldFix;
3848 }
3849
3850
3851
3852
3853
3854
3855
3856 void setShouldRerun() {
3857 rerun = true;
3858 }
3859
3860 boolean shouldRerun() {
3861 return rerun;
3862 }
3863
3864
3865
3866
3867
3868 public void setFixAssignments(boolean shouldFix) {
3869 fixAssignments = shouldFix;
3870 fixAny |= shouldFix;
3871 }
3872
3873 boolean shouldFixAssignments() {
3874 return fixAssignments;
3875 }
3876
3877 public void setFixMeta(boolean shouldFix) {
3878 fixMeta = shouldFix;
3879 fixAny |= shouldFix;
3880 }
3881
3882 boolean shouldFixMeta() {
3883 return fixMeta;
3884 }
3885
3886 public void setFixEmptyMetaCells(boolean shouldFix) {
3887 fixEmptyMetaCells = shouldFix;
3888 fixAny |= shouldFix;
3889 }
3890
3891 boolean shouldFixEmptyMetaCells() {
3892 return fixEmptyMetaCells;
3893 }
3894
3895 public void setCheckHdfs(boolean checking) {
3896 checkHdfs = checking;
3897 }
3898
3899 boolean shouldCheckHdfs() {
3900 return checkHdfs;
3901 }
3902
3903 public void setFixHdfsHoles(boolean shouldFix) {
3904 fixHdfsHoles = shouldFix;
3905 fixAny |= shouldFix;
3906 }
3907
3908 boolean shouldFixHdfsHoles() {
3909 return fixHdfsHoles;
3910 }
3911
3912 public void setFixTableOrphans(boolean shouldFix) {
3913 fixTableOrphans = shouldFix;
3914 fixAny |= shouldFix;
3915 }
3916
3917 boolean shouldFixTableOrphans() {
3918 return fixTableOrphans;
3919 }
3920
3921 public void setFixHdfsOverlaps(boolean shouldFix) {
3922 fixHdfsOverlaps = shouldFix;
3923 fixAny |= shouldFix;
3924 }
3925
3926 boolean shouldFixHdfsOverlaps() {
3927 return fixHdfsOverlaps;
3928 }
3929
3930 public void setFixHdfsOrphans(boolean shouldFix) {
3931 fixHdfsOrphans = shouldFix;
3932 fixAny |= shouldFix;
3933 }
3934
3935 boolean shouldFixHdfsOrphans() {
3936 return fixHdfsOrphans;
3937 }
3938
3939 public void setFixVersionFile(boolean shouldFix) {
3940 fixVersionFile = shouldFix;
3941 fixAny |= shouldFix;
3942 }
3943
3944 public boolean shouldFixVersionFile() {
3945 return fixVersionFile;
3946 }
3947
3948 public void setSidelineBigOverlaps(boolean sbo) {
3949 this.sidelineBigOverlaps = sbo;
3950 }
3951
3952 public boolean shouldSidelineBigOverlaps() {
3953 return sidelineBigOverlaps;
3954 }
3955
3956 public void setFixSplitParents(boolean shouldFix) {
3957 fixSplitParents = shouldFix;
3958 fixAny |= shouldFix;
3959 }
3960
3961 boolean shouldFixSplitParents() {
3962 return fixSplitParents;
3963 }
3964
3965 public void setFixReferenceFiles(boolean shouldFix) {
3966 fixReferenceFiles = shouldFix;
3967 fixAny |= shouldFix;
3968 }
3969
3970 boolean shouldFixReferenceFiles() {
3971 return fixReferenceFiles;
3972 }
3973
3974 public boolean shouldIgnorePreCheckPermission() {
3975 return !fixAny || ignorePreCheckPermission;
3976 }
3977
3978 public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
3979 this.ignorePreCheckPermission = ignorePreCheckPermission;
3980 }
3981
3982
3983
3984
3985 public void setMaxMerge(int mm) {
3986 this.maxMerge = mm;
3987 }
3988
3989 public int getMaxMerge() {
3990 return maxMerge;
3991 }
3992
3993 public void setMaxOverlapsToSideline(int mo) {
3994 this.maxOverlapsToSideline = mo;
3995 }
3996
3997 public int getMaxOverlapsToSideline() {
3998 return maxOverlapsToSideline;
3999 }
4000
4001
4002
4003
4004
4005 boolean isTableIncluded(TableName table) {
4006 return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
4007 }
4008
4009 public void includeTable(TableName table) {
4010 tablesIncluded.add(table);
4011 }
4012
4013 Set<TableName> getIncludedTables() {
4014 return new HashSet<TableName>(tablesIncluded);
4015 }
4016
4017
4018
4019
4020
4021
4022 public void setTimeLag(long seconds) {
4023 timelag = seconds * 1000;
4024 }
4025
4026
4027
4028
4029
4030 public void setSidelineDir(String sidelineDir) {
4031 this.sidelineDir = new Path(sidelineDir);
4032 }
4033
4034 protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
4035 return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
4036 }
4037
4038 public HFileCorruptionChecker getHFilecorruptionChecker() {
4039 return hfcc;
4040 }
4041
4042 public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
4043 this.hfcc = hfcc;
4044 }
4045
4046 public void setRetCode(int code) {
4047 this.retcode = code;
4048 }
4049
4050 public int getRetCode() {
4051 return retcode;
4052 }
4053
4054 protected HBaseFsck printUsageAndExit() {
4055 StringWriter sw = new StringWriter(2048);
4056 PrintWriter out = new PrintWriter(sw);
4057 out.println("Usage: fsck [opts] {only tables}");
4058 out.println(" where [opts] are:");
4059 out.println(" -help Display help options (this)");
4060 out.println(" -details Display full report of all regions.");
4061 out.println(" -timelag <timeInSeconds> Process only regions that " +
4062 " have not experienced any metadata updates in the last " +
4063 " <timeInSeconds> seconds.");
4064 out.println(" -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
4065 " before checking if the fix worked if run with -fix");
4066 out.println(" -summary Print only summary of the tables and status.");
4067 out.println(" -metaonly Only check the state of the hbase:meta table.");
4068 out.println(" -sidelineDir <hdfs://> HDFS path to backup existing meta.");
4069 out.println(" -boundaries Verify that regions boundaries are the same between META and store files.");
4070
4071 out.println("");
4072 out.println(" Metadata Repair options: (expert features, use with caution!)");
4073 out.println(" -fix Try to fix region assignments. This is for backwards compatiblity");
4074 out.println(" -fixAssignments Try to fix region assignments. Replaces the old -fix");
4075 out.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good.");
4076 out.println(" -noHdfsChecking Don't load/check region info from HDFS."
4077 + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
4078 out.println(" -fixHdfsHoles Try to fix region holes in hdfs.");
4079 out.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs");
4080 out.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
4081 out.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs.");
4082 out.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs.");
4083 out.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
4084 out.println(" -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps");
4085 out.println(" -maxOverlapsToSideline <n> When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
4086 out.println(" -fixSplitParents Try to force offline split parents to be online.");
4087 out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check");
4088 out.println(" -fixReferenceFiles Try to offline lingering reference store files");
4089 out.println(" -fixEmptyMetaCells Try to fix hbase:meta entries not referencing any region"
4090 + " (empty REGIONINFO_QUALIFIER rows)");
4091
4092 out.println("");
4093 out.println(" Datafile Repair options: (expert features, use with caution!)");
4094 out.println(" -checkCorruptHFiles Check all Hfiles by opening them to make sure they are valid");
4095 out.println(" -sidelineCorruptHFiles Quarantine corrupted HFiles. implies -checkCorruptHFiles");
4096
4097 out.println("");
4098 out.println(" Metadata Repair shortcuts");
4099 out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
4100 "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps " +
4101 "-fixReferenceFiles -fixTableLocks -fixOrphanedTableZnodes");
4102 out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
4103
4104 out.println("");
4105 out.println(" Table lock options");
4106 out.println(" -fixTableLocks Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
4107
4108 out.println("");
4109 out.println(" Table Znode options");
4110 out.println(" -fixOrphanedTableZnodes Set table state in ZNode to disabled if table does not exists");
4111
4112 out.flush();
4113 errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
4114
4115 setRetCode(-2);
4116 return this;
4117 }
4118
4119
4120
4121
4122
4123
4124
4125 public static void main(String[] args) throws Exception {
4126
4127 Configuration conf = HBaseConfiguration.create();
4128 Path hbasedir = FSUtils.getRootDir(conf);
4129 URI defaultFs = hbasedir.getFileSystem(conf).getUri();
4130 FSUtils.setFsDefault(conf, new Path(defaultFs));
4131 int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
4132 System.exit(ret);
4133 }
4134
4135
4136
4137
4138 static class HBaseFsckTool extends Configured implements Tool {
4139 HBaseFsckTool(Configuration conf) { super(conf); }
4140 @Override
4141 public int run(String[] args) throws Exception {
4142 HBaseFsck hbck = new HBaseFsck(getConf());
4143 hbck.exec(hbck.executor, args);
4144 return hbck.getRetCode();
4145 }
4146 };
4147
4148
4149 public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
4150 ServiceException, InterruptedException {
4151 long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
4152
4153 boolean checkCorruptHFiles = false;
4154 boolean sidelineCorruptHFiles = false;
4155
4156
4157 for (int i = 0; i < args.length; i++) {
4158 String cmd = args[i];
4159 if (cmd.equals("-help") || cmd.equals("-h")) {
4160 return printUsageAndExit();
4161 } else if (cmd.equals("-details")) {
4162 setDisplayFullReport();
4163 } else if (cmd.equals("-timelag")) {
4164 if (i == args.length - 1) {
4165 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
4166 return printUsageAndExit();
4167 }
4168 try {
4169 long timelag = Long.parseLong(args[i+1]);
4170 setTimeLag(timelag);
4171 } catch (NumberFormatException e) {
4172 errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
4173 return printUsageAndExit();
4174 }
4175 i++;
4176 } else if (cmd.equals("-sleepBeforeRerun")) {
4177 if (i == args.length - 1) {
4178 errors.reportError(ERROR_CODE.WRONG_USAGE,
4179 "HBaseFsck: -sleepBeforeRerun needs a value.");
4180 return printUsageAndExit();
4181 }
4182 try {
4183 sleepBeforeRerun = Long.parseLong(args[i+1]);
4184 } catch (NumberFormatException e) {
4185 errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
4186 return printUsageAndExit();
4187 }
4188 i++;
4189 } else if (cmd.equals("-sidelineDir")) {
4190 if (i == args.length - 1) {
4191 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
4192 return printUsageAndExit();
4193 }
4194 i++;
4195 setSidelineDir(args[i]);
4196 } else if (cmd.equals("-fix")) {
4197 errors.reportError(ERROR_CODE.WRONG_USAGE,
4198 "This option is deprecated, please use -fixAssignments instead.");
4199 setFixAssignments(true);
4200 } else if (cmd.equals("-fixAssignments")) {
4201 setFixAssignments(true);
4202 } else if (cmd.equals("-fixMeta")) {
4203 setFixMeta(true);
4204 } else if (cmd.equals("-noHdfsChecking")) {
4205 setCheckHdfs(false);
4206 } else if (cmd.equals("-fixHdfsHoles")) {
4207 setFixHdfsHoles(true);
4208 } else if (cmd.equals("-fixHdfsOrphans")) {
4209 setFixHdfsOrphans(true);
4210 } else if (cmd.equals("-fixTableOrphans")) {
4211 setFixTableOrphans(true);
4212 } else if (cmd.equals("-fixHdfsOverlaps")) {
4213 setFixHdfsOverlaps(true);
4214 } else if (cmd.equals("-fixVersionFile")) {
4215 setFixVersionFile(true);
4216 } else if (cmd.equals("-sidelineBigOverlaps")) {
4217 setSidelineBigOverlaps(true);
4218 } else if (cmd.equals("-fixSplitParents")) {
4219 setFixSplitParents(true);
4220 } else if (cmd.equals("-ignorePreCheckPermission")) {
4221 setIgnorePreCheckPermission(true);
4222 } else if (cmd.equals("-checkCorruptHFiles")) {
4223 checkCorruptHFiles = true;
4224 } else if (cmd.equals("-sidelineCorruptHFiles")) {
4225 sidelineCorruptHFiles = true;
4226 } else if (cmd.equals("-fixReferenceFiles")) {
4227 setFixReferenceFiles(true);
4228 } else if (cmd.equals("-fixEmptyMetaCells")) {
4229 setFixEmptyMetaCells(true);
4230 } else if (cmd.equals("-repair")) {
4231
4232
4233 setFixHdfsHoles(true);
4234 setFixHdfsOrphans(true);
4235 setFixMeta(true);
4236 setFixAssignments(true);
4237 setFixHdfsOverlaps(true);
4238 setFixVersionFile(true);
4239 setSidelineBigOverlaps(true);
4240 setFixSplitParents(false);
4241 setCheckHdfs(true);
4242 setFixReferenceFiles(true);
4243 setFixTableLocks(true);
4244 setFixTableZNodes(true);
4245 } else if (cmd.equals("-repairHoles")) {
4246
4247 setFixHdfsHoles(true);
4248 setFixHdfsOrphans(false);
4249 setFixMeta(true);
4250 setFixAssignments(true);
4251 setFixHdfsOverlaps(false);
4252 setSidelineBigOverlaps(false);
4253 setFixSplitParents(false);
4254 setCheckHdfs(true);
4255 } else if (cmd.equals("-maxOverlapsToSideline")) {
4256 if (i == args.length - 1) {
4257 errors.reportError(ERROR_CODE.WRONG_USAGE,
4258 "-maxOverlapsToSideline needs a numeric value argument.");
4259 return printUsageAndExit();
4260 }
4261 try {
4262 int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
4263 setMaxOverlapsToSideline(maxOverlapsToSideline);
4264 } catch (NumberFormatException e) {
4265 errors.reportError(ERROR_CODE.WRONG_USAGE,
4266 "-maxOverlapsToSideline needs a numeric value argument.");
4267 return printUsageAndExit();
4268 }
4269 i++;
4270 } else if (cmd.equals("-maxMerge")) {
4271 if (i == args.length - 1) {
4272 errors.reportError(ERROR_CODE.WRONG_USAGE,
4273 "-maxMerge needs a numeric value argument.");
4274 return printUsageAndExit();
4275 }
4276 try {
4277 int maxMerge = Integer.parseInt(args[i+1]);
4278 setMaxMerge(maxMerge);
4279 } catch (NumberFormatException e) {
4280 errors.reportError(ERROR_CODE.WRONG_USAGE,
4281 "-maxMerge needs a numeric value argument.");
4282 return printUsageAndExit();
4283 }
4284 i++;
4285 } else if (cmd.equals("-summary")) {
4286 setSummary();
4287 } else if (cmd.equals("-metaonly")) {
4288 setCheckMetaOnly();
4289 } else if (cmd.equals("-boundaries")) {
4290 setRegionBoundariesCheck();
4291 } else if (cmd.equals("-fixTableLocks")) {
4292 setFixTableLocks(true);
4293 } else if (cmd.equals("-fixOrphanedTableZnodes")) {
4294 setFixTableZNodes(true);
4295 } else if (cmd.startsWith("-")) {
4296 errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
4297 return printUsageAndExit();
4298 } else {
4299 includeTable(TableName.valueOf(cmd));
4300 errors.print("Allow checking/fixes for table: " + cmd);
4301 }
4302 }
4303
4304 errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
4305
4306
4307 try {
4308 preCheckPermission();
4309 } catch (AccessDeniedException ace) {
4310 Runtime.getRuntime().exit(-1);
4311 } catch (IOException ioe) {
4312 Runtime.getRuntime().exit(-1);
4313 }
4314
4315
4316 connect();
4317
4318 try {
4319
4320 if (checkCorruptHFiles || sidelineCorruptHFiles) {
4321 LOG.info("Checking all hfiles for corruption");
4322 HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
4323 setHFileCorruptionChecker(hfcc);
4324 Collection<TableName> tables = getIncludedTables();
4325 Collection<Path> tableDirs = new ArrayList<Path>();
4326 Path rootdir = FSUtils.getRootDir(getConf());
4327 if (tables.size() > 0) {
4328 for (TableName t : tables) {
4329 tableDirs.add(FSUtils.getTableDir(rootdir, t));
4330 }
4331 } else {
4332 tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
4333 }
4334 hfcc.checkTables(tableDirs);
4335 hfcc.report(errors);
4336 }
4337
4338
4339 int code = onlineHbck();
4340 setRetCode(code);
4341
4342
4343
4344
4345 if (shouldRerun()) {
4346 try {
4347 LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
4348 Thread.sleep(sleepBeforeRerun);
4349 } catch (InterruptedException ie) {
4350 return this;
4351 }
4352
4353 setFixAssignments(false);
4354 setFixMeta(false);
4355 setFixHdfsHoles(false);
4356 setFixHdfsOverlaps(false);
4357 setFixVersionFile(false);
4358 setFixTableOrphans(false);
4359 errors.resetErrors();
4360 code = onlineHbck();
4361 setRetCode(code);
4362 }
4363 } finally {
4364 IOUtils.cleanup(null, connection, meta, admin);
4365 }
4366 return this;
4367 }
4368
4369
4370
4371
4372 void debugLsr(Path p) throws IOException {
4373 debugLsr(getConf(), p, errors);
4374 }
4375
4376
4377
4378
4379 public static void debugLsr(Configuration conf,
4380 Path p) throws IOException {
4381 debugLsr(conf, p, new PrintingErrorReporter());
4382 }
4383
4384
4385
4386
4387 public static void debugLsr(Configuration conf,
4388 Path p, ErrorReporter errors) throws IOException {
4389 if (!LOG.isDebugEnabled() || p == null) {
4390 return;
4391 }
4392 FileSystem fs = p.getFileSystem(conf);
4393
4394 if (!fs.exists(p)) {
4395
4396 return;
4397 }
4398 errors.print(p.toString());
4399
4400 if (fs.isFile(p)) {
4401 return;
4402 }
4403
4404 if (fs.getFileStatus(p).isDir()) {
4405 FileStatus[] fss= fs.listStatus(p);
4406 for (FileStatus status : fss) {
4407 debugLsr(conf, status.getPath(), errors);
4408 }
4409 }
4410 }
4411 }