1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.util;
19
20 import java.io.Closeable;
21 import java.io.FileNotFoundException;
22 import java.io.IOException;
23 import java.io.InterruptedIOException;
24 import java.io.PrintWriter;
25 import java.io.StringWriter;
26 import java.net.InetAddress;
27 import java.net.URI;
28 import java.util.ArrayList;
29 import java.util.Arrays;
30 import java.util.Collection;
31 import java.util.Collections;
32 import java.util.Comparator;
33 import java.util.HashMap;
34 import java.util.HashSet;
35 import java.util.Iterator;
36 import java.util.List;
37 import java.util.Map;
38 import java.util.Map.Entry;
39 import java.util.Set;
40 import java.util.SortedMap;
41 import java.util.SortedSet;
42 import java.util.TreeMap;
43 import java.util.TreeSet;
44 import java.util.concurrent.Callable;
45 import java.util.concurrent.ConcurrentSkipListMap;
46 import java.util.concurrent.ExecutionException;
47 import java.util.concurrent.ExecutorService;
48 import java.util.concurrent.Future;
49 import java.util.concurrent.ScheduledThreadPoolExecutor;
50 import java.util.concurrent.atomic.AtomicBoolean;
51 import java.util.concurrent.atomic.AtomicInteger;
52
53 import org.apache.commons.lang.StringUtils;
54 import org.apache.commons.logging.Log;
55 import org.apache.commons.logging.LogFactory;
56 import org.apache.hadoop.hbase.classification.InterfaceAudience;
57 import org.apache.hadoop.hbase.classification.InterfaceStability;
58 import org.apache.hadoop.conf.Configuration;
59 import org.apache.hadoop.conf.Configured;
60 import org.apache.hadoop.fs.FSDataOutputStream;
61 import org.apache.hadoop.fs.FileStatus;
62 import org.apache.hadoop.fs.FileSystem;
63 import org.apache.hadoop.fs.Path;
64 import org.apache.hadoop.fs.permission.FsAction;
65 import org.apache.hadoop.fs.permission.FsPermission;
66 import org.apache.hadoop.hbase.Abortable;
67 import org.apache.hadoop.hbase.Cell;
68 import org.apache.hadoop.hbase.ClusterStatus;
69 import org.apache.hadoop.hbase.CoordinatedStateException;
70 import org.apache.hadoop.hbase.HBaseConfiguration;
71 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
72 import org.apache.hadoop.hbase.HColumnDescriptor;
73 import org.apache.hadoop.hbase.HConstants;
74 import org.apache.hadoop.hbase.HRegionInfo;
75 import org.apache.hadoop.hbase.HRegionLocation;
76 import org.apache.hadoop.hbase.HTableDescriptor;
77 import org.apache.hadoop.hbase.KeyValue;
78 import org.apache.hadoop.hbase.MasterNotRunningException;
79 import org.apache.hadoop.hbase.ServerName;
80 import org.apache.hadoop.hbase.TableName;
81 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
82 import org.apache.hadoop.hbase.MetaTableAccessor;
83 import org.apache.hadoop.hbase.classification.InterfaceAudience;
84 import org.apache.hadoop.hbase.classification.InterfaceStability;
85 import org.apache.hadoop.hbase.client.Admin;
86 import org.apache.hadoop.hbase.client.ClusterConnection;
87 import org.apache.hadoop.hbase.client.ConnectionFactory;
88 import org.apache.hadoop.hbase.client.Delete;
89 import org.apache.hadoop.hbase.client.Get;
90 import org.apache.hadoop.hbase.client.HBaseAdmin;
91 import org.apache.hadoop.hbase.client.HConnectable;
92 import org.apache.hadoop.hbase.client.HConnection;
93 import org.apache.hadoop.hbase.client.HConnectionManager;
94 import org.apache.hadoop.hbase.client.MetaScanner;
95 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
96 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
97 import org.apache.hadoop.hbase.client.Put;
98 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
99 import org.apache.hadoop.hbase.client.Result;
100 import org.apache.hadoop.hbase.client.RowMutations;
101 import org.apache.hadoop.hbase.client.Table;
102 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
103 import org.apache.hadoop.hbase.io.hfile.HFile;
104 import org.apache.hadoop.hbase.master.MasterFileSystem;
105 import org.apache.hadoop.hbase.master.RegionState;
106 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
107 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
108 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
109 import org.apache.hadoop.hbase.regionserver.HRegion;
110 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
111 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
112 import org.apache.hadoop.hbase.security.AccessDeniedException;
113 import org.apache.hadoop.hbase.security.UserProvider;
114 import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
115 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
116 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
117 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
118 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
119 import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
120 import org.apache.hadoop.hbase.wal.WALSplitter;
121 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
122 import org.apache.hadoop.hbase.zookeeper.ZKTableStateClientSideReader;
123 import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager;
124 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
125 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
126 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
127 import org.apache.hadoop.io.IOUtils;
128 import org.apache.hadoop.ipc.RemoteException;
129 import org.apache.hadoop.security.UserGroupInformation;
130 import org.apache.hadoop.util.ReflectionUtils;
131 import org.apache.hadoop.util.Tool;
132 import org.apache.hadoop.util.ToolRunner;
133 import org.apache.zookeeper.KeeperException;
134
135 import com.google.common.annotations.VisibleForTesting;
136 import com.google.common.base.Joiner;
137 import com.google.common.base.Preconditions;
138 import com.google.common.collect.Lists;
139 import com.google.common.collect.Multimap;
140 import com.google.common.collect.TreeMultimap;
141 import com.google.protobuf.ServiceException;
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
189 @InterfaceStability.Evolving
190 public class HBaseFsck extends Configured implements Closeable {
191 public static final long DEFAULT_TIME_LAG = 60000;
192 public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
193 private static final int MAX_NUM_THREADS = 50;
194 private static boolean rsSupportsOffline = true;
195 private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
196 private static final int DEFAULT_MAX_MERGE = 5;
197 private static final String TO_BE_LOADED = "to_be_loaded";
198 private static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
199
200
201
202
203
204 private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
205 private ClusterStatus status;
206 private ClusterConnection connection;
207 private Admin admin;
208 private Table meta;
209
210 protected ExecutorService executor;
211 private long startMillis = System.currentTimeMillis();
212 private HFileCorruptionChecker hfcc;
213 private int retcode = 0;
214 private Path HBCK_LOCK_PATH;
215 private FSDataOutputStream hbckOutFd;
216
217
218
219 private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
220
221
222
223
224 private static boolean details = false;
225 private long timelag = DEFAULT_TIME_LAG;
226 private boolean fixAssignments = false;
227 private boolean fixMeta = false;
228 private boolean checkHdfs = true;
229 private boolean fixHdfsHoles = false;
230 private boolean fixHdfsOverlaps = false;
231 private boolean fixHdfsOrphans = false;
232 private boolean fixTableOrphans = false;
233 private boolean fixVersionFile = false;
234 private boolean fixSplitParents = false;
235 private boolean fixReferenceFiles = false;
236 private boolean fixEmptyMetaCells = false;
237 private boolean fixTableLocks = false;
238 private boolean fixTableZNodes = false;
239 private boolean fixAny = false;
240
241
242
243 private Set<TableName> tablesIncluded = new HashSet<TableName>();
244 private int maxMerge = DEFAULT_MAX_MERGE;
245 private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
246 private boolean sidelineBigOverlaps = false;
247 private Path sidelineDir = null;
248
249 private boolean rerun = false;
250 private static boolean summary = false;
251 private boolean checkMetaOnly = false;
252 private boolean checkRegionBoundaries = false;
253 private boolean ignorePreCheckPermission = false;
254
255
256
257
258 final private ErrorReporter errors;
259 int fixes = 0;
260
261
262
263
264
265
266 private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
267 private TreeSet<TableName> disabledTables =
268 new TreeSet<TableName>();
269
270 private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
271
272
273
274
275
276
277
278
279
280
281
282 private SortedMap<TableName, TableInfo> tablesInfo =
283 new ConcurrentSkipListMap<TableName, TableInfo>();
284
285
286
287
288 private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
289
290 private Map<TableName, Set<String>> orphanTableDirs =
291 new HashMap<TableName, Set<String>>();
292
293
294
295
296 private Set<TableName> orphanedTableZNodes = new HashSet<TableName>();
297
298
299
300
301
302
303
304
305 public HBaseFsck(Configuration conf) throws MasterNotRunningException,
306 ZooKeeperConnectionException, IOException, ClassNotFoundException {
307 super(conf);
308
309 setConf(HBaseConfiguration.create(getConf()));
310
311 getConf().setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
312 errors = getErrorReporter(conf);
313
314 int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
315 executor = new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
316 }
317
318
319
320
321
322
323
324
325
326
327
328 public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
329 ZooKeeperConnectionException, IOException, ClassNotFoundException {
330 super(conf);
331 errors = getErrorReporter(getConf());
332 this.executor = exec;
333 }
334
335
336
337
338
339
340
341 private FSDataOutputStream checkAndMarkRunningHbck() throws IOException {
342 long start = EnvironmentEdgeManager.currentTime();
343 try {
344 FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
345 FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(),
346 HConstants.DATA_FILE_UMASK_KEY);
347 Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY);
348 fs.mkdirs(tmpDir);
349 HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE);
350 final FSDataOutputStream out = FSUtils.create(fs, HBCK_LOCK_PATH, defaultPerms, false);
351 out.writeBytes(InetAddress.getLocalHost().toString());
352 out.flush();
353 return out;
354 } catch(RemoteException e) {
355 if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
356 return null;
357 } else {
358 throw e;
359 }
360 } finally {
361 long duration = EnvironmentEdgeManager.currentTime() - start;
362 if (duration > 30000) {
363 LOG.warn("Took " + duration + " milliseconds to obtain lock");
364
365 return null;
366 }
367 }
368 }
369
370 private void unlockHbck() {
371 if(hbckLockCleanup.compareAndSet(true, false)){
372 IOUtils.closeStream(hbckOutFd);
373 try{
374 FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true);
375 } catch(IOException ioe) {
376 LOG.warn("Failed to delete " + HBCK_LOCK_PATH);
377 LOG.debug(ioe);
378 }
379 }
380 }
381
382
383
384
385
386 public void connect() throws IOException {
387
388
389 hbckOutFd = checkAndMarkRunningHbck();
390 if (hbckOutFd == null) {
391 setRetCode(-1);
392 LOG.error("Another instance of hbck is running, exiting this instance.[If you are sure" +
393 " no other instance is running, delete the lock file " +
394 HBCK_LOCK_PATH + " and rerun the tool]");
395 throw new IOException("Duplicate hbck - Abort");
396 }
397
398
399 hbckLockCleanup.set(true);
400
401
402
403
404 Runtime.getRuntime().addShutdownHook(new Thread() {
405 @Override
406 public void run() {
407 IOUtils.closeStream(HBaseFsck.this);
408 unlockHbck();
409 }
410 });
411 LOG.debug("Launching hbck");
412
413 connection = (ClusterConnection)ConnectionFactory.createConnection(getConf());
414 admin = connection.getAdmin();
415 meta = connection.getTable(TableName.META_TABLE_NAME);
416 status = admin.getClusterStatus();
417 }
418
419
420
421
422 private void loadDeployedRegions() throws IOException, InterruptedException {
423
424 Collection<ServerName> regionServers = status.getServers();
425 errors.print("Number of live region servers: " + regionServers.size());
426 if (details) {
427 for (ServerName rsinfo: regionServers) {
428 errors.print(" " + rsinfo.getServerName());
429 }
430 }
431
432
433 Collection<ServerName> deadRegionServers = status.getDeadServerNames();
434 errors.print("Number of dead region servers: " + deadRegionServers.size());
435 if (details) {
436 for (ServerName name: deadRegionServers) {
437 errors.print(" " + name);
438 }
439 }
440
441
442 errors.print("Master: " + status.getMaster());
443
444
445 Collection<ServerName> backupMasters = status.getBackupMasters();
446 errors.print("Number of backup masters: " + backupMasters.size());
447 if (details) {
448 for (ServerName name: backupMasters) {
449 errors.print(" " + name);
450 }
451 }
452
453 errors.print("Average load: " + status.getAverageLoad());
454 errors.print("Number of requests: " + status.getRequestsCount());
455 errors.print("Number of regions: " + status.getRegionsCount());
456
457 Map<String, RegionState> rits = status.getRegionsInTransition();
458 errors.print("Number of regions in transition: " + rits.size());
459 if (details) {
460 for (RegionState state: rits.values()) {
461 errors.print(" " + state.toDescriptiveString());
462 }
463 }
464
465
466 processRegionServers(regionServers);
467 }
468
469
470
471
472 private void clearState() {
473
474 fixes = 0;
475 regionInfoMap.clear();
476 emptyRegionInfoQualifiers.clear();
477 disabledTables.clear();
478 errors.clear();
479 tablesInfo.clear();
480 orphanHdfsDirs.clear();
481 }
482
483
484
485
486
487
488 public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
489
490 if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
491 || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
492 LOG.info("Loading regioninfos HDFS");
493
494 int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
495 int curIter = 0;
496 do {
497 clearState();
498
499 restoreHdfsIntegrity();
500 curIter++;
501 } while (fixes > 0 && curIter <= maxIterations);
502
503
504
505 if (curIter > 2) {
506 if (curIter == maxIterations) {
507 LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
508 + "Tables integrity may not be fully repaired!");
509 } else {
510 LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
511 }
512 }
513 }
514 }
515
516
517
518
519
520
521
522
523
524 public int onlineConsistencyRepair() throws IOException, KeeperException,
525 InterruptedException {
526 clearState();
527
528
529 loadDeployedRegions();
530
531 recordMetaRegion();
532
533 if (!checkMetaRegion()) {
534 String errorMsg = "hbase:meta table is not consistent. ";
535 if (shouldFixAssignments()) {
536 errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
537 } else {
538 errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
539 }
540 errors.reportError(errorMsg + " Exiting...");
541 return -2;
542 }
543
544 LOG.info("Loading regionsinfo from the hbase:meta table");
545 boolean success = loadMetaEntries();
546 if (!success) return -1;
547
548
549 reportEmptyMetaCells();
550
551
552 if (shouldFixEmptyMetaCells()) {
553 fixEmptyMetaCells();
554 }
555
556
557 if (!checkMetaOnly) {
558 reportTablesInFlux();
559 }
560
561
562 if (shouldCheckHdfs()) {
563 loadHdfsRegionDirs();
564 loadHdfsRegionInfos();
565 }
566
567
568 loadDisabledTables();
569
570
571 fixOrphanTables();
572
573
574 checkAndFixConsistency();
575
576
577 checkIntegrity();
578 return errors.getErrorList().size();
579 }
580
581
582
583
584
585 public int onlineHbck() throws IOException, KeeperException, InterruptedException, ServiceException {
586
587 errors.print("Version: " + status.getHBaseVersion());
588 offlineHdfsIntegrityRepair();
589
590
591 boolean oldBalancer = admin.setBalancerRunning(false, true);
592 try {
593 onlineConsistencyRepair();
594 }
595 finally {
596 admin.setBalancerRunning(oldBalancer, false);
597 }
598
599 if (checkRegionBoundaries) {
600 checkRegionBoundaries();
601 }
602
603 offlineReferenceFileRepair();
604
605 checkAndFixTableLocks();
606
607
608 checkAndFixOrphanedTableZNodes();
609
610
611 unlockHbck();
612
613
614 printTableSummary(tablesInfo);
615 return errors.summarize();
616 }
617
618 public static byte[] keyOnly (byte[] b) {
619 if (b == null)
620 return b;
621 int rowlength = Bytes.toShort(b, 0);
622 byte[] result = new byte[rowlength];
623 System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
624 return result;
625 }
626
627 @Override
628 public void close() throws IOException {
629 IOUtils.cleanup(null, admin, meta, connection);
630 }
631
632 private static class RegionBoundariesInformation {
633 public byte [] regionName;
634 public byte [] metaFirstKey;
635 public byte [] metaLastKey;
636 public byte [] storesFirstKey;
637 public byte [] storesLastKey;
638 @Override
639 public String toString () {
640 return "regionName=" + Bytes.toStringBinary(regionName) +
641 "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
642 "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
643 "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
644 "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
645 }
646 }
647
648 public void checkRegionBoundaries() {
649 try {
650 ByteArrayComparator comparator = new ByteArrayComparator();
651 List<HRegionInfo> regions = MetaScanner.listAllRegions(getConf(), connection, false);
652 final RegionBoundariesInformation currentRegionBoundariesInformation =
653 new RegionBoundariesInformation();
654 Path hbaseRoot = FSUtils.getRootDir(getConf());
655 for (HRegionInfo regionInfo : regions) {
656 Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
657 currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
658
659
660 Path path = new Path(tableDir, regionInfo.getEncodedName());
661 FileSystem fs = path.getFileSystem(getConf());
662 FileStatus[] files = fs.listStatus(path);
663
664 byte[] storeFirstKey = null;
665 byte[] storeLastKey = null;
666 for (FileStatus file : files) {
667 String fileName = file.getPath().toString();
668 fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
669 if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
670 FileStatus[] storeFiles = fs.listStatus(file.getPath());
671
672 for (FileStatus storeFile : storeFiles) {
673 HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), new CacheConfig(
674 getConf()), getConf());
675 if ((reader.getFirstKey() != null)
676 && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
677 reader.getFirstKey()) > 0))) {
678 storeFirstKey = reader.getFirstKey();
679 }
680 if ((reader.getLastKey() != null)
681 && ((storeLastKey == null) || (comparator.compare(storeLastKey,
682 reader.getLastKey())) < 0)) {
683 storeLastKey = reader.getLastKey();
684 }
685 reader.close();
686 }
687 }
688 }
689 currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
690 currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
691 currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
692 currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
693 if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
694 currentRegionBoundariesInformation.metaFirstKey = null;
695 if (currentRegionBoundariesInformation.metaLastKey.length == 0)
696 currentRegionBoundariesInformation.metaLastKey = null;
697
698
699
700
701
702
703 boolean valid = true;
704
705 if ((currentRegionBoundariesInformation.storesFirstKey != null)
706 && (currentRegionBoundariesInformation.metaFirstKey != null)) {
707 valid = valid
708 && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
709 currentRegionBoundariesInformation.metaFirstKey) >= 0;
710 }
711
712 if ((currentRegionBoundariesInformation.storesLastKey != null)
713 && (currentRegionBoundariesInformation.metaLastKey != null)) {
714 valid = valid
715 && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
716 currentRegionBoundariesInformation.metaLastKey) < 0;
717 }
718 if (!valid) {
719 errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
720 tablesInfo.get(regionInfo.getTable()));
721 LOG.warn("Region's boundaries not alligned between stores and META for:");
722 LOG.warn(currentRegionBoundariesInformation);
723 }
724 }
725 } catch (IOException e) {
726 LOG.error(e);
727 }
728 }
729
730
731
732
733 private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
734 for (HbckInfo hi : orphanHdfsDirs) {
735 LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
736 adoptHdfsOrphan(hi);
737 }
738 }
739
740
741
742
743
744
745
746
747
748
749 @SuppressWarnings("deprecation")
750 private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
751 Path p = hi.getHdfsRegionDir();
752 FileSystem fs = p.getFileSystem(getConf());
753 FileStatus[] dirs = fs.listStatus(p);
754 if (dirs == null) {
755 LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " +
756 p + ". This dir could probably be deleted.");
757 return ;
758 }
759
760 TableName tableName = hi.getTableName();
761 TableInfo tableInfo = tablesInfo.get(tableName);
762 Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
763 HTableDescriptor template = tableInfo.getHTD();
764
765
766 Pair<byte[],byte[]> orphanRegionRange = null;
767 for (FileStatus cf : dirs) {
768 String cfName= cf.getPath().getName();
769
770 if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
771
772 FileStatus[] hfiles = fs.listStatus(cf.getPath());
773 for (FileStatus hfile : hfiles) {
774 byte[] start, end;
775 HFile.Reader hf = null;
776 try {
777 CacheConfig cacheConf = new CacheConfig(getConf());
778 hf = HFile.createReader(fs, hfile.getPath(), cacheConf, getConf());
779 hf.loadFileInfo();
780 KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
781 start = startKv.getRow();
782 KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
783 end = endKv.getRow();
784 } catch (IOException ioe) {
785 LOG.warn("Problem reading orphan file " + hfile + ", skipping");
786 continue;
787 } catch (NullPointerException ioe) {
788 LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
789 continue;
790 } finally {
791 if (hf != null) {
792 hf.close();
793 }
794 }
795
796
797 if (orphanRegionRange == null) {
798
799 orphanRegionRange = new Pair<byte[], byte[]>(start, end);
800 } else {
801
802
803
804 if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
805 orphanRegionRange.setFirst(start);
806 }
807 if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
808 orphanRegionRange.setSecond(end);
809 }
810 }
811 }
812 }
813 if (orphanRegionRange == null) {
814 LOG.warn("No data in dir " + p + ", sidelining data");
815 fixes++;
816 sidelineRegionDir(fs, hi);
817 return;
818 }
819 LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
820 Bytes.toString(orphanRegionRange.getSecond()) + ")");
821
822
823 HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(), orphanRegionRange.getSecond());
824 LOG.info("Creating new region : " + hri);
825 HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
826 Path target = region.getRegionFileSystem().getRegionDir();
827
828
829 mergeRegionDirs(target, hi);
830 fixes++;
831 }
832
833
834
835
836
837
838
839
840
841 private int restoreHdfsIntegrity() throws IOException, InterruptedException {
842
843 LOG.info("Loading HBase regioninfo from HDFS...");
844 loadHdfsRegionDirs();
845
846 int errs = errors.getErrorList().size();
847
848 tablesInfo = loadHdfsRegionInfos();
849 checkHdfsIntegrity(false, false);
850
851 if (errors.getErrorList().size() == errs) {
852 LOG.info("No integrity errors. We are done with this phase. Glorious.");
853 return 0;
854 }
855
856 if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
857 adoptHdfsOrphans(orphanHdfsDirs);
858
859 }
860
861
862 if (shouldFixHdfsHoles()) {
863 clearState();
864 loadHdfsRegionDirs();
865 tablesInfo = loadHdfsRegionInfos();
866 tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
867 }
868
869
870 if (shouldFixHdfsOverlaps()) {
871
872 clearState();
873 loadHdfsRegionDirs();
874 tablesInfo = loadHdfsRegionInfos();
875 tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
876 }
877
878 return errors.getErrorList().size();
879 }
880
881
882
883
884
885
886
887
888
889 private void offlineReferenceFileRepair() throws IOException {
890 Configuration conf = getConf();
891 Path hbaseRoot = FSUtils.getRootDir(conf);
892 FileSystem fs = hbaseRoot.getFileSystem(conf);
893 Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot);
894 for (Path path: allFiles.values()) {
895 boolean isReference = false;
896 try {
897 isReference = StoreFileInfo.isReference(path);
898 } catch (Throwable t) {
899
900
901
902
903 }
904 if (!isReference) continue;
905
906 Path referredToFile = StoreFileInfo.getReferredToFile(path);
907 if (fs.exists(referredToFile)) continue;
908
909
910 errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
911 "Found lingering reference file " + path);
912 if (!shouldFixReferenceFiles()) continue;
913
914
915 boolean success = false;
916 String pathStr = path.toString();
917
918
919
920
921
922 int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
923 for (int i = 0; index > 0 && i < 5; i++) {
924 index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
925 }
926 if (index > 0) {
927 Path rootDir = getSidelineDir();
928 Path dst = new Path(rootDir, pathStr.substring(index + 1));
929 fs.mkdirs(dst.getParent());
930 LOG.info("Trying to sildeline reference file "
931 + path + " to " + dst);
932 setShouldRerun();
933
934 success = fs.rename(path, dst);
935 }
936 if (!success) {
937 LOG.error("Failed to sideline reference file " + path);
938 }
939 }
940 }
941
942
943
944
945 private void reportEmptyMetaCells() {
946 errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
947 emptyRegionInfoQualifiers.size());
948 if (details) {
949 for (Result r: emptyRegionInfoQualifiers) {
950 errors.print(" " + r);
951 }
952 }
953 }
954
955
956
957
958 private void reportTablesInFlux() {
959 AtomicInteger numSkipped = new AtomicInteger(0);
960 HTableDescriptor[] allTables = getTables(numSkipped);
961 errors.print("Number of Tables: " + allTables.length);
962 if (details) {
963 if (numSkipped.get() > 0) {
964 errors.detail("Number of Tables in flux: " + numSkipped.get());
965 }
966 for (HTableDescriptor td : allTables) {
967 errors.detail(" Table: " + td.getTableName() + "\t" +
968 (td.isReadOnly() ? "ro" : "rw") + "\t" +
969 (td.isMetaRegion() ? "META" : " ") + "\t" +
970 " families: " + td.getFamilies().size());
971 }
972 }
973 }
974
975 public ErrorReporter getErrors() {
976 return errors;
977 }
978
979
980
981
982
983 private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
984 Path regionDir = hbi.getHdfsRegionDir();
985 if (regionDir == null) {
986 LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
987 return;
988 }
989
990 if (hbi.hdfsEntry.hri != null) {
991
992 return;
993 }
994
995 FileSystem fs = FileSystem.get(getConf());
996 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
997 LOG.debug("HRegionInfo read: " + hri.toString());
998 hbi.hdfsEntry.hri = hri;
999 }
1000
1001
1002
1003
1004
1005 public static class RegionRepairException extends IOException {
1006 private static final long serialVersionUID = 1L;
1007 final IOException ioe;
1008 public RegionRepairException(String s, IOException ioe) {
1009 super(s);
1010 this.ioe = ioe;
1011 }
1012 }
1013
1014
1015
1016
1017 private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
1018 throws IOException, InterruptedException {
1019 tablesInfo.clear();
1020
1021 Collection<HbckInfo> hbckInfos = regionInfoMap.values();
1022
1023
1024 List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
1025 List<Future<Void>> hbiFutures;
1026
1027 for (HbckInfo hbi : hbckInfos) {
1028 WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1029 hbis.add(work);
1030 }
1031
1032
1033 hbiFutures = executor.invokeAll(hbis);
1034
1035 for(int i=0; i<hbiFutures.size(); i++) {
1036 WorkItemHdfsRegionInfo work = hbis.get(i);
1037 Future<Void> f = hbiFutures.get(i);
1038 try {
1039 f.get();
1040 } catch(ExecutionException e) {
1041 LOG.warn("Failed to read .regioninfo file for region " +
1042 work.hbi.getRegionNameAsString(), e.getCause());
1043 }
1044 }
1045
1046 Path hbaseRoot = FSUtils.getRootDir(getConf());
1047 FileSystem fs = hbaseRoot.getFileSystem(getConf());
1048
1049 for (HbckInfo hbi: hbckInfos) {
1050
1051 if (hbi.getHdfsHRI() == null) {
1052
1053 continue;
1054 }
1055
1056
1057
1058 TableName tableName = hbi.getTableName();
1059 if (tableName == null) {
1060
1061 LOG.warn("tableName was null for: " + hbi);
1062 continue;
1063 }
1064
1065 TableInfo modTInfo = tablesInfo.get(tableName);
1066 if (modTInfo == null) {
1067
1068 modTInfo = new TableInfo(tableName);
1069 tablesInfo.put(tableName, modTInfo);
1070 try {
1071 HTableDescriptor htd =
1072 FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1073 modTInfo.htds.add(htd);
1074 } catch (IOException ioe) {
1075 if (!orphanTableDirs.containsKey(tableName)) {
1076 LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1077
1078 errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1079 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1080 Set<String> columns = new HashSet<String>();
1081 orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1082 }
1083 }
1084 }
1085 if (!hbi.isSkipChecks()) {
1086 modTInfo.addRegionInfo(hbi);
1087 }
1088 }
1089
1090 loadTableInfosForTablesWithNoRegion();
1091
1092 return tablesInfo;
1093 }
1094
1095
1096
1097
1098
1099
1100
1101
1102 private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1103 Path regionDir = hbi.getHdfsRegionDir();
1104 FileSystem fs = regionDir.getFileSystem(getConf());
1105 FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1106 for (FileStatus subdir : subDirs) {
1107 String columnfamily = subdir.getPath().getName();
1108 columns.add(columnfamily);
1109 }
1110 return columns;
1111 }
1112
1113
1114
1115
1116
1117
1118
1119
1120 private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1121 Set<String> columns) throws IOException {
1122 if (columns ==null || columns.isEmpty()) return false;
1123 HTableDescriptor htd = new HTableDescriptor(tableName);
1124 for (String columnfamimly : columns) {
1125 htd.addFamily(new HColumnDescriptor(columnfamimly));
1126 }
1127 fstd.createTableDescriptor(htd, true);
1128 return true;
1129 }
1130
1131
1132
1133
1134
1135 public void fixEmptyMetaCells() throws IOException {
1136 if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1137 LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1138 for (Result region : emptyRegionInfoQualifiers) {
1139 deleteMetaRegion(region.getRow());
1140 errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1141 }
1142 emptyRegionInfoQualifiers.clear();
1143 }
1144 }
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155 public void fixOrphanTables() throws IOException {
1156 if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1157
1158 List<TableName> tmpList = new ArrayList<TableName>();
1159 tmpList.addAll(orphanTableDirs.keySet());
1160 HTableDescriptor[] htds = getHTableDescriptors(tmpList);
1161 Iterator<Entry<TableName, Set<String>>> iter =
1162 orphanTableDirs.entrySet().iterator();
1163 int j = 0;
1164 int numFailedCase = 0;
1165 FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1166 while (iter.hasNext()) {
1167 Entry<TableName, Set<String>> entry =
1168 iter.next();
1169 TableName tableName = entry.getKey();
1170 LOG.info("Trying to fix orphan table error: " + tableName);
1171 if (j < htds.length) {
1172 if (tableName.equals(htds[j].getTableName())) {
1173 HTableDescriptor htd = htds[j];
1174 LOG.info("fixing orphan table: " + tableName + " from cache");
1175 fstd.createTableDescriptor(htd, true);
1176 j++;
1177 iter.remove();
1178 }
1179 } else {
1180 if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1181 LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1182 LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
1183 iter.remove();
1184 } else {
1185 LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1186 numFailedCase++;
1187 }
1188 }
1189 fixes++;
1190 }
1191
1192 if (orphanTableDirs.isEmpty()) {
1193
1194
1195 setShouldRerun();
1196 LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1197 } else if (numFailedCase > 0) {
1198 LOG.error("Failed to fix " + numFailedCase
1199 + " OrphanTables with default .tableinfo files");
1200 }
1201
1202 }
1203
1204 orphanTableDirs.clear();
1205
1206 }
1207
1208
1209
1210
1211
1212
1213 private HRegion createNewMeta() throws IOException {
1214 Path rootdir = FSUtils.getRootDir(getConf());
1215 Configuration c = getConf();
1216 HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
1217 HTableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
1218 MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, false);
1219 HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c, metaDescriptor);
1220 MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, true);
1221 return meta;
1222 }
1223
1224
1225
1226
1227
1228
1229
1230 private ArrayList<Put> generatePuts(
1231 SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1232 ArrayList<Put> puts = new ArrayList<Put>();
1233 boolean hasProblems = false;
1234 for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) {
1235 TableName name = e.getKey();
1236
1237
1238 if (name.compareTo(TableName.META_TABLE_NAME) == 0) {
1239 continue;
1240 }
1241
1242 TableInfo ti = e.getValue();
1243 for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1244 .entrySet()) {
1245 Collection<HbckInfo> his = spl.getValue();
1246 int sz = his.size();
1247 if (sz != 1) {
1248
1249 LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1250 + " had " + sz + " regions instead of exactly 1." );
1251 hasProblems = true;
1252 continue;
1253 }
1254
1255
1256 HbckInfo hi = his.iterator().next();
1257 HRegionInfo hri = hi.getHdfsHRI();
1258 Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
1259 puts.add(p);
1260 }
1261 }
1262 return hasProblems ? null : puts;
1263 }
1264
1265
1266
1267
1268 private void suggestFixes(
1269 SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1270 for (TableInfo tInfo : tablesInfo.values()) {
1271 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1272 tInfo.checkRegionChain(handler);
1273 }
1274 }
1275
1276
1277
1278
1279
1280
1281
1282
1283 public boolean rebuildMeta(boolean fix) throws IOException,
1284 InterruptedException {
1285
1286
1287
1288
1289
1290 LOG.info("Loading HBase regioninfo from HDFS...");
1291 loadHdfsRegionDirs();
1292
1293 int errs = errors.getErrorList().size();
1294 tablesInfo = loadHdfsRegionInfos();
1295 checkHdfsIntegrity(false, false);
1296
1297
1298 if (errors.getErrorList().size() != errs) {
1299
1300 while(true) {
1301 fixes = 0;
1302 suggestFixes(tablesInfo);
1303 errors.clear();
1304 loadHdfsRegionInfos();
1305 checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1306
1307 int errCount = errors.getErrorList().size();
1308
1309 if (fixes == 0) {
1310 if (errCount > 0) {
1311 return false;
1312 } else {
1313 break;
1314 }
1315 }
1316 }
1317 }
1318
1319
1320 LOG.info("HDFS regioninfo's seems good. Sidelining old hbase:meta");
1321 Path backupDir = sidelineOldMeta();
1322
1323 LOG.info("Creating new hbase:meta");
1324 HRegion meta = createNewMeta();
1325
1326
1327 List<Put> puts = generatePuts(tablesInfo);
1328 if (puts == null) {
1329 LOG.fatal("Problem encountered when creating new hbase:meta entries. " +
1330 "You may need to restore the previously sidelined hbase:meta");
1331 return false;
1332 }
1333 meta.batchMutate(puts.toArray(new Put[puts.size()]));
1334 HRegion.closeHRegion(meta);
1335 LOG.info("Success! hbase:meta table rebuilt.");
1336 LOG.info("Old hbase:meta is moved into " + backupDir);
1337 return true;
1338 }
1339
1340 private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1341 boolean fixOverlaps) throws IOException {
1342 LOG.info("Checking HBase region split map from HDFS data...");
1343 for (TableInfo tInfo : tablesInfo.values()) {
1344 TableIntegrityErrorHandler handler;
1345 if (fixHoles || fixOverlaps) {
1346 handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1347 fixHoles, fixOverlaps);
1348 } else {
1349 handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1350 }
1351 if (!tInfo.checkRegionChain(handler)) {
1352
1353 errors.report("Found inconsistency in table " + tInfo.getName());
1354 }
1355 }
1356 return tablesInfo;
1357 }
1358
1359 private Path getSidelineDir() throws IOException {
1360 if (sidelineDir == null) {
1361 Path hbaseDir = FSUtils.getRootDir(getConf());
1362 Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1363 sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1364 + startMillis);
1365 }
1366 return sidelineDir;
1367 }
1368
1369
1370
1371
1372 Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1373 return sidelineRegionDir(fs, null, hi);
1374 }
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384 Path sidelineRegionDir(FileSystem fs,
1385 String parentDir, HbckInfo hi) throws IOException {
1386 TableName tableName = hi.getTableName();
1387 Path regionDir = hi.getHdfsRegionDir();
1388
1389 if (!fs.exists(regionDir)) {
1390 LOG.warn("No previous " + regionDir + " exists. Continuing.");
1391 return null;
1392 }
1393
1394 Path rootDir = getSidelineDir();
1395 if (parentDir != null) {
1396 rootDir = new Path(rootDir, parentDir);
1397 }
1398 Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1399 Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1400 fs.mkdirs(sidelineRegionDir);
1401 boolean success = false;
1402 FileStatus[] cfs = fs.listStatus(regionDir);
1403 if (cfs == null) {
1404 LOG.info("Region dir is empty: " + regionDir);
1405 } else {
1406 for (FileStatus cf : cfs) {
1407 Path src = cf.getPath();
1408 Path dst = new Path(sidelineRegionDir, src.getName());
1409 if (fs.isFile(src)) {
1410
1411 success = fs.rename(src, dst);
1412 if (!success) {
1413 String msg = "Unable to rename file " + src + " to " + dst;
1414 LOG.error(msg);
1415 throw new IOException(msg);
1416 }
1417 continue;
1418 }
1419
1420
1421 fs.mkdirs(dst);
1422
1423 LOG.info("Sidelining files from " + src + " into containing region " + dst);
1424
1425
1426
1427
1428 FileStatus[] hfiles = fs.listStatus(src);
1429 if (hfiles != null && hfiles.length > 0) {
1430 for (FileStatus hfile : hfiles) {
1431 success = fs.rename(hfile.getPath(), dst);
1432 if (!success) {
1433 String msg = "Unable to rename file " + src + " to " + dst;
1434 LOG.error(msg);
1435 throw new IOException(msg);
1436 }
1437 }
1438 }
1439 LOG.debug("Sideline directory contents:");
1440 debugLsr(sidelineRegionDir);
1441 }
1442 }
1443
1444 LOG.info("Removing old region dir: " + regionDir);
1445 success = fs.delete(regionDir, true);
1446 if (!success) {
1447 String msg = "Unable to delete dir " + regionDir;
1448 LOG.error(msg);
1449 throw new IOException(msg);
1450 }
1451 return sidelineRegionDir;
1452 }
1453
1454
1455
1456
1457 void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir,
1458 Path backupHbaseDir) throws IOException {
1459 Path tableDir = FSUtils.getTableDir(hbaseDir, tableName);
1460 if (fs.exists(tableDir)) {
1461 Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName);
1462 fs.mkdirs(backupTableDir.getParent());
1463 boolean success = fs.rename(tableDir, backupTableDir);
1464 if (!success) {
1465 throw new IOException("Failed to move " + tableName + " from "
1466 + tableDir + " to " + backupTableDir);
1467 }
1468 } else {
1469 LOG.info("No previous " + tableName + " exists. Continuing.");
1470 }
1471 }
1472
1473
1474
1475
1476 Path sidelineOldMeta() throws IOException {
1477
1478 Path hbaseDir = FSUtils.getRootDir(getConf());
1479 FileSystem fs = hbaseDir.getFileSystem(getConf());
1480 Path backupDir = getSidelineDir();
1481 fs.mkdirs(backupDir);
1482
1483 try {
1484 sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir);
1485 } catch (IOException e) {
1486 LOG.fatal("... failed to sideline meta. Currently in inconsistent state. To restore "
1487 + "try to rename hbase:meta in " + backupDir.getName() + " to "
1488 + hbaseDir.getName() + ".", e);
1489 throw e;
1490 }
1491 return backupDir;
1492 }
1493
1494
1495
1496
1497
1498
1499 private void loadDisabledTables()
1500 throws ZooKeeperConnectionException, IOException {
1501 HConnectionManager.execute(new HConnectable<Void>(getConf()) {
1502 @Override
1503 public Void connect(HConnection connection) throws IOException {
1504 ZooKeeperWatcher zkw = createZooKeeperWatcher();
1505 try {
1506 for (TableName tableName :
1507 ZKTableStateClientSideReader.getDisabledOrDisablingTables(zkw)) {
1508 disabledTables.add(tableName);
1509 }
1510 } catch (KeeperException ke) {
1511 throw new IOException(ke);
1512 } catch (InterruptedException e) {
1513 throw new InterruptedIOException();
1514 } finally {
1515 zkw.close();
1516 }
1517 return null;
1518 }
1519 });
1520 }
1521
1522
1523
1524
1525 private boolean isTableDisabled(HRegionInfo regionInfo) {
1526 return disabledTables.contains(regionInfo.getTable());
1527 }
1528
1529
1530
1531
1532
1533 public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1534 Path rootDir = FSUtils.getRootDir(getConf());
1535 FileSystem fs = rootDir.getFileSystem(getConf());
1536
1537
1538 List<FileStatus> tableDirs = Lists.newArrayList();
1539
1540 boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1541
1542 List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1543 for (Path path : paths) {
1544 TableName tableName = FSUtils.getTableName(path);
1545 if ((!checkMetaOnly &&
1546 isTableIncluded(tableName)) ||
1547 tableName.equals(TableName.META_TABLE_NAME)) {
1548 tableDirs.add(fs.getFileStatus(path));
1549 }
1550 }
1551
1552
1553 if (!foundVersionFile) {
1554 errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1555 "Version file does not exist in root dir " + rootDir);
1556 if (shouldFixVersionFile()) {
1557 LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1558 + " file.");
1559 setShouldRerun();
1560 FSUtils.setVersion(fs, rootDir, getConf().getInt(
1561 HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1562 HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1563 HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1564 }
1565 }
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591 private boolean recordMetaRegion() throws IOException {
1592 HRegionLocation metaLocation = connection.locateRegion(
1593 TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW);
1594
1595
1596 if (metaLocation == null || metaLocation.getRegionInfo() == null ||
1597 metaLocation.getHostname() == null) {
1598 errors.reportError(ERROR_CODE.NULL_META_REGION,
1599 "META region or some of its attributes are null.");
1600 return false;
1601 }
1602 ServerName sn;
1603 try {
1604 sn = getMetaRegionServerName();
1605 } catch (KeeperException e) {
1606 throw new IOException(e);
1607 }
1608 MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
1609 HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
1610 if (hbckInfo == null) {
1611 regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
1612 } else {
1613 hbckInfo.metaEntry = m;
1614 }
1615 return true;
1616 }
1617
1618 private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1619 return new ZooKeeperWatcher(getConf(), "hbase Fsck", new Abortable() {
1620 @Override
1621 public void abort(String why, Throwable e) {
1622 LOG.error(why, e);
1623 System.exit(1);
1624 }
1625
1626 @Override
1627 public boolean isAborted() {
1628 return false;
1629 }
1630
1631 });
1632 }
1633
1634 private ServerName getMetaRegionServerName()
1635 throws IOException, KeeperException {
1636 ZooKeeperWatcher zkw = createZooKeeperWatcher();
1637 ServerName sn = null;
1638 try {
1639 sn = new MetaTableLocator().getMetaRegionLocation(zkw);
1640 } finally {
1641 zkw.close();
1642 }
1643 return sn;
1644 }
1645
1646
1647
1648
1649
1650
1651 void processRegionServers(Collection<ServerName> regionServerList)
1652 throws IOException, InterruptedException {
1653
1654 List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1655 List<Future<Void>> workFutures;
1656
1657
1658 for (ServerName rsinfo: regionServerList) {
1659 workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1660 }
1661
1662 workFutures = executor.invokeAll(workItems);
1663
1664 for(int i=0; i<workFutures.size(); i++) {
1665 WorkItemRegion item = workItems.get(i);
1666 Future<Void> f = workFutures.get(i);
1667 try {
1668 f.get();
1669 } catch(ExecutionException e) {
1670 LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1671 e.getCause());
1672 }
1673 }
1674 }
1675
1676
1677
1678
1679 private void checkAndFixConsistency()
1680 throws IOException, KeeperException, InterruptedException {
1681 for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1682 checkRegionConsistency(e.getKey(), e.getValue());
1683 }
1684 }
1685
1686 private void preCheckPermission() throws IOException, AccessDeniedException {
1687 if (shouldIgnorePreCheckPermission()) {
1688 return;
1689 }
1690
1691 Path hbaseDir = FSUtils.getRootDir(getConf());
1692 FileSystem fs = hbaseDir.getFileSystem(getConf());
1693 UserProvider userProvider = UserProvider.instantiate(getConf());
1694 UserGroupInformation ugi = userProvider.getCurrent().getUGI();
1695 FileStatus[] files = fs.listStatus(hbaseDir);
1696 for (FileStatus file : files) {
1697 try {
1698 FSUtils.checkAccess(ugi, file, FsAction.WRITE);
1699 } catch (AccessDeniedException ace) {
1700 LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
1701 errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
1702 + " does not have write perms to " + file.getPath()
1703 + ". Please rerun hbck as hdfs user " + file.getOwner());
1704 throw ace;
1705 }
1706 }
1707 }
1708
1709
1710
1711
1712 private void deleteMetaRegion(HbckInfo hi) throws IOException {
1713 deleteMetaRegion(hi.metaEntry.getRegionName());
1714 }
1715
1716
1717
1718
1719 private void deleteMetaRegion(byte[] metaKey) throws IOException {
1720 Delete d = new Delete(metaKey);
1721 meta.delete(d);
1722 LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
1723 }
1724
1725
1726
1727
1728 private void resetSplitParent(HbckInfo hi) throws IOException {
1729 RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
1730 Delete d = new Delete(hi.metaEntry.getRegionName());
1731 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1732 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1733 mutations.add(d);
1734
1735 HRegionInfo hri = new HRegionInfo(hi.metaEntry);
1736 hri.setOffline(false);
1737 hri.setSplit(false);
1738 Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
1739 mutations.add(p);
1740
1741 meta.mutateRow(mutations);
1742 LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
1743 }
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753 private void offline(byte[] regionName) throws IOException {
1754 String regionString = Bytes.toStringBinary(regionName);
1755 if (!rsSupportsOffline) {
1756 LOG.warn("Using unassign region " + regionString
1757 + " instead of using offline method, you should"
1758 + " restart HMaster after these repairs");
1759 admin.unassign(regionName, true);
1760 return;
1761 }
1762
1763
1764 try {
1765 LOG.info("Offlining region " + regionString);
1766 admin.offline(regionName);
1767 } catch (IOException ioe) {
1768 String notFoundMsg = "java.lang.NoSuchMethodException: " +
1769 "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1770 if (ioe.getMessage().contains(notFoundMsg)) {
1771 LOG.warn("Using unassign region " + regionString
1772 + " instead of using offline method, you should"
1773 + " restart HMaster after these repairs");
1774 rsSupportsOffline = false;
1775 admin.unassign(regionName, true);
1776 return;
1777 }
1778 throw ioe;
1779 }
1780 }
1781
1782 private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
1783 for (OnlineEntry rse : hi.deployedEntries) {
1784 LOG.debug("Undeploy region " + rse.hri + " from " + rse.hsa);
1785 try {
1786 HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.hsa, rse.hri);
1787 offline(rse.hri.getRegionName());
1788 } catch (IOException ioe) {
1789 LOG.warn("Got exception when attempting to offline region "
1790 + Bytes.toString(rse.hri.getRegionName()), ioe);
1791 }
1792 }
1793 }
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807 private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
1808 if (hi.metaEntry == null && hi.hdfsEntry == null) {
1809 undeployRegions(hi);
1810 return;
1811 }
1812
1813
1814 Get get = new Get(hi.getRegionName());
1815 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1816 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1817 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1818 Result r = meta.get(get);
1819 ServerName serverName = HRegionInfo.getServerName(r);
1820 if (serverName == null) {
1821 errors.reportError("Unable to close region "
1822 + hi.getRegionNameAsString() + " because meta does not "
1823 + "have handle to reach it.");
1824 return;
1825 }
1826
1827 HRegionInfo hri = HRegionInfo.getHRegionInfo(r);
1828 if (hri == null) {
1829 LOG.warn("Unable to close region " + hi.getRegionNameAsString()
1830 + " because hbase:meta had invalid or missing "
1831 + HConstants.CATALOG_FAMILY_STR + ":"
1832 + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
1833 + " qualifier value.");
1834 return;
1835 }
1836
1837
1838 HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
1839 }
1840
1841 private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
1842 KeeperException, InterruptedException {
1843
1844 if (shouldFixAssignments()) {
1845 errors.print(msg);
1846 undeployRegions(hbi);
1847 setShouldRerun();
1848 HRegionInfo hri = hbi.getHdfsHRI();
1849 if (hri == null) {
1850 hri = hbi.metaEntry;
1851 }
1852 HBaseFsckRepair.fixUnassigned(admin, hri);
1853 HBaseFsckRepair.waitUntilAssigned(admin, hri);
1854 }
1855 }
1856
1857
1858
1859
1860 private void checkRegionConsistency(final String key, final HbckInfo hbi)
1861 throws IOException, KeeperException, InterruptedException {
1862 String descriptiveName = hbi.toString();
1863
1864 boolean inMeta = hbi.metaEntry != null;
1865
1866 boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
1867 boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
1868 boolean isDeployed = !hbi.deployedOn.isEmpty();
1869 boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
1870 boolean deploymentMatchesMeta =
1871 hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
1872 hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
1873 boolean splitParent =
1874 (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
1875 boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
1876 boolean recentlyModified = inHdfs &&
1877 hbi.getModTime() + timelag > System.currentTimeMillis();
1878
1879
1880 if (hbi.containsOnlyHdfsEdits()) {
1881 return;
1882 }
1883 if (hbi.isSkipChecks()) return;
1884 if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
1885 return;
1886 } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
1887 LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
1888 "tabled that is not deployed");
1889 return;
1890 } else if (recentlyModified) {
1891 LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
1892 return;
1893 }
1894
1895 else if (!inMeta && !inHdfs && !isDeployed) {
1896
1897 assert false : "Entry for region with no data";
1898 } else if (!inMeta && !inHdfs && isDeployed) {
1899 errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
1900 + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
1901 "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1902 if (shouldFixAssignments()) {
1903 undeployRegions(hbi);
1904 }
1905
1906 } else if (!inMeta && inHdfs && !isDeployed) {
1907 if (hbi.isMerged()) {
1908
1909
1910 hbi.setSkipChecks(true);
1911 LOG.info("Region " + descriptiveName
1912 + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
1913 return;
1914 }
1915 errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
1916 + descriptiveName + " on HDFS, but not listed in hbase:meta " +
1917 "or deployed on any region server");
1918
1919 if (shouldFixMeta()) {
1920 if (!hbi.isHdfsRegioninfoPresent()) {
1921 LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
1922 + " in table integrity repair phase if -fixHdfsOrphans was" +
1923 " used.");
1924 return;
1925 }
1926
1927 HRegionInfo hri = hbi.getHdfsHRI();
1928 TableInfo tableInfo = tablesInfo.get(hri.getTable());
1929 if (tableInfo.regionsFromMeta.isEmpty()) {
1930 for (HbckInfo h : regionInfoMap.values()) {
1931 if (hri.getTable().equals(h.getTableName())) {
1932 if (h.metaEntry != null) tableInfo.regionsFromMeta
1933 .add((HRegionInfo) h.metaEntry);
1934 }
1935 }
1936 Collections.sort(tableInfo.regionsFromMeta);
1937 }
1938 for (HRegionInfo region : tableInfo.regionsFromMeta) {
1939 if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
1940 && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
1941 hri.getEndKey()) >= 0)
1942 && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
1943 if(region.isSplit() || region.isOffline()) continue;
1944 Path regionDir = hbi.getHdfsRegionDir();
1945 FileSystem fs = regionDir.getFileSystem(getConf());
1946 List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
1947 for (Path familyDir : familyDirs) {
1948 List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
1949 for (Path referenceFilePath : referenceFilePaths) {
1950 Path parentRegionDir =
1951 StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
1952 if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
1953 LOG.warn(hri + " start and stop keys are in the range of " + region
1954 + ". The region might not be cleaned up from hdfs when region " + region
1955 + " split failed. Hence deleting from hdfs.");
1956 HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
1957 regionDir.getParent(), hri);
1958 return;
1959 }
1960 }
1961 }
1962 }
1963 }
1964
1965 LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
1966 HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
1967
1968 tryAssignmentRepair(hbi, "Trying to reassign region...");
1969 }
1970
1971 } else if (!inMeta && inHdfs && isDeployed) {
1972 errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
1973 + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1974 debugLsr(hbi.getHdfsRegionDir());
1975 if (shouldFixMeta()) {
1976 if (!hbi.isHdfsRegioninfoPresent()) {
1977 LOG.error("This should have been repaired in table integrity repair phase");
1978 return;
1979 }
1980
1981 LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
1982 HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
1983
1984 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
1985 }
1986
1987
1988 } else if (inMeta && inHdfs && !isDeployed && splitParent) {
1989
1990
1991 if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
1992
1993 HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
1994 HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
1995 if (infoA != null && infoB != null) {
1996
1997 hbi.setSkipChecks(true);
1998 return;
1999 }
2000 }
2001 errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2002 + descriptiveName + " is a split parent in META, in HDFS, "
2003 + "and not deployed on any region server. This could be transient.");
2004 if (shouldFixSplitParents()) {
2005 setShouldRerun();
2006 resetSplitParent(hbi);
2007 }
2008 } else if (inMeta && !inHdfs && !isDeployed) {
2009 errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2010 + descriptiveName + " found in META, but not in HDFS "
2011 + "or deployed on any region server.");
2012 if (shouldFixMeta()) {
2013 deleteMetaRegion(hbi);
2014 }
2015 } else if (inMeta && !inHdfs && isDeployed) {
2016 errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2017 + " found in META, but not in HDFS, " +
2018 "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2019
2020
2021
2022 if (shouldFixAssignments()) {
2023 errors.print("Trying to fix unassigned region...");
2024 undeployRegions(hbi);
2025 }
2026 if (shouldFixMeta()) {
2027
2028 deleteMetaRegion(hbi);
2029 }
2030 } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2031 errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2032 + " not deployed on any region server.");
2033 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2034 } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2035 errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2036 "Region " + descriptiveName + " should not be deployed according " +
2037 "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2038 if (shouldFixAssignments()) {
2039 errors.print("Trying to close the region " + descriptiveName);
2040 setShouldRerun();
2041 HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2042 }
2043 } else if (inMeta && inHdfs && isMultiplyDeployed) {
2044 errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2045 + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer
2046 + " but is multiply assigned to region servers " +
2047 Joiner.on(", ").join(hbi.deployedOn));
2048
2049 if (shouldFixAssignments()) {
2050 errors.print("Trying to fix assignment error...");
2051 setShouldRerun();
2052 HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2053 }
2054 } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2055 errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2056 + descriptiveName + " listed in hbase:meta on region server " +
2057 hbi.metaEntry.regionServer + " but found on region server " +
2058 hbi.deployedOn.get(0));
2059
2060 if (shouldFixAssignments()) {
2061 errors.print("Trying to fix assignment error...");
2062 setShouldRerun();
2063 HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2064 HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2065 }
2066 } else {
2067 errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2068 " is in an unforeseen state:" +
2069 " inMeta=" + inMeta +
2070 " inHdfs=" + inHdfs +
2071 " isDeployed=" + isDeployed +
2072 " isMultiplyDeployed=" + isMultiplyDeployed +
2073 " deploymentMatchesMeta=" + deploymentMatchesMeta +
2074 " shouldBeDeployed=" + shouldBeDeployed);
2075 }
2076 }
2077
2078
2079
2080
2081
2082
2083
2084 SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
2085 tablesInfo = new TreeMap<TableName,TableInfo> ();
2086 LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2087 for (HbckInfo hbi : regionInfoMap.values()) {
2088
2089 if (hbi.metaEntry == null) {
2090
2091 Path p = hbi.getHdfsRegionDir();
2092 if (p == null) {
2093 errors.report("No regioninfo in Meta or HDFS. " + hbi);
2094 }
2095
2096
2097 continue;
2098 }
2099 if (hbi.metaEntry.regionServer == null) {
2100 errors.detail("Skipping region because no region server: " + hbi);
2101 continue;
2102 }
2103 if (hbi.metaEntry.isOffline()) {
2104 errors.detail("Skipping region because it is offline: " + hbi);
2105 continue;
2106 }
2107 if (hbi.containsOnlyHdfsEdits()) {
2108 errors.detail("Skipping region because it only contains edits" + hbi);
2109 continue;
2110 }
2111
2112
2113
2114
2115
2116
2117 if (hbi.deployedOn.size() == 0) continue;
2118
2119
2120 TableName tableName = hbi.metaEntry.getTable();
2121 TableInfo modTInfo = tablesInfo.get(tableName);
2122 if (modTInfo == null) {
2123 modTInfo = new TableInfo(tableName);
2124 }
2125 for (ServerName server : hbi.deployedOn) {
2126 modTInfo.addServer(server);
2127 }
2128
2129 if (!hbi.isSkipChecks()) {
2130 modTInfo.addRegionInfo(hbi);
2131 }
2132
2133 tablesInfo.put(tableName, modTInfo);
2134 }
2135
2136 loadTableInfosForTablesWithNoRegion();
2137
2138 for (TableInfo tInfo : tablesInfo.values()) {
2139 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2140 if (!tInfo.checkRegionChain(handler)) {
2141 errors.report("Found inconsistency in table " + tInfo.getName());
2142 }
2143 }
2144 return tablesInfo;
2145 }
2146
2147
2148
2149
2150 private void loadTableInfosForTablesWithNoRegion() throws IOException {
2151 Map<String, HTableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2152 for (HTableDescriptor htd : allTables.values()) {
2153 if (checkMetaOnly && !htd.isMetaTable()) {
2154 continue;
2155 }
2156
2157 TableName tableName = htd.getTableName();
2158 if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2159 TableInfo tableInfo = new TableInfo(tableName);
2160 tableInfo.htds.add(htd);
2161 tablesInfo.put(htd.getTableName(), tableInfo);
2162 }
2163 }
2164 }
2165
2166
2167
2168
2169
2170 public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2171 int fileMoves = 0;
2172 String thread = Thread.currentThread().getName();
2173 LOG.debug("[" + thread + "] Contained region dir after close and pause");
2174 debugLsr(contained.getHdfsRegionDir());
2175
2176
2177 FileSystem fs = targetRegionDir.getFileSystem(getConf());
2178 FileStatus[] dirs = null;
2179 try {
2180 dirs = fs.listStatus(contained.getHdfsRegionDir());
2181 } catch (FileNotFoundException fnfe) {
2182
2183
2184 if (!fs.exists(contained.getHdfsRegionDir())) {
2185 LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2186 + " is missing. Assuming already sidelined or moved.");
2187 } else {
2188 sidelineRegionDir(fs, contained);
2189 }
2190 return fileMoves;
2191 }
2192
2193 if (dirs == null) {
2194 if (!fs.exists(contained.getHdfsRegionDir())) {
2195 LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2196 + " already sidelined.");
2197 } else {
2198 sidelineRegionDir(fs, contained);
2199 }
2200 return fileMoves;
2201 }
2202
2203 for (FileStatus cf : dirs) {
2204 Path src = cf.getPath();
2205 Path dst = new Path(targetRegionDir, src.getName());
2206
2207 if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2208
2209 continue;
2210 }
2211
2212 if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2213
2214 continue;
2215 }
2216
2217 LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2218
2219
2220
2221
2222 for (FileStatus hfile : fs.listStatus(src)) {
2223 boolean success = fs.rename(hfile.getPath(), dst);
2224 if (success) {
2225 fileMoves++;
2226 }
2227 }
2228 LOG.debug("[" + thread + "] Sideline directory contents:");
2229 debugLsr(targetRegionDir);
2230 }
2231
2232
2233 sidelineRegionDir(fs, contained);
2234 LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2235 getSidelineDir());
2236 debugLsr(contained.getHdfsRegionDir());
2237
2238 return fileMoves;
2239 }
2240
2241
2242 static class WorkItemOverlapMerge implements Callable<Void> {
2243 private TableIntegrityErrorHandler handler;
2244 Collection<HbckInfo> overlapgroup;
2245
2246 WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2247 this.handler = handler;
2248 this.overlapgroup = overlapgroup;
2249 }
2250
2251 @Override
2252 public Void call() throws Exception {
2253 handler.handleOverlapGroup(overlapgroup);
2254 return null;
2255 }
2256 };
2257
2258
2259
2260
2261
2262 public class TableInfo {
2263 TableName tableName;
2264 TreeSet <ServerName> deployedOn;
2265
2266
2267 final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
2268
2269
2270 final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
2271
2272
2273 final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
2274
2275
2276 final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
2277
2278
2279 final Multimap<byte[], HbckInfo> overlapGroups =
2280 TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2281
2282
2283 final List<HRegionInfo> regionsFromMeta = new ArrayList<HRegionInfo>();
2284
2285 TableInfo(TableName name) {
2286 this.tableName = name;
2287 deployedOn = new TreeSet <ServerName>();
2288 }
2289
2290
2291
2292
2293 private HTableDescriptor getHTD() {
2294 if (htds.size() == 1) {
2295 return (HTableDescriptor)htds.toArray()[0];
2296 } else {
2297 LOG.error("None/Multiple table descriptors found for table '"
2298 + tableName + "' regions: " + htds);
2299 }
2300 return null;
2301 }
2302
2303 public void addRegionInfo(HbckInfo hir) {
2304 if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2305
2306 sc.add(hir);
2307 return;
2308 }
2309
2310
2311 if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2312 errors.reportError(
2313 ERROR_CODE.REGION_CYCLE,
2314 String.format("The endkey for this region comes before the "
2315 + "startkey, startkey=%s, endkey=%s",
2316 Bytes.toStringBinary(hir.getStartKey()),
2317 Bytes.toStringBinary(hir.getEndKey())), this, hir);
2318 backwards.add(hir);
2319 return;
2320 }
2321
2322
2323 sc.add(hir);
2324 }
2325
2326 public void addServer(ServerName server) {
2327 this.deployedOn.add(server);
2328 }
2329
2330 public TableName getName() {
2331 return tableName;
2332 }
2333
2334 public int getNumRegions() {
2335 return sc.getStarts().size() + backwards.size();
2336 }
2337
2338 private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2339 ErrorReporter errors;
2340
2341 IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2342 this.errors = errors;
2343 setTableInfo(ti);
2344 }
2345
2346 @Override
2347 public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2348 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2349 "First region should start with an empty key. You need to "
2350 + " create a new region and regioninfo in HDFS to plug the hole.",
2351 getTableInfo(), hi);
2352 }
2353
2354 @Override
2355 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2356 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2357 "Last region should end with an empty key. You need to "
2358 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2359 }
2360
2361 @Override
2362 public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2363 errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2364 "Region has the same start and end key.", getTableInfo(), hi);
2365 }
2366
2367 @Override
2368 public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2369 byte[] key = r1.getStartKey();
2370
2371 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2372 "Multiple regions have the same startkey: "
2373 + Bytes.toStringBinary(key), getTableInfo(), r1);
2374 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2375 "Multiple regions have the same startkey: "
2376 + Bytes.toStringBinary(key), getTableInfo(), r2);
2377 }
2378
2379 @Override
2380 public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2381 errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2382 "There is an overlap in the region chain.",
2383 getTableInfo(), hi1, hi2);
2384 }
2385
2386 @Override
2387 public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2388 errors.reportError(
2389 ERROR_CODE.HOLE_IN_REGION_CHAIN,
2390 "There is a hole in the region chain between "
2391 + Bytes.toStringBinary(holeStart) + " and "
2392 + Bytes.toStringBinary(holeStop)
2393 + ". You need to create a new .regioninfo and region "
2394 + "dir in hdfs to plug the hole.");
2395 }
2396 };
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410 private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2411 Configuration conf;
2412
2413 boolean fixOverlaps = true;
2414
2415 HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2416 boolean fixHoles, boolean fixOverlaps) {
2417 super(ti, errors);
2418 this.conf = conf;
2419 this.fixOverlaps = fixOverlaps;
2420
2421 }
2422
2423
2424
2425
2426
2427
2428 @Override
2429 public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2430 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2431 "First region should start with an empty key. Creating a new " +
2432 "region and regioninfo in HDFS to plug the hole.",
2433 getTableInfo(), next);
2434 HTableDescriptor htd = getTableInfo().getHTD();
2435
2436 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(),
2437 HConstants.EMPTY_START_ROW, next.getStartKey());
2438
2439
2440 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2441 LOG.info("Table region start key was not empty. Created new empty region: "
2442 + newRegion + " " +region);
2443 fixes++;
2444 }
2445
2446 @Override
2447 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2448 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2449 "Last region should end with an empty key. Creating a new "
2450 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2451 HTableDescriptor htd = getTableInfo().getHTD();
2452
2453 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), curEndKey,
2454 HConstants.EMPTY_START_ROW);
2455
2456 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2457 LOG.info("Table region end key was not empty. Created new empty region: " + newRegion
2458 + " " + region);
2459 fixes++;
2460 }
2461
2462
2463
2464
2465
2466 @Override
2467 public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2468 errors.reportError(
2469 ERROR_CODE.HOLE_IN_REGION_CHAIN,
2470 "There is a hole in the region chain between "
2471 + Bytes.toStringBinary(holeStartKey) + " and "
2472 + Bytes.toStringBinary(holeStopKey)
2473 + ". Creating a new regioninfo and region "
2474 + "dir in hdfs to plug the hole.");
2475 HTableDescriptor htd = getTableInfo().getHTD();
2476 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey);
2477 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2478 LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
2479 fixes++;
2480 }
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493 @Override
2494 public void handleOverlapGroup(Collection<HbckInfo> overlap)
2495 throws IOException {
2496 Preconditions.checkNotNull(overlap);
2497 Preconditions.checkArgument(overlap.size() >0);
2498
2499 if (!this.fixOverlaps) {
2500 LOG.warn("Not attempting to repair overlaps.");
2501 return;
2502 }
2503
2504 if (overlap.size() > maxMerge) {
2505 LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2506 "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2507 if (sidelineBigOverlaps) {
2508
2509 sidelineBigOverlaps(overlap);
2510 }
2511 return;
2512 }
2513
2514 mergeOverlaps(overlap);
2515 }
2516
2517 void mergeOverlaps(Collection<HbckInfo> overlap)
2518 throws IOException {
2519 String thread = Thread.currentThread().getName();
2520 LOG.info("== [" + thread + "] Merging regions into one region: "
2521 + Joiner.on(",").join(overlap));
2522
2523 Pair<byte[], byte[]> range = null;
2524 for (HbckInfo hi : overlap) {
2525 if (range == null) {
2526 range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2527 } else {
2528 if (RegionSplitCalculator.BYTES_COMPARATOR
2529 .compare(hi.getStartKey(), range.getFirst()) < 0) {
2530 range.setFirst(hi.getStartKey());
2531 }
2532 if (RegionSplitCalculator.BYTES_COMPARATOR
2533 .compare(hi.getEndKey(), range.getSecond()) > 0) {
2534 range.setSecond(hi.getEndKey());
2535 }
2536 }
2537
2538 LOG.debug("[" + thread + "] Closing region before moving data around: " + hi);
2539 LOG.debug("[" + thread + "] Contained region dir before close");
2540 debugLsr(hi.getHdfsRegionDir());
2541 try {
2542 LOG.info("[" + thread + "] Closing region: " + hi);
2543 closeRegion(hi);
2544 } catch (IOException ioe) {
2545 LOG.warn("[" + thread + "] Was unable to close region " + hi
2546 + ". Just continuing... ", ioe);
2547 } catch (InterruptedException e) {
2548 LOG.warn("[" + thread + "] Was unable to close region " + hi
2549 + ". Just continuing... ", e);
2550 }
2551
2552 try {
2553 LOG.info("[" + thread + "] Offlining region: " + hi);
2554 offline(hi.getRegionName());
2555 } catch (IOException ioe) {
2556 LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
2557 + ". Just continuing... ", ioe);
2558 }
2559 }
2560
2561
2562 HTableDescriptor htd = getTableInfo().getHTD();
2563
2564 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), range.getFirst(),
2565 range.getSecond());
2566 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2567 LOG.info("[" + thread + "] Created new empty container region: " +
2568 newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
2569 debugLsr(region.getRegionFileSystem().getRegionDir());
2570
2571
2572 boolean didFix= false;
2573 Path target = region.getRegionFileSystem().getRegionDir();
2574 for (HbckInfo contained : overlap) {
2575 LOG.info("[" + thread + "] Merging " + contained + " into " + target );
2576 int merges = mergeRegionDirs(target, contained);
2577 if (merges > 0) {
2578 didFix = true;
2579 }
2580 }
2581 if (didFix) {
2582 fixes++;
2583 }
2584 }
2585
2586
2587
2588
2589
2590
2591
2592
2593 void sidelineBigOverlaps(
2594 Collection<HbckInfo> bigOverlap) throws IOException {
2595 int overlapsToSideline = bigOverlap.size() - maxMerge;
2596 if (overlapsToSideline > maxOverlapsToSideline) {
2597 overlapsToSideline = maxOverlapsToSideline;
2598 }
2599 List<HbckInfo> regionsToSideline =
2600 RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
2601 FileSystem fs = FileSystem.get(conf);
2602 for (HbckInfo regionToSideline: regionsToSideline) {
2603 try {
2604 LOG.info("Closing region: " + regionToSideline);
2605 closeRegion(regionToSideline);
2606 } catch (IOException ioe) {
2607 LOG.warn("Was unable to close region " + regionToSideline
2608 + ". Just continuing... ", ioe);
2609 } catch (InterruptedException e) {
2610 LOG.warn("Was unable to close region " + regionToSideline
2611 + ". Just continuing... ", e);
2612 }
2613
2614 try {
2615 LOG.info("Offlining region: " + regionToSideline);
2616 offline(regionToSideline.getRegionName());
2617 } catch (IOException ioe) {
2618 LOG.warn("Unable to offline region from master: " + regionToSideline
2619 + ". Just continuing... ", ioe);
2620 }
2621
2622 LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
2623 Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
2624 if (sidelineRegionDir != null) {
2625 sidelinedRegions.put(sidelineRegionDir, regionToSideline);
2626 LOG.info("After sidelined big overlapped region: "
2627 + regionToSideline.getRegionNameAsString()
2628 + " to " + sidelineRegionDir.toString());
2629 fixes++;
2630 }
2631 }
2632 }
2633 }
2634
2635
2636
2637
2638
2639
2640
2641 public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
2642
2643
2644
2645 if (disabledTables.contains(this.tableName)) {
2646 return true;
2647 }
2648 int originalErrorsCount = errors.getErrorList().size();
2649 Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
2650 SortedSet<byte[]> splits = sc.getSplits();
2651
2652 byte[] prevKey = null;
2653 byte[] problemKey = null;
2654
2655 if (splits.size() == 0) {
2656
2657 handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
2658 }
2659
2660 for (byte[] key : splits) {
2661 Collection<HbckInfo> ranges = regions.get(key);
2662 if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
2663 for (HbckInfo rng : ranges) {
2664 handler.handleRegionStartKeyNotEmpty(rng);
2665 }
2666 }
2667
2668
2669 for (HbckInfo rng : ranges) {
2670
2671 byte[] endKey = rng.getEndKey();
2672 endKey = (endKey.length == 0) ? null : endKey;
2673 if (Bytes.equals(rng.getStartKey(),endKey)) {
2674 handler.handleDegenerateRegion(rng);
2675 }
2676 }
2677
2678 if (ranges.size() == 1) {
2679
2680 if (problemKey != null) {
2681 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2682 }
2683 problemKey = null;
2684 } else if (ranges.size() > 1) {
2685
2686
2687 if (problemKey == null) {
2688
2689 LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
2690 problemKey = key;
2691 }
2692 overlapGroups.putAll(problemKey, ranges);
2693
2694
2695 ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
2696
2697 for (HbckInfo r1 : ranges) {
2698 subRange.remove(r1);
2699 for (HbckInfo r2 : subRange) {
2700 if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
2701 handler.handleDuplicateStartKeys(r1,r2);
2702 } else {
2703
2704 handler.handleOverlapInRegionChain(r1, r2);
2705 }
2706 }
2707 }
2708
2709 } else if (ranges.size() == 0) {
2710 if (problemKey != null) {
2711 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2712 }
2713 problemKey = null;
2714
2715 byte[] holeStopKey = sc.getSplits().higher(key);
2716
2717 if (holeStopKey != null) {
2718
2719 handler.handleHoleInRegionChain(key, holeStopKey);
2720 }
2721 }
2722 prevKey = key;
2723 }
2724
2725
2726
2727 if (prevKey != null) {
2728 handler.handleRegionEndKeyNotEmpty(prevKey);
2729 }
2730
2731
2732 if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
2733 LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
2734 " false to run serially.");
2735 boolean ok = handleOverlapsParallel(handler, prevKey);
2736 if (!ok) {
2737 return false;
2738 }
2739 } else {
2740 LOG.info("Handling overlap merges serially. set hbasefsck.overlap.merge.parallel to" +
2741 " true to run in parallel.");
2742 for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2743 handler.handleOverlapGroup(overlap);
2744 }
2745 }
2746
2747 if (details) {
2748
2749 errors.print("---- Table '" + this.tableName
2750 + "': region split map");
2751 dump(splits, regions);
2752 errors.print("---- Table '" + this.tableName
2753 + "': overlap groups");
2754 dumpOverlapProblems(overlapGroups);
2755 errors.print("There are " + overlapGroups.keySet().size()
2756 + " overlap groups with " + overlapGroups.size()
2757 + " overlapping regions");
2758 }
2759 if (!sidelinedRegions.isEmpty()) {
2760 LOG.warn("Sidelined big overlapped regions, please bulk load them!");
2761 errors.print("---- Table '" + this.tableName
2762 + "': sidelined big overlapped regions");
2763 dumpSidelinedRegions(sidelinedRegions);
2764 }
2765 return errors.getErrorList().size() == originalErrorsCount;
2766 }
2767
2768 private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
2769 throws IOException {
2770
2771
2772 List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size());
2773 List<Future<Void>> rets;
2774 for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2775
2776 merges.add(new WorkItemOverlapMerge(overlap, handler));
2777 }
2778 try {
2779 rets = executor.invokeAll(merges);
2780 } catch (InterruptedException e) {
2781 LOG.error("Overlap merges were interrupted", e);
2782 return false;
2783 }
2784 for(int i=0; i<merges.size(); i++) {
2785 WorkItemOverlapMerge work = merges.get(i);
2786 Future<Void> f = rets.get(i);
2787 try {
2788 f.get();
2789 } catch(ExecutionException e) {
2790 LOG.warn("Failed to merge overlap group" + work, e.getCause());
2791 } catch (InterruptedException e) {
2792 LOG.error("Waiting for overlap merges was interrupted", e);
2793 return false;
2794 }
2795 }
2796 return true;
2797 }
2798
2799
2800
2801
2802
2803
2804
2805 void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
2806
2807 StringBuilder sb = new StringBuilder();
2808 for (byte[] k : splits) {
2809 sb.setLength(0);
2810 sb.append(Bytes.toStringBinary(k) + ":\t");
2811 for (HbckInfo r : regions.get(k)) {
2812 sb.append("[ "+ r.toString() + ", "
2813 + Bytes.toStringBinary(r.getEndKey())+ "]\t");
2814 }
2815 errors.print(sb.toString());
2816 }
2817 }
2818 }
2819
2820 public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
2821
2822
2823 for (byte[] k : regions.keySet()) {
2824 errors.print(Bytes.toStringBinary(k) + ":");
2825 for (HbckInfo r : regions.get(k)) {
2826 errors.print("[ " + r.toString() + ", "
2827 + Bytes.toStringBinary(r.getEndKey()) + "]");
2828 }
2829 errors.print("----");
2830 }
2831 }
2832
2833 public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
2834 for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
2835 TableName tableName = entry.getValue().getTableName();
2836 Path path = entry.getKey();
2837 errors.print("This sidelined region dir should be bulk loaded: "
2838 + path.toString());
2839 errors.print("Bulk load command looks like: "
2840 + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
2841 + path.toUri().getPath() + " "+ tableName);
2842 }
2843 }
2844
2845 public Multimap<byte[], HbckInfo> getOverlapGroups(
2846 TableName table) {
2847 TableInfo ti = tablesInfo.get(table);
2848 return ti.overlapGroups;
2849 }
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860 HTableDescriptor[] getTables(AtomicInteger numSkipped) {
2861 List<TableName> tableNames = new ArrayList<TableName>();
2862 long now = System.currentTimeMillis();
2863
2864 for (HbckInfo hbi : regionInfoMap.values()) {
2865 MetaEntry info = hbi.metaEntry;
2866
2867
2868
2869 if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
2870 if (info.modTime + timelag < now) {
2871 tableNames.add(info.getTable());
2872 } else {
2873 numSkipped.incrementAndGet();
2874 }
2875 }
2876 }
2877 return getHTableDescriptors(tableNames);
2878 }
2879
2880 HTableDescriptor[] getHTableDescriptors(List<TableName> tableNames) {
2881 HTableDescriptor[] htd = new HTableDescriptor[0];
2882 Admin admin = null;
2883 try {
2884 LOG.info("getHTableDescriptors == tableNames => " + tableNames);
2885 admin = new HBaseAdmin(getConf());
2886 htd = admin.getTableDescriptorsByTableName(tableNames);
2887 } catch (IOException e) {
2888 LOG.debug("Exception getting table descriptors", e);
2889 } finally {
2890 if (admin != null) {
2891 try {
2892 admin.close();
2893 } catch (IOException e) {
2894 LOG.debug("Exception closing HBaseAdmin", e);
2895 }
2896 }
2897 }
2898 return htd;
2899 }
2900
2901
2902
2903
2904
2905
2906 private synchronized HbckInfo getOrCreateInfo(String name) {
2907 HbckInfo hbi = regionInfoMap.get(name);
2908 if (hbi == null) {
2909 hbi = new HbckInfo(null);
2910 regionInfoMap.put(name, hbi);
2911 }
2912 return hbi;
2913 }
2914
2915 private void checkAndFixTableLocks() throws IOException {
2916 ZooKeeperWatcher zkw = createZooKeeperWatcher();
2917
2918 try {
2919 TableLockChecker checker = new TableLockChecker(zkw, errors);
2920 checker.checkTableLocks();
2921
2922 if (this.fixTableLocks) {
2923 checker.fixExpiredTableLocks();
2924 }
2925 } finally {
2926 zkw.close();
2927 }
2928 }
2929
2930
2931
2932
2933
2934
2935
2936 private void checkAndFixOrphanedTableZNodes()
2937 throws IOException, KeeperException, InterruptedException {
2938 ZooKeeperWatcher zkw = createZooKeeperWatcher();
2939
2940 try {
2941 Set<TableName> enablingTables = ZKTableStateClientSideReader.getEnablingTables(zkw);
2942 String msg;
2943 TableInfo tableInfo;
2944
2945 for (TableName tableName : enablingTables) {
2946
2947 tableInfo = tablesInfo.get(tableName);
2948 if (tableInfo != null) {
2949
2950 continue;
2951 }
2952
2953 msg = "Table " + tableName + " not found in hbase:meta. Orphaned table ZNode found.";
2954 LOG.warn(msg);
2955 orphanedTableZNodes.add(tableName);
2956 errors.reportError(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY, msg);
2957 }
2958
2959 if (orphanedTableZNodes.size() > 0 && this.fixTableZNodes) {
2960 ZKTableStateManager zkTableStateMgr = new ZKTableStateManager(zkw);
2961
2962 for (TableName tableName : orphanedTableZNodes) {
2963 try {
2964
2965
2966
2967
2968 zkTableStateMgr.setTableState(tableName, ZooKeeperProtos.Table.State.DISABLED);
2969 } catch (CoordinatedStateException e) {
2970
2971 LOG.error(
2972 "Got a CoordinatedStateException while fixing the ENABLING table znode " + tableName,
2973 e);
2974 }
2975 }
2976 }
2977 } finally {
2978 zkw.close();
2979 }
2980 }
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991 boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
2992 List<HbckInfo> metaRegions = Lists.newArrayList();
2993 for (HbckInfo value : regionInfoMap.values()) {
2994 if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
2995 metaRegions.add(value);
2996 }
2997 }
2998
2999
3000
3001 List<ServerName> servers = new ArrayList<ServerName>();
3002 HbckInfo metaHbckInfo = null;
3003 if (!metaRegions.isEmpty()) {
3004 metaHbckInfo = metaRegions.get(0);
3005 servers = metaHbckInfo.deployedOn;
3006 }
3007 if (servers.size() != 1) {
3008 if (servers.size() == 0) {
3009 errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta is not found on any region.");
3010 if (shouldFixAssignments()) {
3011 errors.print("Trying to fix a problem with hbase:meta..");
3012 setShouldRerun();
3013
3014 HBaseFsckRepair.fixUnassigned(admin, HRegionInfo.FIRST_META_REGIONINFO);
3015 HBaseFsckRepair.waitUntilAssigned(admin, HRegionInfo.FIRST_META_REGIONINFO);
3016 }
3017 } else if (servers.size() > 1) {
3018 errors
3019 .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta is found on more than one region.");
3020 if (shouldFixAssignments()) {
3021 if (metaHbckInfo == null) {
3022 errors.print(
3023 "Unable to fix problem with hbase:meta due to hbase:meta region info missing");
3024 return false;
3025 }
3026 errors.print("Trying to fix a problem with hbase:meta..");
3027 setShouldRerun();
3028
3029 HBaseFsckRepair.fixMultiAssignment(connection, metaHbckInfo.metaEntry, servers);
3030 }
3031 }
3032
3033 return false;
3034 }
3035
3036 return true;
3037 }
3038
3039
3040
3041
3042
3043 boolean loadMetaEntries() throws IOException {
3044 MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
3045 int countRecord = 1;
3046
3047
3048 final Comparator<Cell> comp = new Comparator<Cell>() {
3049 @Override
3050 public int compare(Cell k1, Cell k2) {
3051 return (int)(k1.getTimestamp() - k2.getTimestamp());
3052 }
3053 };
3054
3055 @Override
3056 public boolean processRow(Result result) throws IOException {
3057 try {
3058
3059
3060 long ts = Collections.max(result.listCells(), comp).getTimestamp();
3061 Pair<HRegionInfo, ServerName> pair = HRegionInfo.getHRegionInfoAndServerName(result);
3062 if (pair == null || pair.getFirst() == null) {
3063 emptyRegionInfoQualifiers.add(result);
3064 errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3065 "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3066 return true;
3067 }
3068 ServerName sn = null;
3069 if (pair.getSecond() != null) {
3070 sn = pair.getSecond();
3071 }
3072 HRegionInfo hri = pair.getFirst();
3073 if (!(isTableIncluded(hri.getTable())
3074 || hri.isMetaRegion())) {
3075 return true;
3076 }
3077 PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
3078 MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
3079 HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
3080 if (previous == null) {
3081 regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
3082 } else if (previous.metaEntry == null) {
3083 previous.metaEntry = m;
3084 } else {
3085 throw new IOException("Two entries in hbase:meta are same " + previous);
3086 }
3087
3088 PairOfSameType<HRegionInfo> mergeRegions = HRegionInfo.getMergeRegions(result);
3089 for (HRegionInfo mergeRegion : new HRegionInfo[] {
3090 mergeRegions.getFirst(), mergeRegions.getSecond() }) {
3091 if (mergeRegion != null) {
3092
3093 HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
3094 hbInfo.setMerged(true);
3095 }
3096 }
3097
3098
3099 if (countRecord % 100 == 0) {
3100 errors.progress();
3101 }
3102 countRecord++;
3103 return true;
3104 } catch (RuntimeException e) {
3105 LOG.error("Result=" + result);
3106 throw e;
3107 }
3108 }
3109 };
3110 if (!checkMetaOnly) {
3111
3112 MetaScanner.metaScan(connection, visitor);
3113 }
3114
3115 errors.print("");
3116 return true;
3117 }
3118
3119
3120
3121
3122 static class MetaEntry extends HRegionInfo {
3123 ServerName regionServer;
3124 long modTime;
3125 HRegionInfo splitA, splitB;
3126
3127 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
3128 this(rinfo, regionServer, modTime, null, null);
3129 }
3130
3131 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
3132 HRegionInfo splitA, HRegionInfo splitB) {
3133 super(rinfo);
3134 this.regionServer = regionServer;
3135 this.modTime = modTime;
3136 this.splitA = splitA;
3137 this.splitB = splitB;
3138 }
3139
3140 @Override
3141 public boolean equals(Object o) {
3142 boolean superEq = super.equals(o);
3143 if (!superEq) {
3144 return superEq;
3145 }
3146
3147 MetaEntry me = (MetaEntry) o;
3148 if (!regionServer.equals(me.regionServer)) {
3149 return false;
3150 }
3151 return (modTime == me.modTime);
3152 }
3153
3154 @Override
3155 public int hashCode() {
3156 int hash = Arrays.hashCode(getRegionName());
3157 hash ^= getRegionId();
3158 hash ^= Arrays.hashCode(getStartKey());
3159 hash ^= Arrays.hashCode(getEndKey());
3160 hash ^= Boolean.valueOf(isOffline()).hashCode();
3161 hash ^= getTable().hashCode();
3162 if (regionServer != null) {
3163 hash ^= regionServer.hashCode();
3164 }
3165 hash ^= modTime;
3166 return hash;
3167 }
3168 }
3169
3170
3171
3172
3173 static class HdfsEntry {
3174 HRegionInfo hri;
3175 Path hdfsRegionDir = null;
3176 long hdfsRegionDirModTime = 0;
3177 boolean hdfsRegioninfoFilePresent = false;
3178 boolean hdfsOnlyEdits = false;
3179 }
3180
3181
3182
3183
3184 static class OnlineEntry {
3185 HRegionInfo hri;
3186 ServerName hsa;
3187
3188 @Override
3189 public String toString() {
3190 return hsa.toString() + ";" + hri.getRegionNameAsString();
3191 }
3192 }
3193
3194
3195
3196
3197
3198 public static class HbckInfo implements KeyRange {
3199 private MetaEntry metaEntry = null;
3200 private HdfsEntry hdfsEntry = null;
3201 private List<OnlineEntry> deployedEntries = Lists.newArrayList();
3202 private List<ServerName> deployedOn = Lists.newArrayList();
3203 private boolean skipChecks = false;
3204 private boolean isMerged = false;
3205
3206 HbckInfo(MetaEntry metaEntry) {
3207 this.metaEntry = metaEntry;
3208 }
3209
3210 public synchronized void addServer(HRegionInfo hri, ServerName server) {
3211 OnlineEntry rse = new OnlineEntry() ;
3212 rse.hri = hri;
3213 rse.hsa = server;
3214 this.deployedEntries.add(rse);
3215 this.deployedOn.add(server);
3216 }
3217
3218 @Override
3219 public synchronized String toString() {
3220 StringBuilder sb = new StringBuilder();
3221 sb.append("{ meta => ");
3222 sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
3223 sb.append( ", hdfs => " + getHdfsRegionDir());
3224 sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
3225 sb.append(" }");
3226 return sb.toString();
3227 }
3228
3229 @Override
3230 public byte[] getStartKey() {
3231 if (this.metaEntry != null) {
3232 return this.metaEntry.getStartKey();
3233 } else if (this.hdfsEntry != null) {
3234 return this.hdfsEntry.hri.getStartKey();
3235 } else {
3236 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3237 return null;
3238 }
3239 }
3240
3241 @Override
3242 public byte[] getEndKey() {
3243 if (this.metaEntry != null) {
3244 return this.metaEntry.getEndKey();
3245 } else if (this.hdfsEntry != null) {
3246 return this.hdfsEntry.hri.getEndKey();
3247 } else {
3248 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3249 return null;
3250 }
3251 }
3252
3253 public TableName getTableName() {
3254 if (this.metaEntry != null) {
3255 return this.metaEntry.getTable();
3256 } else if (this.hdfsEntry != null) {
3257
3258
3259 Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3260 return FSUtils.getTableName(tableDir);
3261 } else {
3262
3263
3264 return null;
3265 }
3266 }
3267
3268 public String getRegionNameAsString() {
3269 if (metaEntry != null) {
3270 return metaEntry.getRegionNameAsString();
3271 } else if (hdfsEntry != null) {
3272 if (hdfsEntry.hri != null) {
3273 return hdfsEntry.hri.getRegionNameAsString();
3274 }
3275 }
3276 return null;
3277 }
3278
3279 public byte[] getRegionName() {
3280 if (metaEntry != null) {
3281 return metaEntry.getRegionName();
3282 } else if (hdfsEntry != null) {
3283 return hdfsEntry.hri.getRegionName();
3284 } else {
3285 return null;
3286 }
3287 }
3288
3289 Path getHdfsRegionDir() {
3290 if (hdfsEntry == null) {
3291 return null;
3292 }
3293 return hdfsEntry.hdfsRegionDir;
3294 }
3295
3296 boolean containsOnlyHdfsEdits() {
3297 if (hdfsEntry == null) {
3298 return false;
3299 }
3300 return hdfsEntry.hdfsOnlyEdits;
3301 }
3302
3303 boolean isHdfsRegioninfoPresent() {
3304 if (hdfsEntry == null) {
3305 return false;
3306 }
3307 return hdfsEntry.hdfsRegioninfoFilePresent;
3308 }
3309
3310 long getModTime() {
3311 if (hdfsEntry == null) {
3312 return 0;
3313 }
3314 return hdfsEntry.hdfsRegionDirModTime;
3315 }
3316
3317 HRegionInfo getHdfsHRI() {
3318 if (hdfsEntry == null) {
3319 return null;
3320 }
3321 return hdfsEntry.hri;
3322 }
3323
3324 public void setSkipChecks(boolean skipChecks) {
3325 this.skipChecks = skipChecks;
3326 }
3327
3328 public boolean isSkipChecks() {
3329 return skipChecks;
3330 }
3331
3332 public void setMerged(boolean isMerged) {
3333 this.isMerged = isMerged;
3334 }
3335
3336 public boolean isMerged() {
3337 return this.isMerged;
3338 }
3339 }
3340
3341 final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
3342 @Override
3343 public int compare(HbckInfo l, HbckInfo r) {
3344 if (l == r) {
3345
3346 return 0;
3347 }
3348
3349 int tableCompare = l.getTableName().compareTo(r.getTableName());
3350 if (tableCompare != 0) {
3351 return tableCompare;
3352 }
3353
3354 int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3355 l.getStartKey(), r.getStartKey());
3356 if (startComparison != 0) {
3357 return startComparison;
3358 }
3359
3360
3361 byte[] endKey = r.getEndKey();
3362 endKey = (endKey.length == 0) ? null : endKey;
3363 byte[] endKey2 = l.getEndKey();
3364 endKey2 = (endKey2.length == 0) ? null : endKey2;
3365 int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3366 endKey2, endKey);
3367
3368 if (endComparison != 0) {
3369 return endComparison;
3370 }
3371
3372
3373
3374 if (l.hdfsEntry == null && r.hdfsEntry == null) {
3375 return 0;
3376 }
3377 if (l.hdfsEntry == null && r.hdfsEntry != null) {
3378 return 1;
3379 }
3380
3381 if (r.hdfsEntry == null) {
3382 return -1;
3383 }
3384
3385 return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
3386 }
3387 };
3388
3389
3390
3391
3392 private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
3393 StringBuilder sb = new StringBuilder();
3394 errors.print("Summary:");
3395 for (TableInfo tInfo : tablesInfo.values()) {
3396 if (errors.tableHasErrors(tInfo)) {
3397 errors.print("Table " + tInfo.getName() + " is inconsistent.");
3398 } else {
3399 errors.print(" " + tInfo.getName() + " is okay.");
3400 }
3401 errors.print(" Number of regions: " + tInfo.getNumRegions());
3402 sb.setLength(0);
3403 sb.append(" Deployed on: ");
3404 for (ServerName server : tInfo.deployedOn) {
3405 sb.append(" " + server.toString());
3406 }
3407 errors.print(sb.toString());
3408 }
3409 }
3410
3411 static ErrorReporter getErrorReporter(
3412 final Configuration conf) throws ClassNotFoundException {
3413 Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
3414 return ReflectionUtils.newInstance(reporter, conf);
3415 }
3416
3417 public interface ErrorReporter {
3418 enum ERROR_CODE {
3419 UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
3420 NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED,
3421 MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
3422 FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
3423 HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
3424 ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
3425 WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, ORPHANED_ZK_TABLE_ENTRY, BOUNDARIES_ERROR
3426 }
3427 void clear();
3428 void report(String message);
3429 void reportError(String message);
3430 void reportError(ERROR_CODE errorCode, String message);
3431 void reportError(ERROR_CODE errorCode, String message, TableInfo table);
3432 void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
3433 void reportError(
3434 ERROR_CODE errorCode,
3435 String message,
3436 TableInfo table,
3437 HbckInfo info1,
3438 HbckInfo info2
3439 );
3440 int summarize();
3441 void detail(String details);
3442 ArrayList<ERROR_CODE> getErrorList();
3443 void progress();
3444 void print(String message);
3445 void resetErrors();
3446 boolean tableHasErrors(TableInfo table);
3447 }
3448
3449 static class PrintingErrorReporter implements ErrorReporter {
3450 public int errorCount = 0;
3451 private int showProgress;
3452
3453 Set<TableInfo> errorTables = new HashSet<TableInfo>();
3454
3455
3456 private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
3457
3458 @Override
3459 public void clear() {
3460 errorTables.clear();
3461 errorList.clear();
3462 errorCount = 0;
3463 }
3464
3465 @Override
3466 public synchronized void reportError(ERROR_CODE errorCode, String message) {
3467 if (errorCode == ERROR_CODE.WRONG_USAGE) {
3468 System.err.println(message);
3469 return;
3470 }
3471
3472 errorList.add(errorCode);
3473 if (!summary) {
3474 System.out.println("ERROR: " + message);
3475 }
3476 errorCount++;
3477 showProgress = 0;
3478 }
3479
3480 @Override
3481 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
3482 errorTables.add(table);
3483 reportError(errorCode, message);
3484 }
3485
3486 @Override
3487 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3488 HbckInfo info) {
3489 errorTables.add(table);
3490 String reference = "(region " + info.getRegionNameAsString() + ")";
3491 reportError(errorCode, reference + " " + message);
3492 }
3493
3494 @Override
3495 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3496 HbckInfo info1, HbckInfo info2) {
3497 errorTables.add(table);
3498 String reference = "(regions " + info1.getRegionNameAsString()
3499 + " and " + info2.getRegionNameAsString() + ")";
3500 reportError(errorCode, reference + " " + message);
3501 }
3502
3503 @Override
3504 public synchronized void reportError(String message) {
3505 reportError(ERROR_CODE.UNKNOWN, message);
3506 }
3507
3508
3509
3510
3511
3512
3513 @Override
3514 public synchronized void report(String message) {
3515 if (! summary) {
3516 System.out.println("ERROR: " + message);
3517 }
3518 showProgress = 0;
3519 }
3520
3521 @Override
3522 public synchronized int summarize() {
3523 System.out.println(Integer.toString(errorCount) +
3524 " inconsistencies detected.");
3525 if (errorCount == 0) {
3526 System.out.println("Status: OK");
3527 return 0;
3528 } else {
3529 System.out.println("Status: INCONSISTENT");
3530 return -1;
3531 }
3532 }
3533
3534 @Override
3535 public ArrayList<ERROR_CODE> getErrorList() {
3536 return errorList;
3537 }
3538
3539 @Override
3540 public synchronized void print(String message) {
3541 if (!summary) {
3542 System.out.println(message);
3543 }
3544 }
3545
3546 @Override
3547 public boolean tableHasErrors(TableInfo table) {
3548 return errorTables.contains(table);
3549 }
3550
3551 @Override
3552 public void resetErrors() {
3553 errorCount = 0;
3554 }
3555
3556 @Override
3557 public synchronized void detail(String message) {
3558 if (details) {
3559 System.out.println(message);
3560 }
3561 showProgress = 0;
3562 }
3563
3564 @Override
3565 public synchronized void progress() {
3566 if (showProgress++ == 10) {
3567 if (!summary) {
3568 System.out.print(".");
3569 }
3570 showProgress = 0;
3571 }
3572 }
3573 }
3574
3575
3576
3577
3578 static class WorkItemRegion implements Callable<Void> {
3579 private HBaseFsck hbck;
3580 private ServerName rsinfo;
3581 private ErrorReporter errors;
3582 private HConnection connection;
3583
3584 WorkItemRegion(HBaseFsck hbck, ServerName info,
3585 ErrorReporter errors, HConnection connection) {
3586 this.hbck = hbck;
3587 this.rsinfo = info;
3588 this.errors = errors;
3589 this.connection = connection;
3590 }
3591
3592 @Override
3593 public synchronized Void call() throws IOException {
3594 errors.progress();
3595 try {
3596 BlockingInterface server = connection.getAdmin(rsinfo);
3597
3598
3599 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
3600 regions = filterRegions(regions);
3601
3602 if (details) {
3603 errors.detail("RegionServer: " + rsinfo.getServerName() +
3604 " number of regions: " + regions.size());
3605 for (HRegionInfo rinfo: regions) {
3606 errors.detail(" " + rinfo.getRegionNameAsString() +
3607 " id: " + rinfo.getRegionId() +
3608 " encoded_name: " + rinfo.getEncodedName() +
3609 " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
3610 " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
3611 }
3612 }
3613
3614
3615 for (HRegionInfo r:regions) {
3616 HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
3617 if (!RegionReplicaUtil.isDefaultReplica(r)) hbi.setSkipChecks(true);
3618 hbi.addServer(r, rsinfo);
3619 }
3620 } catch (IOException e) {
3621 errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
3622 " Unable to fetch region information. " + e);
3623 throw e;
3624 }
3625 return null;
3626 }
3627
3628 private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
3629 List<HRegionInfo> ret = Lists.newArrayList();
3630 for (HRegionInfo hri : regions) {
3631 if (hri.isMetaTable() || (!hbck.checkMetaOnly
3632 && hbck.isTableIncluded(hri.getTable()))) {
3633 ret.add(hri);
3634 }
3635 }
3636 return ret;
3637 }
3638 }
3639
3640
3641
3642
3643
3644 static class WorkItemHdfsDir implements Callable<Void> {
3645 private HBaseFsck hbck;
3646 private FileStatus tableDir;
3647 private ErrorReporter errors;
3648 private FileSystem fs;
3649
3650 WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors,
3651 FileStatus status) {
3652 this.hbck = hbck;
3653 this.fs = fs;
3654 this.tableDir = status;
3655 this.errors = errors;
3656 }
3657
3658 @Override
3659 public synchronized Void call() throws IOException {
3660 try {
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715 static class WorkItemHdfsRegionInfo implements Callable<Void> {
3716 private HbckInfo hbi;
3717 private HBaseFsck hbck;
3718 private ErrorReporter errors;
3719
3720 WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
3721 this.hbi = hbi;
3722 this.hbck = hbck;
3723 this.errors = errors;
3724 }
3725
3726 @Override
3727 public synchronized Void call() throws IOException {
3728
3729 if (hbi.getHdfsHRI() == null) {
3730 try {
3731 hbck.loadHdfsRegioninfo(hbi);
3732 } catch (IOException ioe) {
3733 String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
3734 + hbi.getTableName() + " in hdfs dir "
3735 + hbi.getHdfsRegionDir()
3736 + "! It may be an invalid format or version file. Treating as "
3737 + "an orphaned regiondir.";
3738 errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
3739 try {
3740 hbck.debugLsr(hbi.getHdfsRegionDir());
3741 } catch (IOException ioe2) {
3742 LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
3743 throw ioe2;
3744 }
3745 hbck.orphanHdfsDirs.add(hbi);
3746 throw ioe;
3747 }
3748 }
3749 return null;
3750 }
3751 };
3752
3753
3754
3755
3756
3757 public static void setDisplayFullReport() {
3758 details = true;
3759 }
3760
3761
3762
3763
3764
3765 void setSummary() {
3766 summary = true;
3767 }
3768
3769
3770
3771
3772
3773 void setCheckMetaOnly() {
3774 checkMetaOnly = true;
3775 }
3776
3777
3778
3779
3780 void setRegionBoundariesCheck() {
3781 checkRegionBoundaries = true;
3782 }
3783
3784
3785
3786
3787
3788 public void setFixTableLocks(boolean shouldFix) {
3789 fixTableLocks = shouldFix;
3790 fixAny |= shouldFix;
3791 }
3792
3793
3794
3795
3796
3797 public void setFixTableZNodes(boolean shouldFix) {
3798 fixTableZNodes = shouldFix;
3799 fixAny |= shouldFix;
3800 }
3801
3802
3803
3804
3805
3806
3807
3808 void setShouldRerun() {
3809 rerun = true;
3810 }
3811
3812 boolean shouldRerun() {
3813 return rerun;
3814 }
3815
3816
3817
3818
3819
3820 public void setFixAssignments(boolean shouldFix) {
3821 fixAssignments = shouldFix;
3822 fixAny |= shouldFix;
3823 }
3824
3825 boolean shouldFixAssignments() {
3826 return fixAssignments;
3827 }
3828
3829 public void setFixMeta(boolean shouldFix) {
3830 fixMeta = shouldFix;
3831 fixAny |= shouldFix;
3832 }
3833
3834 boolean shouldFixMeta() {
3835 return fixMeta;
3836 }
3837
3838 public void setFixEmptyMetaCells(boolean shouldFix) {
3839 fixEmptyMetaCells = shouldFix;
3840 fixAny |= shouldFix;
3841 }
3842
3843 boolean shouldFixEmptyMetaCells() {
3844 return fixEmptyMetaCells;
3845 }
3846
3847 public void setCheckHdfs(boolean checking) {
3848 checkHdfs = checking;
3849 }
3850
3851 boolean shouldCheckHdfs() {
3852 return checkHdfs;
3853 }
3854
3855 public void setFixHdfsHoles(boolean shouldFix) {
3856 fixHdfsHoles = shouldFix;
3857 fixAny |= shouldFix;
3858 }
3859
3860 boolean shouldFixHdfsHoles() {
3861 return fixHdfsHoles;
3862 }
3863
3864 public void setFixTableOrphans(boolean shouldFix) {
3865 fixTableOrphans = shouldFix;
3866 fixAny |= shouldFix;
3867 }
3868
3869 boolean shouldFixTableOrphans() {
3870 return fixTableOrphans;
3871 }
3872
3873 public void setFixHdfsOverlaps(boolean shouldFix) {
3874 fixHdfsOverlaps = shouldFix;
3875 fixAny |= shouldFix;
3876 }
3877
3878 boolean shouldFixHdfsOverlaps() {
3879 return fixHdfsOverlaps;
3880 }
3881
3882 public void setFixHdfsOrphans(boolean shouldFix) {
3883 fixHdfsOrphans = shouldFix;
3884 fixAny |= shouldFix;
3885 }
3886
3887 boolean shouldFixHdfsOrphans() {
3888 return fixHdfsOrphans;
3889 }
3890
3891 public void setFixVersionFile(boolean shouldFix) {
3892 fixVersionFile = shouldFix;
3893 fixAny |= shouldFix;
3894 }
3895
3896 public boolean shouldFixVersionFile() {
3897 return fixVersionFile;
3898 }
3899
3900 public void setSidelineBigOverlaps(boolean sbo) {
3901 this.sidelineBigOverlaps = sbo;
3902 }
3903
3904 public boolean shouldSidelineBigOverlaps() {
3905 return sidelineBigOverlaps;
3906 }
3907
3908 public void setFixSplitParents(boolean shouldFix) {
3909 fixSplitParents = shouldFix;
3910 fixAny |= shouldFix;
3911 }
3912
3913 boolean shouldFixSplitParents() {
3914 return fixSplitParents;
3915 }
3916
3917 public void setFixReferenceFiles(boolean shouldFix) {
3918 fixReferenceFiles = shouldFix;
3919 fixAny |= shouldFix;
3920 }
3921
3922 boolean shouldFixReferenceFiles() {
3923 return fixReferenceFiles;
3924 }
3925
3926 public boolean shouldIgnorePreCheckPermission() {
3927 return !fixAny || ignorePreCheckPermission;
3928 }
3929
3930 public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
3931 this.ignorePreCheckPermission = ignorePreCheckPermission;
3932 }
3933
3934
3935
3936
3937 public void setMaxMerge(int mm) {
3938 this.maxMerge = mm;
3939 }
3940
3941 public int getMaxMerge() {
3942 return maxMerge;
3943 }
3944
3945 public void setMaxOverlapsToSideline(int mo) {
3946 this.maxOverlapsToSideline = mo;
3947 }
3948
3949 public int getMaxOverlapsToSideline() {
3950 return maxOverlapsToSideline;
3951 }
3952
3953
3954
3955
3956
3957 boolean isTableIncluded(TableName table) {
3958 return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
3959 }
3960
3961 public void includeTable(TableName table) {
3962 tablesIncluded.add(table);
3963 }
3964
3965 Set<TableName> getIncludedTables() {
3966 return new HashSet<TableName>(tablesIncluded);
3967 }
3968
3969
3970
3971
3972
3973
3974 public void setTimeLag(long seconds) {
3975 timelag = seconds * 1000;
3976 }
3977
3978
3979
3980
3981
3982 public void setSidelineDir(String sidelineDir) {
3983 this.sidelineDir = new Path(sidelineDir);
3984 }
3985
3986 protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
3987 return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
3988 }
3989
3990 public HFileCorruptionChecker getHFilecorruptionChecker() {
3991 return hfcc;
3992 }
3993
3994 public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
3995 this.hfcc = hfcc;
3996 }
3997
3998 public void setRetCode(int code) {
3999 this.retcode = code;
4000 }
4001
4002 public int getRetCode() {
4003 return retcode;
4004 }
4005
4006 protected HBaseFsck printUsageAndExit() {
4007 StringWriter sw = new StringWriter(2048);
4008 PrintWriter out = new PrintWriter(sw);
4009 out.println("Usage: fsck [opts] {only tables}");
4010 out.println(" where [opts] are:");
4011 out.println(" -help Display help options (this)");
4012 out.println(" -details Display full report of all regions.");
4013 out.println(" -timelag <timeInSeconds> Process only regions that " +
4014 " have not experienced any metadata updates in the last " +
4015 " <timeInSeconds> seconds.");
4016 out.println(" -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
4017 " before checking if the fix worked if run with -fix");
4018 out.println(" -summary Print only summary of the tables and status.");
4019 out.println(" -metaonly Only check the state of the hbase:meta table.");
4020 out.println(" -sidelineDir <hdfs://> HDFS path to backup existing meta.");
4021 out.println(" -boundaries Verify that regions boundaries are the same between META and store files.");
4022
4023 out.println("");
4024 out.println(" Metadata Repair options: (expert features, use with caution!)");
4025 out.println(" -fix Try to fix region assignments. This is for backwards compatiblity");
4026 out.println(" -fixAssignments Try to fix region assignments. Replaces the old -fix");
4027 out.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good.");
4028 out.println(" -noHdfsChecking Don't load/check region info from HDFS."
4029 + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
4030 out.println(" -fixHdfsHoles Try to fix region holes in hdfs.");
4031 out.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs");
4032 out.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
4033 out.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs.");
4034 out.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs.");
4035 out.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
4036 out.println(" -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps");
4037 out.println(" -maxOverlapsToSideline <n> When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
4038 out.println(" -fixSplitParents Try to force offline split parents to be online.");
4039 out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check");
4040 out.println(" -fixReferenceFiles Try to offline lingering reference store files");
4041 out.println(" -fixEmptyMetaCells Try to fix hbase:meta entries not referencing any region"
4042 + " (empty REGIONINFO_QUALIFIER rows)");
4043
4044 out.println("");
4045 out.println(" Datafile Repair options: (expert features, use with caution!)");
4046 out.println(" -checkCorruptHFiles Check all Hfiles by opening them to make sure they are valid");
4047 out.println(" -sidelineCorruptHFiles Quarantine corrupted HFiles. implies -checkCorruptHFiles");
4048
4049 out.println("");
4050 out.println(" Metadata Repair shortcuts");
4051 out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
4052 "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps " +
4053 "-fixReferenceFiles -fixTableLocks -fixOrphanedTableZnodes");
4054 out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
4055
4056 out.println("");
4057 out.println(" Table lock options");
4058 out.println(" -fixTableLocks Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
4059
4060 out.println("");
4061 out.println(" Table Znode options");
4062 out.println(" -fixOrphanedTableZnodes Set table state in ZNode to disabled if table does not exists");
4063
4064 out.flush();
4065 errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
4066
4067 setRetCode(-2);
4068 return this;
4069 }
4070
4071
4072
4073
4074
4075
4076
4077 public static void main(String[] args) throws Exception {
4078
4079 Configuration conf = HBaseConfiguration.create();
4080 Path hbasedir = FSUtils.getRootDir(conf);
4081 URI defaultFs = hbasedir.getFileSystem(conf).getUri();
4082 FSUtils.setFsDefault(conf, new Path(defaultFs));
4083 int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
4084 System.exit(ret);
4085 }
4086
4087
4088
4089
4090 static class HBaseFsckTool extends Configured implements Tool {
4091 HBaseFsckTool(Configuration conf) { super(conf); }
4092 @Override
4093 public int run(String[] args) throws Exception {
4094 HBaseFsck hbck = new HBaseFsck(getConf());
4095 hbck.exec(hbck.executor, args);
4096 hbck.close();
4097 return hbck.getRetCode();
4098 }
4099 };
4100
4101
4102 public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
4103 ServiceException, InterruptedException {
4104 long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
4105
4106 boolean checkCorruptHFiles = false;
4107 boolean sidelineCorruptHFiles = false;
4108
4109
4110 for (int i = 0; i < args.length; i++) {
4111 String cmd = args[i];
4112 if (cmd.equals("-help") || cmd.equals("-h")) {
4113 return printUsageAndExit();
4114 } else if (cmd.equals("-details")) {
4115 setDisplayFullReport();
4116 } else if (cmd.equals("-timelag")) {
4117 if (i == args.length - 1) {
4118 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
4119 return printUsageAndExit();
4120 }
4121 try {
4122 long timelag = Long.parseLong(args[i+1]);
4123 setTimeLag(timelag);
4124 } catch (NumberFormatException e) {
4125 errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
4126 return printUsageAndExit();
4127 }
4128 i++;
4129 } else if (cmd.equals("-sleepBeforeRerun")) {
4130 if (i == args.length - 1) {
4131 errors.reportError(ERROR_CODE.WRONG_USAGE,
4132 "HBaseFsck: -sleepBeforeRerun needs a value.");
4133 return printUsageAndExit();
4134 }
4135 try {
4136 sleepBeforeRerun = Long.parseLong(args[i+1]);
4137 } catch (NumberFormatException e) {
4138 errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
4139 return printUsageAndExit();
4140 }
4141 i++;
4142 } else if (cmd.equals("-sidelineDir")) {
4143 if (i == args.length - 1) {
4144 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
4145 return printUsageAndExit();
4146 }
4147 i++;
4148 setSidelineDir(args[i]);
4149 } else if (cmd.equals("-fix")) {
4150 errors.reportError(ERROR_CODE.WRONG_USAGE,
4151 "This option is deprecated, please use -fixAssignments instead.");
4152 setFixAssignments(true);
4153 } else if (cmd.equals("-fixAssignments")) {
4154 setFixAssignments(true);
4155 } else if (cmd.equals("-fixMeta")) {
4156 setFixMeta(true);
4157 } else if (cmd.equals("-noHdfsChecking")) {
4158 setCheckHdfs(false);
4159 } else if (cmd.equals("-fixHdfsHoles")) {
4160 setFixHdfsHoles(true);
4161 } else if (cmd.equals("-fixHdfsOrphans")) {
4162 setFixHdfsOrphans(true);
4163 } else if (cmd.equals("-fixTableOrphans")) {
4164 setFixTableOrphans(true);
4165 } else if (cmd.equals("-fixHdfsOverlaps")) {
4166 setFixHdfsOverlaps(true);
4167 } else if (cmd.equals("-fixVersionFile")) {
4168 setFixVersionFile(true);
4169 } else if (cmd.equals("-sidelineBigOverlaps")) {
4170 setSidelineBigOverlaps(true);
4171 } else if (cmd.equals("-fixSplitParents")) {
4172 setFixSplitParents(true);
4173 } else if (cmd.equals("-ignorePreCheckPermission")) {
4174 setIgnorePreCheckPermission(true);
4175 } else if (cmd.equals("-checkCorruptHFiles")) {
4176 checkCorruptHFiles = true;
4177 } else if (cmd.equals("-sidelineCorruptHFiles")) {
4178 sidelineCorruptHFiles = true;
4179 } else if (cmd.equals("-fixReferenceFiles")) {
4180 setFixReferenceFiles(true);
4181 } else if (cmd.equals("-fixEmptyMetaCells")) {
4182 setFixEmptyMetaCells(true);
4183 } else if (cmd.equals("-repair")) {
4184
4185
4186 setFixHdfsHoles(true);
4187 setFixHdfsOrphans(true);
4188 setFixMeta(true);
4189 setFixAssignments(true);
4190 setFixHdfsOverlaps(true);
4191 setFixVersionFile(true);
4192 setSidelineBigOverlaps(true);
4193 setFixSplitParents(false);
4194 setCheckHdfs(true);
4195 setFixReferenceFiles(true);
4196 setFixTableLocks(true);
4197 setFixTableZNodes(true);
4198 } else if (cmd.equals("-repairHoles")) {
4199
4200 setFixHdfsHoles(true);
4201 setFixHdfsOrphans(false);
4202 setFixMeta(true);
4203 setFixAssignments(true);
4204 setFixHdfsOverlaps(false);
4205 setSidelineBigOverlaps(false);
4206 setFixSplitParents(false);
4207 setCheckHdfs(true);
4208 } else if (cmd.equals("-maxOverlapsToSideline")) {
4209 if (i == args.length - 1) {
4210 errors.reportError(ERROR_CODE.WRONG_USAGE,
4211 "-maxOverlapsToSideline needs a numeric value argument.");
4212 return printUsageAndExit();
4213 }
4214 try {
4215 int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
4216 setMaxOverlapsToSideline(maxOverlapsToSideline);
4217 } catch (NumberFormatException e) {
4218 errors.reportError(ERROR_CODE.WRONG_USAGE,
4219 "-maxOverlapsToSideline needs a numeric value argument.");
4220 return printUsageAndExit();
4221 }
4222 i++;
4223 } else if (cmd.equals("-maxMerge")) {
4224 if (i == args.length - 1) {
4225 errors.reportError(ERROR_CODE.WRONG_USAGE,
4226 "-maxMerge needs a numeric value argument.");
4227 return printUsageAndExit();
4228 }
4229 try {
4230 int maxMerge = Integer.parseInt(args[i+1]);
4231 setMaxMerge(maxMerge);
4232 } catch (NumberFormatException e) {
4233 errors.reportError(ERROR_CODE.WRONG_USAGE,
4234 "-maxMerge needs a numeric value argument.");
4235 return printUsageAndExit();
4236 }
4237 i++;
4238 } else if (cmd.equals("-summary")) {
4239 setSummary();
4240 } else if (cmd.equals("-metaonly")) {
4241 setCheckMetaOnly();
4242 } else if (cmd.equals("-boundaries")) {
4243 setRegionBoundariesCheck();
4244 } else if (cmd.equals("-fixTableLocks")) {
4245 setFixTableLocks(true);
4246 } else if (cmd.equals("-fixOrphanedTableZnodes")) {
4247 setFixTableZNodes(true);
4248 } else if (cmd.startsWith("-")) {
4249 errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
4250 return printUsageAndExit();
4251 } else {
4252 includeTable(TableName.valueOf(cmd));
4253 errors.print("Allow checking/fixes for table: " + cmd);
4254 }
4255 }
4256
4257 errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
4258
4259
4260 try {
4261 preCheckPermission();
4262 } catch (AccessDeniedException ace) {
4263 Runtime.getRuntime().exit(-1);
4264 } catch (IOException ioe) {
4265 Runtime.getRuntime().exit(-1);
4266 }
4267
4268
4269 connect();
4270
4271 try {
4272
4273 if (checkCorruptHFiles || sidelineCorruptHFiles) {
4274 LOG.info("Checking all hfiles for corruption");
4275 HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
4276 setHFileCorruptionChecker(hfcc);
4277 Collection<TableName> tables = getIncludedTables();
4278 Collection<Path> tableDirs = new ArrayList<Path>();
4279 Path rootdir = FSUtils.getRootDir(getConf());
4280 if (tables.size() > 0) {
4281 for (TableName t : tables) {
4282 tableDirs.add(FSUtils.getTableDir(rootdir, t));
4283 }
4284 } else {
4285 tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
4286 }
4287 hfcc.checkTables(tableDirs);
4288 hfcc.report(errors);
4289 }
4290
4291
4292 int code = onlineHbck();
4293 setRetCode(code);
4294
4295
4296
4297
4298 if (shouldRerun()) {
4299 try {
4300 LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
4301 Thread.sleep(sleepBeforeRerun);
4302 } catch (InterruptedException ie) {
4303 LOG.warn("Interrupted while sleeping");
4304 return this;
4305 }
4306
4307 setFixAssignments(false);
4308 setFixMeta(false);
4309 setFixHdfsHoles(false);
4310 setFixHdfsOverlaps(false);
4311 setFixVersionFile(false);
4312 setFixTableOrphans(false);
4313 errors.resetErrors();
4314 code = onlineHbck();
4315 setRetCode(code);
4316 }
4317 } finally {
4318 IOUtils.cleanup(null, this);
4319 }
4320 return this;
4321 }
4322
4323
4324
4325
4326 void debugLsr(Path p) throws IOException {
4327 debugLsr(getConf(), p, errors);
4328 }
4329
4330
4331
4332
4333 public static void debugLsr(Configuration conf,
4334 Path p) throws IOException {
4335 debugLsr(conf, p, new PrintingErrorReporter());
4336 }
4337
4338
4339
4340
4341 public static void debugLsr(Configuration conf,
4342 Path p, ErrorReporter errors) throws IOException {
4343 if (!LOG.isDebugEnabled() || p == null) {
4344 return;
4345 }
4346 FileSystem fs = p.getFileSystem(conf);
4347
4348 if (!fs.exists(p)) {
4349
4350 return;
4351 }
4352 errors.print(p.toString());
4353
4354 if (fs.isFile(p)) {
4355 return;
4356 }
4357
4358 if (fs.getFileStatus(p).isDirectory()) {
4359 FileStatus[] fss= fs.listStatus(p);
4360 for (FileStatus status : fss) {
4361 debugLsr(conf, status.getPath(), errors);
4362 }
4363 }
4364 }
4365 }