1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.util;
19
20 import java.io.FileNotFoundException;
21 import java.io.IOException;
22 import java.io.PrintWriter;
23 import java.io.StringWriter;
24 import java.net.InetAddress;
25 import java.net.URI;
26 import java.util.ArrayList;
27 import java.util.Arrays;
28 import java.util.Collection;
29 import java.util.Collections;
30 import java.util.Comparator;
31 import java.util.HashMap;
32 import java.util.HashSet;
33 import java.util.Iterator;
34 import java.util.List;
35 import java.util.Map;
36 import java.util.Map.Entry;
37 import java.util.Set;
38 import java.util.SortedMap;
39 import java.util.SortedSet;
40 import java.util.TreeMap;
41 import java.util.TreeSet;
42 import java.util.concurrent.Callable;
43 import java.util.concurrent.ConcurrentSkipListMap;
44 import java.util.concurrent.ExecutionException;
45 import java.util.concurrent.ExecutorService;
46 import java.util.concurrent.Future;
47 import java.util.concurrent.ScheduledThreadPoolExecutor;
48 import java.util.concurrent.atomic.AtomicInteger;
49 import java.util.concurrent.atomic.AtomicBoolean;
50
51 import org.apache.commons.lang.StringUtils;
52 import org.apache.commons.logging.Log;
53 import org.apache.commons.logging.LogFactory;
54 import org.apache.hadoop.hbase.classification.InterfaceAudience;
55 import org.apache.hadoop.hbase.classification.InterfaceStability;
56 import org.apache.hadoop.conf.Configuration;
57 import org.apache.hadoop.conf.Configured;
58 import org.apache.hadoop.fs.FSDataOutputStream;
59 import org.apache.hadoop.fs.FileStatus;
60 import org.apache.hadoop.fs.FileSystem;
61 import org.apache.hadoop.fs.Path;
62 import org.apache.hadoop.fs.permission.FsAction;
63 import org.apache.hadoop.fs.permission.FsPermission;
64 import org.apache.hadoop.hbase.Abortable;
65 import org.apache.hadoop.hbase.Cell;
66 import org.apache.hadoop.hbase.ClusterStatus;
67 import org.apache.hadoop.hbase.HBaseConfiguration;
68 import org.apache.hadoop.hbase.HColumnDescriptor;
69 import org.apache.hadoop.hbase.HConstants;
70 import org.apache.hadoop.hbase.HRegionInfo;
71 import org.apache.hadoop.hbase.HRegionLocation;
72 import org.apache.hadoop.hbase.HTableDescriptor;
73 import org.apache.hadoop.hbase.KeyValue;
74 import org.apache.hadoop.hbase.MasterNotRunningException;
75 import org.apache.hadoop.hbase.ServerName;
76 import org.apache.hadoop.hbase.TableName;
77 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
78 import org.apache.hadoop.hbase.catalog.MetaEditor;
79 import org.apache.hadoop.hbase.client.Delete;
80 import org.apache.hadoop.hbase.client.Get;
81 import org.apache.hadoop.hbase.client.HBaseAdmin;
82 import org.apache.hadoop.hbase.client.HConnectable;
83 import org.apache.hadoop.hbase.client.HConnection;
84 import org.apache.hadoop.hbase.client.HConnectionManager;
85 import org.apache.hadoop.hbase.client.HTable;
86 import org.apache.hadoop.hbase.client.MetaScanner;
87 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
88 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
89 import org.apache.hadoop.hbase.client.Put;
90 import org.apache.hadoop.hbase.client.Result;
91 import org.apache.hadoop.hbase.client.RowMutations;
92 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
93 import org.apache.hadoop.hbase.io.hfile.HFile;
94 import org.apache.hadoop.hbase.master.MasterFileSystem;
95 import org.apache.hadoop.hbase.master.RegionState;
96 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
97 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
98 import org.apache.hadoop.hbase.regionserver.HRegion;
99 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
100 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
101 import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
102 import org.apache.hadoop.hbase.security.UserProvider;
103 import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
104 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
105 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
106 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
107 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
108 import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
109 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
110 import org.apache.hadoop.hbase.zookeeper.ZKTable;
111 import org.apache.hadoop.hbase.zookeeper.ZKTableReadOnly;
112 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
113 import org.apache.hadoop.hbase.security.AccessDeniedException;
114 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
115 import org.apache.hadoop.io.IOUtils;
116 import org.apache.hadoop.ipc.RemoteException;
117 import org.apache.hadoop.security.UserGroupInformation;
118 import org.apache.hadoop.util.ReflectionUtils;
119 import org.apache.hadoop.util.Tool;
120 import org.apache.hadoop.util.ToolRunner;
121 import org.apache.zookeeper.KeeperException;
122
123 import com.google.common.base.Joiner;
124 import com.google.common.base.Preconditions;
125 import com.google.common.collect.ImmutableList;
126 import com.google.common.collect.Lists;
127 import com.google.common.collect.Multimap;
128 import com.google.common.collect.Ordering;
129 import com.google.common.collect.TreeMultimap;
130 import com.google.protobuf.ServiceException;
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177 @InterfaceAudience.Public
178 @InterfaceStability.Evolving
179 public class HBaseFsck extends Configured {
180 public static final long DEFAULT_TIME_LAG = 60000;
181 public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
182 private static final int MAX_NUM_THREADS = 50;
183 private static boolean rsSupportsOffline = true;
184 private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
185 private static final int DEFAULT_MAX_MERGE = 5;
186 private static final String TO_BE_LOADED = "to_be_loaded";
187 private static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
188
189
190
191
192
193 private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
194 private ClusterStatus status;
195 private HConnection connection;
196 private HBaseAdmin admin;
197 private HTable meta;
198
199 protected ExecutorService executor;
200 private long startMillis = EnvironmentEdgeManager.currentTimeMillis();
201 private HFileCorruptionChecker hfcc;
202 private int retcode = 0;
203 private Path HBCK_LOCK_PATH;
204 private FSDataOutputStream hbckOutFd;
205
206
207
208 private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
209
210
211
212
213 private static boolean details = false;
214 private long timelag = DEFAULT_TIME_LAG;
215 private boolean fixAssignments = false;
216 private boolean fixMeta = false;
217 private boolean checkHdfs = true;
218 private boolean fixHdfsHoles = false;
219 private boolean fixHdfsOverlaps = false;
220 private boolean fixHdfsOrphans = false;
221 private boolean fixTableOrphans = false;
222 private boolean fixVersionFile = false;
223 private boolean fixSplitParents = false;
224 private boolean fixReferenceFiles = false;
225 private boolean fixEmptyMetaCells = false;
226 private boolean fixTableLocks = false;
227 private boolean fixTableZNodes = false;
228 private boolean fixAny = false;
229
230
231
232 private Set<TableName> tablesIncluded = new HashSet<TableName>();
233 private int maxMerge = DEFAULT_MAX_MERGE;
234 private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
235 private boolean sidelineBigOverlaps = false;
236 private Path sidelineDir = null;
237
238 private boolean rerun = false;
239 private static boolean summary = false;
240 private boolean checkMetaOnly = false;
241 private boolean checkRegionBoundaries = false;
242 private boolean ignorePreCheckPermission = false;
243
244
245
246
247 final private ErrorReporter errors;
248 int fixes = 0;
249
250
251
252
253
254
255 private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
256 private TreeSet<TableName> disabledTables =
257 new TreeSet<TableName>();
258
259 private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
260
261
262
263
264
265
266
267
268
269
270
271 private SortedMap<TableName, TableInfo> tablesInfo =
272 new ConcurrentSkipListMap<TableName, TableInfo>();
273
274
275
276
277 private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
278
279 private Map<TableName, Set<String>> orphanTableDirs =
280 new HashMap<TableName, Set<String>>();
281
282 private Map<TableName, Set<String>> skippedRegions = new HashMap<TableName, Set<String>>();
283
284
285
286
287 private Set<TableName> orphanedTableZNodes = new HashSet<TableName>();
288
289
290
291
292
293
294
295
296 public HBaseFsck(Configuration conf) throws MasterNotRunningException,
297 ZooKeeperConnectionException, IOException, ClassNotFoundException {
298 super(conf);
299
300 setConf(HBaseConfiguration.create(getConf()));
301
302 getConf().setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
303 errors = getErrorReporter(conf);
304
305 int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
306 executor = new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
307 }
308
309
310
311
312
313
314
315
316
317
318
319 public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
320 ZooKeeperConnectionException, IOException, ClassNotFoundException {
321 super(conf);
322 errors = getErrorReporter(getConf());
323 this.executor = exec;
324 }
325
326
327
328
329
330
331
332 private FSDataOutputStream checkAndMarkRunningHbck() throws IOException {
333 long start = EnvironmentEdgeManager.currentTimeMillis();
334 try {
335 FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
336 FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(),
337 HConstants.DATA_FILE_UMASK_KEY);
338 Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY);
339 fs.mkdirs(tmpDir);
340 HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE);
341 final FSDataOutputStream out = FSUtils.create(fs, HBCK_LOCK_PATH, defaultPerms, false);
342 out.writeBytes(InetAddress.getLocalHost().toString());
343 out.flush();
344 return out;
345 } catch(RemoteException e) {
346 if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
347 return null;
348 } else {
349 throw e;
350 }
351 } finally {
352 long duration = EnvironmentEdgeManager.currentTimeMillis() - start;
353 if (duration > 30000) {
354 LOG.warn("Took " + duration + " milliseconds to obtain lock");
355
356 return null;
357 }
358 }
359 }
360
361 private void unlockHbck() {
362 if(hbckLockCleanup.compareAndSet(true, false)){
363 IOUtils.closeStream(hbckOutFd);
364 try{
365 FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true);
366 } catch(IOException ioe) {
367 LOG.warn("Failed to delete " + HBCK_LOCK_PATH);
368 LOG.debug(ioe);
369 }
370 }
371 }
372
373
374
375
376
377 public void connect() throws IOException {
378
379
380 hbckOutFd = checkAndMarkRunningHbck();
381 if (hbckOutFd == null) {
382 setRetCode(-1);
383 LOG.error("Another instance of hbck is running, exiting this instance.[If you are sure" +
384 " no other instance is running, delete the lock file " +
385 HBCK_LOCK_PATH + " and rerun the tool]");
386 throw new IOException("Duplicate hbck - Abort");
387 }
388
389
390 hbckLockCleanup.set(true);
391
392
393
394
395 Runtime.getRuntime().addShutdownHook(new Thread() {
396 @Override
397 public void run() {
398 unlockHbck();
399 }
400 });
401 LOG.debug("Launching hbck");
402
403 connection = HConnectionManager.createConnection(getConf());
404 admin = new HBaseAdmin(connection);
405 meta = new HTable(TableName.META_TABLE_NAME, connection);
406 status = admin.getClusterStatus();
407 }
408
409
410
411
412 private void loadDeployedRegions() throws IOException, InterruptedException {
413
414 Collection<ServerName> regionServers = status.getServers();
415 errors.print("Number of live region servers: " + regionServers.size());
416 if (details) {
417 for (ServerName rsinfo: regionServers) {
418 errors.print(" " + rsinfo.getServerName());
419 }
420 }
421
422
423 Collection<ServerName> deadRegionServers = status.getDeadServerNames();
424 errors.print("Number of dead region servers: " + deadRegionServers.size());
425 if (details) {
426 for (ServerName name: deadRegionServers) {
427 errors.print(" " + name);
428 }
429 }
430
431
432 errors.print("Master: " + status.getMaster());
433
434
435 Collection<ServerName> backupMasters = status.getBackupMasters();
436 errors.print("Number of backup masters: " + backupMasters.size());
437 if (details) {
438 for (ServerName name: backupMasters) {
439 errors.print(" " + name);
440 }
441 }
442
443 errors.print("Average load: " + status.getAverageLoad());
444 errors.print("Number of requests: " + status.getRequestsCount());
445 errors.print("Number of regions: " + status.getRegionsCount());
446
447 Map<String, RegionState> rits = status.getRegionsInTransition();
448 errors.print("Number of regions in transition: " + rits.size());
449 if (details) {
450 for (RegionState state: rits.values()) {
451 errors.print(" " + state.toDescriptiveString());
452 }
453 }
454
455
456 processRegionServers(regionServers);
457 }
458
459
460
461
462 private void clearState() {
463
464 fixes = 0;
465 regionInfoMap.clear();
466 emptyRegionInfoQualifiers.clear();
467 disabledTables.clear();
468 errors.clear();
469 tablesInfo.clear();
470 orphanHdfsDirs.clear();
471 skippedRegions.clear();
472 }
473
474
475
476
477
478
479 public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
480
481 if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
482 || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
483 LOG.info("Loading regioninfos HDFS");
484
485 int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
486 int curIter = 0;
487 do {
488 clearState();
489
490 restoreHdfsIntegrity();
491 curIter++;
492 } while (fixes > 0 && curIter <= maxIterations);
493
494
495
496 if (curIter > 2) {
497 if (curIter == maxIterations) {
498 LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
499 + "Tables integrity may not be fully repaired!");
500 } else {
501 LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
502 }
503 }
504 }
505 }
506
507
508
509
510
511
512
513
514
515 public int onlineConsistencyRepair() throws IOException, KeeperException,
516 InterruptedException {
517 clearState();
518
519
520 loadDeployedRegions();
521
522 recordMetaRegion();
523
524 if (!checkMetaRegion()) {
525 String errorMsg = "hbase:meta table is not consistent. ";
526 if (shouldFixAssignments()) {
527 errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
528 } else {
529 errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
530 }
531 errors.reportError(errorMsg + " Exiting...");
532 return -2;
533 }
534
535 LOG.info("Loading regionsinfo from the hbase:meta table");
536 boolean success = loadMetaEntries();
537 if (!success) return -1;
538
539
540 reportEmptyMetaCells();
541
542
543 if (shouldFixEmptyMetaCells()) {
544 fixEmptyMetaCells();
545 }
546
547
548 if (!checkMetaOnly) {
549 reportTablesInFlux();
550 }
551
552
553 if (shouldCheckHdfs()) {
554 LOG.info("Loading region directories from HDFS");
555 loadHdfsRegionDirs();
556 LOG.info("Loading region information from HDFS");
557 loadHdfsRegionInfos();
558 }
559
560
561 loadDisabledTables();
562
563
564 fixOrphanTables();
565
566 LOG.info("Checking and fixing region consistency");
567
568
569 checkAndFixConsistency();
570
571
572 checkIntegrity();
573 return errors.getErrorList().size();
574 }
575
576
577
578
579
580 public int onlineHbck() throws IOException, KeeperException, InterruptedException, ServiceException {
581
582 errors.print("Version: " + status.getHBaseVersion());
583 offlineHdfsIntegrityRepair();
584
585
586 boolean oldBalancer = admin.setBalancerRunning(false, true);
587 try {
588 onlineConsistencyRepair();
589 }
590 finally {
591 admin.setBalancerRunning(oldBalancer, false);
592 }
593
594 if (checkRegionBoundaries) {
595 checkRegionBoundaries();
596 }
597
598 offlineReferenceFileRepair();
599
600 checkAndFixTableLocks();
601
602
603 checkAndFixOrphanedTableZNodes();
604
605
606 unlockHbck();
607
608
609 printTableSummary(tablesInfo);
610 return errors.summarize();
611 }
612
613 public static byte[] keyOnly (byte[] b) {
614 if (b == null)
615 return b;
616 int rowlength = Bytes.toShort(b, 0);
617 byte[] result = new byte[rowlength];
618 System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
619 return result;
620 }
621
622 private static class RegionBoundariesInformation {
623 public byte [] regionName;
624 public byte [] metaFirstKey;
625 public byte [] metaLastKey;
626 public byte [] storesFirstKey;
627 public byte [] storesLastKey;
628 @Override
629 public String toString () {
630 return "regionName=" + Bytes.toStringBinary(regionName) +
631 "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
632 "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
633 "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
634 "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
635 }
636 }
637
638 public void checkRegionBoundaries() {
639 try {
640 ByteArrayComparator comparator = new ByteArrayComparator();
641 List<HRegionInfo> regions = MetaScanner.listAllRegions(getConf(), false);
642 final RegionBoundariesInformation currentRegionBoundariesInformation =
643 new RegionBoundariesInformation();
644 Path hbaseRoot = FSUtils.getRootDir(getConf());
645 for (HRegionInfo regionInfo : regions) {
646 Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
647 currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
648
649
650 Path path = new Path(tableDir, regionInfo.getEncodedName());
651 FileSystem fs = path.getFileSystem(getConf());
652 FileStatus[] files = fs.listStatus(path);
653
654 byte[] storeFirstKey = null;
655 byte[] storeLastKey = null;
656 for (FileStatus file : files) {
657 String fileName = file.getPath().toString();
658 fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
659 if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
660 FileStatus[] storeFiles = fs.listStatus(file.getPath());
661
662 for (FileStatus storeFile : storeFiles) {
663 HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), new CacheConfig(
664 getConf()), getConf());
665 if ((reader.getFirstKey() != null)
666 && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
667 reader.getFirstKey()) > 0))) {
668 storeFirstKey = reader.getFirstKey();
669 }
670 if ((reader.getLastKey() != null)
671 && ((storeLastKey == null) || (comparator.compare(storeLastKey,
672 reader.getLastKey())) < 0)) {
673 storeLastKey = reader.getLastKey();
674 }
675 reader.close();
676 }
677 }
678 }
679 currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
680 currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
681 currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
682 currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
683 if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
684 currentRegionBoundariesInformation.metaFirstKey = null;
685 if (currentRegionBoundariesInformation.metaLastKey.length == 0)
686 currentRegionBoundariesInformation.metaLastKey = null;
687
688
689
690
691
692
693 boolean valid = true;
694
695 if ((currentRegionBoundariesInformation.storesFirstKey != null)
696 && (currentRegionBoundariesInformation.metaFirstKey != null)) {
697 valid = valid
698 && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
699 currentRegionBoundariesInformation.metaFirstKey) >= 0;
700 }
701
702 if ((currentRegionBoundariesInformation.storesLastKey != null)
703 && (currentRegionBoundariesInformation.metaLastKey != null)) {
704 valid = valid
705 && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
706 currentRegionBoundariesInformation.metaLastKey) < 0;
707 }
708 if (!valid) {
709 errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
710 tablesInfo.get(regionInfo.getTable()));
711 LOG.warn("Region's boundaries not alligned between stores and META for:");
712 LOG.warn(currentRegionBoundariesInformation);
713 }
714 }
715 } catch (IOException e) {
716 LOG.error(e);
717 }
718 }
719
720
721
722
723 private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
724 for (HbckInfo hi : orphanHdfsDirs) {
725 LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
726 adoptHdfsOrphan(hi);
727 }
728 }
729
730
731
732
733
734
735
736
737
738
739 @SuppressWarnings("deprecation")
740 private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
741 Path p = hi.getHdfsRegionDir();
742 FileSystem fs = p.getFileSystem(getConf());
743 FileStatus[] dirs = fs.listStatus(p);
744 if (dirs == null) {
745 LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " +
746 p + ". This dir could probably be deleted.");
747 return ;
748 }
749
750 TableName tableName = hi.getTableName();
751 TableInfo tableInfo = tablesInfo.get(tableName);
752 Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
753 HTableDescriptor template = tableInfo.getHTD();
754
755
756 Pair<byte[],byte[]> orphanRegionRange = null;
757 for (FileStatus cf : dirs) {
758 String cfName= cf.getPath().getName();
759
760 if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
761
762 FileStatus[] hfiles = fs.listStatus(cf.getPath());
763 for (FileStatus hfile : hfiles) {
764 byte[] start, end;
765 HFile.Reader hf = null;
766 try {
767 CacheConfig cacheConf = new CacheConfig(getConf());
768 hf = HFile.createReader(fs, hfile.getPath(), cacheConf, getConf());
769 hf.loadFileInfo();
770 KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
771 start = startKv.getRow();
772 KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
773 end = endKv.getRow();
774 } catch (IOException ioe) {
775 LOG.warn("Problem reading orphan file " + hfile + ", skipping");
776 continue;
777 } catch (NullPointerException ioe) {
778 LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
779 continue;
780 } finally {
781 if (hf != null) {
782 hf.close();
783 }
784 }
785
786
787 if (orphanRegionRange == null) {
788
789 orphanRegionRange = new Pair<byte[], byte[]>(start, end);
790 } else {
791
792
793
794 if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
795 orphanRegionRange.setFirst(start);
796 }
797 if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
798 orphanRegionRange.setSecond(end);
799 }
800 }
801 }
802 }
803 if (orphanRegionRange == null) {
804 LOG.warn("No data in dir " + p + ", sidelining data");
805 fixes++;
806 sidelineRegionDir(fs, hi);
807 return;
808 }
809 LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
810 Bytes.toString(orphanRegionRange.getSecond()) + ")");
811
812
813 HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(), orphanRegionRange.getSecond());
814 LOG.info("Creating new region : " + hri);
815 HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
816 Path target = region.getRegionFileSystem().getRegionDir();
817
818
819 mergeRegionDirs(target, hi);
820 fixes++;
821 }
822
823
824
825
826
827
828
829
830
831 private int restoreHdfsIntegrity() throws IOException, InterruptedException {
832
833 LOG.info("Loading HBase regioninfo from HDFS...");
834 loadHdfsRegionDirs();
835
836 int errs = errors.getErrorList().size();
837
838 tablesInfo = loadHdfsRegionInfos();
839 checkHdfsIntegrity(false, false);
840
841 if (errors.getErrorList().size() == errs) {
842 LOG.info("No integrity errors. We are done with this phase. Glorious.");
843 return 0;
844 }
845
846 if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
847 adoptHdfsOrphans(orphanHdfsDirs);
848
849 }
850
851
852 if (shouldFixHdfsHoles()) {
853 clearState();
854 loadHdfsRegionDirs();
855 tablesInfo = loadHdfsRegionInfos();
856 tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
857 }
858
859
860 if (shouldFixHdfsOverlaps()) {
861
862 clearState();
863 loadHdfsRegionDirs();
864 tablesInfo = loadHdfsRegionInfos();
865 tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
866 }
867
868 return errors.getErrorList().size();
869 }
870
871
872
873
874
875
876
877
878
879 private void offlineReferenceFileRepair() throws IOException {
880 Configuration conf = getConf();
881 Path hbaseRoot = FSUtils.getRootDir(conf);
882 FileSystem fs = hbaseRoot.getFileSystem(conf);
883 LOG.info("Computing mapping of all store files");
884 Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot, errors);
885 errors.print("");
886 LOG.info("Validating mapping using HDFS state");
887 for (Path path: allFiles.values()) {
888 boolean isReference = false;
889 try {
890 isReference = StoreFileInfo.isReference(path);
891 } catch (Throwable t) {
892
893
894
895
896 }
897 if (!isReference) continue;
898
899 Path referredToFile = StoreFileInfo.getReferredToFile(path);
900 if (fs.exists(referredToFile)) continue;
901
902
903 errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
904 "Found lingering reference file " + path);
905 if (!shouldFixReferenceFiles()) continue;
906
907
908 boolean success = false;
909 String pathStr = path.toString();
910
911
912
913
914
915 int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
916 for (int i = 0; index > 0 && i < 5; i++) {
917 index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
918 }
919 if (index > 0) {
920 Path rootDir = getSidelineDir();
921 Path dst = new Path(rootDir, pathStr.substring(index + 1));
922 fs.mkdirs(dst.getParent());
923 LOG.info("Trying to sildeline reference file "
924 + path + " to " + dst);
925 setShouldRerun();
926
927 success = fs.rename(path, dst);
928 }
929 if (!success) {
930 LOG.error("Failed to sideline reference file " + path);
931 }
932 }
933 }
934
935
936
937
938 private void reportEmptyMetaCells() {
939 errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
940 emptyRegionInfoQualifiers.size());
941 if (details) {
942 for (Result r: emptyRegionInfoQualifiers) {
943 errors.print(" " + r);
944 }
945 }
946 }
947
948
949
950
951 private void reportTablesInFlux() {
952 AtomicInteger numSkipped = new AtomicInteger(0);
953 HTableDescriptor[] allTables = getTables(numSkipped);
954 errors.print("Number of Tables: " + allTables.length);
955 if (details) {
956 if (numSkipped.get() > 0) {
957 errors.detail("Number of Tables in flux: " + numSkipped.get());
958 }
959 for (HTableDescriptor td : allTables) {
960 errors.detail(" Table: " + td.getTableName() + "\t" +
961 (td.isReadOnly() ? "ro" : "rw") + "\t" +
962 (td.isMetaRegion() ? "META" : " ") + "\t" +
963 " families: " + td.getFamilies().size());
964 }
965 }
966 }
967
968 public ErrorReporter getErrors() {
969 return errors;
970 }
971
972
973
974
975
976 private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
977 Path regionDir = hbi.getHdfsRegionDir();
978 if (regionDir == null) {
979 LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
980 return;
981 }
982
983 if (hbi.hdfsEntry.hri != null) {
984
985 return;
986 }
987
988 FileSystem fs = FileSystem.get(getConf());
989 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
990 LOG.debug("HRegionInfo read: " + hri.toString());
991 hbi.hdfsEntry.hri = hri;
992 }
993
994
995
996
997
998 public static class RegionRepairException extends IOException {
999 private static final long serialVersionUID = 1L;
1000 final IOException ioe;
1001 public RegionRepairException(String s, IOException ioe) {
1002 super(s);
1003 this.ioe = ioe;
1004 }
1005 }
1006
1007
1008
1009
1010 private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
1011 throws IOException, InterruptedException {
1012 tablesInfo.clear();
1013
1014 Collection<HbckInfo> hbckInfos = regionInfoMap.values();
1015
1016
1017 List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
1018 List<Future<Void>> hbiFutures;
1019
1020 for (HbckInfo hbi : hbckInfos) {
1021 WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1022 hbis.add(work);
1023 }
1024
1025
1026 hbiFutures = executor.invokeAll(hbis);
1027
1028 for(int i=0; i<hbiFutures.size(); i++) {
1029 WorkItemHdfsRegionInfo work = hbis.get(i);
1030 Future<Void> f = hbiFutures.get(i);
1031 try {
1032 f.get();
1033 } catch(ExecutionException e) {
1034 LOG.warn("Failed to read .regioninfo file for region " +
1035 work.hbi.getRegionNameAsString(), e.getCause());
1036 }
1037 }
1038
1039 Path hbaseRoot = FSUtils.getRootDir(getConf());
1040 FileSystem fs = hbaseRoot.getFileSystem(getConf());
1041
1042 for (HbckInfo hbi: hbckInfos) {
1043
1044 if (hbi.getHdfsHRI() == null) {
1045
1046 continue;
1047 }
1048
1049
1050
1051 TableName tableName = hbi.getTableName();
1052 if (tableName == null) {
1053
1054 LOG.warn("tableName was null for: " + hbi);
1055 continue;
1056 }
1057
1058 TableInfo modTInfo = tablesInfo.get(tableName);
1059 if (modTInfo == null) {
1060
1061 modTInfo = new TableInfo(tableName);
1062 tablesInfo.put(tableName, modTInfo);
1063 try {
1064 HTableDescriptor htd =
1065 FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1066 modTInfo.htds.add(htd);
1067 } catch (IOException ioe) {
1068 if (!orphanTableDirs.containsKey(tableName)) {
1069 LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1070
1071 errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1072 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1073 Set<String> columns = new HashSet<String>();
1074 orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1075 }
1076 }
1077 }
1078 if (!hbi.isSkipChecks()) {
1079 modTInfo.addRegionInfo(hbi);
1080 }
1081 }
1082
1083 loadTableInfosForTablesWithNoRegion();
1084 errors.print("");
1085
1086 return tablesInfo;
1087 }
1088
1089
1090
1091
1092
1093
1094
1095
1096 private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1097 Path regionDir = hbi.getHdfsRegionDir();
1098 FileSystem fs = regionDir.getFileSystem(getConf());
1099 FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1100 for (FileStatus subdir : subDirs) {
1101 String columnfamily = subdir.getPath().getName();
1102 columns.add(columnfamily);
1103 }
1104 return columns;
1105 }
1106
1107
1108
1109
1110
1111
1112
1113
1114 private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1115 Set<String> columns) throws IOException {
1116 if (columns ==null || columns.isEmpty()) return false;
1117 HTableDescriptor htd = new HTableDescriptor(tableName);
1118 for (String columnfamimly : columns) {
1119 htd.addFamily(new HColumnDescriptor(columnfamimly));
1120 }
1121 fstd.createTableDescriptor(htd, true);
1122 return true;
1123 }
1124
1125
1126
1127
1128
1129 public void fixEmptyMetaCells() throws IOException {
1130 if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1131 LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1132 for (Result region : emptyRegionInfoQualifiers) {
1133 deleteMetaRegion(region.getRow());
1134 errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1135 }
1136 emptyRegionInfoQualifiers.clear();
1137 }
1138 }
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149 public void fixOrphanTables() throws IOException {
1150 if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1151
1152 List<TableName> tmpList = new ArrayList<TableName>();
1153 tmpList.addAll(orphanTableDirs.keySet());
1154 HTableDescriptor[] htds = getHTableDescriptors(tmpList);
1155 Iterator<Entry<TableName, Set<String>>> iter =
1156 orphanTableDirs.entrySet().iterator();
1157 int j = 0;
1158 int numFailedCase = 0;
1159 FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1160 while (iter.hasNext()) {
1161 Entry<TableName, Set<String>> entry =
1162 iter.next();
1163 TableName tableName = entry.getKey();
1164 LOG.info("Trying to fix orphan table error: " + tableName);
1165 if (j < htds.length) {
1166 if (tableName.equals(htds[j].getTableName())) {
1167 HTableDescriptor htd = htds[j];
1168 LOG.info("fixing orphan table: " + tableName + " from cache");
1169 fstd.createTableDescriptor(htd, true);
1170 j++;
1171 iter.remove();
1172 }
1173 } else {
1174 if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1175 LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1176 LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
1177 iter.remove();
1178 } else {
1179 LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1180 numFailedCase++;
1181 }
1182 }
1183 fixes++;
1184 }
1185
1186 if (orphanTableDirs.isEmpty()) {
1187
1188
1189 setShouldRerun();
1190 LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1191 } else if (numFailedCase > 0) {
1192 LOG.error("Failed to fix " + numFailedCase
1193 + " OrphanTables with default .tableinfo files");
1194 }
1195
1196 }
1197
1198 orphanTableDirs.clear();
1199
1200 }
1201
1202
1203
1204
1205
1206
1207 private HRegion createNewMeta() throws IOException {
1208 Path rootdir = FSUtils.getRootDir(getConf());
1209 Configuration c = getConf();
1210 HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
1211 HTableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
1212 MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, false);
1213 HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c, metaDescriptor);
1214 MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, true);
1215 return meta;
1216 }
1217
1218
1219
1220
1221
1222
1223
1224 private ArrayList<Put> generatePuts(
1225 SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1226 ArrayList<Put> puts = new ArrayList<Put>();
1227 boolean hasProblems = false;
1228 for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) {
1229 TableName name = e.getKey();
1230
1231
1232 if (name.compareTo(TableName.META_TABLE_NAME) == 0) {
1233 continue;
1234 }
1235
1236 TableInfo ti = e.getValue();
1237 for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1238 .entrySet()) {
1239 Collection<HbckInfo> his = spl.getValue();
1240 int sz = his.size();
1241 if (sz != 1) {
1242
1243 LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1244 + " had " + sz + " regions instead of exactly 1." );
1245 hasProblems = true;
1246 continue;
1247 }
1248
1249
1250 HbckInfo hi = his.iterator().next();
1251 HRegionInfo hri = hi.getHdfsHRI();
1252 Put p = MetaEditor.makePutFromRegionInfo(hri);
1253 puts.add(p);
1254 }
1255 }
1256 return hasProblems ? null : puts;
1257 }
1258
1259
1260
1261
1262 private void suggestFixes(
1263 SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1264 logParallelMerge();
1265 for (TableInfo tInfo : tablesInfo.values()) {
1266 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1267 tInfo.checkRegionChain(handler);
1268 }
1269 }
1270
1271
1272
1273
1274
1275
1276
1277
1278 public boolean rebuildMeta(boolean fix) throws IOException,
1279 InterruptedException {
1280
1281
1282
1283
1284
1285 LOG.info("Loading HBase regioninfo from HDFS...");
1286 loadHdfsRegionDirs();
1287
1288 int errs = errors.getErrorList().size();
1289 tablesInfo = loadHdfsRegionInfos();
1290 checkHdfsIntegrity(false, false);
1291
1292
1293 if (errors.getErrorList().size() != errs) {
1294
1295 while(true) {
1296 fixes = 0;
1297 suggestFixes(tablesInfo);
1298 errors.clear();
1299 loadHdfsRegionInfos();
1300 checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1301
1302 int errCount = errors.getErrorList().size();
1303
1304 if (fixes == 0) {
1305 if (errCount > 0) {
1306 return false;
1307 } else {
1308 break;
1309 }
1310 }
1311 }
1312 }
1313
1314
1315 LOG.info("HDFS regioninfo's seems good. Sidelining old hbase:meta");
1316 Path backupDir = sidelineOldMeta();
1317
1318 LOG.info("Creating new hbase:meta");
1319 HRegion meta = createNewMeta();
1320
1321
1322 List<Put> puts = generatePuts(tablesInfo);
1323 if (puts == null) {
1324 LOG.fatal("Problem encountered when creating new hbase:meta entries. " +
1325 "You may need to restore the previously sidelined hbase:meta");
1326 return false;
1327 }
1328 meta.batchMutate(puts.toArray(new Put[puts.size()]));
1329 HRegion.closeHRegion(meta);
1330 LOG.info("Success! hbase:meta table rebuilt.");
1331 LOG.info("Old hbase:meta is moved into " + backupDir);
1332 return true;
1333 }
1334
1335
1336
1337
1338 private void logParallelMerge() {
1339 if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1340 LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
1341 " false to run serially.");
1342 } else {
1343 LOG.info("Handling overlap merges serially. set hbasefsck.overlap.merge.parallel to" +
1344 " true to run in parallel.");
1345 }
1346 }
1347
1348 private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1349 boolean fixOverlaps) throws IOException {
1350 LOG.info("Checking HBase region split map from HDFS data...");
1351 logParallelMerge();
1352 for (TableInfo tInfo : tablesInfo.values()) {
1353 TableIntegrityErrorHandler handler;
1354 if (fixHoles || fixOverlaps) {
1355 handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1356 fixHoles, fixOverlaps);
1357 } else {
1358 handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1359 }
1360 if (!tInfo.checkRegionChain(handler)) {
1361
1362 errors.report("Found inconsistency in table " + tInfo.getName());
1363 }
1364 }
1365 return tablesInfo;
1366 }
1367
1368 private Path getSidelineDir() throws IOException {
1369 if (sidelineDir == null) {
1370 Path hbaseDir = FSUtils.getRootDir(getConf());
1371 Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1372 sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1373 + startMillis);
1374 }
1375 return sidelineDir;
1376 }
1377
1378
1379
1380
1381 Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1382 return sidelineRegionDir(fs, null, hi);
1383 }
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393 Path sidelineRegionDir(FileSystem fs,
1394 String parentDir, HbckInfo hi) throws IOException {
1395 TableName tableName = hi.getTableName();
1396 Path regionDir = hi.getHdfsRegionDir();
1397
1398 if (!fs.exists(regionDir)) {
1399 LOG.warn("No previous " + regionDir + " exists. Continuing.");
1400 return null;
1401 }
1402
1403 Path rootDir = getSidelineDir();
1404 if (parentDir != null) {
1405 rootDir = new Path(rootDir, parentDir);
1406 }
1407 Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1408 Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1409 fs.mkdirs(sidelineRegionDir);
1410 boolean success = false;
1411 FileStatus[] cfs = fs.listStatus(regionDir);
1412 if (cfs == null) {
1413 LOG.info("Region dir is empty: " + regionDir);
1414 } else {
1415 for (FileStatus cf : cfs) {
1416 Path src = cf.getPath();
1417 Path dst = new Path(sidelineRegionDir, src.getName());
1418 if (fs.isFile(src)) {
1419
1420 success = fs.rename(src, dst);
1421 if (!success) {
1422 String msg = "Unable to rename file " + src + " to " + dst;
1423 LOG.error(msg);
1424 throw new IOException(msg);
1425 }
1426 continue;
1427 }
1428
1429
1430 fs.mkdirs(dst);
1431
1432 LOG.info("Sidelining files from " + src + " into containing region " + dst);
1433
1434
1435
1436
1437 FileStatus[] hfiles = fs.listStatus(src);
1438 if (hfiles != null && hfiles.length > 0) {
1439 for (FileStatus hfile : hfiles) {
1440 success = fs.rename(hfile.getPath(), dst);
1441 if (!success) {
1442 String msg = "Unable to rename file " + src + " to " + dst;
1443 LOG.error(msg);
1444 throw new IOException(msg);
1445 }
1446 }
1447 }
1448 LOG.debug("Sideline directory contents:");
1449 debugLsr(sidelineRegionDir);
1450 }
1451 }
1452
1453 LOG.info("Removing old region dir: " + regionDir);
1454 success = fs.delete(regionDir, true);
1455 if (!success) {
1456 String msg = "Unable to delete dir " + regionDir;
1457 LOG.error(msg);
1458 throw new IOException(msg);
1459 }
1460 return sidelineRegionDir;
1461 }
1462
1463
1464
1465
1466 void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir,
1467 Path backupHbaseDir) throws IOException {
1468 Path tableDir = FSUtils.getTableDir(hbaseDir, tableName);
1469 if (fs.exists(tableDir)) {
1470 Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName);
1471 fs.mkdirs(backupTableDir.getParent());
1472 boolean success = fs.rename(tableDir, backupTableDir);
1473 if (!success) {
1474 throw new IOException("Failed to move " + tableName + " from "
1475 + tableDir + " to " + backupTableDir);
1476 }
1477 } else {
1478 LOG.info("No previous " + tableName + " exists. Continuing.");
1479 }
1480 }
1481
1482
1483
1484
1485 Path sidelineOldMeta() throws IOException {
1486
1487 Path hbaseDir = FSUtils.getRootDir(getConf());
1488 FileSystem fs = hbaseDir.getFileSystem(getConf());
1489 Path backupDir = getSidelineDir();
1490 fs.mkdirs(backupDir);
1491
1492 try {
1493 sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir);
1494 } catch (IOException e) {
1495 LOG.fatal("... failed to sideline meta. Currently in inconsistent state. To restore "
1496 + "try to rename hbase:meta in " + backupDir.getName() + " to "
1497 + hbaseDir.getName() + ".", e);
1498 throw e;
1499 }
1500 return backupDir;
1501 }
1502
1503
1504
1505
1506
1507
1508 private void loadDisabledTables()
1509 throws ZooKeeperConnectionException, IOException {
1510 HConnectionManager.execute(new HConnectable<Void>(getConf()) {
1511 @Override
1512 public Void connect(HConnection connection) throws IOException {
1513 ZooKeeperWatcher zkw = createZooKeeperWatcher();
1514 try {
1515 for (TableName tableName :
1516 ZKTableReadOnly.getDisabledOrDisablingTables(zkw)) {
1517 disabledTables.add(tableName);
1518 }
1519 } catch (KeeperException ke) {
1520 throw new IOException(ke);
1521 } finally {
1522 zkw.close();
1523 }
1524 return null;
1525 }
1526 });
1527 }
1528
1529
1530
1531
1532 private boolean isTableDisabled(HRegionInfo regionInfo) {
1533 return disabledTables.contains(regionInfo.getTable());
1534 }
1535
1536
1537
1538
1539
1540 public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1541 Path rootDir = FSUtils.getRootDir(getConf());
1542 FileSystem fs = rootDir.getFileSystem(getConf());
1543
1544
1545 List<FileStatus> tableDirs = Lists.newArrayList();
1546
1547 boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1548
1549 List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1550 for (Path path : paths) {
1551 TableName tableName = FSUtils.getTableName(path);
1552 if ((!checkMetaOnly &&
1553 isTableIncluded(tableName)) ||
1554 tableName.equals(TableName.META_TABLE_NAME)) {
1555 tableDirs.add(fs.getFileStatus(path));
1556 }
1557 }
1558
1559
1560 if (!foundVersionFile) {
1561 errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1562 "Version file does not exist in root dir " + rootDir);
1563 if (shouldFixVersionFile()) {
1564 LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1565 + " file.");
1566 setShouldRerun();
1567 FSUtils.setVersion(fs, rootDir, getConf().getInt(
1568 HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1569 HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1570 HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1571 }
1572 }
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599 private boolean recordMetaRegion() throws IOException {
1600 HRegionLocation metaLocation = connection.locateRegion(
1601 TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW);
1602
1603
1604 if (metaLocation == null || metaLocation.getRegionInfo() == null ||
1605 metaLocation.getHostname() == null) {
1606 errors.reportError(ERROR_CODE.NULL_META_REGION,
1607 "META region or some of its attributes are null.");
1608 return false;
1609 }
1610 ServerName sn;
1611 try {
1612 sn = getMetaRegionServerName();
1613 } catch (KeeperException e) {
1614 throw new IOException(e);
1615 }
1616 MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
1617 HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
1618 if (hbckInfo == null) {
1619 regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
1620 } else {
1621 hbckInfo.metaEntry = m;
1622 }
1623 return true;
1624 }
1625
1626 private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1627 return new ZooKeeperWatcher(getConf(), "hbase Fsck", new Abortable() {
1628 @Override
1629 public void abort(String why, Throwable e) {
1630 LOG.error(why, e);
1631 System.exit(1);
1632 }
1633
1634 @Override
1635 public boolean isAborted() {
1636 return false;
1637 }
1638
1639 });
1640 }
1641
1642 private ServerName getMetaRegionServerName()
1643 throws IOException, KeeperException {
1644 ZooKeeperWatcher zkw = createZooKeeperWatcher();
1645 ServerName sn = null;
1646 try {
1647 sn = MetaRegionTracker.getMetaRegionLocation(zkw);
1648 } finally {
1649 zkw.close();
1650 }
1651 return sn;
1652 }
1653
1654
1655
1656
1657
1658
1659 void processRegionServers(Collection<ServerName> regionServerList)
1660 throws IOException, InterruptedException {
1661
1662 List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1663 List<Future<Void>> workFutures;
1664
1665
1666 for (ServerName rsinfo: regionServerList) {
1667 workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1668 }
1669
1670 workFutures = executor.invokeAll(workItems);
1671
1672 for(int i=0; i<workFutures.size(); i++) {
1673 WorkItemRegion item = workItems.get(i);
1674 Future<Void> f = workFutures.get(i);
1675 try {
1676 f.get();
1677 } catch(ExecutionException e) {
1678 LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1679 e.getCause());
1680 }
1681 }
1682 }
1683
1684
1685
1686
1687 private void checkAndFixConsistency()
1688 throws IOException, KeeperException, InterruptedException {
1689 List<CheckRegionConsistencyWorkItem> workItems =
1690 new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size());
1691 for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1692 workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1693 }
1694 checkRegionConsistencyConcurrently(workItems);
1695
1696
1697
1698
1699
1700 int terminateThreshold = getConf().getInt("hbase.hbck.skipped.regions.limit", 0);
1701 int numOfSkippedRegions = skippedRegions.size();
1702 if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {
1703 throw new IOException(numOfSkippedRegions
1704 + " region(s) could not be checked or repaired. See logs for detail.");
1705 }
1706 }
1707
1708
1709
1710
1711 private void checkRegionConsistencyConcurrently(
1712 final List<CheckRegionConsistencyWorkItem> workItems)
1713 throws IOException, KeeperException, InterruptedException {
1714 if (workItems.isEmpty()) {
1715 return;
1716 }
1717
1718 List<Future<Void>> workFutures = executor.invokeAll(workItems);
1719 for(Future<Void> f: workFutures) {
1720 try {
1721 f.get();
1722 } catch(ExecutionException e1) {
1723 LOG.warn("Could not check region consistency " , e1.getCause());
1724 if (e1.getCause() instanceof IOException) {
1725 throw (IOException)e1.getCause();
1726 } else if (e1.getCause() instanceof KeeperException) {
1727 throw (KeeperException)e1.getCause();
1728 } else if (e1.getCause() instanceof InterruptedException) {
1729 throw (InterruptedException)e1.getCause();
1730 } else {
1731 throw new IOException(e1.getCause());
1732 }
1733 }
1734 }
1735 }
1736
1737 class CheckRegionConsistencyWorkItem implements Callable<Void> {
1738 private final String key;
1739 private final HbckInfo hbi;
1740
1741 CheckRegionConsistencyWorkItem(String key, HbckInfo hbi) {
1742 this.key = key;
1743 this.hbi = hbi;
1744 }
1745
1746 @Override
1747 public synchronized Void call() throws Exception {
1748 try {
1749 checkRegionConsistency(key, hbi);
1750 } catch (Exception e) {
1751
1752
1753 LOG.warn("Unable to complete check or repair the region '" + hbi.getRegionNameAsString()
1754 + "'.", e);
1755 if (hbi.getHdfsHRI().isMetaRegion()) {
1756 throw e;
1757 }
1758 LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");
1759 addSkippedRegion(hbi);
1760 }
1761 return null;
1762 }
1763 }
1764
1765 private void addSkippedRegion(final HbckInfo hbi) {
1766 Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());
1767 if (skippedRegionNames == null) {
1768 skippedRegionNames = new HashSet<String>();
1769 }
1770 skippedRegionNames.add(hbi.getRegionNameAsString());
1771 skippedRegions.put(hbi.getTableName(), skippedRegionNames);
1772 }
1773
1774 private void preCheckPermission() throws IOException, AccessDeniedException {
1775 if (shouldIgnorePreCheckPermission()) {
1776 return;
1777 }
1778
1779 Path hbaseDir = FSUtils.getRootDir(getConf());
1780 FileSystem fs = hbaseDir.getFileSystem(getConf());
1781 UserProvider userProvider = UserProvider.instantiate(getConf());
1782 UserGroupInformation ugi = userProvider.getCurrent().getUGI();
1783 FileStatus[] files = fs.listStatus(hbaseDir);
1784 for (FileStatus file : files) {
1785 try {
1786 FSUtils.checkAccess(ugi, file, FsAction.WRITE);
1787 } catch (AccessDeniedException ace) {
1788 LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
1789 errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
1790 + " does not have write perms to " + file.getPath()
1791 + ". Please rerun hbck as hdfs user " + file.getOwner());
1792 throw ace;
1793 }
1794 }
1795 }
1796
1797
1798
1799
1800 private void deleteMetaRegion(HbckInfo hi) throws IOException {
1801 deleteMetaRegion(hi.metaEntry.getRegionName());
1802 }
1803
1804
1805
1806
1807 private void deleteMetaRegion(byte[] metaKey) throws IOException {
1808 Delete d = new Delete(metaKey);
1809 meta.delete(d);
1810 meta.flushCommits();
1811 LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
1812 }
1813
1814
1815
1816
1817 private void resetSplitParent(HbckInfo hi) throws IOException {
1818 RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
1819 Delete d = new Delete(hi.metaEntry.getRegionName());
1820 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1821 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1822 mutations.add(d);
1823
1824 HRegionInfo hri = new HRegionInfo(hi.metaEntry);
1825 hri.setOffline(false);
1826 hri.setSplit(false);
1827 Put p = MetaEditor.makePutFromRegionInfo(hri);
1828 mutations.add(p);
1829
1830 meta.mutateRow(mutations);
1831 meta.flushCommits();
1832 LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
1833 }
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843 private void offline(byte[] regionName) throws IOException {
1844 String regionString = Bytes.toStringBinary(regionName);
1845 if (!rsSupportsOffline) {
1846 LOG.warn("Using unassign region " + regionString
1847 + " instead of using offline method, you should"
1848 + " restart HMaster after these repairs");
1849 admin.unassign(regionName, true);
1850 return;
1851 }
1852
1853
1854 try {
1855 LOG.info("Offlining region " + regionString);
1856 admin.offline(regionName);
1857 } catch (IOException ioe) {
1858 String notFoundMsg = "java.lang.NoSuchMethodException: " +
1859 "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1860 if (ioe.getMessage().contains(notFoundMsg)) {
1861 LOG.warn("Using unassign region " + regionString
1862 + " instead of using offline method, you should"
1863 + " restart HMaster after these repairs");
1864 rsSupportsOffline = false;
1865 admin.unassign(regionName, true);
1866 return;
1867 }
1868 throw ioe;
1869 }
1870 }
1871
1872 private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
1873 for (OnlineEntry rse : hi.deployedEntries) {
1874 LOG.debug("Undeploy region " + rse.hri + " from " + rse.hsa);
1875 try {
1876 HBaseFsckRepair.closeRegionSilentlyAndWait(admin, rse.hsa, rse.hri);
1877 offline(rse.hri.getRegionName());
1878 } catch (IOException ioe) {
1879 LOG.warn("Got exception when attempting to offline region "
1880 + Bytes.toString(rse.hri.getRegionName()), ioe);
1881 }
1882 }
1883 }
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897 private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
1898 if (hi.metaEntry == null && hi.hdfsEntry == null) {
1899 undeployRegions(hi);
1900 return;
1901 }
1902
1903
1904 Get get = new Get(hi.getRegionName());
1905 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1906 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1907 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1908 Result r = meta.get(get);
1909 ServerName serverName = HRegionInfo.getServerName(r);
1910 if (serverName == null) {
1911 errors.reportError("Unable to close region "
1912 + hi.getRegionNameAsString() + " because meta does not "
1913 + "have handle to reach it.");
1914 return;
1915 }
1916
1917 HRegionInfo hri = HRegionInfo.getHRegionInfo(r);
1918 if (hri == null) {
1919 LOG.warn("Unable to close region " + hi.getRegionNameAsString()
1920 + " because hbase:meta had invalid or missing "
1921 + HConstants.CATALOG_FAMILY_STR + ":"
1922 + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
1923 + " qualifier value.");
1924 return;
1925 }
1926
1927
1928 HBaseFsckRepair.closeRegionSilentlyAndWait(admin, serverName, hri);
1929 }
1930
1931 private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
1932 KeeperException, InterruptedException {
1933
1934 if (shouldFixAssignments()) {
1935 errors.print(msg);
1936 undeployRegions(hbi);
1937 setShouldRerun();
1938 HRegionInfo hri = hbi.getHdfsHRI();
1939 if (hri == null) {
1940 hri = hbi.metaEntry;
1941 }
1942 HBaseFsckRepair.fixUnassigned(admin, hri);
1943 HBaseFsckRepair.waitUntilAssigned(admin, hri);
1944 }
1945 }
1946
1947
1948
1949
1950 private void checkRegionConsistency(final String key, final HbckInfo hbi)
1951 throws IOException, KeeperException, InterruptedException {
1952 String descriptiveName = hbi.toString();
1953
1954 boolean inMeta = hbi.metaEntry != null;
1955
1956 boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
1957 boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
1958 boolean isDeployed = !hbi.deployedOn.isEmpty();
1959 boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
1960 boolean deploymentMatchesMeta =
1961 hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
1962 hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
1963 boolean splitParent =
1964 (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
1965 boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
1966 boolean recentlyModified = inHdfs &&
1967 hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTimeMillis();
1968
1969
1970 if (hbi.containsOnlyHdfsEdits()) {
1971 return;
1972 }
1973 if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
1974 return;
1975 } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
1976 LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
1977 "tabled that is not deployed");
1978 return;
1979 } else if (recentlyModified) {
1980 LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
1981 return;
1982 }
1983
1984 else if (!inMeta && !inHdfs && !isDeployed) {
1985
1986 assert false : "Entry for region with no data";
1987 } else if (!inMeta && !inHdfs && isDeployed) {
1988 errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
1989 + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
1990 "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1991 if (shouldFixAssignments()) {
1992 undeployRegions(hbi);
1993 }
1994
1995 } else if (!inMeta && inHdfs && !isDeployed) {
1996 if (hbi.isMerged()) {
1997
1998
1999 hbi.setSkipChecks(true);
2000 LOG.info("Region " + descriptiveName
2001 + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2002 return;
2003 }
2004 errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
2005 + descriptiveName + " on HDFS, but not listed in hbase:meta " +
2006 "or deployed on any region server");
2007
2008 if (shouldFixMeta()) {
2009 if (!hbi.isHdfsRegioninfoPresent()) {
2010 LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2011 + " in table integrity repair phase if -fixHdfsOrphans was" +
2012 " used.");
2013 return;
2014 }
2015
2016 HRegionInfo hri = hbi.getHdfsHRI();
2017 TableInfo tableInfo = tablesInfo.get(hri.getTable());
2018 for (HRegionInfo region : tableInfo.getRegionsFromMeta()) {
2019 if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2020 && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
2021 hri.getEndKey()) >= 0)
2022 && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
2023 if(region.isSplit() || region.isOffline()) continue;
2024 Path regionDir = hbi.getHdfsRegionDir();
2025 FileSystem fs = regionDir.getFileSystem(getConf());
2026 List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2027 for (Path familyDir : familyDirs) {
2028 List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2029 for (Path referenceFilePath : referenceFilePaths) {
2030 Path parentRegionDir =
2031 StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2032 if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2033 LOG.warn(hri + " start and stop keys are in the range of " + region
2034 + ". The region might not be cleaned up from hdfs when region " + region
2035 + " split failed. Hence deleting from hdfs.");
2036 HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
2037 regionDir.getParent(), hri);
2038 return;
2039 }
2040 }
2041 }
2042 }
2043 }
2044
2045 LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2046 HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
2047
2048 tryAssignmentRepair(hbi, "Trying to reassign region...");
2049 }
2050
2051 } else if (!inMeta && inHdfs && isDeployed) {
2052 errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2053 + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2054 debugLsr(hbi.getHdfsRegionDir());
2055 if (shouldFixMeta()) {
2056 if (!hbi.isHdfsRegioninfoPresent()) {
2057 LOG.error("This should have been repaired in table integrity repair phase");
2058 return;
2059 }
2060
2061 LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2062 HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
2063
2064 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2065 }
2066
2067
2068 } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2069
2070
2071 if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
2072
2073 HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
2074 HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
2075 if (infoA != null && infoB != null) {
2076
2077 hbi.setSkipChecks(true);
2078 return;
2079 }
2080 }
2081 errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2082 + descriptiveName + " is a split parent in META, in HDFS, "
2083 + "and not deployed on any region server. This could be transient.");
2084 if (shouldFixSplitParents()) {
2085 setShouldRerun();
2086 resetSplitParent(hbi);
2087 }
2088 } else if (inMeta && !inHdfs && !isDeployed) {
2089 errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2090 + descriptiveName + " found in META, but not in HDFS "
2091 + "or deployed on any region server.");
2092 if (shouldFixMeta()) {
2093 deleteMetaRegion(hbi);
2094 }
2095 } else if (inMeta && !inHdfs && isDeployed) {
2096 errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2097 + " found in META, but not in HDFS, " +
2098 "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2099
2100
2101
2102 if (shouldFixAssignments()) {
2103 errors.print("Trying to fix unassigned region...");
2104 undeployRegions(hbi);
2105 }
2106 if (shouldFixMeta()) {
2107
2108 deleteMetaRegion(hbi);
2109 }
2110 } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2111 errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2112 + " not deployed on any region server.");
2113 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2114 } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2115 errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2116 "Region " + descriptiveName + " should not be deployed according " +
2117 "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2118 if (shouldFixAssignments()) {
2119 errors.print("Trying to close the region " + descriptiveName);
2120 setShouldRerun();
2121 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
2122 }
2123 } else if (inMeta && inHdfs && isMultiplyDeployed) {
2124 errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2125 + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer
2126 + " but is multiply assigned to region servers " +
2127 Joiner.on(", ").join(hbi.deployedOn));
2128
2129 if (shouldFixAssignments()) {
2130 errors.print("Trying to fix assignment error...");
2131 setShouldRerun();
2132 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
2133 }
2134 } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2135 errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2136 + descriptiveName + " listed in hbase:meta on region server " +
2137 hbi.metaEntry.regionServer + " but found on region server " +
2138 hbi.deployedOn.get(0));
2139
2140 if (shouldFixAssignments()) {
2141 errors.print("Trying to fix assignment error...");
2142 setShouldRerun();
2143 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
2144 HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2145 }
2146 } else {
2147 errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2148 " is in an unforeseen state:" +
2149 " inMeta=" + inMeta +
2150 " inHdfs=" + inHdfs +
2151 " isDeployed=" + isDeployed +
2152 " isMultiplyDeployed=" + isMultiplyDeployed +
2153 " deploymentMatchesMeta=" + deploymentMatchesMeta +
2154 " shouldBeDeployed=" + shouldBeDeployed);
2155 }
2156 }
2157
2158
2159
2160
2161
2162
2163
2164 SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
2165 tablesInfo = new TreeMap<TableName,TableInfo> ();
2166 List<HbckInfo> noHDFSRegionInfos = new ArrayList<HbckInfo>();
2167 LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2168 for (HbckInfo hbi : regionInfoMap.values()) {
2169
2170 if (hbi.metaEntry == null) {
2171
2172 noHDFSRegionInfos.add(hbi);
2173 Path p = hbi.getHdfsRegionDir();
2174 if (p == null) {
2175 errors.report("No regioninfo in Meta or HDFS. " + hbi);
2176 }
2177
2178
2179 continue;
2180 }
2181 if (hbi.metaEntry.regionServer == null) {
2182 errors.detail("Skipping region because no region server: " + hbi);
2183 continue;
2184 }
2185 if (hbi.metaEntry.isOffline()) {
2186 errors.detail("Skipping region because it is offline: " + hbi);
2187 continue;
2188 }
2189 if (hbi.containsOnlyHdfsEdits()) {
2190 errors.detail("Skipping region because it only contains edits" + hbi);
2191 continue;
2192 }
2193
2194
2195
2196
2197
2198
2199 if (hbi.deployedOn.size() == 0) continue;
2200
2201
2202 TableName tableName = hbi.metaEntry.getTable();
2203 TableInfo modTInfo = tablesInfo.get(tableName);
2204 if (modTInfo == null) {
2205 modTInfo = new TableInfo(tableName);
2206 }
2207 for (ServerName server : hbi.deployedOn) {
2208 modTInfo.addServer(server);
2209 }
2210
2211 if (!hbi.isSkipChecks()) {
2212 modTInfo.addRegionInfo(hbi);
2213 }
2214
2215 tablesInfo.put(tableName, modTInfo);
2216 }
2217
2218 loadTableInfosForTablesWithNoRegion();
2219
2220 logParallelMerge();
2221 for (TableInfo tInfo : tablesInfo.values()) {
2222 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2223 if (!tInfo.checkRegionChain(handler)) {
2224 errors.report("Found inconsistency in table " + tInfo.getName());
2225 }
2226 }
2227 return tablesInfo;
2228 }
2229
2230
2231
2232
2233 private void loadTableInfosForTablesWithNoRegion() throws IOException {
2234 Map<String, HTableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2235 for (HTableDescriptor htd : allTables.values()) {
2236 if (checkMetaOnly && !htd.isMetaTable()) {
2237 continue;
2238 }
2239
2240 TableName tableName = htd.getTableName();
2241 if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2242 TableInfo tableInfo = new TableInfo(tableName);
2243 tableInfo.htds.add(htd);
2244 tablesInfo.put(htd.getTableName(), tableInfo);
2245 }
2246 }
2247 }
2248
2249
2250
2251
2252
2253 public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2254 int fileMoves = 0;
2255 String thread = Thread.currentThread().getName();
2256 LOG.debug("[" + thread + "] Contained region dir after close and pause");
2257 debugLsr(contained.getHdfsRegionDir());
2258
2259
2260 FileSystem fs = targetRegionDir.getFileSystem(getConf());
2261 FileStatus[] dirs = null;
2262 try {
2263 dirs = fs.listStatus(contained.getHdfsRegionDir());
2264 } catch (FileNotFoundException fnfe) {
2265
2266
2267 if (!fs.exists(contained.getHdfsRegionDir())) {
2268 LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2269 + " is missing. Assuming already sidelined or moved.");
2270 } else {
2271 sidelineRegionDir(fs, contained);
2272 }
2273 return fileMoves;
2274 }
2275
2276 if (dirs == null) {
2277 if (!fs.exists(contained.getHdfsRegionDir())) {
2278 LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2279 + " already sidelined.");
2280 } else {
2281 sidelineRegionDir(fs, contained);
2282 }
2283 return fileMoves;
2284 }
2285
2286 for (FileStatus cf : dirs) {
2287 Path src = cf.getPath();
2288 Path dst = new Path(targetRegionDir, src.getName());
2289
2290 if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2291
2292 continue;
2293 }
2294
2295 if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2296
2297 continue;
2298 }
2299
2300 LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2301
2302
2303
2304
2305 for (FileStatus hfile : fs.listStatus(src)) {
2306 boolean success = fs.rename(hfile.getPath(), dst);
2307 if (success) {
2308 fileMoves++;
2309 }
2310 }
2311 LOG.debug("[" + thread + "] Sideline directory contents:");
2312 debugLsr(targetRegionDir);
2313 }
2314
2315
2316 sidelineRegionDir(fs, contained);
2317 LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2318 getSidelineDir());
2319 debugLsr(contained.getHdfsRegionDir());
2320
2321 return fileMoves;
2322 }
2323
2324
2325 static class WorkItemOverlapMerge implements Callable<Void> {
2326 private TableIntegrityErrorHandler handler;
2327 Collection<HbckInfo> overlapgroup;
2328
2329 WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2330 this.handler = handler;
2331 this.overlapgroup = overlapgroup;
2332 }
2333
2334 @Override
2335 public Void call() throws Exception {
2336 handler.handleOverlapGroup(overlapgroup);
2337 return null;
2338 }
2339 };
2340
2341
2342
2343
2344
2345 public class TableInfo {
2346 TableName tableName;
2347 TreeSet <ServerName> deployedOn;
2348
2349
2350 final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
2351
2352
2353 final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
2354
2355
2356 final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
2357
2358
2359 final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
2360
2361
2362 final Multimap<byte[], HbckInfo> overlapGroups =
2363 TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2364
2365
2366 private ImmutableList<HRegionInfo> regionsFromMeta = null;
2367
2368 TableInfo(TableName name) {
2369 this.tableName = name;
2370 deployedOn = new TreeSet <ServerName>();
2371 }
2372
2373
2374
2375
2376 private HTableDescriptor getHTD() {
2377 if (htds.size() == 1) {
2378 return (HTableDescriptor)htds.toArray()[0];
2379 } else {
2380 LOG.error("None/Multiple table descriptors found for table '"
2381 + tableName + "' regions: " + htds);
2382 }
2383 return null;
2384 }
2385
2386 public void addRegionInfo(HbckInfo hir) {
2387 if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2388
2389 sc.add(hir);
2390 return;
2391 }
2392
2393
2394 if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2395 errors.reportError(
2396 ERROR_CODE.REGION_CYCLE,
2397 String.format("The endkey for this region comes before the "
2398 + "startkey, startkey=%s, endkey=%s",
2399 Bytes.toStringBinary(hir.getStartKey()),
2400 Bytes.toStringBinary(hir.getEndKey())), this, hir);
2401 backwards.add(hir);
2402 return;
2403 }
2404
2405
2406 sc.add(hir);
2407 }
2408
2409 public void addServer(ServerName server) {
2410 this.deployedOn.add(server);
2411 }
2412
2413 public TableName getName() {
2414 return tableName;
2415 }
2416
2417 public int getNumRegions() {
2418 return sc.getStarts().size() + backwards.size();
2419 }
2420
2421 public synchronized ImmutableList<HRegionInfo> getRegionsFromMeta() {
2422
2423 if (regionsFromMeta == null) {
2424 List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
2425 for (HbckInfo h : HBaseFsck.this.regionInfoMap.values()) {
2426 if (tableName.equals(h.getTableName())) {
2427 if (h.metaEntry != null) {
2428 regions.add((HRegionInfo) h.metaEntry);
2429 }
2430 }
2431 }
2432 regionsFromMeta = Ordering.natural().immutableSortedCopy(regions);
2433 }
2434
2435 return regionsFromMeta;
2436 }
2437
2438 private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2439 ErrorReporter errors;
2440
2441 IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2442 this.errors = errors;
2443 setTableInfo(ti);
2444 }
2445
2446 @Override
2447 public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2448 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2449 "First region should start with an empty key. You need to "
2450 + " create a new region and regioninfo in HDFS to plug the hole.",
2451 getTableInfo(), hi);
2452 }
2453
2454 @Override
2455 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2456 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2457 "Last region should end with an empty key. You need to "
2458 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2459 }
2460
2461 @Override
2462 public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2463 errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2464 "Region has the same start and end key.", getTableInfo(), hi);
2465 }
2466
2467 @Override
2468 public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2469 byte[] key = r1.getStartKey();
2470
2471 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2472 "Multiple regions have the same startkey: "
2473 + Bytes.toStringBinary(key), getTableInfo(), r1);
2474 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2475 "Multiple regions have the same startkey: "
2476 + Bytes.toStringBinary(key), getTableInfo(), r2);
2477 }
2478
2479 @Override
2480 public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2481 errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2482 "There is an overlap in the region chain.",
2483 getTableInfo(), hi1, hi2);
2484 }
2485
2486 @Override
2487 public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2488 errors.reportError(
2489 ERROR_CODE.HOLE_IN_REGION_CHAIN,
2490 "There is a hole in the region chain between "
2491 + Bytes.toStringBinary(holeStart) + " and "
2492 + Bytes.toStringBinary(holeStop)
2493 + ". You need to create a new .regioninfo and region "
2494 + "dir in hdfs to plug the hole.");
2495 }
2496 };
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510 private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2511 Configuration conf;
2512
2513 boolean fixOverlaps = true;
2514
2515 HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2516 boolean fixHoles, boolean fixOverlaps) {
2517 super(ti, errors);
2518 this.conf = conf;
2519 this.fixOverlaps = fixOverlaps;
2520
2521 }
2522
2523
2524
2525
2526
2527
2528 @Override
2529 public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2530 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2531 "First region should start with an empty key. Creating a new " +
2532 "region and regioninfo in HDFS to plug the hole.",
2533 getTableInfo(), next);
2534 HTableDescriptor htd = getTableInfo().getHTD();
2535
2536 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(),
2537 HConstants.EMPTY_START_ROW, next.getStartKey());
2538
2539
2540 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2541 LOG.info("Table region start key was not empty. Created new empty region: "
2542 + newRegion + " " +region);
2543 fixes++;
2544 }
2545
2546 @Override
2547 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2548 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2549 "Last region should end with an empty key. Creating a new "
2550 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2551 HTableDescriptor htd = getTableInfo().getHTD();
2552
2553 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), curEndKey,
2554 HConstants.EMPTY_START_ROW);
2555
2556 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2557 LOG.info("Table region end key was not empty. Created new empty region: " + newRegion
2558 + " " + region);
2559 fixes++;
2560 }
2561
2562
2563
2564
2565
2566 @Override
2567 public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2568 errors.reportError(
2569 ERROR_CODE.HOLE_IN_REGION_CHAIN,
2570 "There is a hole in the region chain between "
2571 + Bytes.toStringBinary(holeStartKey) + " and "
2572 + Bytes.toStringBinary(holeStopKey)
2573 + ". Creating a new regioninfo and region "
2574 + "dir in hdfs to plug the hole.");
2575 HTableDescriptor htd = getTableInfo().getHTD();
2576 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey);
2577 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2578 LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
2579 fixes++;
2580 }
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593 @Override
2594 public void handleOverlapGroup(Collection<HbckInfo> overlap)
2595 throws IOException {
2596 Preconditions.checkNotNull(overlap);
2597 Preconditions.checkArgument(overlap.size() >0);
2598
2599 if (!this.fixOverlaps) {
2600 LOG.warn("Not attempting to repair overlaps.");
2601 return;
2602 }
2603
2604 if (overlap.size() > maxMerge) {
2605 LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2606 "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2607 if (sidelineBigOverlaps) {
2608
2609 sidelineBigOverlaps(overlap);
2610 }
2611 return;
2612 }
2613
2614 mergeOverlaps(overlap);
2615 }
2616
2617 void mergeOverlaps(Collection<HbckInfo> overlap)
2618 throws IOException {
2619 String thread = Thread.currentThread().getName();
2620 LOG.info("== [" + thread + "] Merging regions into one region: "
2621 + Joiner.on(",").join(overlap));
2622
2623 Pair<byte[], byte[]> range = null;
2624 for (HbckInfo hi : overlap) {
2625 if (range == null) {
2626 range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2627 } else {
2628 if (RegionSplitCalculator.BYTES_COMPARATOR
2629 .compare(hi.getStartKey(), range.getFirst()) < 0) {
2630 range.setFirst(hi.getStartKey());
2631 }
2632 if (RegionSplitCalculator.BYTES_COMPARATOR
2633 .compare(hi.getEndKey(), range.getSecond()) > 0) {
2634 range.setSecond(hi.getEndKey());
2635 }
2636 }
2637
2638 LOG.debug("[" + thread + "] Closing region before moving data around: " + hi);
2639 LOG.debug("[" + thread + "] Contained region dir before close");
2640 debugLsr(hi.getHdfsRegionDir());
2641 try {
2642 LOG.info("[" + thread + "] Closing region: " + hi);
2643 closeRegion(hi);
2644 } catch (IOException ioe) {
2645 LOG.warn("[" + thread + "] Was unable to close region " + hi
2646 + ". Just continuing... ", ioe);
2647 } catch (InterruptedException e) {
2648 LOG.warn("[" + thread + "] Was unable to close region " + hi
2649 + ". Just continuing... ", e);
2650 }
2651
2652 try {
2653 LOG.info("[" + thread + "] Offlining region: " + hi);
2654 offline(hi.getRegionName());
2655 } catch (IOException ioe) {
2656 LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
2657 + ". Just continuing... ", ioe);
2658 }
2659 }
2660
2661
2662 HTableDescriptor htd = getTableInfo().getHTD();
2663
2664 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), range.getFirst(),
2665 range.getSecond());
2666 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2667 LOG.info("[" + thread + "] Created new empty container region: " +
2668 newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
2669 debugLsr(region.getRegionFileSystem().getRegionDir());
2670
2671
2672 boolean didFix= false;
2673 Path target = region.getRegionFileSystem().getRegionDir();
2674 for (HbckInfo contained : overlap) {
2675 LOG.info("[" + thread + "] Merging " + contained + " into " + target );
2676 int merges = mergeRegionDirs(target, contained);
2677 if (merges > 0) {
2678 didFix = true;
2679 }
2680 }
2681 if (didFix) {
2682 fixes++;
2683 }
2684 }
2685
2686
2687
2688
2689
2690
2691
2692
2693 void sidelineBigOverlaps(
2694 Collection<HbckInfo> bigOverlap) throws IOException {
2695 int overlapsToSideline = bigOverlap.size() - maxMerge;
2696 if (overlapsToSideline > maxOverlapsToSideline) {
2697 overlapsToSideline = maxOverlapsToSideline;
2698 }
2699 List<HbckInfo> regionsToSideline =
2700 RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
2701 FileSystem fs = FileSystem.get(conf);
2702 for (HbckInfo regionToSideline: regionsToSideline) {
2703 try {
2704 LOG.info("Closing region: " + regionToSideline);
2705 closeRegion(regionToSideline);
2706 } catch (IOException ioe) {
2707 LOG.warn("Was unable to close region " + regionToSideline
2708 + ". Just continuing... ", ioe);
2709 } catch (InterruptedException e) {
2710 LOG.warn("Was unable to close region " + regionToSideline
2711 + ". Just continuing... ", e);
2712 }
2713
2714 try {
2715 LOG.info("Offlining region: " + regionToSideline);
2716 offline(regionToSideline.getRegionName());
2717 } catch (IOException ioe) {
2718 LOG.warn("Unable to offline region from master: " + regionToSideline
2719 + ". Just continuing... ", ioe);
2720 }
2721
2722 LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
2723 Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
2724 if (sidelineRegionDir != null) {
2725 sidelinedRegions.put(sidelineRegionDir, regionToSideline);
2726 LOG.info("After sidelined big overlapped region: "
2727 + regionToSideline.getRegionNameAsString()
2728 + " to " + sidelineRegionDir.toString());
2729 fixes++;
2730 }
2731 }
2732 }
2733 }
2734
2735
2736
2737
2738
2739
2740
2741 public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
2742
2743
2744
2745 if (disabledTables.contains(this.tableName)) {
2746 return true;
2747 }
2748 int originalErrorsCount = errors.getErrorList().size();
2749 Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
2750 SortedSet<byte[]> splits = sc.getSplits();
2751
2752 byte[] prevKey = null;
2753 byte[] problemKey = null;
2754
2755 if (splits.size() == 0) {
2756
2757 handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
2758 }
2759
2760 for (byte[] key : splits) {
2761 Collection<HbckInfo> ranges = regions.get(key);
2762 if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
2763 for (HbckInfo rng : ranges) {
2764 handler.handleRegionStartKeyNotEmpty(rng);
2765 }
2766 }
2767
2768
2769 for (HbckInfo rng : ranges) {
2770
2771 byte[] endKey = rng.getEndKey();
2772 endKey = (endKey.length == 0) ? null : endKey;
2773 if (Bytes.equals(rng.getStartKey(),endKey)) {
2774 handler.handleDegenerateRegion(rng);
2775 }
2776 }
2777
2778 if (ranges.size() == 1) {
2779
2780 if (problemKey != null) {
2781 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2782 }
2783 problemKey = null;
2784 } else if (ranges.size() > 1) {
2785
2786
2787 if (problemKey == null) {
2788
2789 LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
2790 problemKey = key;
2791 }
2792 overlapGroups.putAll(problemKey, ranges);
2793
2794
2795 ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
2796
2797 for (HbckInfo r1 : ranges) {
2798 subRange.remove(r1);
2799 for (HbckInfo r2 : subRange) {
2800 if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
2801 handler.handleDuplicateStartKeys(r1,r2);
2802 } else {
2803
2804 handler.handleOverlapInRegionChain(r1, r2);
2805 }
2806 }
2807 }
2808
2809 } else if (ranges.size() == 0) {
2810 if (problemKey != null) {
2811 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2812 }
2813 problemKey = null;
2814
2815 byte[] holeStopKey = sc.getSplits().higher(key);
2816
2817 if (holeStopKey != null) {
2818
2819 handler.handleHoleInRegionChain(key, holeStopKey);
2820 }
2821 }
2822 prevKey = key;
2823 }
2824
2825
2826
2827 if (prevKey != null) {
2828 handler.handleRegionEndKeyNotEmpty(prevKey);
2829 }
2830
2831
2832 if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
2833 boolean ok = handleOverlapsParallel(handler, prevKey);
2834 if (!ok) {
2835 return false;
2836 }
2837 } else {
2838 for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2839 handler.handleOverlapGroup(overlap);
2840 }
2841 }
2842
2843 if (details) {
2844
2845 errors.print("---- Table '" + this.tableName
2846 + "': region split map");
2847 dump(splits, regions);
2848 errors.print("---- Table '" + this.tableName
2849 + "': overlap groups");
2850 dumpOverlapProblems(overlapGroups);
2851 errors.print("There are " + overlapGroups.keySet().size()
2852 + " overlap groups with " + overlapGroups.size()
2853 + " overlapping regions");
2854 }
2855 if (!sidelinedRegions.isEmpty()) {
2856 LOG.warn("Sidelined big overlapped regions, please bulk load them!");
2857 errors.print("---- Table '" + this.tableName
2858 + "': sidelined big overlapped regions");
2859 dumpSidelinedRegions(sidelinedRegions);
2860 }
2861 return errors.getErrorList().size() == originalErrorsCount;
2862 }
2863
2864 private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
2865 throws IOException {
2866
2867
2868 List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size());
2869 List<Future<Void>> rets;
2870 for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2871
2872 merges.add(new WorkItemOverlapMerge(overlap, handler));
2873 }
2874 try {
2875 rets = executor.invokeAll(merges);
2876 } catch (InterruptedException e) {
2877 LOG.error("Overlap merges were interrupted", e);
2878 return false;
2879 }
2880 for(int i=0; i<merges.size(); i++) {
2881 WorkItemOverlapMerge work = merges.get(i);
2882 Future<Void> f = rets.get(i);
2883 try {
2884 f.get();
2885 } catch(ExecutionException e) {
2886 LOG.warn("Failed to merge overlap group" + work, e.getCause());
2887 } catch (InterruptedException e) {
2888 LOG.error("Waiting for overlap merges was interrupted", e);
2889 return false;
2890 }
2891 }
2892 return true;
2893 }
2894
2895
2896
2897
2898
2899
2900
2901 void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
2902
2903 StringBuilder sb = new StringBuilder();
2904 for (byte[] k : splits) {
2905 sb.setLength(0);
2906 sb.append(Bytes.toStringBinary(k) + ":\t");
2907 for (HbckInfo r : regions.get(k)) {
2908 sb.append("[ "+ r.toString() + ", "
2909 + Bytes.toStringBinary(r.getEndKey())+ "]\t");
2910 }
2911 errors.print(sb.toString());
2912 }
2913 }
2914 }
2915
2916 public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
2917
2918
2919 for (byte[] k : regions.keySet()) {
2920 errors.print(Bytes.toStringBinary(k) + ":");
2921 for (HbckInfo r : regions.get(k)) {
2922 errors.print("[ " + r.toString() + ", "
2923 + Bytes.toStringBinary(r.getEndKey()) + "]");
2924 }
2925 errors.print("----");
2926 }
2927 }
2928
2929 public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
2930 for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
2931 TableName tableName = entry.getValue().getTableName();
2932 Path path = entry.getKey();
2933 errors.print("This sidelined region dir should be bulk loaded: "
2934 + path.toString());
2935 errors.print("Bulk load command looks like: "
2936 + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
2937 + path.toUri().getPath() + " "+ tableName);
2938 }
2939 }
2940
2941 public Multimap<byte[], HbckInfo> getOverlapGroups(
2942 TableName table) {
2943 TableInfo ti = tablesInfo.get(table);
2944 return ti.overlapGroups;
2945 }
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956 HTableDescriptor[] getTables(AtomicInteger numSkipped) {
2957 List<TableName> tableNames = new ArrayList<TableName>();
2958 long now = EnvironmentEdgeManager.currentTimeMillis();
2959
2960 for (HbckInfo hbi : regionInfoMap.values()) {
2961 MetaEntry info = hbi.metaEntry;
2962
2963
2964
2965 if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
2966 if (info.modTime + timelag < now) {
2967 tableNames.add(info.getTable());
2968 } else {
2969 numSkipped.incrementAndGet();
2970 }
2971 }
2972 }
2973 return getHTableDescriptors(tableNames);
2974 }
2975
2976 HTableDescriptor[] getHTableDescriptors(List<TableName> tableNames) {
2977 HTableDescriptor[] htd = new HTableDescriptor[0];
2978 try {
2979 LOG.info("getHTableDescriptors == tableNames => " + tableNames);
2980 htd = new HBaseAdmin(getConf()).getTableDescriptorsByTableName(tableNames);
2981 } catch (IOException e) {
2982 LOG.debug("Exception getting table descriptors", e);
2983 }
2984 return htd;
2985 }
2986
2987
2988
2989
2990
2991
2992 private synchronized HbckInfo getOrCreateInfo(String name) {
2993 HbckInfo hbi = regionInfoMap.get(name);
2994 if (hbi == null) {
2995 hbi = new HbckInfo(null);
2996 regionInfoMap.put(name, hbi);
2997 }
2998 return hbi;
2999 }
3000
3001 private void checkAndFixTableLocks() throws IOException {
3002 ZooKeeperWatcher zkw = createZooKeeperWatcher();
3003
3004 try {
3005 TableLockChecker checker = new TableLockChecker(createZooKeeperWatcher(), errors);
3006 checker.checkTableLocks();
3007
3008 if (this.fixTableLocks) {
3009 checker.fixExpiredTableLocks();
3010 }
3011 } finally {
3012 zkw.close();
3013 }
3014 }
3015
3016
3017
3018
3019
3020
3021
3022 private void checkAndFixOrphanedTableZNodes()
3023 throws IOException, KeeperException, InterruptedException {
3024 ZooKeeperWatcher zkw = createZooKeeperWatcher();
3025 try {
3026 ZKTable zkTable = new ZKTable(zkw);
3027 Set<TableName> enablingTables = zkTable.getEnablingTables(zkw);
3028 String msg;
3029 TableInfo tableInfo;
3030
3031 for (TableName tableName : enablingTables) {
3032
3033 tableInfo = tablesInfo.get(tableName);
3034 if (tableInfo != null) {
3035
3036 continue;
3037 }
3038
3039 msg = "Table " + tableName + " not found in hbase:meta. Orphaned table ZNode found.";
3040 LOG.warn(msg);
3041 orphanedTableZNodes.add(tableName);
3042 errors.reportError(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY, msg);
3043 }
3044
3045 if (orphanedTableZNodes.size() > 0 && this.fixTableZNodes) {
3046 for (TableName tableName : orphanedTableZNodes) {
3047
3048
3049
3050
3051 zkTable.setDisabledTable(tableName);
3052 }
3053 }
3054 } finally {
3055 zkw.close();
3056 }
3057 }
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068 boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
3069 List<HbckInfo> metaRegions = Lists.newArrayList();
3070 for (HbckInfo value : regionInfoMap.values()) {
3071 if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
3072 metaRegions.add(value);
3073 }
3074 }
3075
3076
3077
3078 List<ServerName> servers = new ArrayList<ServerName>();
3079 HbckInfo metaHbckInfo = null;
3080 if (!metaRegions.isEmpty()) {
3081 metaHbckInfo = metaRegions.get(0);
3082 servers = metaHbckInfo.deployedOn;
3083 }
3084 if (servers.size() != 1) {
3085 if (servers.size() == 0) {
3086 errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta is not found on any region.");
3087 if (shouldFixAssignments()) {
3088 errors.print("Trying to fix a problem with hbase:meta..");
3089 setShouldRerun();
3090
3091 HBaseFsckRepair.fixUnassigned(admin, HRegionInfo.FIRST_META_REGIONINFO);
3092 HBaseFsckRepair.waitUntilAssigned(admin, HRegionInfo.FIRST_META_REGIONINFO);
3093 }
3094 } else if (servers.size() > 1) {
3095 errors
3096 .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta is found on more than one region.");
3097 if (shouldFixAssignments()) {
3098 if (metaHbckInfo == null) {
3099 errors.print(
3100 "Unable to fix problem with hbase:meta due to hbase:meta region info missing");
3101 return false;
3102 }
3103 errors.print("Trying to fix a problem with hbase:meta..");
3104 setShouldRerun();
3105
3106 HBaseFsckRepair.fixMultiAssignment(admin, metaHbckInfo.metaEntry, servers);
3107 }
3108 }
3109
3110 return false;
3111 }
3112
3113 return true;
3114 }
3115
3116
3117
3118
3119
3120 boolean loadMetaEntries() throws IOException {
3121 MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
3122 int countRecord = 1;
3123
3124
3125 final Comparator<Cell> comp = new Comparator<Cell>() {
3126 @Override
3127 public int compare(Cell k1, Cell k2) {
3128 return (int)(k1.getTimestamp() - k2.getTimestamp());
3129 }
3130 };
3131
3132 @Override
3133 public boolean processRow(Result result) throws IOException {
3134 try {
3135
3136
3137 long ts = Collections.max(result.listCells(), comp).getTimestamp();
3138 Pair<HRegionInfo, ServerName> pair = HRegionInfo.getHRegionInfoAndServerName(result);
3139 if (pair == null || pair.getFirst() == null) {
3140 emptyRegionInfoQualifiers.add(result);
3141 errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3142 "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3143 return true;
3144 }
3145 ServerName sn = null;
3146 if (pair.getSecond() != null) {
3147 sn = pair.getSecond();
3148 }
3149 HRegionInfo hri = pair.getFirst();
3150 if (!(isTableIncluded(hri.getTable())
3151 || hri.isMetaRegion())) {
3152 return true;
3153 }
3154 PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
3155 MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
3156 HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
3157 if (previous == null) {
3158 regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
3159 } else if (previous.metaEntry == null) {
3160 previous.metaEntry = m;
3161 } else {
3162 throw new IOException("Two entries in hbase:meta are same " + previous);
3163 }
3164
3165 PairOfSameType<HRegionInfo> mergeRegions = HRegionInfo.getMergeRegions(result);
3166 for (HRegionInfo mergeRegion : new HRegionInfo[] {
3167 mergeRegions.getFirst(), mergeRegions.getSecond() }) {
3168 if (mergeRegion != null) {
3169
3170 HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
3171 hbInfo.setMerged(true);
3172 }
3173 }
3174
3175
3176 if (countRecord % 100 == 0) {
3177 errors.progress();
3178 }
3179 countRecord++;
3180 return true;
3181 } catch (RuntimeException e) {
3182 LOG.error("Result=" + result);
3183 throw e;
3184 }
3185 }
3186 };
3187 if (!checkMetaOnly) {
3188
3189 MetaScanner.metaScan(getConf(), visitor);
3190 }
3191
3192 errors.print("");
3193 return true;
3194 }
3195
3196
3197
3198
3199 static class MetaEntry extends HRegionInfo {
3200 ServerName regionServer;
3201 long modTime;
3202 HRegionInfo splitA, splitB;
3203
3204 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
3205 this(rinfo, regionServer, modTime, null, null);
3206 }
3207
3208 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
3209 HRegionInfo splitA, HRegionInfo splitB) {
3210 super(rinfo);
3211 this.regionServer = regionServer;
3212 this.modTime = modTime;
3213 this.splitA = splitA;
3214 this.splitB = splitB;
3215 }
3216
3217 @Override
3218 public boolean equals(Object o) {
3219 boolean superEq = super.equals(o);
3220 if (!superEq) {
3221 return superEq;
3222 }
3223
3224 MetaEntry me = (MetaEntry) o;
3225 if (!regionServer.equals(me.regionServer)) {
3226 return false;
3227 }
3228 return (modTime == me.modTime);
3229 }
3230
3231 @Override
3232 public int hashCode() {
3233 int hash = Arrays.hashCode(getRegionName());
3234 hash ^= getRegionId();
3235 hash ^= Arrays.hashCode(getStartKey());
3236 hash ^= Arrays.hashCode(getEndKey());
3237 hash ^= Boolean.valueOf(isOffline()).hashCode();
3238 hash ^= getTable().hashCode();
3239 if (regionServer != null) {
3240 hash ^= regionServer.hashCode();
3241 }
3242 hash ^= modTime;
3243 return hash;
3244 }
3245 }
3246
3247
3248
3249
3250 static class HdfsEntry {
3251 HRegionInfo hri;
3252 Path hdfsRegionDir = null;
3253 long hdfsRegionDirModTime = 0;
3254 boolean hdfsRegioninfoFilePresent = false;
3255 boolean hdfsOnlyEdits = false;
3256 }
3257
3258
3259
3260
3261 static class OnlineEntry {
3262 HRegionInfo hri;
3263 ServerName hsa;
3264
3265 @Override
3266 public String toString() {
3267 return hsa.toString() + ";" + hri.getRegionNameAsString();
3268 }
3269 }
3270
3271
3272
3273
3274
3275 public static class HbckInfo implements KeyRange {
3276 private MetaEntry metaEntry = null;
3277 private HdfsEntry hdfsEntry = null;
3278 private List<OnlineEntry> deployedEntries = Lists.newArrayList();
3279 private List<ServerName> deployedOn = Lists.newArrayList();
3280 private boolean skipChecks = false;
3281 private boolean isMerged = false;
3282
3283 HbckInfo(MetaEntry metaEntry) {
3284 this.metaEntry = metaEntry;
3285 }
3286
3287 public synchronized void addServer(HRegionInfo hri, ServerName server) {
3288 OnlineEntry rse = new OnlineEntry() ;
3289 rse.hri = hri;
3290 rse.hsa = server;
3291 this.deployedEntries.add(rse);
3292 this.deployedOn.add(server);
3293 }
3294
3295 @Override
3296 public synchronized String toString() {
3297 StringBuilder sb = new StringBuilder();
3298 sb.append("{ meta => ");
3299 sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
3300 sb.append( ", hdfs => " + getHdfsRegionDir());
3301 sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
3302 sb.append(" }");
3303 return sb.toString();
3304 }
3305
3306 @Override
3307 public byte[] getStartKey() {
3308 if (this.metaEntry != null) {
3309 return this.metaEntry.getStartKey();
3310 } else if (this.hdfsEntry != null) {
3311 return this.hdfsEntry.hri.getStartKey();
3312 } else {
3313 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3314 return null;
3315 }
3316 }
3317
3318 @Override
3319 public byte[] getEndKey() {
3320 if (this.metaEntry != null) {
3321 return this.metaEntry.getEndKey();
3322 } else if (this.hdfsEntry != null) {
3323 return this.hdfsEntry.hri.getEndKey();
3324 } else {
3325 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3326 return null;
3327 }
3328 }
3329
3330 public TableName getTableName() {
3331 if (this.metaEntry != null) {
3332 return this.metaEntry.getTable();
3333 } else if (this.hdfsEntry != null) {
3334
3335
3336 Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3337 return FSUtils.getTableName(tableDir);
3338 } else {
3339
3340
3341 return null;
3342 }
3343 }
3344
3345 public String getRegionNameAsString() {
3346 if (metaEntry != null) {
3347 return metaEntry.getRegionNameAsString();
3348 } else if (hdfsEntry != null) {
3349 if (hdfsEntry.hri != null) {
3350 return hdfsEntry.hri.getRegionNameAsString();
3351 }
3352 }
3353 return null;
3354 }
3355
3356 public byte[] getRegionName() {
3357 if (metaEntry != null) {
3358 return metaEntry.getRegionName();
3359 } else if (hdfsEntry != null) {
3360 return hdfsEntry.hri.getRegionName();
3361 } else {
3362 return null;
3363 }
3364 }
3365
3366 Path getHdfsRegionDir() {
3367 if (hdfsEntry == null) {
3368 return null;
3369 }
3370 return hdfsEntry.hdfsRegionDir;
3371 }
3372
3373 boolean containsOnlyHdfsEdits() {
3374 if (hdfsEntry == null) {
3375 return false;
3376 }
3377 return hdfsEntry.hdfsOnlyEdits;
3378 }
3379
3380 boolean isHdfsRegioninfoPresent() {
3381 if (hdfsEntry == null) {
3382 return false;
3383 }
3384 return hdfsEntry.hdfsRegioninfoFilePresent;
3385 }
3386
3387 long getModTime() {
3388 if (hdfsEntry == null) {
3389 return 0;
3390 }
3391 return hdfsEntry.hdfsRegionDirModTime;
3392 }
3393
3394 HRegionInfo getHdfsHRI() {
3395 if (hdfsEntry == null) {
3396 return null;
3397 }
3398 return hdfsEntry.hri;
3399 }
3400
3401 public void setSkipChecks(boolean skipChecks) {
3402 this.skipChecks = skipChecks;
3403 }
3404
3405 public boolean isSkipChecks() {
3406 return skipChecks;
3407 }
3408
3409 public void setMerged(boolean isMerged) {
3410 this.isMerged = isMerged;
3411 }
3412
3413 public boolean isMerged() {
3414 return this.isMerged;
3415 }
3416 }
3417
3418 final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
3419 @Override
3420 public int compare(HbckInfo l, HbckInfo r) {
3421 if (l == r) {
3422
3423 return 0;
3424 }
3425
3426 int tableCompare = l.getTableName().compareTo(r.getTableName());
3427 if (tableCompare != 0) {
3428 return tableCompare;
3429 }
3430
3431 int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3432 l.getStartKey(), r.getStartKey());
3433 if (startComparison != 0) {
3434 return startComparison;
3435 }
3436
3437
3438 byte[] endKey = r.getEndKey();
3439 endKey = (endKey.length == 0) ? null : endKey;
3440 byte[] endKey2 = l.getEndKey();
3441 endKey2 = (endKey2.length == 0) ? null : endKey2;
3442 int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3443 endKey2, endKey);
3444
3445 if (endComparison != 0) {
3446 return endComparison;
3447 }
3448
3449
3450
3451 if (l.hdfsEntry == null && r.hdfsEntry == null) {
3452 return 0;
3453 }
3454 if (l.hdfsEntry == null && r.hdfsEntry != null) {
3455 return 1;
3456 }
3457
3458 if (r.hdfsEntry == null) {
3459 return -1;
3460 }
3461
3462 return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
3463 }
3464 };
3465
3466
3467
3468
3469 private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
3470 StringBuilder sb = new StringBuilder();
3471 int numOfSkippedRegions;
3472 errors.print("Summary:");
3473 for (TableInfo tInfo : tablesInfo.values()) {
3474 numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) ?
3475 skippedRegions.get(tInfo.getName()).size() : 0;
3476
3477 if (errors.tableHasErrors(tInfo)) {
3478 errors.print("Table " + tInfo.getName() + " is inconsistent.");
3479 } else if (numOfSkippedRegions > 0){
3480 errors.print("Table " + tInfo.getName() + " is okay (with "
3481 + numOfSkippedRegions + " skipped regions).");
3482 }
3483 else {
3484 errors.print("Table " + tInfo.getName() + " is okay.");
3485 }
3486 errors.print(" Number of regions: " + tInfo.getNumRegions());
3487 if (numOfSkippedRegions > 0) {
3488 Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName());
3489 System.out.println(" Number of skipped regions: " + numOfSkippedRegions);
3490 System.out.println(" List of skipped regions:");
3491 for(String sr : skippedRegionStrings) {
3492 System.out.println(" " + sr);
3493 }
3494 }
3495 sb.setLength(0);
3496 sb.append(" Deployed on: ");
3497 for (ServerName server : tInfo.deployedOn) {
3498 sb.append(" " + server.toString());
3499 }
3500 errors.print(sb.toString());
3501 }
3502 }
3503
3504 static ErrorReporter getErrorReporter(
3505 final Configuration conf) throws ClassNotFoundException {
3506 Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
3507 return ReflectionUtils.newInstance(reporter, conf);
3508 }
3509
3510 public interface ErrorReporter {
3511 enum ERROR_CODE {
3512 UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
3513 NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED,
3514 MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
3515 FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
3516 HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
3517 ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
3518 WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, ORPHANED_ZK_TABLE_ENTRY, BOUNDARIES_ERROR
3519 }
3520 void clear();
3521 void report(String message);
3522 void reportError(String message);
3523 void reportError(ERROR_CODE errorCode, String message);
3524 void reportError(ERROR_CODE errorCode, String message, TableInfo table);
3525 void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
3526 void reportError(
3527 ERROR_CODE errorCode,
3528 String message,
3529 TableInfo table,
3530 HbckInfo info1,
3531 HbckInfo info2
3532 );
3533 int summarize();
3534 void detail(String details);
3535 ArrayList<ERROR_CODE> getErrorList();
3536 void progress();
3537 void print(String message);
3538 void resetErrors();
3539 boolean tableHasErrors(TableInfo table);
3540 }
3541
3542 static class PrintingErrorReporter implements ErrorReporter {
3543 public int errorCount = 0;
3544 private int showProgress;
3545
3546 private static final int progressThreshold = 100;
3547
3548 Set<TableInfo> errorTables = new HashSet<TableInfo>();
3549
3550
3551 private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
3552
3553 @Override
3554 public void clear() {
3555 errorTables.clear();
3556 errorList.clear();
3557 errorCount = 0;
3558 }
3559
3560 @Override
3561 public synchronized void reportError(ERROR_CODE errorCode, String message) {
3562 if (errorCode == ERROR_CODE.WRONG_USAGE) {
3563 System.err.println(message);
3564 return;
3565 }
3566
3567 errorList.add(errorCode);
3568 if (!summary) {
3569 System.out.println("ERROR: " + message);
3570 }
3571 errorCount++;
3572 showProgress = 0;
3573 }
3574
3575 @Override
3576 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
3577 errorTables.add(table);
3578 reportError(errorCode, message);
3579 }
3580
3581 @Override
3582 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3583 HbckInfo info) {
3584 errorTables.add(table);
3585 String reference = "(region " + info.getRegionNameAsString() + ")";
3586 reportError(errorCode, reference + " " + message);
3587 }
3588
3589 @Override
3590 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3591 HbckInfo info1, HbckInfo info2) {
3592 errorTables.add(table);
3593 String reference = "(regions " + info1.getRegionNameAsString()
3594 + " and " + info2.getRegionNameAsString() + ")";
3595 reportError(errorCode, reference + " " + message);
3596 }
3597
3598 @Override
3599 public synchronized void reportError(String message) {
3600 reportError(ERROR_CODE.UNKNOWN, message);
3601 }
3602
3603
3604
3605
3606
3607
3608 @Override
3609 public synchronized void report(String message) {
3610 if (! summary) {
3611 System.out.println("ERROR: " + message);
3612 }
3613 showProgress = 0;
3614 }
3615
3616 @Override
3617 public synchronized int summarize() {
3618 System.out.println(Integer.toString(errorCount) +
3619 " inconsistencies detected.");
3620 if (errorCount == 0) {
3621 System.out.println("Status: OK");
3622 return 0;
3623 } else {
3624 System.out.println("Status: INCONSISTENT");
3625 return -1;
3626 }
3627 }
3628
3629 @Override
3630 public ArrayList<ERROR_CODE> getErrorList() {
3631 return errorList;
3632 }
3633
3634 @Override
3635 public synchronized void print(String message) {
3636 if (!summary) {
3637 System.out.println(message);
3638 }
3639 }
3640
3641 @Override
3642 public boolean tableHasErrors(TableInfo table) {
3643 return errorTables.contains(table);
3644 }
3645
3646 @Override
3647 public void resetErrors() {
3648 errorCount = 0;
3649 }
3650
3651 @Override
3652 public synchronized void detail(String message) {
3653 if (details) {
3654 System.out.println(message);
3655 }
3656 showProgress = 0;
3657 }
3658
3659 @Override
3660 public synchronized void progress() {
3661 if (showProgress++ == progressThreshold) {
3662 if (!summary) {
3663 System.out.print(".");
3664 }
3665 showProgress = 0;
3666 }
3667 }
3668 }
3669
3670
3671
3672
3673 static class WorkItemRegion implements Callable<Void> {
3674 private HBaseFsck hbck;
3675 private ServerName rsinfo;
3676 private ErrorReporter errors;
3677 private HConnection connection;
3678
3679 WorkItemRegion(HBaseFsck hbck, ServerName info,
3680 ErrorReporter errors, HConnection connection) {
3681 this.hbck = hbck;
3682 this.rsinfo = info;
3683 this.errors = errors;
3684 this.connection = connection;
3685 }
3686
3687 @Override
3688 public synchronized Void call() throws IOException {
3689 errors.progress();
3690 try {
3691 BlockingInterface server = connection.getAdmin(rsinfo);
3692
3693
3694 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
3695 regions = filterRegions(regions);
3696
3697 if (details) {
3698 errors.detail("RegionServer: " + rsinfo.getServerName() +
3699 " number of regions: " + regions.size());
3700 for (HRegionInfo rinfo: regions) {
3701 errors.detail(" " + rinfo.getRegionNameAsString() +
3702 " id: " + rinfo.getRegionId() +
3703 " encoded_name: " + rinfo.getEncodedName() +
3704 " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
3705 " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
3706 }
3707 }
3708
3709
3710 for (HRegionInfo r:regions) {
3711 HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
3712 hbi.addServer(r, rsinfo);
3713 }
3714 } catch (IOException e) {
3715 errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
3716 " Unable to fetch region information. " + e);
3717 throw e;
3718 }
3719 return null;
3720 }
3721
3722 private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
3723 List<HRegionInfo> ret = Lists.newArrayList();
3724 for (HRegionInfo hri : regions) {
3725 if (hri.isMetaTable() || (!hbck.checkMetaOnly
3726 && hbck.isTableIncluded(hri.getTable()))) {
3727 ret.add(hri);
3728 }
3729 }
3730 return ret;
3731 }
3732 }
3733
3734
3735
3736
3737
3738 static class WorkItemHdfsDir implements Callable<Void> {
3739 private HBaseFsck hbck;
3740 private FileStatus tableDir;
3741 private ErrorReporter errors;
3742 private FileSystem fs;
3743
3744 WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors,
3745 FileStatus status) {
3746 this.hbck = hbck;
3747 this.fs = fs;
3748 this.tableDir = status;
3749 this.errors = errors;
3750 }
3751
3752 @Override
3753 public synchronized Void call() throws IOException {
3754 try {
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811 static class WorkItemHdfsRegionInfo implements Callable<Void> {
3812 private HbckInfo hbi;
3813 private HBaseFsck hbck;
3814 private ErrorReporter errors;
3815
3816 WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
3817 this.hbi = hbi;
3818 this.hbck = hbck;
3819 this.errors = errors;
3820 }
3821
3822 @Override
3823 public synchronized Void call() throws IOException {
3824
3825 if (hbi.getHdfsHRI() == null) {
3826 try {
3827 errors.progress();
3828 hbck.loadHdfsRegioninfo(hbi);
3829 } catch (IOException ioe) {
3830 String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
3831 + hbi.getTableName() + " in hdfs dir "
3832 + hbi.getHdfsRegionDir()
3833 + "! It may be an invalid format or version file. Treating as "
3834 + "an orphaned regiondir.";
3835 errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
3836 try {
3837 hbck.debugLsr(hbi.getHdfsRegionDir());
3838 } catch (IOException ioe2) {
3839 LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
3840 throw ioe2;
3841 }
3842 hbck.orphanHdfsDirs.add(hbi);
3843 throw ioe;
3844 }
3845 }
3846 return null;
3847 }
3848 };
3849
3850
3851
3852
3853
3854 public static void setDisplayFullReport() {
3855 details = true;
3856 }
3857
3858
3859
3860
3861
3862 void setSummary() {
3863 summary = true;
3864 }
3865
3866
3867
3868
3869
3870 void setCheckMetaOnly() {
3871 checkMetaOnly = true;
3872 }
3873
3874
3875
3876
3877 void setRegionBoundariesCheck() {
3878 checkRegionBoundaries = true;
3879 }
3880
3881
3882
3883
3884
3885 public void setFixTableLocks(boolean shouldFix) {
3886 fixTableLocks = shouldFix;
3887 fixAny |= shouldFix;
3888 }
3889
3890
3891
3892
3893
3894 public void setFixTableZNodes(boolean shouldFix) {
3895 fixTableZNodes = shouldFix;
3896 fixAny |= shouldFix;
3897 }
3898
3899
3900
3901
3902
3903
3904
3905 void setShouldRerun() {
3906 rerun = true;
3907 }
3908
3909 boolean shouldRerun() {
3910 return rerun;
3911 }
3912
3913
3914
3915
3916
3917 public void setFixAssignments(boolean shouldFix) {
3918 fixAssignments = shouldFix;
3919 fixAny |= shouldFix;
3920 }
3921
3922 boolean shouldFixAssignments() {
3923 return fixAssignments;
3924 }
3925
3926 public void setFixMeta(boolean shouldFix) {
3927 fixMeta = shouldFix;
3928 fixAny |= shouldFix;
3929 }
3930
3931 boolean shouldFixMeta() {
3932 return fixMeta;
3933 }
3934
3935 public void setFixEmptyMetaCells(boolean shouldFix) {
3936 fixEmptyMetaCells = shouldFix;
3937 fixAny |= shouldFix;
3938 }
3939
3940 boolean shouldFixEmptyMetaCells() {
3941 return fixEmptyMetaCells;
3942 }
3943
3944 public void setCheckHdfs(boolean checking) {
3945 checkHdfs = checking;
3946 }
3947
3948 boolean shouldCheckHdfs() {
3949 return checkHdfs;
3950 }
3951
3952 public void setFixHdfsHoles(boolean shouldFix) {
3953 fixHdfsHoles = shouldFix;
3954 fixAny |= shouldFix;
3955 }
3956
3957 boolean shouldFixHdfsHoles() {
3958 return fixHdfsHoles;
3959 }
3960
3961 public void setFixTableOrphans(boolean shouldFix) {
3962 fixTableOrphans = shouldFix;
3963 fixAny |= shouldFix;
3964 }
3965
3966 boolean shouldFixTableOrphans() {
3967 return fixTableOrphans;
3968 }
3969
3970 public void setFixHdfsOverlaps(boolean shouldFix) {
3971 fixHdfsOverlaps = shouldFix;
3972 fixAny |= shouldFix;
3973 }
3974
3975 boolean shouldFixHdfsOverlaps() {
3976 return fixHdfsOverlaps;
3977 }
3978
3979 public void setFixHdfsOrphans(boolean shouldFix) {
3980 fixHdfsOrphans = shouldFix;
3981 fixAny |= shouldFix;
3982 }
3983
3984 boolean shouldFixHdfsOrphans() {
3985 return fixHdfsOrphans;
3986 }
3987
3988 public void setFixVersionFile(boolean shouldFix) {
3989 fixVersionFile = shouldFix;
3990 fixAny |= shouldFix;
3991 }
3992
3993 public boolean shouldFixVersionFile() {
3994 return fixVersionFile;
3995 }
3996
3997 public void setSidelineBigOverlaps(boolean sbo) {
3998 this.sidelineBigOverlaps = sbo;
3999 }
4000
4001 public boolean shouldSidelineBigOverlaps() {
4002 return sidelineBigOverlaps;
4003 }
4004
4005 public void setFixSplitParents(boolean shouldFix) {
4006 fixSplitParents = shouldFix;
4007 fixAny |= shouldFix;
4008 }
4009
4010 boolean shouldFixSplitParents() {
4011 return fixSplitParents;
4012 }
4013
4014 public void setFixReferenceFiles(boolean shouldFix) {
4015 fixReferenceFiles = shouldFix;
4016 fixAny |= shouldFix;
4017 }
4018
4019 boolean shouldFixReferenceFiles() {
4020 return fixReferenceFiles;
4021 }
4022
4023 public boolean shouldIgnorePreCheckPermission() {
4024 return !fixAny || ignorePreCheckPermission;
4025 }
4026
4027 public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
4028 this.ignorePreCheckPermission = ignorePreCheckPermission;
4029 }
4030
4031
4032
4033
4034 public void setMaxMerge(int mm) {
4035 this.maxMerge = mm;
4036 }
4037
4038 public int getMaxMerge() {
4039 return maxMerge;
4040 }
4041
4042 public void setMaxOverlapsToSideline(int mo) {
4043 this.maxOverlapsToSideline = mo;
4044 }
4045
4046 public int getMaxOverlapsToSideline() {
4047 return maxOverlapsToSideline;
4048 }
4049
4050
4051
4052
4053
4054 boolean isTableIncluded(TableName table) {
4055 return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
4056 }
4057
4058 public void includeTable(TableName table) {
4059 tablesIncluded.add(table);
4060 }
4061
4062 Set<TableName> getIncludedTables() {
4063 return new HashSet<TableName>(tablesIncluded);
4064 }
4065
4066
4067
4068
4069
4070
4071 public void setTimeLag(long seconds) {
4072 timelag = seconds * 1000;
4073 }
4074
4075
4076
4077
4078
4079 public void setSidelineDir(String sidelineDir) {
4080 this.sidelineDir = new Path(sidelineDir);
4081 }
4082
4083 protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
4084 return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
4085 }
4086
4087 public HFileCorruptionChecker getHFilecorruptionChecker() {
4088 return hfcc;
4089 }
4090
4091 public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
4092 this.hfcc = hfcc;
4093 }
4094
4095 public void setRetCode(int code) {
4096 this.retcode = code;
4097 }
4098
4099 public int getRetCode() {
4100 return retcode;
4101 }
4102
4103 protected HBaseFsck printUsageAndExit() {
4104 StringWriter sw = new StringWriter(2048);
4105 PrintWriter out = new PrintWriter(sw);
4106 out.println("Usage: fsck [opts] {only tables}");
4107 out.println(" where [opts] are:");
4108 out.println(" -help Display help options (this)");
4109 out.println(" -details Display full report of all regions.");
4110 out.println(" -timelag <timeInSeconds> Process only regions that " +
4111 " have not experienced any metadata updates in the last " +
4112 " <timeInSeconds> seconds.");
4113 out.println(" -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
4114 " before checking if the fix worked if run with -fix");
4115 out.println(" -summary Print only summary of the tables and status.");
4116 out.println(" -metaonly Only check the state of the hbase:meta table.");
4117 out.println(" -sidelineDir <hdfs://> HDFS path to backup existing meta.");
4118 out.println(" -boundaries Verify that regions boundaries are the same between META and store files.");
4119
4120 out.println("");
4121 out.println(" Metadata Repair options: (expert features, use with caution!)");
4122 out.println(" -fix Try to fix region assignments. This is for backwards compatiblity");
4123 out.println(" -fixAssignments Try to fix region assignments. Replaces the old -fix");
4124 out.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good.");
4125 out.println(" -noHdfsChecking Don't load/check region info from HDFS."
4126 + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
4127 out.println(" -fixHdfsHoles Try to fix region holes in hdfs.");
4128 out.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs");
4129 out.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
4130 out.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs.");
4131 out.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs.");
4132 out.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
4133 out.println(" -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps");
4134 out.println(" -maxOverlapsToSideline <n> When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
4135 out.println(" -fixSplitParents Try to force offline split parents to be online.");
4136 out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check");
4137 out.println(" -fixReferenceFiles Try to offline lingering reference store files");
4138 out.println(" -fixEmptyMetaCells Try to fix hbase:meta entries not referencing any region"
4139 + " (empty REGIONINFO_QUALIFIER rows)");
4140
4141 out.println("");
4142 out.println(" Datafile Repair options: (expert features, use with caution!)");
4143 out.println(" -checkCorruptHFiles Check all Hfiles by opening them to make sure they are valid");
4144 out.println(" -sidelineCorruptHFiles Quarantine corrupted HFiles. implies -checkCorruptHFiles");
4145
4146 out.println("");
4147 out.println(" Metadata Repair shortcuts");
4148 out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
4149 "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps " +
4150 "-fixReferenceFiles -fixTableLocks -fixOrphanedTableZnodes");
4151 out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
4152
4153 out.println("");
4154 out.println(" Table lock options");
4155 out.println(" -fixTableLocks Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
4156
4157 out.println("");
4158 out.println(" Table Znode options");
4159 out.println(" -fixOrphanedTableZnodes Set table state in ZNode to disabled if table does not exists");
4160
4161 out.flush();
4162 errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
4163
4164 setRetCode(-2);
4165 return this;
4166 }
4167
4168
4169
4170
4171
4172
4173
4174 public static void main(String[] args) throws Exception {
4175
4176 Configuration conf = HBaseConfiguration.create();
4177 Path hbasedir = FSUtils.getRootDir(conf);
4178 URI defaultFs = hbasedir.getFileSystem(conf).getUri();
4179 FSUtils.setFsDefault(conf, new Path(defaultFs));
4180 int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
4181 System.exit(ret);
4182 }
4183
4184
4185
4186
4187 static class HBaseFsckTool extends Configured implements Tool {
4188 HBaseFsckTool(Configuration conf) { super(conf); }
4189 @Override
4190 public int run(String[] args) throws Exception {
4191 HBaseFsck hbck = new HBaseFsck(getConf());
4192 hbck.exec(hbck.executor, args);
4193 return hbck.getRetCode();
4194 }
4195 };
4196
4197
4198 public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
4199 ServiceException, InterruptedException {
4200 long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
4201
4202 boolean checkCorruptHFiles = false;
4203 boolean sidelineCorruptHFiles = false;
4204
4205
4206 for (int i = 0; i < args.length; i++) {
4207 String cmd = args[i];
4208 if (cmd.equals("-help") || cmd.equals("-h")) {
4209 return printUsageAndExit();
4210 } else if (cmd.equals("-details")) {
4211 setDisplayFullReport();
4212 } else if (cmd.equals("-timelag")) {
4213 if (i == args.length - 1) {
4214 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
4215 return printUsageAndExit();
4216 }
4217 try {
4218 long timelag = Long.parseLong(args[i+1]);
4219 setTimeLag(timelag);
4220 } catch (NumberFormatException e) {
4221 errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
4222 return printUsageAndExit();
4223 }
4224 i++;
4225 } else if (cmd.equals("-sleepBeforeRerun")) {
4226 if (i == args.length - 1) {
4227 errors.reportError(ERROR_CODE.WRONG_USAGE,
4228 "HBaseFsck: -sleepBeforeRerun needs a value.");
4229 return printUsageAndExit();
4230 }
4231 try {
4232 sleepBeforeRerun = Long.parseLong(args[i+1]);
4233 } catch (NumberFormatException e) {
4234 errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
4235 return printUsageAndExit();
4236 }
4237 i++;
4238 } else if (cmd.equals("-sidelineDir")) {
4239 if (i == args.length - 1) {
4240 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
4241 return printUsageAndExit();
4242 }
4243 i++;
4244 setSidelineDir(args[i]);
4245 } else if (cmd.equals("-fix")) {
4246 errors.reportError(ERROR_CODE.WRONG_USAGE,
4247 "This option is deprecated, please use -fixAssignments instead.");
4248 setFixAssignments(true);
4249 } else if (cmd.equals("-fixAssignments")) {
4250 setFixAssignments(true);
4251 } else if (cmd.equals("-fixMeta")) {
4252 setFixMeta(true);
4253 } else if (cmd.equals("-noHdfsChecking")) {
4254 setCheckHdfs(false);
4255 } else if (cmd.equals("-fixHdfsHoles")) {
4256 setFixHdfsHoles(true);
4257 } else if (cmd.equals("-fixHdfsOrphans")) {
4258 setFixHdfsOrphans(true);
4259 } else if (cmd.equals("-fixTableOrphans")) {
4260 setFixTableOrphans(true);
4261 } else if (cmd.equals("-fixHdfsOverlaps")) {
4262 setFixHdfsOverlaps(true);
4263 } else if (cmd.equals("-fixVersionFile")) {
4264 setFixVersionFile(true);
4265 } else if (cmd.equals("-sidelineBigOverlaps")) {
4266 setSidelineBigOverlaps(true);
4267 } else if (cmd.equals("-fixSplitParents")) {
4268 setFixSplitParents(true);
4269 } else if (cmd.equals("-ignorePreCheckPermission")) {
4270 setIgnorePreCheckPermission(true);
4271 } else if (cmd.equals("-checkCorruptHFiles")) {
4272 checkCorruptHFiles = true;
4273 } else if (cmd.equals("-sidelineCorruptHFiles")) {
4274 sidelineCorruptHFiles = true;
4275 } else if (cmd.equals("-fixReferenceFiles")) {
4276 setFixReferenceFiles(true);
4277 } else if (cmd.equals("-fixEmptyMetaCells")) {
4278 setFixEmptyMetaCells(true);
4279 } else if (cmd.equals("-repair")) {
4280
4281
4282 setFixHdfsHoles(true);
4283 setFixHdfsOrphans(true);
4284 setFixMeta(true);
4285 setFixAssignments(true);
4286 setFixHdfsOverlaps(true);
4287 setFixVersionFile(true);
4288 setSidelineBigOverlaps(true);
4289 setFixSplitParents(false);
4290 setCheckHdfs(true);
4291 setFixReferenceFiles(true);
4292 setFixTableLocks(true);
4293 setFixTableZNodes(true);
4294 } else if (cmd.equals("-repairHoles")) {
4295
4296 setFixHdfsHoles(true);
4297 setFixHdfsOrphans(false);
4298 setFixMeta(true);
4299 setFixAssignments(true);
4300 setFixHdfsOverlaps(false);
4301 setSidelineBigOverlaps(false);
4302 setFixSplitParents(false);
4303 setCheckHdfs(true);
4304 } else if (cmd.equals("-maxOverlapsToSideline")) {
4305 if (i == args.length - 1) {
4306 errors.reportError(ERROR_CODE.WRONG_USAGE,
4307 "-maxOverlapsToSideline needs a numeric value argument.");
4308 return printUsageAndExit();
4309 }
4310 try {
4311 int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
4312 setMaxOverlapsToSideline(maxOverlapsToSideline);
4313 } catch (NumberFormatException e) {
4314 errors.reportError(ERROR_CODE.WRONG_USAGE,
4315 "-maxOverlapsToSideline needs a numeric value argument.");
4316 return printUsageAndExit();
4317 }
4318 i++;
4319 } else if (cmd.equals("-maxMerge")) {
4320 if (i == args.length - 1) {
4321 errors.reportError(ERROR_CODE.WRONG_USAGE,
4322 "-maxMerge needs a numeric value argument.");
4323 return printUsageAndExit();
4324 }
4325 try {
4326 int maxMerge = Integer.parseInt(args[i+1]);
4327 setMaxMerge(maxMerge);
4328 } catch (NumberFormatException e) {
4329 errors.reportError(ERROR_CODE.WRONG_USAGE,
4330 "-maxMerge needs a numeric value argument.");
4331 return printUsageAndExit();
4332 }
4333 i++;
4334 } else if (cmd.equals("-summary")) {
4335 setSummary();
4336 } else if (cmd.equals("-metaonly")) {
4337 setCheckMetaOnly();
4338 } else if (cmd.equals("-boundaries")) {
4339 setRegionBoundariesCheck();
4340 } else if (cmd.equals("-fixTableLocks")) {
4341 setFixTableLocks(true);
4342 } else if (cmd.equals("-fixOrphanedTableZnodes")) {
4343 setFixTableZNodes(true);
4344 } else if (cmd.startsWith("-")) {
4345 errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
4346 return printUsageAndExit();
4347 } else {
4348 includeTable(TableName.valueOf(cmd));
4349 errors.print("Allow checking/fixes for table: " + cmd);
4350 }
4351 }
4352
4353 errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
4354
4355
4356 try {
4357 preCheckPermission();
4358 } catch (AccessDeniedException ace) {
4359 Runtime.getRuntime().exit(-1);
4360 } catch (IOException ioe) {
4361 Runtime.getRuntime().exit(-1);
4362 }
4363
4364
4365 connect();
4366
4367 try {
4368
4369 if (checkCorruptHFiles || sidelineCorruptHFiles) {
4370 LOG.info("Checking all hfiles for corruption");
4371 HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
4372 setHFileCorruptionChecker(hfcc);
4373 Collection<TableName> tables = getIncludedTables();
4374 Collection<Path> tableDirs = new ArrayList<Path>();
4375 Path rootdir = FSUtils.getRootDir(getConf());
4376 if (tables.size() > 0) {
4377 for (TableName t : tables) {
4378 tableDirs.add(FSUtils.getTableDir(rootdir, t));
4379 }
4380 } else {
4381 tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
4382 }
4383 hfcc.checkTables(tableDirs);
4384 hfcc.report(errors);
4385 }
4386
4387
4388 int code = onlineHbck();
4389 setRetCode(code);
4390
4391
4392
4393
4394 if (shouldRerun()) {
4395 try {
4396 LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
4397 Thread.sleep(sleepBeforeRerun);
4398 } catch (InterruptedException ie) {
4399 return this;
4400 }
4401
4402 setFixAssignments(false);
4403 setFixMeta(false);
4404 setFixHdfsHoles(false);
4405 setFixHdfsOverlaps(false);
4406 setFixVersionFile(false);
4407 setFixTableOrphans(false);
4408 errors.resetErrors();
4409 code = onlineHbck();
4410 setRetCode(code);
4411 }
4412 } finally {
4413 IOUtils.cleanup(null, connection, meta, admin);
4414 }
4415 return this;
4416 }
4417
4418
4419
4420
4421 void debugLsr(Path p) throws IOException {
4422 debugLsr(getConf(), p, errors);
4423 }
4424
4425
4426
4427
4428 public static void debugLsr(Configuration conf,
4429 Path p) throws IOException {
4430 debugLsr(conf, p, new PrintingErrorReporter());
4431 }
4432
4433
4434
4435
4436 public static void debugLsr(Configuration conf,
4437 Path p, ErrorReporter errors) throws IOException {
4438 if (!LOG.isDebugEnabled() || p == null) {
4439 return;
4440 }
4441 FileSystem fs = p.getFileSystem(conf);
4442
4443 if (!fs.exists(p)) {
4444
4445 return;
4446 }
4447 errors.print(p.toString());
4448
4449 if (fs.isFile(p)) {
4450 return;
4451 }
4452
4453 if (fs.getFileStatus(p).isDir()) {
4454 FileStatus[] fss= fs.listStatus(p);
4455 for (FileStatus status : fss) {
4456 debugLsr(conf, status.getPath(), errors);
4457 }
4458 }
4459 }
4460 }