View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.util;
21  
22  import java.io.IOException;
23  import java.util.*;
24  import java.util.concurrent.atomic.AtomicInteger;
25  import java.util.concurrent.ThreadPoolExecutor;
26  import java.util.concurrent.TimeUnit;
27  import java.util.concurrent.LinkedBlockingQueue;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.conf.Configuration;
32  import org.apache.hadoop.fs.FileStatus;
33  import org.apache.hadoop.fs.FileSystem;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.hbase.ClusterStatus;
36  import org.apache.hadoop.hbase.HBaseConfiguration;
37  import org.apache.hadoop.hbase.HConstants;
38  import org.apache.hadoop.hbase.HRegionInfo;
39  import org.apache.hadoop.hbase.HRegionLocation;
40  import org.apache.hadoop.hbase.HServerAddress;
41  import org.apache.hadoop.hbase.HServerInfo;
42  import org.apache.hadoop.hbase.HTableDescriptor;
43  import org.apache.hadoop.hbase.KeyValue;
44  import org.apache.hadoop.hbase.MasterNotRunningException;
45  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
46  import org.apache.hadoop.hbase.client.HBaseAdmin;
47  import org.apache.hadoop.hbase.client.HConnection;
48  import org.apache.hadoop.hbase.client.HConnectionManager;
49  import org.apache.hadoop.hbase.client.MetaScanner;
50  import org.apache.hadoop.hbase.client.Result;
51  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
52  import org.apache.hadoop.hbase.ipc.HRegionInterface;
53  import org.apache.hadoop.hbase.regionserver.wal.HLog;
54  import org.apache.hadoop.hbase.zookeeper.ZKTable;
55  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
56  import org.apache.zookeeper.KeeperException;
57  
58  import com.google.common.base.Joiner;
59  import com.google.common.collect.Lists;
60  
61  import static org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
62  
63  /**
64   * Check consistency among the in-memory states of the master and the
65   * region server(s) and the state of data in HDFS.
66   */
67  public class HBaseFsck {
68    public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
69    public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
70  
71    private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
72    private static final long THREADS_KEEP_ALIVE_SECONDS = 60;
73  
74    private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
75    private Configuration conf;
76  
77    private ClusterStatus status;
78    private HConnection connection;
79  
80    private TreeMap<String, HbckInfo> regionInfo = new TreeMap<String, HbckInfo>();
81    private TreeMap<String, TInfo> tablesInfo = new TreeMap<String, TInfo>();
82    private TreeSet<byte[]> disabledTables =
83      new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
84    ErrorReporter errors = new PrintingErrorReporter();
85  
86    private static boolean details = false; // do we display the full report
87    private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
88    private boolean fix = false; // do we want to try fixing the errors?
89    private boolean rerun = false; // if we tried to fix something rerun hbck
90    private static boolean summary = false; // if we want to print less output
91    // Empty regioninfo qualifiers in .META.
92    private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
93    private int numThreads = MAX_NUM_THREADS;
94  
95    ThreadPoolExecutor executor; // threads to retrieve data from regionservers
96  
97    /**
98     * Constructor
99     *
100    * @param conf Configuration object
101    * @throws MasterNotRunningException if the master is not running
102    * @throws ZooKeeperConnectionException if unable to connect to zookeeper
103    */
104   public HBaseFsck(Configuration conf)
105     throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
106     this.conf = conf;
107 
108     HBaseAdmin admin = new HBaseAdmin(conf);
109     status = admin.getMaster().getClusterStatus();
110     connection = admin.getConnection();
111 
112     numThreads = conf.getInt("hbasefsck.numthreads", numThreads);
113     executor = new ThreadPoolExecutor(0, numThreads,
114           THREADS_KEEP_ALIVE_SECONDS, TimeUnit.SECONDS,
115           new LinkedBlockingQueue<Runnable>());
116   }
117 
118   /**
119    * Contacts the master and prints out cluster-wide information
120    * @throws IOException if a remote or network exception occurs
121    * @return 0 on success, non-zero on failure
122    * @throws KeeperException
123    * @throws InterruptedException
124    */
125   int doWork() throws IOException, KeeperException, InterruptedException {
126     // print hbase server version
127     errors.print("Version: " + status.getHBaseVersion());
128 
129     // Make sure regionInfo is empty before starting
130     regionInfo.clear();
131     tablesInfo.clear();
132     emptyRegionInfoQualifiers.clear();
133     disabledTables.clear();
134     errors.clear();
135 
136     // get a list of all regions from the master. This involves
137     // scanning the META table
138     if (!recordRootRegion()) {
139       // Will remove later if we can fix it
140       errors.reportError("Encountered fatal error. Exiting...");
141       return -1;
142     }
143     getMetaEntries();
144 
145     // Check if .META. is found only once and in the right place
146     if (!checkMetaEntries()) {
147       // Will remove later if we can fix it
148       errors.reportError("Encountered fatal error. Exiting...");
149       return -1;
150     }
151 
152     // get a list of all tables that have not changed recently.
153     AtomicInteger numSkipped = new AtomicInteger(0);
154     HTableDescriptor[] allTables = getTables(numSkipped);
155     errors.print("Number of Tables: " + allTables.length);
156     if (details) {
157       if (numSkipped.get() > 0) {
158         errors.detail("Number of Tables in flux: " + numSkipped.get());
159       }
160       for (HTableDescriptor td : allTables) {
161         String tableName = td.getNameAsString();
162         errors.detail("  Table: " + tableName + "\t" +
163                            (td.isReadOnly() ? "ro" : "rw") + "\t" +
164                            (td.isRootRegion() ? "ROOT" :
165                             (td.isMetaRegion() ? "META" : "    ")) + "\t" +
166                            " families: " + td.getFamilies().size());
167       }
168     }
169 
170     // From the master, get a list of all known live region servers
171     Collection<HServerInfo> regionServers = status.getServerInfo();
172     errors.print("Number of live region servers: " +
173                        regionServers.size());
174     if (details) {
175       for (HServerInfo rsinfo: regionServers) {
176         errors.print("  " + rsinfo.getServerName());
177       }
178     }
179 
180     // From the master, get a list of all dead region servers
181     Collection<String> deadRegionServers = status.getDeadServerNames();
182     errors.print("Number of dead region servers: " +
183                        deadRegionServers.size());
184     if (details) {
185       for (String name: deadRegionServers) {
186         errors.print("  " + name);
187       }
188     }
189 
190     // Determine what's deployed
191     processRegionServers(regionServers);
192 
193     // Determine what's on HDFS
194     checkHdfs();
195 
196     // Empty cells in .META.?
197     errors.print("Number of empty REGIONINFO_QUALIFIER rows in .META.: " +
198       emptyRegionInfoQualifiers.size());
199     if (details) {
200       for (Result r: emptyRegionInfoQualifiers) {
201         errors.print("  " + r);
202       }
203     }
204 
205     // Get disabled tables from ZooKeeper
206     loadDisabledTables();
207 
208     // Check consistency
209     checkConsistency();
210 
211     // Check integrity
212     checkIntegrity();
213 
214     // Print table summary
215     printTableSummary();
216 
217     return errors.summarize();
218   }
219 
220   public ErrorReporter getErrors() {
221     return errors;
222   }
223 
224   /**
225    * Load the list of disabled tables in ZK into local set.
226    * @throws ZooKeeperConnectionException
227    * @throws IOException
228    * @throws KeeperException
229    */
230   private void loadDisabledTables()
231   throws ZooKeeperConnectionException, IOException, KeeperException {
232     ZooKeeperWatcher zkw =
233       HConnectionManager.getConnection(conf).getZooKeeperWatcher();
234     for (String tableName : ZKTable.getDisabledOrDisablingTables(zkw)) {
235       disabledTables.add(Bytes.toBytes(tableName));
236     }
237   }
238 
239   /**
240    * Check if the specified region's table is disabled.
241    * @throws ZooKeeperConnectionException
242    * @throws IOException
243    * @throws KeeperException
244    */
245   private boolean isTableDisabled(HRegionInfo regionInfo) {
246     return disabledTables.contains(regionInfo.getTableDesc().getName());
247   }
248 
249   /**
250    * Scan HDFS for all regions, recording their information into
251    * regionInfo
252    */
253   void checkHdfs() throws IOException, InterruptedException {
254     Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
255     FileSystem fs = rootDir.getFileSystem(conf);
256 
257     // list all tables from HDFS
258     List<FileStatus> tableDirs = Lists.newArrayList();
259 
260     boolean foundVersionFile = false;
261     FileStatus[] files = fs.listStatus(rootDir);
262     for (FileStatus file : files) {
263       if (file.getPath().getName().equals(HConstants.VERSION_FILE_NAME)) {
264         foundVersionFile = true;
265       } else {
266         tableDirs.add(file);
267       }
268     }
269 
270     // verify that version file exists
271     if (!foundVersionFile) {
272       errors.reportError(ERROR_CODE.NO_VERSION_FILE,
273           "Version file does not exist in root dir " + rootDir);
274     }
275 
276     // level 1:  <HBASE_DIR>/*
277     WorkItemHdfsDir[] dirs = new WorkItemHdfsDir[tableDirs.size()];  
278     int num = 0;
279     for (FileStatus tableDir : tableDirs) {
280       dirs[num] = new WorkItemHdfsDir(this, fs, errors, tableDir); 
281       executor.execute(dirs[num]);
282       num++;
283     }
284 
285     // wait for all directories to be done
286     for (int i = 0; i < num; i++) {
287       synchronized (dirs[i]) {
288         while (!dirs[i].isDone()) {
289           dirs[i].wait();
290         }
291       }
292     }
293   }
294 
295   /**
296    * Record the location of the ROOT region as found in ZooKeeper,
297    * as if it were in a META table. This is so that we can check
298    * deployment of ROOT.
299    */
300   boolean recordRootRegion() throws IOException {
301     HRegionLocation rootLocation = connection.locateRegion(
302       HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
303 
304     // Check if Root region is valid and existing
305     if (rootLocation == null || rootLocation.getRegionInfo() == null ||
306         rootLocation.getServerAddress() == null) {
307       errors.reportError(ERROR_CODE.NULL_ROOT_REGION,
308           "Root Region or some of its attributes are null.");
309       return false;
310     }
311 
312     MetaEntry m = new MetaEntry(rootLocation.getRegionInfo(),
313       rootLocation.getServerAddress(), null, System.currentTimeMillis());
314     HbckInfo hbInfo = new HbckInfo(m);
315     regionInfo.put(rootLocation.getRegionInfo().getEncodedName(), hbInfo);
316     return true;
317   }
318 
319   /**
320    * Contacts each regionserver and fetches metadata about regions.
321    * @param regionServerList - the list of region servers to connect to
322    * @throws IOException if a remote or network exception occurs
323    */
324   void processRegionServers(Collection<HServerInfo> regionServerList)
325     throws IOException, InterruptedException {
326 
327     WorkItemRegion[] work = new WorkItemRegion[regionServerList.size()];
328     int num = 0;
329 
330     // loop to contact each region server in parallel
331     for (HServerInfo rsinfo:regionServerList) {
332       work[num] = new WorkItemRegion(this, rsinfo, errors, connection);
333       executor.execute(work[num]);
334       num++;
335     }
336     
337     // wait for all submitted tasks to be done
338     for (int i = 0; i < num; i++) {
339       synchronized (work[i]) {
340         while (!work[i].isDone()) {
341           work[i].wait();
342         }
343       }
344     }
345   }
346 
347   /**
348    * Check consistency of all regions that have been found in previous phases.
349    * @throws KeeperException
350    * @throws InterruptedException
351    */
352   void checkConsistency()
353   throws IOException, KeeperException, InterruptedException {
354     for (java.util.Map.Entry<String, HbckInfo> e: regionInfo.entrySet()) {
355       doConsistencyCheck(e.getKey(), e.getValue());
356     }
357   }
358 
359   /**
360    * Check a single region for consistency and correct deployment.
361    * @throws KeeperException
362    * @throws InterruptedException
363    */
364   void doConsistencyCheck(final String key, final HbckInfo hbi)
365   throws IOException, KeeperException, InterruptedException {
366     String descriptiveName = hbi.toString();
367 
368     boolean inMeta = hbi.metaEntry != null;
369     boolean inHdfs = hbi.foundRegionDir != null;
370     boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
371     boolean isDeployed = !hbi.deployedOn.isEmpty();
372     boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
373     boolean deploymentMatchesMeta =
374       hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
375       hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
376     boolean splitParent =
377       (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
378     boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
379     boolean recentlyModified = hbi.foundRegionDir != null &&
380       hbi.foundRegionDir.getModificationTime() + timelag > System.currentTimeMillis();
381 
382     // ========== First the healthy cases =============
383     if (hbi.onlyEdits) {
384       return;
385     }
386     if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
387       return;
388     } else if (inMeta && !isDeployed && splitParent) {
389       return;
390     } else if (inMeta && !shouldBeDeployed && !isDeployed) {
391       return;
392     } else if (recentlyModified) {
393       LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
394       return;
395     }
396     // ========== Cases where the region is not in META =============
397     else if (!inMeta && !inHdfs && !isDeployed) {
398       // We shouldn't have record of this region at all then!
399       assert false : "Entry for region with no data";
400     } else if (!inMeta && !inHdfs && isDeployed) {
401       errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
402           + descriptiveName + ", key=" + key + ", not on HDFS or in META but " +
403           "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
404     } else if (!inMeta && inHdfs && !isDeployed) {
405       errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
406           + descriptiveName + " on HDFS, but not listed in META " +
407           "or deployed on any region server");
408     } else if (!inMeta && inHdfs && isDeployed) {
409       errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
410           + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
411 
412     // ========== Cases where the region is in META =============
413     } else if (inMeta && !inHdfs && !isDeployed) {
414       errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
415           + descriptiveName + " found in META, but not in HDFS "
416           + "or deployed on any region server.");
417     } else if (inMeta && !inHdfs && isDeployed) {
418       errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
419           + " found in META, but not in HDFS, " +
420           "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
421     } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
422       errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
423           + " not deployed on any region server.");
424       // If we are trying to fix the errors
425       if (shouldFix()) {
426         errors.print("Trying to fix unassigned region...");
427         setShouldRerun();
428         HBaseFsckRepair.fixUnassigned(this.conf, hbi.metaEntry);
429       }
430     } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
431       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED, "Region "
432           + descriptiveName + " should not be deployed according " +
433           "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
434     } else if (inMeta && inHdfs && isMultiplyDeployed) {
435       errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
436           + " is listed in META on region server " + hbi.metaEntry.regionServer
437           + " but is multiply assigned to region servers " +
438           Joiner.on(", ").join(hbi.deployedOn));
439       // If we are trying to fix the errors
440       if (shouldFix()) {
441         errors.print("Trying to fix assignment error...");
442         setShouldRerun();
443         HBaseFsckRepair.fixDupeAssignment(this.conf, hbi.metaEntry, hbi.deployedOn);
444       }
445     } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
446       errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
447           + descriptiveName + " listed in META on region server " +
448           hbi.metaEntry.regionServer + " but found on region server " +
449           hbi.deployedOn.get(0));
450       // If we are trying to fix the errors
451       if (shouldFix()) {
452         errors.print("Trying to fix assignment error...");
453         setShouldRerun();
454         HBaseFsckRepair.fixDupeAssignment(this.conf, hbi.metaEntry, hbi.deployedOn);
455       }
456     } else {
457       errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
458           " is in an unforeseen state:" +
459           " inMeta=" + inMeta +
460           " inHdfs=" + inHdfs +
461           " isDeployed=" + isDeployed +
462           " isMultiplyDeployed=" + isMultiplyDeployed +
463           " deploymentMatchesMeta=" + deploymentMatchesMeta +
464           " shouldBeDeployed=" + shouldBeDeployed);
465     }
466   }
467 
468   /**
469    * Checks tables integrity. Goes over all regions and scans the tables.
470    * Collects all the pieces for each table and checks if there are missing,
471    * repeated or overlapping ones.
472    */
473   void checkIntegrity() {
474     for (HbckInfo hbi : regionInfo.values()) {
475       // Check only valid, working regions
476       if (hbi.metaEntry == null) continue;
477       if (hbi.metaEntry.regionServer == null) continue;
478       if (hbi.onlyEdits) continue;
479 
480       // Missing regionDir or over-deployment is checked elsewhere. Include
481       // these cases in modTInfo, so we can evaluate those regions as part of
482       // the region chain in META
483       //if (hbi.foundRegionDir == null) continue;
484       //if (hbi.deployedOn.size() != 1) continue;
485       if (hbi.deployedOn.size() == 0) continue;
486 
487       // We should be safe here
488       String tableName = hbi.metaEntry.getTableDesc().getNameAsString();
489       TInfo modTInfo = tablesInfo.get(tableName);
490       if (modTInfo == null) {
491         modTInfo = new TInfo(tableName);
492       }
493       for (HServerAddress server : hbi.deployedOn) {
494         modTInfo.addServer(server);
495       }
496 
497       //modTInfo.addEdge(hbi.metaEntry.getStartKey(), hbi.metaEntry.getEndKey());
498       modTInfo.addRegionInfo(hbi);
499 
500       tablesInfo.put(tableName, modTInfo);
501     }
502 
503     for (TInfo tInfo : tablesInfo.values()) {
504       if (!tInfo.checkRegionChain()) {
505         errors.report("Found inconsistency in table " + tInfo.getName());
506       }
507     }
508   }
509 
510   /**
511    * Maintain information about a particular table.
512    */
513   private class TInfo {
514     String tableName;
515     TreeSet <HServerAddress> deployedOn;
516 
517     List<HbckInfo> regions = new ArrayList<HbckInfo>();
518 
519     TInfo(String name) {
520       this.tableName = name;
521       deployedOn = new TreeSet <HServerAddress>();
522     }
523 
524     public void addRegionInfo (HbckInfo r) {
525       regions.add(r);
526     }
527 
528     public void addServer(HServerAddress server) {
529       this.deployedOn.add(server);
530     }
531 
532     public String getName() {
533       return tableName;
534     }
535 
536     public int getNumRegions() {
537       return regions.size();
538     }
539 
540     /**
541      * Check the region chain (from META) of this table.  We are looking for
542      * holes, overlaps, and cycles.
543      * @return false if there are errors
544      */
545     public boolean checkRegionChain() {
546       Collections.sort(regions);
547       HbckInfo last = null;
548       int originalErrorsCount = errors.getErrorList().size();
549 
550       for (HbckInfo r : regions) {
551         if (last == null) {
552           // This is the first region, check that the start key is empty
553           if (! Bytes.equals(r.metaEntry.getStartKey(), HConstants.EMPTY_BYTE_ARRAY)) {
554             errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
555                 "First region should start with an empty key.",
556                 this, r);
557           }
558         } else {
559 
560           // Check if endKey < startKey
561           // Previous implementation of this code checked for a cycle in the
562           // region chain.  A cycle would imply that the endKey comes before
563           // the startKey (i.e. endKey < startKey).
564           if (! Bytes.equals(r.metaEntry.getEndKey(), HConstants.EMPTY_BYTE_ARRAY)) {
565             // continue with this check if this is not the last region
566             int cmpRegionKeys = Bytes.compareTo(r.metaEntry.getStartKey(),
567                 r.metaEntry.getEndKey());
568             if (cmpRegionKeys > 0) {
569               errors.reportError(ERROR_CODE.REGION_CYCLE,
570                   String.format("The endkey for this region comes before the "
571                       + "startkey, startkey=%s, endkey=%s",
572                       Bytes.toStringBinary(r.metaEntry.getStartKey()),
573                       Bytes.toStringBinary(r.metaEntry.getEndKey())),
574                   this, r, last);
575             }
576           }
577 
578           // Check if the startkeys are different
579           if (Bytes.equals(r.metaEntry.getStartKey(), last.metaEntry.getStartKey())) {
580             errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
581                 "Two regions have the same startkey: "
582                     + Bytes.toStringBinary(r.metaEntry.getStartKey()),
583                 this, r, last);
584           } else {
585             // Check that the startkey is the same as the previous end key
586             int cmp = Bytes.compareTo(r.metaEntry.getStartKey(),
587                 last.metaEntry.getEndKey());
588             if (cmp > 0) {
589               // hole
590               errors.reportError(ERROR_CODE.HOLE_IN_REGION_CHAIN,
591                   "There is a hole in the region chain.",
592                   this, r, last);
593             } else if (cmp < 0) {
594               // overlap
595               errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
596                   "There is an overlap in the region chain.",
597                   this, r, last);
598             }
599           }
600 
601         }
602 
603         last = r;
604       }
605 
606       return errors.getErrorList().size() == originalErrorsCount;
607     }
608 
609   }
610 
611   /**
612    * Return a list of user-space table names whose metadata have not been
613    * modified in the last few milliseconds specified by timelag
614    * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
615    * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
616    * milliseconds specified by timelag, then the table is a candidate to be returned.
617    * @return tables that have not been modified recently
618    * @throws IOException if an error is encountered
619    */
620   HTableDescriptor[] getTables(AtomicInteger numSkipped) {
621     TreeSet<HTableDescriptor> uniqueTables = new TreeSet<HTableDescriptor>();
622     long now = System.currentTimeMillis();
623 
624     for (HbckInfo hbi : regionInfo.values()) {
625       MetaEntry info = hbi.metaEntry;
626 
627       // if the start key is zero, then we have found the first region of a table.
628       // pick only those tables that were not modified in the last few milliseconds.
629       if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
630         if (info.modTime + timelag < now) {
631           uniqueTables.add(info.getTableDesc());
632         } else {
633           numSkipped.incrementAndGet(); // one more in-flux table
634         }
635       }
636     }
637     return uniqueTables.toArray(new HTableDescriptor[uniqueTables.size()]);
638   }
639 
640   /**
641    * Gets the entry in regionInfo corresponding to the the given encoded
642    * region name. If the region has not been seen yet, a new entry is added
643    * and returned.
644    */
645   private synchronized HbckInfo getOrCreateInfo(String name) {
646     HbckInfo hbi = regionInfo.get(name);
647     if (hbi == null) {
648       hbi = new HbckInfo(null);
649       regionInfo.put(name, hbi);
650     }
651     return hbi;
652   }
653 
654   /**
655     * Check values in regionInfo for .META.
656     * Check if zero or more than one regions with META are found.
657     * If there are inconsistencies (i.e. zero or more than one regions
658     * pretend to be holding the .META.) try to fix that and report an error.
659     * @throws IOException from HBaseFsckRepair functions
660    * @throws KeeperException
661    * @throws InterruptedException
662     */
663   boolean checkMetaEntries()
664   throws IOException, KeeperException, InterruptedException {
665     List <HbckInfo> metaRegions = Lists.newArrayList();
666     for (HbckInfo value : regionInfo.values()) {
667       if (value.metaEntry.isMetaTable()) {
668         metaRegions.add(value);
669       }
670     }
671 
672     // If something is wrong
673     if (metaRegions.size() != 1) {
674       HRegionLocation rootLocation = connection.locateRegion(
675         HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
676       HbckInfo root =
677           regionInfo.get(rootLocation.getRegionInfo().getEncodedName());
678 
679       // If there is no region holding .META.
680       if (metaRegions.size() == 0) {
681         errors.reportError(ERROR_CODE.NO_META_REGION, ".META. is not found on any region.");
682         if (shouldFix()) {
683           errors.print("Trying to fix a problem with .META...");
684           setShouldRerun();
685           // try to fix it (treat it as unassigned region)
686           HBaseFsckRepair.fixUnassigned(conf, root.metaEntry);
687         }
688       }
689       // If there are more than one regions pretending to hold the .META.
690       else if (metaRegions.size() > 1) {
691         errors.reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
692         if (shouldFix()) {
693           errors.print("Trying to fix a problem with .META...");
694           setShouldRerun();
695           // try fix it (treat is a dupe assignment)
696           List <HServerAddress> deployedOn = Lists.newArrayList();
697           for (HbckInfo mRegion : metaRegions) {
698             deployedOn.add(mRegion.metaEntry.regionServer);
699           }
700           HBaseFsckRepair.fixDupeAssignment(conf, root.metaEntry, deployedOn);
701         }
702       }
703       // rerun hbck with hopefully fixed META
704       return false;
705     }
706     // no errors, so continue normally
707     return true;
708   }
709 
710   /**
711    * Scan .META. and -ROOT-, adding all regions found to the regionInfo map.
712    * @throws IOException if an error is encountered
713    */
714   void getMetaEntries() throws IOException {
715     MetaScannerVisitor visitor = new MetaScannerVisitor() {
716       int countRecord = 1;
717 
718       // comparator to sort KeyValues with latest modtime
719       final Comparator<KeyValue> comp = new Comparator<KeyValue>() {
720         public int compare(KeyValue k1, KeyValue k2) {
721           return (int)(k1.getTimestamp() - k2.getTimestamp());
722         }
723       };
724 
725       public boolean processRow(Result result) throws IOException {
726         try {
727 
728           // record the latest modification of this META record
729           long ts =  Collections.max(result.list(), comp).getTimestamp();
730 
731           // record region details
732           byte [] value = result.getValue(HConstants.CATALOG_FAMILY,
733             HConstants.REGIONINFO_QUALIFIER);
734           if (value == null || value.length == 0) {
735             emptyRegionInfoQualifiers.add(result);
736             return true;
737           }
738           HRegionInfo info = Writables.getHRegionInfo(value);
739           HServerAddress server = null;
740           byte[] startCode = null;
741 
742           // record assigned region server
743           value = result.getValue(HConstants.CATALOG_FAMILY,
744                                      HConstants.SERVER_QUALIFIER);
745           if (value != null && value.length > 0) {
746             String address = Bytes.toString(value);
747             server = new HServerAddress(address);
748           }
749 
750           // record region's start key
751           value = result.getValue(HConstants.CATALOG_FAMILY,
752                                   HConstants.STARTCODE_QUALIFIER);
753           if (value != null) {
754             startCode = value;
755           }
756           MetaEntry m = new MetaEntry(info, server, startCode, ts);
757           HbckInfo hbInfo = new HbckInfo(m);
758           HbckInfo previous = regionInfo.put(info.getEncodedName(), hbInfo);
759           if (previous != null) {
760             throw new IOException("Two entries in META are same " + previous);
761           }
762 
763           // show proof of progress to the user, once for every 100 records.
764           if (countRecord % 100 == 0) {
765             errors.progress();
766           }
767           countRecord++;
768           return true;
769         } catch (RuntimeException e) {
770           LOG.error("Result=" + result);
771           throw e;
772         }
773       }
774     };
775 
776     // Scan -ROOT- to pick up META regions
777     MetaScanner.metaScan(conf, visitor, null, null,
778       Integer.MAX_VALUE, HConstants.ROOT_TABLE_NAME);
779 
780     // Scan .META. to pick up user regions
781     MetaScanner.metaScan(conf, visitor);
782     errors.print("");
783   }
784 
785   /**
786    * Stores the entries scanned from META
787    */
788   private static class MetaEntry extends HRegionInfo {
789     HServerAddress regionServer;   // server hosting this region
790     long modTime;          // timestamp of most recent modification metadata
791 
792     public MetaEntry(HRegionInfo rinfo, HServerAddress regionServer,
793                      byte[] startCode, long modTime) {
794       super(rinfo);
795       this.regionServer = regionServer;
796       this.modTime = modTime;
797     }
798   }
799 
800   /**
801    * Maintain information about a particular region.
802    */
803   static class HbckInfo implements Comparable {
804     boolean onlyEdits = false;
805     MetaEntry metaEntry = null;
806     FileStatus foundRegionDir = null;
807     List<HServerAddress> deployedOn = Lists.newArrayList();
808 
809     HbckInfo(MetaEntry metaEntry) {
810       this.metaEntry = metaEntry;
811     }
812 
813     public synchronized void addServer(HServerAddress server) {
814       this.deployedOn.add(server);
815     }
816 
817     public synchronized String toString() {
818       if (metaEntry != null) {
819         return metaEntry.getRegionNameAsString();
820       } else if (foundRegionDir != null) {
821         return foundRegionDir.getPath().toString();
822       } else {
823         return "UNKNOWN_REGION on " + Joiner.on(", ").join(deployedOn);
824       }
825     }
826 
827     @Override
828     public int compareTo(Object o) {
829       HbckInfo other = (HbckInfo) o;
830       int startComparison = Bytes.compareTo(this.metaEntry.getStartKey(), other.metaEntry.getStartKey());
831       if (startComparison != 0)
832         return startComparison;
833       else
834         return Bytes.compareTo(this.metaEntry.getEndKey(), other.metaEntry.getEndKey());
835     }
836   }
837 
838   /**
839    * Prints summary of all tables found on the system.
840    */
841   private void printTableSummary() {
842     System.out.println("Summary:");
843     for (TInfo tInfo : tablesInfo.values()) {
844       if (errors.tableHasErrors(tInfo)) {
845         System.out.println("Table " + tInfo.getName() + " is inconsistent.");
846       } else {
847         System.out.println("  " + tInfo.getName() + " is okay.");
848       }
849       System.out.println("    Number of regions: " + tInfo.getNumRegions());
850       System.out.print("    Deployed on: ");
851       for (HServerAddress server : tInfo.deployedOn) {
852         System.out.print(" " + server.toString());
853       }
854       System.out.println();
855     }
856   }
857 
858   interface ErrorReporter {
859     public static enum ERROR_CODE {
860       UNKNOWN, NO_META_REGION, NULL_ROOT_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
861       NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED,
862       MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
863       FIRST_REGION_STARTKEY_NOT_EMPTY, DUPE_STARTKEYS,
864       HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE
865     }
866     public void clear();
867     public void report(String message);
868     public void reportError(String message);
869     public void reportError(ERROR_CODE errorCode, String message);
870     public void reportError(ERROR_CODE errorCode, String message, TInfo table, HbckInfo info);
871     public void reportError(ERROR_CODE errorCode, String message, TInfo table, HbckInfo info1, HbckInfo info2);
872     public int summarize();
873     public void detail(String details);
874     public ArrayList<ERROR_CODE> getErrorList();
875     public void progress();
876     public void print(String message);
877     public void resetErrors();
878     public boolean tableHasErrors(TInfo table);
879   }
880 
881   private static class PrintingErrorReporter implements ErrorReporter {
882     public int errorCount = 0;
883     private int showProgress;
884 
885     Set<TInfo> errorTables = new HashSet<TInfo>();
886 
887     // for use by unit tests to verify which errors were discovered
888     private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
889 
890     public void clear() {
891       errorTables.clear();
892       errorList.clear();
893       errorCount = 0;
894     }
895 
896     public synchronized void reportError(ERROR_CODE errorCode, String message) {
897       errorList.add(errorCode);
898       if (!summary) {
899         System.out.println("ERROR: " + message);
900       }
901       errorCount++;
902       showProgress = 0;
903     }
904 
905     public synchronized void reportError(ERROR_CODE errorCode, String message, TInfo table,
906                                          HbckInfo info) {
907       errorTables.add(table);
908       String reference = "(region " + info.metaEntry.getRegionNameAsString() + ")";
909       reportError(errorCode, reference + " " + message);
910     }
911 
912     public synchronized void reportError(ERROR_CODE errorCode, String message, TInfo table,
913                                          HbckInfo info1, HbckInfo info2) {
914       errorTables.add(table);
915       String reference = "(regions " + info1.metaEntry.getRegionNameAsString()
916           + " and " + info2.metaEntry.getRegionNameAsString() + ")";
917       reportError(errorCode, reference + " " + message);
918     }
919 
920     public synchronized void reportError(String message) {
921       reportError(ERROR_CODE.UNKNOWN, message);
922     }
923 
924     /**
925      * Report error information, but do not increment the error count.  Intended for cases
926      * where the actual error would have been reported previously.
927      * @param message
928      */
929     public synchronized void report(String message) {
930       if (! summary) {
931         System.out.println("ERROR: " + message);
932       }
933       showProgress = 0;
934     }
935 
936     public synchronized int summarize() {
937       System.out.println(Integer.toString(errorCount) +
938                          " inconsistencies detected.");
939       if (errorCount == 0) {
940         System.out.println("Status: OK");
941         return 0;
942       } else {
943         System.out.println("Status: INCONSISTENT");
944         return -1;
945       }
946     }
947 
948     public ArrayList<ERROR_CODE> getErrorList() {
949       return errorList;
950     }
951 
952     public synchronized void print(String message) {
953       if (!summary) {
954         System.out.println(message);
955       }
956     }
957 
958     @Override
959     public boolean tableHasErrors(TInfo table) {
960       return errorTables.contains(table);
961     }
962 
963     @Override
964     public void resetErrors() {
965       errorCount = 0;
966     }
967 
968     public synchronized void detail(String message) {
969       if (details) {
970         System.out.println(message);
971       }
972       showProgress = 0;
973     }
974 
975     public synchronized void progress() {
976       if (showProgress++ == 10) {
977         if (!summary) {
978           System.out.print(".");
979         }
980         showProgress = 0;
981       }
982     }
983   }
984 
985   /**
986    * Contact a region server and get all information from it
987    */
988   static class WorkItemRegion implements Runnable {
989     private HBaseFsck hbck;
990     private HServerInfo rsinfo;
991     private ErrorReporter errors;
992     private HConnection connection;
993     private boolean done;
994 
995     WorkItemRegion(HBaseFsck hbck, HServerInfo info, 
996                    ErrorReporter errors, HConnection connection) {
997       this.hbck = hbck;
998       this.rsinfo = info;
999       this.errors = errors;
1000       this.connection = connection;
1001       this.done = false;
1002     }
1003 
1004     // is this task done?
1005     synchronized boolean isDone() {
1006       return done;
1007     }
1008 
1009     @Override
1010     public synchronized void run() {
1011       errors.progress();
1012       try {
1013         HRegionInterface server = connection.getHRegionConnection(
1014                                     rsinfo.getServerAddress());
1015 
1016         // list all online regions from this region server
1017         List<HRegionInfo> regions = server.getOnlineRegions();
1018         if (details) {
1019           errors.detail("RegionServer: " + rsinfo.getServerName() +
1020                            " number of regions: " + regions.size());
1021           for (HRegionInfo rinfo: regions) {
1022             errors.detail("  " + rinfo.getRegionNameAsString() +
1023                              " id: " + rinfo.getRegionId() +
1024                              " encoded_name: " + rinfo.getEncodedName() +
1025                              " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
1026                              " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
1027           }
1028         }
1029 
1030         // check to see if the existence of this region matches the region in META
1031         for (HRegionInfo r:regions) {
1032           HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
1033           hbi.addServer(rsinfo.getServerAddress());
1034         }
1035       } catch (IOException e) {          // unable to connect to the region server. 
1036         errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
1037                       " Unable to fetch region information. " + e);
1038       } finally {
1039         done = true;
1040         notifyAll(); // wakeup anybody waiting for this item to be done
1041       }
1042     }
1043   }
1044 
1045   /**
1046    * Contact hdfs and get all information about spcified table directory.
1047    */
1048   static class WorkItemHdfsDir implements Runnable {
1049     private HBaseFsck hbck;
1050     private FileStatus tableDir;
1051     private ErrorReporter errors;
1052     private FileSystem fs;
1053     private boolean done;
1054 
1055     WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors, 
1056                     FileStatus status) {
1057       this.hbck = hbck;
1058       this.fs = fs;
1059       this.tableDir = status;
1060       this.errors = errors;
1061       this.done = false;
1062     }
1063 
1064     synchronized boolean isDone() {
1065       return done;
1066     } 
1067 
1068     @Override
1069     public synchronized void run() {
1070       try {
1071         String tableName = tableDir.getPath().getName();
1072         // ignore hidden files
1073         if (tableName.startsWith(".") &&
1074             !tableName.equals( Bytes.toString(HConstants.META_TABLE_NAME)))
1075           return;
1076         // level 2: <HBASE_DIR>/<table>/*
1077         FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
1078         for (FileStatus regionDir : regionDirs) {
1079           String encodedName = regionDir.getPath().getName();
1080 
1081           // ignore directories that aren't hexadecimal
1082           if (!encodedName.toLowerCase().matches("[0-9a-f]+")) continue;
1083   
1084           HbckInfo hbi = hbck.getOrCreateInfo(encodedName);
1085           synchronized (hbi) {
1086             if (hbi.foundRegionDir != null) {
1087               errors.print("Directory " + encodedName + " duplicate??" +
1088                            hbi.foundRegionDir);
1089             }
1090             hbi.foundRegionDir = regionDir;
1091         
1092             // Set a flag if this region contains only edits
1093             // This is special case if a region is left after split
1094             hbi.onlyEdits = true;
1095             FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
1096             Path ePath = HLog.getRegionDirRecoveredEditsDir(regionDir.getPath());
1097             for (FileStatus subDir : subDirs) {
1098               String sdName = subDir.getPath().getName();
1099               if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
1100                 hbi.onlyEdits = false;
1101                 break;
1102               }
1103             }
1104           }
1105         }
1106       } catch (IOException e) {          // unable to connect to the region server. 
1107         errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: " + tableDir.getPath().getName() +
1108                       " Unable to fetch region information. " + e);
1109       } finally {
1110         done = true;
1111         notifyAll();
1112       }
1113     }
1114   }
1115 
1116   /**
1117    * Display the full report from fsck.
1118    * This displays all live and dead region servers, and all known regions.
1119    */
1120   void displayFullReport() {
1121     details = true;
1122   }
1123 
1124   /**
1125    * Set summary mode.
1126    * Print only summary of the tables and status (OK or INCONSISTENT)
1127    */
1128   void setSummary() {
1129     summary = true;
1130   }
1131 
1132   /**
1133    * Check if we should rerun fsck again. This checks if we've tried to
1134    * fix something and we should rerun fsck tool again.
1135    * Display the full report from fsck. This displays all live and dead
1136    * region servers, and all known regions.
1137    */
1138   void setShouldRerun() {
1139     rerun = true;
1140   }
1141 
1142   boolean shouldRerun() {
1143     return rerun;
1144   }
1145 
1146   /**
1147    * Fix inconsistencies found by fsck. This should try to fix errors (if any)
1148    * found by fsck utility.
1149    */
1150   void setFixErrors(boolean shouldFix) {
1151     fix = shouldFix;
1152   }
1153 
1154   boolean shouldFix() {
1155     return fix;
1156   }
1157 
1158   /**
1159    * We are interested in only those tables that have not changed their state in
1160    * META during the last few seconds specified by hbase.admin.fsck.timelag
1161    * @param seconds - the time in seconds
1162    */
1163   void setTimeLag(long seconds) {
1164     timelag = seconds * 1000; // convert to milliseconds
1165   }
1166 
1167   protected static void printUsageAndExit() {
1168     System.err.println("Usage: fsck [opts] ");
1169     System.err.println(" where [opts] are:");
1170     System.err.println("   -details Display full report of all regions.");
1171     System.err.println("   -timelag {timeInSeconds}  Process only regions that " +
1172                        " have not experienced any metadata updates in the last " +
1173                        " {{timeInSeconds} seconds.");
1174     System.err.println("   -fix Try to fix some of the errors.");
1175     System.err.println("   -sleepBeforeRerun {timeInSeconds} Sleep this many seconds" +
1176                        " before checking if the fix worked if run with -fix");
1177     System.err.println("   -summary Print only summary of the tables and status.");
1178 
1179     Runtime.getRuntime().exit(-2);
1180   }
1181 
1182   /**
1183    * Main program
1184    * @param args
1185    * @throws Exception
1186    */
1187   public static void main(String [] args) throws Exception {
1188 
1189     // create a fsck object
1190     Configuration conf = HBaseConfiguration.create();
1191     conf.set("fs.defaultFS", conf.get("hbase.rootdir"));
1192     HBaseFsck fsck = new HBaseFsck(conf);
1193     long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
1194 
1195     // Process command-line args.
1196     for (int i = 0; i < args.length; i++) {
1197       String cmd = args[i];
1198       if (cmd.equals("-details")) {
1199         fsck.displayFullReport();
1200       } else if (cmd.equals("-timelag")) {
1201         if (i == args.length - 1) {
1202           System.err.println("HBaseFsck: -timelag needs a value.");
1203           printUsageAndExit();
1204         }
1205         try {
1206           long timelag = Long.parseLong(args[i+1]);
1207           fsck.setTimeLag(timelag);
1208         } catch (NumberFormatException e) {
1209           System.err.println("-timelag needs a numeric value.");
1210           printUsageAndExit();
1211         }
1212         i++;
1213       } else if (cmd.equals("-sleepBeforeRerun")) {
1214         if (i == args.length - 1) {
1215           System.err.println("HBaseFsck: -sleepBeforeRerun needs a value.");
1216           printUsageAndExit();
1217         }
1218         try {
1219           sleepBeforeRerun = Long.parseLong(args[i+1]);
1220         } catch (NumberFormatException e) {
1221           System.err.println("-sleepBeforeRerun needs a numeric value.");
1222           printUsageAndExit();
1223         }
1224         i++;
1225       } else if (cmd.equals("-fix")) {
1226         fsck.setFixErrors(true);
1227       } else if (cmd.equals("-summary")) {
1228         fsck.setSummary();
1229       } else {
1230         String str = "Unknown command line option : " + cmd;
1231         LOG.info(str);
1232         System.out.println(str);
1233         printUsageAndExit();
1234       }
1235     }
1236     // do the real work of fsck
1237     int code = fsck.doWork();
1238     // If we have changed the HBase state it is better to run fsck again
1239     // to see if we haven't broken something else in the process.
1240     // We run it only once more because otherwise we can easily fall into
1241     // an infinite loop.
1242     if (fsck.shouldRerun()) {
1243       try {
1244         LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
1245         Thread.sleep(sleepBeforeRerun);
1246       } catch (InterruptedException ie) {
1247         Runtime.getRuntime().exit(code);
1248       }
1249       // Just report
1250       fsck.setFixErrors(false);
1251       fsck.errors.resetErrors();
1252       code = fsck.doWork();
1253     }
1254 
1255     Runtime.getRuntime().exit(code);
1256   }
1257 }
1258