View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.Collections;
23  import java.util.HashSet;
24  import java.util.List;
25  import java.util.Set;
26  import java.util.concurrent.Callable;
27  import java.util.concurrent.ConcurrentHashMap;
28  import java.util.concurrent.ExecutionException;
29  import java.util.concurrent.ExecutorService;
30  import java.util.concurrent.Executors;
31  import java.util.concurrent.Future;
32  
33  import org.apache.commons.cli.CommandLine;
34  import org.apache.commons.cli.CommandLineParser;
35  import org.apache.commons.cli.GnuParser;
36  import org.apache.commons.cli.HelpFormatter;
37  import org.apache.commons.cli.Option;
38  import org.apache.commons.cli.Options;
39  import org.apache.commons.cli.ParseException;
40  import org.apache.commons.logging.Log;
41  import org.apache.commons.logging.LogFactory;
42  import org.apache.hadoop.conf.Configured;
43  import org.apache.hadoop.fs.FSDataInputStream;
44  import org.apache.hadoop.fs.FileStatus;
45  import org.apache.hadoop.fs.FileSystem;
46  import org.apache.hadoop.fs.Path;
47  import org.apache.hadoop.hbase.HBaseConfiguration;
48  import org.apache.hadoop.hbase.io.HFileLink;
49  import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer;
50  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
51  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
52  import org.apache.hadoop.util.Tool;
53  import org.apache.hadoop.util.ToolRunner;
54  
55  /**
56   * Tool to detect presence of any HFileV1 in the given directory. It prints all such regions which
57   * have such files.
58   * <p>
59   * To print the help section of the tool:
60   * <ul>
61   * <li>./bin/hbase org.apache.hadoop.hbase.util.HFileV1Detector --h or,
62   * <li>java -cp `hbase classpath` org.apache.hadoop.hbase.util.HFileV1Detector --h
63   * </ul>
64   * It also supports -h, --help, -help options.
65   * </p>
66   */
67  public class HFileV1Detector extends Configured implements Tool {
68    private FileSystem fs;
69    private static final Log LOG = LogFactory.getLog(HFileV1Detector.class);
70    private static final int DEFAULT_NUM_OF_THREADS = 10;
71    private int numOfThreads;
72    private Path dirToProcess;
73    private final Set<Path> corruptedHFiles = Collections
74        .newSetFromMap(new ConcurrentHashMap<Path, Boolean>());
75    private final Set<Path> hFileV1Set = Collections
76        .newSetFromMap(new ConcurrentHashMap<Path, Boolean>());
77  
78    private Options options = new Options();
79  
80    public HFileV1Detector() {
81      Option pathOption = new Option("p", "path", true, "Path to a table, or hbase installation");
82      pathOption.setRequired(false);
83      options.addOption(pathOption);
84      Option threadOption = new Option("n", "numberOfThreads", true,
85          "Number of threads to use while processing HFiles.");
86      threadOption.setRequired(false);
87      options.addOption(threadOption);
88      options.addOption("h", "help", false, "Help");
89    }
90  
91    private boolean parseOption(String[] args) throws ParseException, IOException {
92      if (args.length == 0) {
93        return true; // no args will process with default values.
94      }
95      CommandLineParser parser = new GnuParser();
96      CommandLine cmd = parser.parse(options, args);
97      if (cmd.hasOption("h")) {
98        HelpFormatter formatter = new HelpFormatter();
99        formatter.printHelp("HFileV1Detector", options, true);
100       System.out
101           .println("In case no option is provided, it processes hbase.rootdir using 10 threads.");
102       System.out.println("Example:");
103       System.out.println(" To detect any HFileV1 in a given hbase installation '/myhbase':");
104       System.out.println(" $ $HBASE_HOME/bin/hbase " + this.getClass().getName() + " -p /myhbase");
105       System.out.println();
106       return false;
107     }
108 
109     if (cmd.hasOption("p")) {
110       dirToProcess = new Path(cmd.getOptionValue("p"));
111     }
112     try {
113       if (cmd.hasOption("n")) {
114         int n = Integer.parseInt(cmd.getOptionValue("n"));
115         if (n < 0 || n > 100) {
116           System.out.println("Please use a positive number <= 100 for number of threads."
117               + " Continuing with default value " + DEFAULT_NUM_OF_THREADS);
118           return true;
119         }
120         numOfThreads = n;
121       }
122     } catch (NumberFormatException nfe) {
123       System.err.println("Please select a valid number for threads");
124       return false;
125     }
126     return true;
127   }
128 
129   @Override
130   public int run(String args[]) throws IOException, ParseException {
131     fs = FileSystem.get(getConf());
132     numOfThreads = DEFAULT_NUM_OF_THREADS;
133     dirToProcess = FSUtils.getRootDir(getConf());
134     if (!parseOption(args)) {
135       System.exit(1);
136     }
137     ExecutorService exec = Executors.newFixedThreadPool(numOfThreads);
138     Set<Path> regionsWithHFileV1;
139     try {
140       regionsWithHFileV1 = checkForV1Files(dirToProcess, exec);
141       printHRegionsWithHFileV1(regionsWithHFileV1);
142       printAllHFileV1();
143       printCorruptedHFiles();
144       if (hFileV1Set.isEmpty() && corruptedHFiles.isEmpty()) {
145         // all clear.
146         System.out.println("No HFile V1 Found");
147       }
148     } catch (Exception e) {
149       System.err.println(e);
150       return 1;
151     } finally {
152       exec.shutdown();
153       fs.close();
154     }
155     return 0;
156   }
157 
158   /**
159    * Takes a directory path, and lists out any HFileV1, if present.
160    * @param targetDir directory to start looking for HFilev1.
161    * @param exec
162    * @return set of Regions that have HFileV1
163    * @throws IOException
164    */
165   private Set<Path> checkForV1Files(Path targetDir, final ExecutorService exec) throws IOException {
166     if (isTableDir(fs, targetDir)) {
167       return processTable(targetDir, exec);
168     }
169     // user has passed a hbase installation directory.
170     if (!fs.exists(targetDir)) {
171       throw new IOException("The given path does not exist: " + targetDir);
172     }
173     Set<Path> regionsWithHFileV1 = new HashSet<Path>();
174     FileStatus[] fsStats = fs.listStatus(targetDir);
175     for (FileStatus fsStat : fsStats) {
176       if (isTableDir(fs, fsStat.getPath())) {
177         // look for regions and find out any v1 file.
178         regionsWithHFileV1.addAll(processTable(fsStat.getPath(), exec));
179       } else {
180         LOG.info("Ignoring path: " + fsStat.getPath());
181       }
182     }
183     return regionsWithHFileV1;
184   }
185 
186   /**
187    * Find out the regions in the table which has an HFile v1 in it.
188    * @param tableDir
189    * @param exec
190    * @return the set of regions containing HFile v1.
191    * @throws IOException
192    */
193   private Set<Path> processTable(Path tableDir, final ExecutorService exec) throws IOException {
194     // list out the regions and then process each file in it.
195     LOG.info("processing table: " + tableDir);
196     List<Future<Path>> regionLevelResults = new ArrayList<Future<Path>>();
197     Set<Path> regionsWithHFileV1 = new HashSet<Path>();
198 
199     FileStatus[] fsStats = fs.listStatus(tableDir);
200     for (FileStatus fsStat : fsStats) {
201       // process each region
202       if (isRegionDir(fs, fsStat.getPath())) {
203         regionLevelResults.add(processRegion(fsStat.getPath(), exec));
204       }
205     }
206     for (Future<Path> f : regionLevelResults) {
207       try {
208         if (f.get() != null) {
209           regionsWithHFileV1.add(f.get());
210         }
211       } catch (InterruptedException e) {
212         System.err.println(e);
213       } catch (ExecutionException e) {
214         System.err.println(e); // might be a bad hfile. We print it at the end.
215       }
216     }
217     return regionsWithHFileV1;
218   }
219 
220   /**
221    * Each region is processed by a separate handler. If a HRegion has a hfileV1, its path is
222    * returned as the future result, otherwise, a null value is returned.
223    * @param regionDir Region to process.
224    * @param exec
225    * @return corresponding Future object.
226    */
227   private Future<Path> processRegion(final Path regionDir, final ExecutorService exec) {
228     LOG.info("processing region: " + regionDir);
229     Callable<Path> regionCallable = new Callable<Path>() {
230       @Override
231       public Path call() throws Exception {
232         for (Path familyDir : FSUtils.getFamilyDirs(fs, regionDir)) {
233           FileStatus[] storeFiles = FSUtils.listStatus(fs, familyDir);
234           if (storeFiles == null || storeFiles.length == 0) continue;
235           for (FileStatus storeFile : storeFiles) {
236             Path storeFilePath = storeFile.getPath();
237             FSDataInputStream fsdis = null;
238             long lenToRead = 0;
239             try {
240               // check whether this path is a reference.
241               if (StoreFileInfo.isReference(storeFilePath)) continue;
242               // check whether this path is a HFileLink.
243               else if (HFileLink.isHFileLink(storeFilePath)) {
244                 HFileLink fileLink = new HFileLink(getConf(), storeFilePath);
245                 fsdis = fileLink.open(fs);
246                 lenToRead = fileLink.getFileStatus(fs).getLen();
247               } else {
248                 // a regular hfile
249                 fsdis = fs.open(storeFilePath);
250                 lenToRead = storeFile.getLen();
251               }
252               FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis, lenToRead);
253               int version = trailer.getMajorVersion();
254               if (version == 1) {
255                 hFileV1Set.add(storeFilePath);
256                 // return this region path, as it needs to be compacted.
257                 return regionDir;
258               }
259             } catch (Exception iae) {
260               corruptedHFiles.add(storeFilePath);
261             } finally {
262               if (fsdis != null) fsdis.close();
263             }
264           }
265         }
266         return null;
267       }
268     };
269     Future<Path> f = exec.submit(regionCallable);
270     return f;
271   }
272 
273   private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException {
274     return FSTableDescriptors.getTableInfoPath(fs, path) != null;
275   }
276 
277   private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException {
278     Path regionInfo = new Path(path, HRegionFileSystem.REGION_INFO_FILE);
279     return fs.exists(regionInfo);
280 
281   }
282 
283   private void printHRegionsWithHFileV1(Set<Path> regionsHavingHFileV1) {
284     if (!regionsHavingHFileV1.isEmpty()) {
285       System.out.println();
286       System.out.println("Following regions has HFileV1 and needs to be Major Compacted:");
287       System.out.println();
288       for (Path r : regionsHavingHFileV1) {
289         System.out.println(r);
290       }
291       System.out.println();
292     }
293   }
294 
295   private void printAllHFileV1() {
296     if (!hFileV1Set.isEmpty()) {
297       System.out.println();
298       System.out.println("Following HFileV1 are found:");
299       System.out.println();
300       for (Path r : hFileV1Set) {
301         System.out.println(r);
302       }
303       System.out.println();
304     }
305 
306   }
307 
308   private void printCorruptedHFiles() {
309     if (!corruptedHFiles.isEmpty()) {
310       System.out.println();
311       System.out.println("Following HFiles are corrupted as their version is unknown:");
312       System.out.println();
313       for (Path r : corruptedHFiles) {
314         System.out.println(r);
315       }
316       System.out.println();
317     }
318   }
319 
320   public static void main(String args[]) throws Exception {
321     System.exit(ToolRunner.run(HBaseConfiguration.create(), new HFileV1Detector(), args));
322   }
323 
324 }