1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.util;
19
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.Arrays;
23 import java.util.Collections;
24 import java.util.HashSet;
25 import java.util.List;
26 import java.util.Set;
27 import java.util.concurrent.Callable;
28 import java.util.concurrent.ConcurrentHashMap;
29 import java.util.concurrent.ExecutionException;
30 import java.util.concurrent.ExecutorService;
31 import java.util.concurrent.Executors;
32 import java.util.concurrent.Future;
33
34 import org.apache.commons.cli.CommandLine;
35 import org.apache.commons.cli.CommandLineParser;
36 import org.apache.commons.cli.GnuParser;
37 import org.apache.commons.cli.HelpFormatter;
38 import org.apache.commons.cli.Option;
39 import org.apache.commons.cli.Options;
40 import org.apache.commons.cli.ParseException;
41 import org.apache.commons.logging.Log;
42 import org.apache.commons.logging.LogFactory;
43 import org.apache.hadoop.conf.Configured;
44 import org.apache.hadoop.fs.FSDataInputStream;
45 import org.apache.hadoop.fs.FileStatus;
46 import org.apache.hadoop.fs.FileSystem;
47 import org.apache.hadoop.fs.Path;
48 import org.apache.hadoop.hbase.HBaseConfiguration;
49 import org.apache.hadoop.hbase.io.FileLink;
50 import org.apache.hadoop.hbase.io.HFileLink;
51 import org.apache.hadoop.hbase.regionserver.StoreFile;
52 import org.apache.hadoop.util.Tool;
53 import org.apache.hadoop.util.ToolRunner;
54
55
56
57
58
59
60
61
62
63
64
65
66
67 public class HFileV1Detector extends Configured implements Tool {
68 private FileSystem fs;
69 private static final Log LOG = LogFactory.getLog(HFileV1Detector.class);
70 private static final int DEFAULT_NUM_OF_THREADS = 10;
71
72
73
74 private static final String PRE_NS_DOT_ARCHIVE = ".archive";
75
76
77
78 private static final String PRE_NS_DOT_TMP = ".tmp";
79 private int numOfThreads;
80
81
82
83 private Path targetDirPath;
84
85
86
87 private ExecutorService exec;
88
89
90
91
92 private final Set<Path> processedTables = new HashSet<Path>();
93
94
95
96 private final Set<Path> corruptedHFiles = Collections
97 .newSetFromMap(new ConcurrentHashMap<Path, Boolean>());
98
99
100
101 private final Set<Path> hFileV1Set = Collections
102 .newSetFromMap(new ConcurrentHashMap<Path, Boolean>());
103
104 private Options options = new Options();
105
106 private Path defaultRootDir = null;
107 public HFileV1Detector() {
108 Option pathOption = new Option("p", "path", true, "Path to a table, or hbase installation");
109 pathOption.setRequired(false);
110 options.addOption(pathOption);
111 Option threadOption = new Option("n", "numberOfThreads", true,
112 "Number of threads to use while processing HFiles.");
113 threadOption.setRequired(false);
114 options.addOption(threadOption);
115 options.addOption("h", "help", false, "Help");
116 }
117
118 private boolean parseOption(String[] args) throws ParseException, IOException {
119 if (args.length == 0) {
120 return true;
121 }
122 CommandLineParser parser = new GnuParser();
123 CommandLine cmd = parser.parse(options, args);
124 if (cmd.hasOption("h")) {
125 HelpFormatter formatter = new HelpFormatter();
126 formatter.printHelp("HFileV1Detector", options, true);
127 System.out
128 .println("In case no option is provided, it processes hbase.rootdir using 10 threads.");
129 System.out.println("Example:");
130 System.out.println(" To detect any HFileV1 in a given hbase installation '/myhbase':");
131 System.out.println(" $ $HBASE_HOME/bin/hbase " + this.getClass().getName() + " -p /myhbase");
132 System.out.println();
133 return false;
134 }
135
136 if (cmd.hasOption("p")) {
137 this.targetDirPath = new Path(FSUtils.getRootDir(getConf()), cmd.getOptionValue("p"));
138 }
139 try {
140 if (cmd.hasOption("n")) {
141 int n = Integer.parseInt(cmd.getOptionValue("n"));
142 if (n < 0 || n > 100) {
143 LOG.warn("Please use a positive number <= 100 for number of threads."
144 + " Continuing with default value " + DEFAULT_NUM_OF_THREADS);
145 return true;
146 }
147 this.numOfThreads = n;
148 }
149 } catch (NumberFormatException nfe) {
150 LOG.error("Please select a valid number for threads");
151 return false;
152 }
153 return true;
154 }
155
156
157
158
159
160
161
162
163 @Override
164 public int run(String args[]) throws IOException, ParseException {
165 Path root = new Path(FSUtils.getRootDir(getConf()).toUri());
166 getConf().set("fs.defaultFS", root.toString());
167 fs = FileSystem.get(getConf());
168 numOfThreads = DEFAULT_NUM_OF_THREADS;
169 targetDirPath = FSUtils.getRootDir(getConf());
170 if (!parseOption(args)) {
171 System.exit(-1);
172 }
173 this.exec = Executors.newFixedThreadPool(numOfThreads);
174 try {
175 return processResult(checkForV1Files(targetDirPath));
176 } catch (Exception e) {
177 LOG.error(e);
178 } finally {
179 exec.shutdown();
180 fs.close();
181 }
182 return -1;
183 }
184
185 private int processResult(Set<Path> regionsWithHFileV1) {
186 LOG.info("Result: \n");
187 printSet(processedTables, "Tables Processed: ");
188
189 int count = hFileV1Set.size();
190 LOG.info("Count of HFileV1: " + count);
191 if (count > 0) printSet(hFileV1Set, "HFileV1:");
192
193 count = corruptedHFiles.size();
194 LOG.info("Count of corrupted files: " + count);
195 if (count > 0) printSet(corruptedHFiles, "Corrupted Files: ");
196
197 count = regionsWithHFileV1.size();
198 LOG.info("Count of Regions with HFileV1: " + count);
199 if (count > 0) printSet(regionsWithHFileV1, "Regions to Major Compact: ");
200
201 return (hFileV1Set.isEmpty() && corruptedHFiles.isEmpty()) ? 0 : 1;
202 }
203
204 private void printSet(Set<Path> result, String msg) {
205 LOG.info(msg);
206 for (Path p : result) {
207 LOG.info(p);
208 }
209 }
210
211
212
213
214
215
216
217 private Set<Path> checkForV1Files(Path targetDir) throws IOException {
218 LOG.info("Target dir is: " + targetDir);
219 if (!fs.exists(targetDir)) {
220 throw new IOException("The given path does not exist: " + targetDir);
221 }
222 if (isTableDir(fs, targetDir)) {
223 processedTables.add(targetDir);
224 return processTable(targetDir);
225 }
226 Set<Path> regionsWithHFileV1 = new HashSet<Path>();
227 FileStatus[] fsStats = fs.listStatus(targetDir);
228 for (FileStatus fsStat : fsStats) {
229 if (isTableDir(fs, fsStat.getPath()) && !isRootTable(fsStat.getPath())) {
230 processedTables.add(fsStat.getPath());
231
232 regionsWithHFileV1.addAll(processTable(fsStat.getPath()));
233 } else {
234 LOG.info("Ignoring path: " + fsStat.getPath());
235 }
236 }
237 return regionsWithHFileV1;
238 }
239
240
241
242
243
244 private boolean isRootTable(Path path) {
245 if (path != null && path.toString().endsWith("-ROOT-")) return true;
246 return false;
247 }
248
249
250
251
252
253
254
255 private Set<Path> processTable(Path tableDir) throws IOException {
256
257 LOG.debug("processing table: " + tableDir);
258 List<Future<Path>> regionLevelResults = new ArrayList<Future<Path>>();
259 Set<Path> regionsWithHFileV1 = new HashSet<Path>();
260
261 FileStatus[] fsStats = fs.listStatus(tableDir);
262 for (FileStatus fsStat : fsStats) {
263
264 if (isRegionDir(fs, fsStat.getPath())) {
265 regionLevelResults.add(processRegion(fsStat.getPath()));
266 }
267 }
268 for (Future<Path> f : regionLevelResults) {
269 try {
270 if (f.get() != null) {
271 regionsWithHFileV1.add(f.get());
272 }
273 } catch (InterruptedException e) {
274 LOG.error(e);
275 } catch (ExecutionException e) {
276 LOG.error(e);
277 }
278 }
279 return regionsWithHFileV1;
280 }
281
282
283
284
285
286
287
288 private Future<Path> processRegion(final Path regionDir) {
289 LOG.debug("processing region: " + regionDir);
290 Callable<Path> regionCallable = new Callable<Path>() {
291 @Override
292 public Path call() throws Exception {
293 for (Path familyDir : FSUtils.getFamilyDirs(fs, regionDir)) {
294 FileStatus[] storeFiles = FSUtils.listStatus(fs, familyDir);
295 if (storeFiles == null || storeFiles.length == 0) continue;
296 for (FileStatus storeFile : storeFiles) {
297 Path storeFilePath = storeFile.getPath();
298 FSDataInputStream fsdis = null;
299 long lenToRead = 0;
300 try {
301
302 if (StoreFile.isReference(storeFilePath)) continue;
303
304 else if (HFileLink.isHFileLink(storeFilePath)) {
305 FileLink fLink = getFileLinkWithPreNSPath(storeFilePath);
306 fsdis = fLink.open(fs);
307 lenToRead = fLink.getFileStatus(fs).getLen();
308 } else {
309
310 fsdis = fs.open(storeFilePath);
311 lenToRead = storeFile.getLen();
312 }
313 int majorVersion = computeMajorVersion(fsdis, lenToRead);
314 if (majorVersion == 1) {
315 hFileV1Set.add(storeFilePath);
316
317 return regionDir;
318 }
319 if (majorVersion > 2 || majorVersion < 1) throw new IllegalArgumentException(
320 "Incorrect major version: " + majorVersion);
321 } catch (Exception iae) {
322 corruptedHFiles.add(storeFilePath);
323 LOG.error("Got exception while reading trailer for file: "+ storeFilePath, iae);
324 } finally {
325 if (fsdis != null) fsdis.close();
326 }
327 }
328 }
329 return null;
330 }
331
332 private int computeMajorVersion(FSDataInputStream istream, long fileSize)
333 throws IOException {
334
335 long seekPoint = fileSize - Bytes.SIZEOF_INT;
336 if (seekPoint < 0)
337 throw new IllegalArgumentException("File too small, no major version found");
338
339
340 istream.seek(seekPoint);
341 int version = istream.readInt();
342
343 return version & 0x00ffffff;
344 }
345 };
346 Future<Path> f = exec.submit(regionCallable);
347 return f;
348 }
349
350
351
352
353
354
355
356
357 public FileLink getFileLinkWithPreNSPath(Path storeFilePath) throws IOException {
358 HFileLink link = new HFileLink(getConf(), storeFilePath);
359 List<Path> pathsToProcess = getPreNSPathsForHFileLink(link);
360 pathsToProcess.addAll(Arrays.asList(link.getLocations()));
361 return new FileLink(pathsToProcess);
362 }
363
364 private List<Path> getPreNSPathsForHFileLink(HFileLink fileLink) throws IOException {
365 List<Path> p = new ArrayList<Path>();
366 String relativeTablePath = removeDefaultNSPath(fileLink.getOriginPath());
367 p.add(getPreNSPath(PRE_NS_DOT_ARCHIVE, relativeTablePath));
368 p.add(getPreNSPath(PRE_NS_DOT_TMP, relativeTablePath));
369 p.add(getPreNSPath(null, relativeTablePath));
370 return p;
371 }
372
373
374
375
376
377
378 private String removeDefaultNSPath(Path originalPath) throws IOException {
379 if (defaultRootDir == null) {
380 defaultRootDir = FSUtils.getRootDir(getConf());
381 }
382 String pathStr = originalPath.toString();
383 if (!pathStr.startsWith(defaultRootDir.toString())) return pathStr;
384 return pathStr.substring(defaultRootDir.toString().length() + 1);
385 }
386
387 private Path getPreNSPath(String prefix, String relativeTablePath) throws IOException {
388 String relativePath = (prefix == null ? relativeTablePath : prefix + Path.SEPARATOR
389 + relativeTablePath);
390 return new Path(FSUtils.getRootDir(getConf()), relativePath);
391 }
392
393 private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException {
394
395
396 if (fs.isFile(path)) return false;
397 return (FSTableDescriptors.getTableInfoPath(fs, path) != null)
398 || path.toString().endsWith(".META.");
399 }
400
401 private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException {
402 if (fs.isFile(path)) return false;
403 Path regionInfo = new Path(path, ".regioninfo");
404 return fs.exists(regionInfo);
405
406 }
407
408 public static void main(String args[]) throws Exception {
409 System.exit(ToolRunner.run(HBaseConfiguration.create(), new HFileV1Detector(), args));
410 }
411
412 }