1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.util;
19
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.Collections;
23 import java.util.HashSet;
24 import java.util.List;
25 import java.util.Set;
26 import java.util.concurrent.Callable;
27 import java.util.concurrent.ConcurrentHashMap;
28 import java.util.concurrent.ExecutionException;
29 import java.util.concurrent.ExecutorService;
30 import java.util.concurrent.Executors;
31 import java.util.concurrent.Future;
32
33 import org.apache.commons.cli.CommandLine;
34 import org.apache.commons.cli.CommandLineParser;
35 import org.apache.commons.cli.GnuParser;
36 import org.apache.commons.cli.HelpFormatter;
37 import org.apache.commons.cli.Option;
38 import org.apache.commons.cli.Options;
39 import org.apache.commons.cli.ParseException;
40 import org.apache.commons.logging.Log;
41 import org.apache.commons.logging.LogFactory;
42 import org.apache.hadoop.conf.Configured;
43 import org.apache.hadoop.fs.FSDataInputStream;
44 import org.apache.hadoop.fs.FileStatus;
45 import org.apache.hadoop.fs.FileSystem;
46 import org.apache.hadoop.fs.Path;
47 import org.apache.hadoop.hbase.HBaseConfiguration;
48 import org.apache.hadoop.hbase.io.HFileLink;
49 import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer;
50 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
51 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
52 import org.apache.hadoop.util.Tool;
53 import org.apache.hadoop.util.ToolRunner;
54
55
56
57
58
59
60
61
62
63
64
65
66
67 public class HFileV1Detector extends Configured implements Tool {
68 private FileSystem fs;
69 private static final Log LOG = LogFactory.getLog(HFileV1Detector.class);
70 private static final int DEFAULT_NUM_OF_THREADS = 10;
71 private int numOfThreads;
72 private Path dirToProcess;
73 private final Set<Path> corruptedHFiles = Collections
74 .newSetFromMap(new ConcurrentHashMap<Path, Boolean>());
75 private final Set<Path> hFileV1Set = Collections
76 .newSetFromMap(new ConcurrentHashMap<Path, Boolean>());
77
78 private Options options = new Options();
79
80 public HFileV1Detector() {
81 Option pathOption = new Option("p", "path", true, "Path to a table, or hbase installation");
82 pathOption.setRequired(false);
83 options.addOption(pathOption);
84 Option threadOption = new Option("n", "numberOfThreads", true,
85 "Number of threads to use while processing HFiles.");
86 threadOption.setRequired(false);
87 options.addOption(threadOption);
88 options.addOption("h", "help", false, "Help");
89 }
90
91 private boolean parseOption(String[] args) throws ParseException, IOException {
92 if (args.length == 0) {
93 return true;
94 }
95 CommandLineParser parser = new GnuParser();
96 CommandLine cmd = parser.parse(options, args);
97 if (cmd.hasOption("h")) {
98 HelpFormatter formatter = new HelpFormatter();
99 formatter.printHelp("HFileV1Detector", options, true);
100 System.out
101 .println("In case no option is provided, it processes hbase.rootdir using 10 threads.");
102 System.out.println("Example:");
103 System.out.println(" To detect any HFileV1 in a given hbase installation '/myhbase':");
104 System.out.println(" $ $HBASE_HOME/bin/hbase " + this.getClass().getName() + " -p /myhbase");
105 System.out.println();
106 return false;
107 }
108
109 if (cmd.hasOption("p")) {
110 dirToProcess = new Path(cmd.getOptionValue("p"));
111 }
112 try {
113 if (cmd.hasOption("n")) {
114 int n = Integer.parseInt(cmd.getOptionValue("n"));
115 if (n < 0 || n > 100) {
116 System.out.println("Please use a positive number <= 100 for number of threads."
117 + " Continuing with default value " + DEFAULT_NUM_OF_THREADS);
118 return true;
119 }
120 numOfThreads = n;
121 }
122 } catch (NumberFormatException nfe) {
123 System.err.println("Please select a valid number for threads");
124 return false;
125 }
126 return true;
127 }
128
129 @Override
130 public int run(String args[]) throws IOException, ParseException {
131 fs = FileSystem.get(getConf());
132 numOfThreads = DEFAULT_NUM_OF_THREADS;
133 dirToProcess = FSUtils.getRootDir(getConf());
134 if (!parseOption(args)) {
135 System.exit(1);
136 }
137 ExecutorService exec = Executors.newFixedThreadPool(numOfThreads);
138 Set<Path> regionsWithHFileV1;
139 try {
140 regionsWithHFileV1 = checkForV1Files(dirToProcess, exec);
141 printHRegionsWithHFileV1(regionsWithHFileV1);
142 printAllHFileV1();
143 printCorruptedHFiles();
144 if (hFileV1Set.isEmpty() && corruptedHFiles.isEmpty()) {
145
146 System.out.println("No HFile V1 Found");
147 }
148 } catch (Exception e) {
149 System.err.println(e);
150 return 1;
151 } finally {
152 exec.shutdown();
153 fs.close();
154 }
155 return 0;
156 }
157
158
159
160
161
162
163
164
165 private Set<Path> checkForV1Files(Path targetDir, final ExecutorService exec) throws IOException {
166 if (isTableDir(fs, targetDir)) {
167 return processTable(targetDir, exec);
168 }
169
170 if (!fs.exists(targetDir)) {
171 throw new IOException("The given path does not exist: " + targetDir);
172 }
173 Set<Path> regionsWithHFileV1 = new HashSet<Path>();
174 FileStatus[] fsStats = fs.listStatus(targetDir);
175 for (FileStatus fsStat : fsStats) {
176 if (isTableDir(fs, fsStat.getPath())) {
177
178 regionsWithHFileV1.addAll(processTable(fsStat.getPath(), exec));
179 } else {
180 LOG.info("Ignoring path: " + fsStat.getPath());
181 }
182 }
183 return regionsWithHFileV1;
184 }
185
186
187
188
189
190
191
192
193 private Set<Path> processTable(Path tableDir, final ExecutorService exec) throws IOException {
194
195 LOG.info("processing table: " + tableDir);
196 List<Future<Path>> regionLevelResults = new ArrayList<Future<Path>>();
197 Set<Path> regionsWithHFileV1 = new HashSet<Path>();
198
199 FileStatus[] fsStats = fs.listStatus(tableDir);
200 for (FileStatus fsStat : fsStats) {
201
202 if (isRegionDir(fs, fsStat.getPath())) {
203 regionLevelResults.add(processRegion(fsStat.getPath(), exec));
204 }
205 }
206 for (Future<Path> f : regionLevelResults) {
207 try {
208 if (f.get() != null) {
209 regionsWithHFileV1.add(f.get());
210 }
211 } catch (InterruptedException e) {
212 System.err.println(e);
213 } catch (ExecutionException e) {
214 System.err.println(e);
215 }
216 }
217 return regionsWithHFileV1;
218 }
219
220
221
222
223
224
225
226
227 private Future<Path> processRegion(final Path regionDir, final ExecutorService exec) {
228 LOG.info("processing region: " + regionDir);
229 Callable<Path> regionCallable = new Callable<Path>() {
230 @Override
231 public Path call() throws Exception {
232 for (Path familyDir : FSUtils.getFamilyDirs(fs, regionDir)) {
233 FileStatus[] storeFiles = FSUtils.listStatus(fs, familyDir);
234 if (storeFiles == null || storeFiles.length == 0) continue;
235 for (FileStatus storeFile : storeFiles) {
236 Path storeFilePath = storeFile.getPath();
237 FSDataInputStream fsdis = null;
238 long lenToRead = 0;
239 try {
240
241 if (StoreFileInfo.isReference(storeFilePath)) continue;
242
243 else if (HFileLink.isHFileLink(storeFilePath)) {
244 HFileLink fileLink = new HFileLink(getConf(), storeFilePath);
245 fsdis = fileLink.open(fs);
246 lenToRead = fileLink.getFileStatus(fs).getLen();
247 } else {
248
249 fsdis = fs.open(storeFilePath);
250 lenToRead = storeFile.getLen();
251 }
252 FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis, lenToRead);
253 int version = trailer.getMajorVersion();
254 if (version == 1) {
255 hFileV1Set.add(storeFilePath);
256
257 return regionDir;
258 }
259 } catch (Exception iae) {
260 corruptedHFiles.add(storeFilePath);
261 } finally {
262 if (fsdis != null) fsdis.close();
263 }
264 }
265 }
266 return null;
267 }
268 };
269 Future<Path> f = exec.submit(regionCallable);
270 return f;
271 }
272
273 private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException {
274 return FSTableDescriptors.getTableInfoPath(fs, path) != null;
275 }
276
277 private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException {
278 Path regionInfo = new Path(path, HRegionFileSystem.REGION_INFO_FILE);
279 return fs.exists(regionInfo);
280
281 }
282
283 private void printHRegionsWithHFileV1(Set<Path> regionsHavingHFileV1) {
284 if (!regionsHavingHFileV1.isEmpty()) {
285 System.out.println();
286 System.out.println("Following regions has HFileV1 and needs to be Major Compacted:");
287 System.out.println();
288 for (Path r : regionsHavingHFileV1) {
289 System.out.println(r);
290 }
291 System.out.println();
292 }
293 }
294
295 private void printAllHFileV1() {
296 if (!hFileV1Set.isEmpty()) {
297 System.out.println();
298 System.out.println("Following HFileV1 are found:");
299 System.out.println();
300 for (Path r : hFileV1Set) {
301 System.out.println(r);
302 }
303 System.out.println();
304 }
305
306 }
307
308 private void printCorruptedHFiles() {
309 if (!corruptedHFiles.isEmpty()) {
310 System.out.println();
311 System.out.println("Following HFiles are corrupted as their version is unknown:");
312 System.out.println();
313 for (Path r : corruptedHFiles) {
314 System.out.println(r);
315 }
316 System.out.println();
317 }
318 }
319
320 public static void main(String args[]) throws Exception {
321 System.exit(ToolRunner.run(HBaseConfiguration.create(), new HFileV1Detector(), args));
322 }
323
324 }