1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.io.hfile;
21
22 import java.io.DataInput;
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.List;
26 import java.util.Map;
27
28 import org.apache.commons.cli.CommandLine;
29 import org.apache.commons.cli.CommandLineParser;
30 import org.apache.commons.cli.HelpFormatter;
31 import org.apache.commons.cli.Options;
32 import org.apache.commons.cli.ParseException;
33 import org.apache.commons.cli.PosixParser;
34 import org.apache.commons.logging.Log;
35 import org.apache.commons.logging.LogFactory;
36 import org.apache.hadoop.classification.InterfaceAudience;
37 import org.apache.hadoop.classification.InterfaceStability;
38 import org.apache.hadoop.conf.Configuration;
39 import org.apache.hadoop.fs.FileSystem;
40 import org.apache.hadoop.fs.Path;
41 import org.apache.hadoop.hbase.TableName;
42 import org.apache.hadoop.hbase.HBaseConfiguration;
43 import org.apache.hadoop.hbase.HRegionInfo;
44 import org.apache.hadoop.hbase.KeyValue;
45 import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
46 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
47 import org.apache.hadoop.hbase.util.BloomFilter;
48 import org.apache.hadoop.hbase.util.BloomFilterFactory;
49 import org.apache.hadoop.hbase.util.ByteBloomFilter;
50 import org.apache.hadoop.hbase.util.Bytes;
51 import org.apache.hadoop.hbase.util.FSUtils;
52 import org.apache.hadoop.hbase.util.Writables;
53
54
55
56
57 @InterfaceAudience.Public
58 @InterfaceStability.Evolving
59 public class HFilePrettyPrinter {
60
61 private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
62
63 private Options options = new Options();
64
65 private boolean verbose;
66 private boolean printValue;
67 private boolean printKey;
68 private boolean shouldPrintMeta;
69 private boolean printBlocks;
70 private boolean printStats;
71 private boolean checkRow;
72 private boolean checkFamily;
73 private boolean isSeekToRow = false;
74
75
76
77
78 private byte[] row = null;
79 private Configuration conf;
80
81 private List<Path> files = new ArrayList<Path>();
82 private int count;
83
84 private static final String FOUR_SPACES = " ";
85
86 public HFilePrettyPrinter() {
87 options.addOption("v", "verbose", false,
88 "Verbose output; emits file and meta data delimiters");
89 options.addOption("p", "printkv", false, "Print key/value pairs");
90 options.addOption("e", "printkey", false, "Print keys");
91 options.addOption("m", "printmeta", false, "Print meta data of file");
92 options.addOption("b", "printblocks", false, "Print block index meta data");
93 options.addOption("k", "checkrow", false,
94 "Enable row order check; looks for out-of-order keys");
95 options.addOption("a", "checkfamily", false, "Enable family check");
96 options.addOption("f", "file", true,
97 "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/.META./12/34");
98 options.addOption("w", "seekToRow", true,
99 "Seek to this row and print all the kvs for this row only");
100 options.addOption("r", "region", true,
101 "Region to scan. Pass region name; e.g. '.META.,,1'");
102 options.addOption("s", "stats", false, "Print statistics");
103 }
104
105 public boolean parseOptions(String args[]) throws ParseException,
106 IOException {
107 if (args.length == 0) {
108 HelpFormatter formatter = new HelpFormatter();
109 formatter.printHelp("HFile", options, true);
110 return false;
111 }
112 CommandLineParser parser = new PosixParser();
113 CommandLine cmd = parser.parse(options, args);
114
115 verbose = cmd.hasOption("v");
116 printValue = cmd.hasOption("p");
117 printKey = cmd.hasOption("e") || printValue;
118 shouldPrintMeta = cmd.hasOption("m");
119 printBlocks = cmd.hasOption("b");
120 printStats = cmd.hasOption("s");
121 checkRow = cmd.hasOption("k");
122 checkFamily = cmd.hasOption("a");
123
124 if (cmd.hasOption("f")) {
125 files.add(new Path(cmd.getOptionValue("f")));
126 }
127
128 if (cmd.hasOption("w")) {
129 String key = cmd.getOptionValue("w");
130 if (key != null && key.length() != 0) {
131 row = key.getBytes();
132 isSeekToRow = true;
133 } else {
134 System.err.println("Invalid row is specified.");
135 System.exit(-1);
136 }
137 }
138
139 if (cmd.hasOption("r")) {
140 String regionName = cmd.getOptionValue("r");
141 byte[] rn = Bytes.toBytes(regionName);
142 byte[][] hri = HRegionInfo.parseRegionName(rn);
143 Path rootDir = FSUtils.getRootDir(conf);
144 Path tableDir = FSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
145 String enc = HRegionInfo.encodeRegionName(rn);
146 Path regionDir = new Path(tableDir, enc);
147 if (verbose)
148 System.out.println("region dir -> " + regionDir);
149 List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(conf),
150 regionDir);
151 if (verbose)
152 System.out.println("Number of region files found -> "
153 + regionFiles.size());
154 if (verbose) {
155 int i = 1;
156 for (Path p : regionFiles) {
157 if (verbose)
158 System.out.println("Found file[" + i++ + "] -> " + p);
159 }
160 }
161 files.addAll(regionFiles);
162 }
163
164 return true;
165 }
166
167
168
169
170
171 public int run(String[] args) {
172 conf = HBaseConfiguration.create();
173 try {
174 FSUtils.setFsDefault(conf, FSUtils.getRootDir(conf));
175 if (!parseOptions(args))
176 return 1;
177 } catch (IOException ex) {
178 LOG.error("Error parsing command-line options", ex);
179 return 1;
180 } catch (ParseException ex) {
181 LOG.error("Error parsing command-line options", ex);
182 return 1;
183 }
184
185
186 for (Path fileName : files) {
187 try {
188 processFile(fileName);
189 } catch (IOException ex) {
190 LOG.error("Error reading " + fileName, ex);
191 }
192 }
193
194 if (verbose || printKey) {
195 System.out.println("Scanned kv count -> " + count);
196 }
197
198 return 0;
199 }
200
201 private void processFile(Path file) throws IOException {
202 if (verbose)
203 System.out.println("Scanning -> " + file);
204 FileSystem fs = file.getFileSystem(conf);
205 if (!fs.exists(file)) {
206 System.err.println("ERROR, file doesnt exist: " + file);
207 }
208
209 HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(conf));
210
211 Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
212
213 KeyValueStatsCollector fileStats = null;
214
215 if (verbose || printKey || checkRow || checkFamily || printStats) {
216
217 HFileScanner scanner = reader.getScanner(false, false, false);
218 fileStats = new KeyValueStatsCollector();
219 boolean shouldScanKeysValues = false;
220 if (this.isSeekToRow) {
221
222 shouldScanKeysValues =
223 (scanner.seekTo(KeyValue.createFirstOnRow(this.row).getKey()) != -1);
224 } else {
225 shouldScanKeysValues = scanner.seekTo();
226 }
227 if (shouldScanKeysValues)
228 scanKeysValues(file, fileStats, scanner, row);
229 }
230
231
232 if (shouldPrintMeta) {
233 printMeta(reader, fileInfo);
234 }
235
236 if (printBlocks) {
237 System.out.println("Block Index:");
238 System.out.println(reader.getDataBlockIndexReader());
239 }
240
241 if (printStats) {
242 fileStats.finish();
243 System.out.println("Stats:\n" + fileStats);
244 }
245
246 reader.close();
247 }
248
249 private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
250 HFileScanner scanner, byte[] row) throws IOException {
251 KeyValue pkv = null;
252 do {
253 KeyValue kv = scanner.getKeyValue();
254 if (row != null && row.length != 0) {
255 int result = Bytes.compareTo(kv.getRow(), row);
256 if (result > 0) {
257 break;
258 } else if (result < 0) {
259 continue;
260 }
261 }
262
263 if (printStats) {
264 fileStats.collect(kv);
265 }
266
267 if (printKey) {
268 System.out.print("K: " + kv);
269 if (printValue) {
270 System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
271 }
272 System.out.println();
273 }
274
275 if (checkRow && pkv != null) {
276 if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
277 System.err.println("WARNING, previous row is greater then"
278 + " current row\n\tfilename -> " + file + "\n\tprevious -> "
279 + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent -> "
280 + Bytes.toStringBinary(kv.getKey()));
281 }
282 }
283
284 if (checkFamily) {
285 String fam = Bytes.toString(kv.getFamily());
286 if (!file.toString().contains(fam)) {
287 System.err.println("WARNING, filename does not match kv family,"
288 + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
289 + Bytes.toStringBinary(kv.getKey()));
290 }
291 if (pkv != null
292 && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
293 System.err.println("WARNING, previous kv has different family"
294 + " compared to current key\n\tfilename -> " + file
295 + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
296 + "\n\tcurrent -> " + Bytes.toStringBinary(kv.getKey()));
297 }
298 }
299 pkv = kv;
300 ++count;
301 } while (scanner.next());
302 }
303
304
305
306
307
308 private static String asSeparateLines(String keyValueStr) {
309 return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
310 ",\n" + FOUR_SPACES + "$1");
311 }
312
313 private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
314 throws IOException {
315 System.out.println("Block index size as per heapsize: "
316 + reader.indexSize());
317 System.out.println(asSeparateLines(reader.toString()));
318 System.out.println("Trailer:\n "
319 + asSeparateLines(reader.getTrailer().toString()));
320 System.out.println("Fileinfo:");
321 for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
322 System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
323 if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
324 long seqid = Bytes.toLong(e.getValue());
325 System.out.println(seqid);
326 } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
327 TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
328 Writables.copyWritable(e.getValue(), timeRangeTracker);
329 System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
330 + timeRangeTracker.getMaximumTimestamp());
331 } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
332 || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
333 System.out.println(Bytes.toInt(e.getValue()));
334 } else {
335 System.out.println(Bytes.toStringBinary(e.getValue()));
336 }
337 }
338
339 System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
340
341
342 DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
343 BloomFilter bloomFilter = null;
344 if (bloomMeta != null)
345 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
346
347 System.out.println("Bloom filter:");
348 if (bloomFilter != null) {
349 System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
350 ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
351 } else {
352 System.out.println(FOUR_SPACES + "Not present");
353 }
354
355
356 bloomMeta = reader.getDeleteBloomFilterMetadata();
357 bloomFilter = null;
358 if (bloomMeta != null)
359 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
360
361 System.out.println("Delete Family Bloom filter:");
362 if (bloomFilter != null) {
363 System.out.println(FOUR_SPACES
364 + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
365 "\n" + FOUR_SPACES));
366 } else {
367 System.out.println(FOUR_SPACES + "Not present");
368 }
369 }
370
371 private static class LongStats {
372 private long min = Long.MAX_VALUE;
373 private long max = Long.MIN_VALUE;
374 private long sum = 0;
375 private long count = 0;
376
377 void collect(long d) {
378 if (d < min) min = d;
379 if (d > max) max = d;
380 sum += d;
381 count++;
382 }
383
384 public String toString() {
385 return "count: " + count +
386 "\tmin: " + min +
387 "\tmax: " + max +
388 "\tmean: " + ((double)sum/count);
389 }
390 }
391
392 private static class KeyValueStatsCollector {
393 LongStats keyLen = new LongStats();
394 LongStats valLen = new LongStats();
395 LongStats rowSizeBytes = new LongStats();
396 LongStats rowSizeCols = new LongStats();
397
398 long curRowBytes = 0;
399 long curRowCols = 0;
400
401 byte[] biggestRow = null;
402
403 private KeyValue prevKV = null;
404 private long maxRowBytes = 0;
405
406 public void collect(KeyValue kv) {
407 keyLen.collect(kv.getKeyLength());
408 valLen.collect(kv.getValueLength());
409 if (prevKV != null &&
410 KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
411
412 collectRow();
413 }
414 curRowBytes += kv.getLength();
415 curRowCols++;
416 prevKV = kv;
417 }
418
419 private void collectRow() {
420 rowSizeBytes.collect(curRowBytes);
421 rowSizeCols.collect(curRowCols);
422
423 if (curRowBytes > maxRowBytes && prevKV != null) {
424 biggestRow = prevKV.getRow();
425 maxRowBytes = curRowBytes;
426 }
427
428 curRowBytes = 0;
429 curRowCols = 0;
430 }
431
432 public void finish() {
433 if (curRowCols > 0) {
434 collectRow();
435 }
436 }
437
438 @Override
439 public String toString() {
440 if (prevKV == null)
441 return "no data available for statistics";
442
443 return
444 "Key length: " + keyLen + "\n" +
445 "Val length: " + valLen + "\n" +
446 "Row size (bytes): " + rowSizeBytes + "\n" +
447 "Row size (columns): " + rowSizeCols + "\n" +
448 "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
449 }
450 }
451 }