1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.io.hfile;
21
22 import java.io.ByteArrayOutputStream;
23 import java.io.DataInput;
24 import java.io.IOException;
25 import java.io.PrintStream;
26 import java.util.ArrayList;
27 import java.util.List;
28 import java.util.Locale;
29 import java.util.Map;
30 import java.util.SortedMap;
31
32 import com.yammer.metrics.core.*;
33 import com.yammer.metrics.reporting.ConsoleReporter;
34
35 import org.apache.commons.cli.CommandLine;
36 import org.apache.commons.cli.CommandLineParser;
37 import org.apache.commons.cli.HelpFormatter;
38 import org.apache.commons.cli.Option;
39 import org.apache.commons.cli.OptionGroup;
40 import org.apache.commons.cli.Options;
41 import org.apache.commons.cli.ParseException;
42 import org.apache.commons.cli.PosixParser;
43 import org.apache.commons.logging.Log;
44 import org.apache.commons.logging.LogFactory;
45 import org.apache.hadoop.hbase.classification.InterfaceAudience;
46 import org.apache.hadoop.hbase.classification.InterfaceStability;
47 import org.apache.hadoop.conf.Configuration;
48 import org.apache.hadoop.conf.Configured;
49 import org.apache.hadoop.fs.FileSystem;
50 import org.apache.hadoop.fs.Path;
51 import org.apache.hadoop.hbase.HConstants;
52 import org.apache.hadoop.hbase.TableName;
53 import org.apache.hadoop.hbase.HBaseConfiguration;
54 import org.apache.hadoop.hbase.HRegionInfo;
55 import org.apache.hadoop.hbase.KeyValue;
56 import org.apache.hadoop.hbase.Tag;
57 import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
58 import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
59 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
60 import org.apache.hadoop.hbase.util.BloomFilter;
61 import org.apache.hadoop.hbase.util.BloomFilterFactory;
62 import org.apache.hadoop.hbase.util.ByteBloomFilter;
63 import org.apache.hadoop.hbase.util.Bytes;
64 import org.apache.hadoop.hbase.util.FSUtils;
65 import org.apache.hadoop.hbase.util.Writables;
66 import org.apache.hadoop.util.Tool;
67 import org.apache.hadoop.util.ToolRunner;
68
69
70
71
72 @InterfaceAudience.Public
73 @InterfaceStability.Evolving
74 public class HFilePrettyPrinter extends Configured implements Tool {
75
76 private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
77
78 private Options options = new Options();
79
80 private boolean verbose;
81 private boolean printValue;
82 private boolean printKey;
83 private boolean shouldPrintMeta;
84 private boolean printBlockIndex;
85 private boolean printBlockHeaders;
86 private boolean printStats;
87 private boolean checkRow;
88 private boolean checkFamily;
89 private boolean isSeekToRow = false;
90
91
92
93
94 private byte[] row = null;
95
96 private List<Path> files = new ArrayList<Path>();
97 private int count;
98
99 private static final String FOUR_SPACES = " ";
100
101 public HFilePrettyPrinter() {
102 super();
103 init();
104 }
105
106 public HFilePrettyPrinter(Configuration conf) {
107 super(conf);
108 init();
109 }
110
111 private void init() {
112 options.addOption("v", "verbose", false,
113 "Verbose output; emits file and meta data delimiters");
114 options.addOption("p", "printkv", false, "Print key/value pairs");
115 options.addOption("e", "printkey", false, "Print keys");
116 options.addOption("m", "printmeta", false, "Print meta data of file");
117 options.addOption("b", "printblocks", false, "Print block index meta data");
118 options.addOption("h", "printblockheaders", false, "Print block headers for each block.");
119 options.addOption("k", "checkrow", false,
120 "Enable row order check; looks for out-of-order keys");
121 options.addOption("a", "checkfamily", false, "Enable family check");
122 options.addOption("w", "seekToRow", true,
123 "Seek to this row and print all the kvs for this row only");
124 options.addOption("s", "stats", false, "Print statistics");
125
126 OptionGroup files = new OptionGroup();
127 files.addOption(new Option("f", "file", true,
128 "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34"));
129 files.addOption(new Option("r", "region", true,
130 "Region to scan. Pass region name; e.g. 'hbase:meta,,1'"));
131 options.addOptionGroup(files);
132 }
133
134 public boolean parseOptions(String args[]) throws ParseException,
135 IOException {
136 if (args.length == 0) {
137 HelpFormatter formatter = new HelpFormatter();
138 formatter.printHelp("HFile", options, true);
139 return false;
140 }
141 CommandLineParser parser = new PosixParser();
142 CommandLine cmd = parser.parse(options, args);
143
144 verbose = cmd.hasOption("v");
145 printValue = cmd.hasOption("p");
146 printKey = cmd.hasOption("e") || printValue;
147 shouldPrintMeta = cmd.hasOption("m");
148 printBlockIndex = cmd.hasOption("b");
149 printBlockHeaders = cmd.hasOption("h");
150 printStats = cmd.hasOption("s");
151 checkRow = cmd.hasOption("k");
152 checkFamily = cmd.hasOption("a");
153
154 if (cmd.hasOption("f")) {
155 files.add(new Path(cmd.getOptionValue("f")));
156 }
157
158 if (cmd.hasOption("w")) {
159 String key = cmd.getOptionValue("w");
160 if (key != null && key.length() != 0) {
161 row = key.getBytes();
162 isSeekToRow = true;
163 } else {
164 System.err.println("Invalid row is specified.");
165 System.exit(-1);
166 }
167 }
168
169 if (cmd.hasOption("r")) {
170 String regionName = cmd.getOptionValue("r");
171 byte[] rn = Bytes.toBytes(regionName);
172 byte[][] hri = HRegionInfo.parseRegionName(rn);
173 Path rootDir = FSUtils.getRootDir(getConf());
174 Path tableDir = FSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
175 String enc = HRegionInfo.encodeRegionName(rn);
176 Path regionDir = new Path(tableDir, enc);
177 if (verbose)
178 System.out.println("region dir -> " + regionDir);
179 List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(getConf()),
180 regionDir);
181 if (verbose)
182 System.out.println("Number of region files found -> "
183 + regionFiles.size());
184 if (verbose) {
185 int i = 1;
186 for (Path p : regionFiles) {
187 if (verbose)
188 System.out.println("Found file[" + i++ + "] -> " + p);
189 }
190 }
191 files.addAll(regionFiles);
192 }
193
194 return true;
195 }
196
197
198
199
200
201 public int run(String[] args) {
202 if (getConf() == null) {
203 throw new RuntimeException("A Configuration instance must be provided.");
204 }
205 try {
206 FSUtils.setFsDefault(getConf(), FSUtils.getRootDir(getConf()));
207 if (!parseOptions(args))
208 return 1;
209 } catch (IOException ex) {
210 LOG.error("Error parsing command-line options", ex);
211 return 1;
212 } catch (ParseException ex) {
213 LOG.error("Error parsing command-line options", ex);
214 return 1;
215 }
216
217
218 for (Path fileName : files) {
219 try {
220 processFile(fileName);
221 } catch (IOException ex) {
222 LOG.error("Error reading " + fileName, ex);
223 }
224 }
225
226 if (verbose || printKey) {
227 System.out.println("Scanned kv count -> " + count);
228 }
229
230 return 0;
231 }
232
233 private void processFile(Path file) throws IOException {
234 if (verbose)
235 System.out.println("Scanning -> " + file);
236 FileSystem fs = file.getFileSystem(getConf());
237 if (!fs.exists(file)) {
238 System.err.println("ERROR, file doesnt exist: " + file);
239 }
240
241 HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(getConf()), getConf());
242
243 Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
244
245 KeyValueStatsCollector fileStats = null;
246
247 if (verbose || printKey || checkRow || checkFamily || printStats) {
248
249 HFileScanner scanner = reader.getScanner(false, false, false);
250 fileStats = new KeyValueStatsCollector();
251 boolean shouldScanKeysValues = false;
252 if (this.isSeekToRow) {
253
254 shouldScanKeysValues =
255 (scanner.seekTo(KeyValue.createFirstOnRow(this.row).getKey()) != -1);
256 } else {
257 shouldScanKeysValues = scanner.seekTo();
258 }
259 if (shouldScanKeysValues)
260 scanKeysValues(file, fileStats, scanner, row);
261 }
262
263
264 if (shouldPrintMeta) {
265 printMeta(reader, fileInfo);
266 }
267
268 if (printBlockIndex) {
269 System.out.println("Block Index:");
270 System.out.println(reader.getDataBlockIndexReader());
271 }
272
273 if (printBlockHeaders) {
274 System.out.println("Block Headers:");
275
276
277
278
279 FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, file);
280 long fileSize = fs.getFileStatus(file).getLen();
281 FixedFileTrailer trailer =
282 FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
283 long offset = trailer.getFirstDataBlockOffset(),
284 max = trailer.getLastDataBlockOffset();
285 HFileBlock block;
286 while (offset <= max) {
287 block = reader.readBlock(offset, -1,
288
289 offset += block.getOnDiskSizeWithHeader();
290 System.out.println(block);
291 }
292 }
293
294 if (printStats) {
295 fileStats.finish();
296 System.out.println("Stats:\n" + fileStats);
297 }
298
299 reader.close();
300 }
301
302 private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
303 HFileScanner scanner, byte[] row) throws IOException {
304 KeyValue pkv = null;
305 do {
306 KeyValue kv = scanner.getKeyValue();
307 if (row != null && row.length != 0) {
308 int result = Bytes.compareTo(kv.getRow(), row);
309 if (result > 0) {
310 break;
311 } else if (result < 0) {
312 continue;
313 }
314 }
315
316 if (printStats) {
317 fileStats.collect(kv);
318 }
319
320 if (printKey) {
321 System.out.print("K: " + kv);
322 if (printValue) {
323 System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
324 int i = 0;
325 List<Tag> tags = kv.getTags();
326 for (Tag tag : tags) {
327 System.out
328 .print(String.format(" T[%d]: %s", i++, Bytes.toStringBinary(tag.getValue())));
329 }
330 }
331 System.out.println();
332 }
333
334 if (checkRow && pkv != null) {
335 if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
336 System.err.println("WARNING, previous row is greater then"
337 + " current row\n\tfilename -> " + file + "\n\tprevious -> "
338 + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent -> "
339 + Bytes.toStringBinary(kv.getKey()));
340 }
341 }
342
343 if (checkFamily) {
344 String fam = Bytes.toString(kv.getFamily());
345 if (!file.toString().contains(fam)) {
346 System.err.println("WARNING, filename does not match kv family,"
347 + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
348 + Bytes.toStringBinary(kv.getKey()));
349 }
350 if (pkv != null
351 && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
352 System.err.println("WARNING, previous kv has different family"
353 + " compared to current key\n\tfilename -> " + file
354 + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
355 + "\n\tcurrent -> " + Bytes.toStringBinary(kv.getKey()));
356 }
357 }
358 pkv = kv;
359 ++count;
360 } while (scanner.next());
361 }
362
363
364
365
366
367 private static String asSeparateLines(String keyValueStr) {
368 return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
369 ",\n" + FOUR_SPACES + "$1");
370 }
371
372 private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
373 throws IOException {
374 System.out.println("Block index size as per heapsize: "
375 + reader.indexSize());
376 System.out.println(asSeparateLines(reader.toString()));
377 System.out.println("Trailer:\n "
378 + asSeparateLines(reader.getTrailer().toString()));
379 System.out.println("Fileinfo:");
380 for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
381 System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
382 if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
383 long seqid = Bytes.toLong(e.getValue());
384 System.out.println(seqid);
385 } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
386 TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
387 Writables.copyWritable(e.getValue(), timeRangeTracker);
388 System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
389 + timeRangeTracker.getMaximumTimestamp());
390 } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
391 || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
392 System.out.println(Bytes.toInt(e.getValue()));
393 } else {
394 System.out.println(Bytes.toStringBinary(e.getValue()));
395 }
396 }
397
398 try {
399 System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
400 } catch (Exception e) {
401 System.out.println ("Unable to retrieve the midkey");
402 }
403
404
405 DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
406 BloomFilter bloomFilter = null;
407 if (bloomMeta != null)
408 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
409
410 System.out.println("Bloom filter:");
411 if (bloomFilter != null) {
412 System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
413 ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
414 } else {
415 System.out.println(FOUR_SPACES + "Not present");
416 }
417
418
419 bloomMeta = reader.getDeleteBloomFilterMetadata();
420 bloomFilter = null;
421 if (bloomMeta != null)
422 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
423
424 System.out.println("Delete Family Bloom filter:");
425 if (bloomFilter != null) {
426 System.out.println(FOUR_SPACES
427 + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
428 "\n" + FOUR_SPACES));
429 } else {
430 System.out.println(FOUR_SPACES + "Not present");
431 }
432 }
433
434 private static class KeyValueStatsCollector {
435 private final MetricsRegistry metricsRegistry = new MetricsRegistry();
436 private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
437 private final SimpleReporter simpleReporter = new SimpleReporter(metricsRegistry, new PrintStream(metricsOutput));
438 Histogram keyLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Key length");
439 Histogram valLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Val length");
440 Histogram rowSizeBytes = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (bytes)");
441 Histogram rowSizeCols = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (columns)");
442
443 long curRowBytes = 0;
444 long curRowCols = 0;
445
446 byte[] biggestRow = null;
447
448 private KeyValue prevKV = null;
449 private long maxRowBytes = 0;
450 private long curRowKeyLength;
451
452 public void collect(KeyValue kv) {
453 valLen.update(kv.getValueLength());
454 if (prevKV != null &&
455 KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
456
457 collectRow();
458 }
459 curRowBytes += kv.getLength();
460 curRowKeyLength = kv.getKeyLength();
461 curRowCols++;
462 prevKV = kv;
463 }
464
465 private void collectRow() {
466 rowSizeBytes.update(curRowBytes);
467 rowSizeCols.update(curRowCols);
468 keyLen.update(curRowKeyLength);
469
470 if (curRowBytes > maxRowBytes && prevKV != null) {
471 biggestRow = prevKV.getRow();
472 maxRowBytes = curRowBytes;
473 }
474
475 curRowBytes = 0;
476 curRowCols = 0;
477 }
478
479 public void finish() {
480 if (curRowCols > 0) {
481 collectRow();
482 }
483 }
484
485 @Override
486 public String toString() {
487 if (prevKV == null)
488 return "no data available for statistics";
489
490
491 simpleReporter.shutdown();
492 simpleReporter.run();
493 metricsRegistry.shutdown();
494
495 return
496 metricsOutput.toString() +
497 "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
498 }
499 }
500
501 private static class SimpleReporter extends ConsoleReporter {
502 private final PrintStream out;
503
504 public SimpleReporter(MetricsRegistry metricsRegistry, PrintStream out) {
505 super(metricsRegistry, out, MetricPredicate.ALL);
506 this.out = out;
507 }
508
509 @Override
510 public void run() {
511 for (Map.Entry<String, SortedMap<MetricName, Metric>> entry : getMetricsRegistry().groupedMetrics(
512 MetricPredicate.ALL).entrySet()) {
513 try {
514 for (Map.Entry<MetricName, Metric> subEntry : entry.getValue().entrySet()) {
515 out.print(" " + subEntry.getKey().getName());
516 out.println(':');
517
518 subEntry.getValue().processWith(this, subEntry.getKey(), out);
519 }
520 } catch (Exception e) {
521 e.printStackTrace(out);
522 }
523 }
524 }
525
526 @Override
527 public void processHistogram(MetricName name, Histogram histogram, PrintStream stream) {
528 super.processHistogram(name, histogram, stream);
529 stream.printf(Locale.getDefault(), " count = %d\n", histogram.count());
530 }
531 }
532
533 public static void main(String[] args) throws Exception {
534 Configuration conf = HBaseConfiguration.create();
535
536 conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
537 int ret = ToolRunner.run(conf, new HFilePrettyPrinter(), args);
538 System.exit(ret);
539 }
540 }