View Javadoc

1   
2   /*
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import java.io.ByteArrayOutputStream;
23  import java.io.DataInput;
24  import java.io.IOException;
25  import java.io.PrintStream;
26  import java.util.ArrayList;
27  import java.util.List;
28  import java.util.Locale;
29  import java.util.Map;
30  import java.util.SortedMap;
31  
32  import com.yammer.metrics.core.*;
33  import com.yammer.metrics.reporting.ConsoleReporter;
34  
35  import org.apache.commons.cli.CommandLine;
36  import org.apache.commons.cli.CommandLineParser;
37  import org.apache.commons.cli.HelpFormatter;
38  import org.apache.commons.cli.Option;
39  import org.apache.commons.cli.OptionGroup;
40  import org.apache.commons.cli.Options;
41  import org.apache.commons.cli.ParseException;
42  import org.apache.commons.cli.PosixParser;
43  import org.apache.commons.logging.Log;
44  import org.apache.commons.logging.LogFactory;
45  import org.apache.hadoop.hbase.classification.InterfaceAudience;
46  import org.apache.hadoop.hbase.classification.InterfaceStability;
47  import org.apache.hadoop.conf.Configuration;
48  import org.apache.hadoop.conf.Configured;
49  import org.apache.hadoop.fs.FileSystem;
50  import org.apache.hadoop.fs.Path;
51  import org.apache.hadoop.hbase.HConstants;
52  import org.apache.hadoop.hbase.TableName;
53  import org.apache.hadoop.hbase.HBaseConfiguration;
54  import org.apache.hadoop.hbase.HRegionInfo;
55  import org.apache.hadoop.hbase.KeyValue;
56  import org.apache.hadoop.hbase.Tag;
57  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
58  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
59  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
60  import org.apache.hadoop.hbase.util.BloomFilter;
61  import org.apache.hadoop.hbase.util.BloomFilterFactory;
62  import org.apache.hadoop.hbase.util.ByteBloomFilter;
63  import org.apache.hadoop.hbase.util.Bytes;
64  import org.apache.hadoop.hbase.util.FSUtils;
65  import org.apache.hadoop.hbase.util.Writables;
66  import org.apache.hadoop.util.Tool;
67  import org.apache.hadoop.util.ToolRunner;
68  
69  /**
70   * Implements pretty-printing functionality for {@link HFile}s.
71   */
72  @InterfaceAudience.Public
73  @InterfaceStability.Evolving
74  public class HFilePrettyPrinter extends Configured implements Tool {
75  
76    private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
77  
78    private Options options = new Options();
79  
80    private boolean verbose;
81    private boolean printValue;
82    private boolean printKey;
83    private boolean shouldPrintMeta;
84    private boolean printBlockIndex;
85    private boolean printBlockHeaders;
86    private boolean printStats;
87    private boolean checkRow;
88    private boolean checkFamily;
89    private boolean isSeekToRow = false;
90  
91    /**
92     * The row which the user wants to specify and print all the KeyValues for.
93     */
94    private byte[] row = null;
95  
96    private List<Path> files = new ArrayList<Path>();
97    private int count;
98  
99    private static final String FOUR_SPACES = "    ";
100 
101   public HFilePrettyPrinter() {
102     super();
103     init();
104   }
105 
106   public HFilePrettyPrinter(Configuration conf) {
107     super(conf);
108     init();
109   }
110 
111   private void init() {
112     options.addOption("v", "verbose", false,
113         "Verbose output; emits file and meta data delimiters");
114     options.addOption("p", "printkv", false, "Print key/value pairs");
115     options.addOption("e", "printkey", false, "Print keys");
116     options.addOption("m", "printmeta", false, "Print meta data of file");
117     options.addOption("b", "printblocks", false, "Print block index meta data");
118     options.addOption("h", "printblockheaders", false, "Print block headers for each block.");
119     options.addOption("k", "checkrow", false,
120         "Enable row order check; looks for out-of-order keys");
121     options.addOption("a", "checkfamily", false, "Enable family check");
122     options.addOption("w", "seekToRow", true,
123       "Seek to this row and print all the kvs for this row only");
124     options.addOption("s", "stats", false, "Print statistics");
125 
126     OptionGroup files = new OptionGroup();
127     files.addOption(new Option("f", "file", true,
128       "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34"));
129     files.addOption(new Option("r", "region", true,
130       "Region to scan. Pass region name; e.g. 'hbase:meta,,1'"));
131     options.addOptionGroup(files);
132   }
133 
134   public boolean parseOptions(String args[]) throws ParseException,
135       IOException {
136     if (args.length == 0) {
137       HelpFormatter formatter = new HelpFormatter();
138       formatter.printHelp("HFile", options, true);
139       return false;
140     }
141     CommandLineParser parser = new PosixParser();
142     CommandLine cmd = parser.parse(options, args);
143 
144     verbose = cmd.hasOption("v");
145     printValue = cmd.hasOption("p");
146     printKey = cmd.hasOption("e") || printValue;
147     shouldPrintMeta = cmd.hasOption("m");
148     printBlockIndex = cmd.hasOption("b");
149     printBlockHeaders = cmd.hasOption("h");
150     printStats = cmd.hasOption("s");
151     checkRow = cmd.hasOption("k");
152     checkFamily = cmd.hasOption("a");
153 
154     if (cmd.hasOption("f")) {
155       files.add(new Path(cmd.getOptionValue("f")));
156     }
157 
158     if (cmd.hasOption("w")) {
159       String key = cmd.getOptionValue("w");
160       if (key != null && key.length() != 0) {
161         row = key.getBytes();
162         isSeekToRow = true;
163       } else {
164         System.err.println("Invalid row is specified.");
165         System.exit(-1);
166       }
167     }
168 
169     if (cmd.hasOption("r")) {
170       String regionName = cmd.getOptionValue("r");
171       byte[] rn = Bytes.toBytes(regionName);
172       byte[][] hri = HRegionInfo.parseRegionName(rn);
173       Path rootDir = FSUtils.getRootDir(getConf());
174       Path tableDir = FSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
175       String enc = HRegionInfo.encodeRegionName(rn);
176       Path regionDir = new Path(tableDir, enc);
177       if (verbose)
178         System.out.println("region dir -> " + regionDir);
179       List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(getConf()),
180           regionDir);
181       if (verbose)
182         System.out.println("Number of region files found -> "
183             + regionFiles.size());
184       if (verbose) {
185         int i = 1;
186         for (Path p : regionFiles) {
187           if (verbose)
188             System.out.println("Found file[" + i++ + "] -> " + p);
189         }
190       }
191       files.addAll(regionFiles);
192     }
193 
194     return true;
195   }
196 
197   /**
198    * Runs the command-line pretty-printer, and returns the desired command
199    * exit code (zero for success, non-zero for failure).
200    */
201   public int run(String[] args) {
202     if (getConf() == null) {
203       throw new RuntimeException("A Configuration instance must be provided.");
204     }
205     try {
206       FSUtils.setFsDefault(getConf(), FSUtils.getRootDir(getConf()));
207       if (!parseOptions(args))
208         return 1;
209     } catch (IOException ex) {
210       LOG.error("Error parsing command-line options", ex);
211       return 1;
212     } catch (ParseException ex) {
213       LOG.error("Error parsing command-line options", ex);
214       return 1;
215     }
216 
217     // iterate over all files found
218     for (Path fileName : files) {
219       try {
220         processFile(fileName);
221       } catch (IOException ex) {
222         LOG.error("Error reading " + fileName, ex);
223       }
224     }
225 
226     if (verbose || printKey) {
227       System.out.println("Scanned kv count -> " + count);
228     }
229 
230     return 0;
231   }
232 
233   private void processFile(Path file) throws IOException {
234     if (verbose)
235       System.out.println("Scanning -> " + file);
236     FileSystem fs = file.getFileSystem(getConf());
237     if (!fs.exists(file)) {
238       System.err.println("ERROR, file doesnt exist: " + file);
239     }
240 
241     HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(getConf()), getConf());
242 
243     Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
244 
245     KeyValueStatsCollector fileStats = null;
246 
247     if (verbose || printKey || checkRow || checkFamily || printStats) {
248       // scan over file and read key/value's and check if requested
249       HFileScanner scanner = reader.getScanner(false, false, false);
250       fileStats = new KeyValueStatsCollector();
251       boolean shouldScanKeysValues = false;
252       if (this.isSeekToRow) {
253         // seek to the first kv on this row
254         shouldScanKeysValues = 
255           (scanner.seekTo(KeyValue.createFirstOnRow(this.row).getKey()) != -1);
256       } else {
257         shouldScanKeysValues = scanner.seekTo();
258       }
259       if (shouldScanKeysValues)
260         scanKeysValues(file, fileStats, scanner, row);
261     }
262 
263     // print meta data
264     if (shouldPrintMeta) {
265       printMeta(reader, fileInfo);
266     }
267 
268     if (printBlockIndex) {
269       System.out.println("Block Index:");
270       System.out.println(reader.getDataBlockIndexReader());
271     }
272 
273     if (printBlockHeaders) {
274       System.out.println("Block Headers:");
275       /*
276        * TODO: this same/similar block iteration logic is used in HFileBlock#blockRange and
277        * TestLazyDataBlockDecompression. Refactor?
278        */
279       FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, file);
280       long fileSize = fs.getFileStatus(file).getLen();
281       FixedFileTrailer trailer =
282         FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
283       long offset = trailer.getFirstDataBlockOffset(),
284         max = trailer.getLastDataBlockOffset();
285       HFileBlock block;
286       while (offset <= max) {
287         block = reader.readBlock(offset, -1, /* cacheBlock */ false, /* pread */ false,
288           /* isCompaction */ false, /* updateCacheMetrics */ false, null);
289         offset += block.getOnDiskSizeWithHeader();
290         System.out.println(block);
291       }
292     }
293 
294     if (printStats) {
295       fileStats.finish();
296       System.out.println("Stats:\n" + fileStats);
297     }
298 
299     reader.close();
300   }
301 
302   private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
303       HFileScanner scanner,  byte[] row) throws IOException {
304     KeyValue pkv = null;
305     do {
306       KeyValue kv = scanner.getKeyValue();
307       if (row != null && row.length != 0) {
308         int result = Bytes.compareTo(kv.getRow(), row);
309         if (result > 0) {
310           break;
311         } else if (result < 0) {
312           continue;
313         }
314       }
315       // collect stats
316       if (printStats) {
317         fileStats.collect(kv);
318       }
319       // dump key value
320       if (printKey) {
321         System.out.print("K: " + kv);
322         if (printValue) {
323           System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
324           int i = 0;
325           List<Tag> tags = kv.getTags();
326           for (Tag tag : tags) {
327             System.out
328                 .print(String.format(" T[%d]: %s", i++, Bytes.toStringBinary(tag.getValue())));
329           }
330         }
331         System.out.println();
332       }
333       // check if rows are in order
334       if (checkRow && pkv != null) {
335         if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
336           System.err.println("WARNING, previous row is greater then"
337               + " current row\n\tfilename -> " + file + "\n\tprevious -> "
338               + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent  -> "
339               + Bytes.toStringBinary(kv.getKey()));
340         }
341       }
342       // check if families are consistent
343       if (checkFamily) {
344         String fam = Bytes.toString(kv.getFamily());
345         if (!file.toString().contains(fam)) {
346           System.err.println("WARNING, filename does not match kv family,"
347               + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
348               + Bytes.toStringBinary(kv.getKey()));
349         }
350         if (pkv != null
351             && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
352           System.err.println("WARNING, previous kv has different family"
353               + " compared to current key\n\tfilename -> " + file
354               + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
355               + "\n\tcurrent  -> " + Bytes.toStringBinary(kv.getKey()));
356         }
357       }
358       pkv = kv;
359       ++count;
360     } while (scanner.next());
361   }
362 
363   /**
364    * Format a string of the form "k1=v1, k2=v2, ..." into separate lines
365    * with a four-space indentation.
366    */
367   private static String asSeparateLines(String keyValueStr) {
368     return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
369                                   ",\n" + FOUR_SPACES + "$1");
370   }
371 
372   private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
373       throws IOException {
374     System.out.println("Block index size as per heapsize: "
375         + reader.indexSize());
376     System.out.println(asSeparateLines(reader.toString()));
377     System.out.println("Trailer:\n    "
378         + asSeparateLines(reader.getTrailer().toString()));
379     System.out.println("Fileinfo:");
380     for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
381       System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
382       if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
383         long seqid = Bytes.toLong(e.getValue());
384         System.out.println(seqid);
385       } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
386         TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
387         Writables.copyWritable(e.getValue(), timeRangeTracker);
388         System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
389             + timeRangeTracker.getMaximumTimestamp());
390       } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
391           || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
392         System.out.println(Bytes.toInt(e.getValue()));
393       } else {
394         System.out.println(Bytes.toStringBinary(e.getValue()));
395       }
396     }
397 
398     try {
399       System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
400     } catch (Exception e) {
401       System.out.println ("Unable to retrieve the midkey");
402     }
403 
404     // Printing general bloom information
405     DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
406     BloomFilter bloomFilter = null;
407     if (bloomMeta != null)
408       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
409 
410     System.out.println("Bloom filter:");
411     if (bloomFilter != null) {
412       System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
413           ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
414     } else {
415       System.out.println(FOUR_SPACES + "Not present");
416     }
417 
418     // Printing delete bloom information
419     bloomMeta = reader.getDeleteBloomFilterMetadata();
420     bloomFilter = null;
421     if (bloomMeta != null)
422       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
423 
424     System.out.println("Delete Family Bloom filter:");
425     if (bloomFilter != null) {
426       System.out.println(FOUR_SPACES
427           + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
428               "\n" + FOUR_SPACES));
429     } else {
430       System.out.println(FOUR_SPACES + "Not present");
431     }
432   }
433 
434   private static class KeyValueStatsCollector {
435     private final MetricsRegistry metricsRegistry = new MetricsRegistry();
436     private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
437     private final SimpleReporter simpleReporter = new SimpleReporter(metricsRegistry, new PrintStream(metricsOutput));
438     Histogram keyLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Key length");
439     Histogram valLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Val length");
440     Histogram rowSizeBytes = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (bytes)");
441     Histogram rowSizeCols = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (columns)");
442 
443     long curRowBytes = 0;
444     long curRowCols = 0;
445 
446     byte[] biggestRow = null;
447 
448     private KeyValue prevKV = null;
449     private long maxRowBytes = 0;
450     private long curRowKeyLength;
451 
452     public void collect(KeyValue kv) {
453       valLen.update(kv.getValueLength());
454       if (prevKV != null &&
455           KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
456         // new row
457         collectRow();
458       }
459       curRowBytes += kv.getLength();
460       curRowKeyLength = kv.getKeyLength();
461       curRowCols++;
462       prevKV = kv;
463     }
464 
465     private void collectRow() {
466       rowSizeBytes.update(curRowBytes);
467       rowSizeCols.update(curRowCols);
468       keyLen.update(curRowKeyLength);
469 
470       if (curRowBytes > maxRowBytes && prevKV != null) {
471         biggestRow = prevKV.getRow();
472         maxRowBytes = curRowBytes;
473       }
474 
475       curRowBytes = 0;
476       curRowCols = 0;
477     }
478 
479     public void finish() {
480       if (curRowCols > 0) {
481         collectRow();
482       }
483     }
484 
485     @Override
486     public String toString() {
487       if (prevKV == null)
488         return "no data available for statistics";
489 
490       // Dump the metrics to the output stream
491       simpleReporter.shutdown();
492       simpleReporter.run();
493       metricsRegistry.shutdown();
494 
495       return
496               metricsOutput.toString() +
497                       "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
498     }
499   }
500 
501   private static class SimpleReporter extends ConsoleReporter {
502     private final PrintStream out;
503 
504     public SimpleReporter(MetricsRegistry metricsRegistry, PrintStream out) {
505       super(metricsRegistry, out, MetricPredicate.ALL);
506       this.out = out;
507     }
508 
509     @Override
510     public void run() {
511       for (Map.Entry<String, SortedMap<MetricName, Metric>> entry : getMetricsRegistry().groupedMetrics(
512               MetricPredicate.ALL).entrySet()) {
513         try {
514           for (Map.Entry<MetricName, Metric> subEntry : entry.getValue().entrySet()) {
515             out.print("   " + subEntry.getKey().getName());
516             out.println(':');
517 
518             subEntry.getValue().processWith(this, subEntry.getKey(), out);
519           }
520         } catch (Exception e) {
521           e.printStackTrace(out);
522         }
523       }
524     }
525 
526     @Override
527     public void processHistogram(MetricName name, Histogram histogram, PrintStream stream) {
528       super.processHistogram(name, histogram, stream);
529       stream.printf(Locale.getDefault(), "             count = %d\n", histogram.count());
530     }
531   }
532 
533   public static void main(String[] args) throws Exception {
534     Configuration conf = HBaseConfiguration.create();
535     // no need for a block cache
536     conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
537     int ret = ToolRunner.run(conf, new HFilePrettyPrinter(), args);
538     System.exit(ret);
539   }
540 }