View Javadoc

1   
2   /*
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import java.io.ByteArrayOutputStream;
23  import java.io.DataInput;
24  import java.io.IOException;
25  import java.io.PrintStream;
26  import java.util.ArrayList;
27  import java.util.List;
28  import java.util.Locale;
29  import java.util.Map;
30  import java.util.SortedMap;
31  
32  import com.yammer.metrics.core.*;
33  import com.yammer.metrics.reporting.ConsoleReporter;
34  
35  import org.apache.commons.cli.CommandLine;
36  import org.apache.commons.cli.CommandLineParser;
37  import org.apache.commons.cli.HelpFormatter;
38  import org.apache.commons.cli.Options;
39  import org.apache.commons.cli.ParseException;
40  import org.apache.commons.cli.PosixParser;
41  import org.apache.commons.logging.Log;
42  import org.apache.commons.logging.LogFactory;
43  import org.apache.hadoop.classification.InterfaceAudience;
44  import org.apache.hadoop.classification.InterfaceStability;
45  import org.apache.hadoop.conf.Configuration;
46  import org.apache.hadoop.fs.FileSystem;
47  import org.apache.hadoop.fs.Path;
48  import org.apache.hadoop.hbase.TableName;
49  import org.apache.hadoop.hbase.HBaseConfiguration;
50  import org.apache.hadoop.hbase.HRegionInfo;
51  import org.apache.hadoop.hbase.KeyValue;
52  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
53  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
54  import org.apache.hadoop.hbase.util.BloomFilter;
55  import org.apache.hadoop.hbase.util.BloomFilterFactory;
56  import org.apache.hadoop.hbase.util.ByteBloomFilter;
57  import org.apache.hadoop.hbase.util.Bytes;
58  import org.apache.hadoop.hbase.util.FSUtils;
59  import org.apache.hadoop.hbase.util.Writables;
60  
61  /**
62   * Implements pretty-printing functionality for {@link HFile}s.
63   */
64  @InterfaceAudience.Public
65  @InterfaceStability.Evolving
66  public class HFilePrettyPrinter {
67  
68    private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
69  
70    private Options options = new Options();
71  
72    private boolean verbose;
73    private boolean printValue;
74    private boolean printKey;
75    private boolean shouldPrintMeta;
76    private boolean printBlocks;
77    private boolean printStats;
78    private boolean checkRow;
79    private boolean checkFamily;
80    private boolean isSeekToRow = false;
81  
82    /**
83     * The row which the user wants to specify and print all the KeyValues for.
84     */
85    private byte[] row = null;
86    private Configuration conf;
87  
88    private List<Path> files = new ArrayList<Path>();
89    private int count;
90  
91    private static final String FOUR_SPACES = "    ";
92  
93    public HFilePrettyPrinter() {
94      options.addOption("v", "verbose", false,
95          "Verbose output; emits file and meta data delimiters");
96      options.addOption("p", "printkv", false, "Print key/value pairs");
97      options.addOption("e", "printkey", false, "Print keys");
98      options.addOption("m", "printmeta", false, "Print meta data of file");
99      options.addOption("b", "printblocks", false, "Print block index meta data");
100     options.addOption("k", "checkrow", false,
101         "Enable row order check; looks for out-of-order keys");
102     options.addOption("a", "checkfamily", false, "Enable family check");
103     options.addOption("f", "file", true,
104         "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34");
105     options.addOption("w", "seekToRow", true,
106       "Seek to this row and print all the kvs for this row only");
107     options.addOption("r", "region", true,
108         "Region to scan. Pass region name; e.g. 'hbase:meta,,1'");
109     options.addOption("s", "stats", false, "Print statistics");
110   }
111 
112   public boolean parseOptions(String args[]) throws ParseException,
113       IOException {
114     if (args.length == 0) {
115       HelpFormatter formatter = new HelpFormatter();
116       formatter.printHelp("HFile", options, true);
117       return false;
118     }
119     CommandLineParser parser = new PosixParser();
120     CommandLine cmd = parser.parse(options, args);
121 
122     verbose = cmd.hasOption("v");
123     printValue = cmd.hasOption("p");
124     printKey = cmd.hasOption("e") || printValue;
125     shouldPrintMeta = cmd.hasOption("m");
126     printBlocks = cmd.hasOption("b");
127     printStats = cmd.hasOption("s");
128     checkRow = cmd.hasOption("k");
129     checkFamily = cmd.hasOption("a");
130 
131     if (cmd.hasOption("f")) {
132       files.add(new Path(cmd.getOptionValue("f")));
133     }
134 
135     if (cmd.hasOption("w")) {
136       String key = cmd.getOptionValue("w");
137       if (key != null && key.length() != 0) {
138         row = key.getBytes();
139         isSeekToRow = true;
140       } else {
141         System.err.println("Invalid row is specified.");
142         System.exit(-1);
143       }
144     }
145 
146     if (cmd.hasOption("r")) {
147       String regionName = cmd.getOptionValue("r");
148       byte[] rn = Bytes.toBytes(regionName);
149       byte[][] hri = HRegionInfo.parseRegionName(rn);
150       Path rootDir = FSUtils.getRootDir(conf);
151       Path tableDir = FSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
152       String enc = HRegionInfo.encodeRegionName(rn);
153       Path regionDir = new Path(tableDir, enc);
154       if (verbose)
155         System.out.println("region dir -> " + regionDir);
156       List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(conf),
157           regionDir);
158       if (verbose)
159         System.out.println("Number of region files found -> "
160             + regionFiles.size());
161       if (verbose) {
162         int i = 1;
163         for (Path p : regionFiles) {
164           if (verbose)
165             System.out.println("Found file[" + i++ + "] -> " + p);
166         }
167       }
168       files.addAll(regionFiles);
169     }
170 
171     return true;
172   }
173 
174   /**
175    * Runs the command-line pretty-printer, and returns the desired command
176    * exit code (zero for success, non-zero for failure).
177    */
178   public int run(String[] args) {
179     conf = HBaseConfiguration.create();
180     try {
181       FSUtils.setFsDefault(conf, FSUtils.getRootDir(conf));
182       if (!parseOptions(args))
183         return 1;
184     } catch (IOException ex) {
185       LOG.error("Error parsing command-line options", ex);
186       return 1;
187     } catch (ParseException ex) {
188       LOG.error("Error parsing command-line options", ex);
189       return 1;
190     }
191 
192     // iterate over all files found
193     for (Path fileName : files) {
194       try {
195         processFile(fileName);
196       } catch (IOException ex) {
197         LOG.error("Error reading " + fileName, ex);
198       }
199     }
200 
201     if (verbose || printKey) {
202       System.out.println("Scanned kv count -> " + count);
203     }
204 
205     return 0;
206   }
207 
208   private void processFile(Path file) throws IOException {
209     if (verbose)
210       System.out.println("Scanning -> " + file);
211     FileSystem fs = file.getFileSystem(conf);
212     if (!fs.exists(file)) {
213       System.err.println("ERROR, file doesnt exist: " + file);
214     }
215 
216     HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(conf));
217 
218     Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
219 
220     KeyValueStatsCollector fileStats = null;
221 
222     if (verbose || printKey || checkRow || checkFamily || printStats) {
223       // scan over file and read key/value's and check if requested
224       HFileScanner scanner = reader.getScanner(false, false, false);
225       fileStats = new KeyValueStatsCollector();
226       boolean shouldScanKeysValues = false;
227       if (this.isSeekToRow) {
228         // seek to the first kv on this row
229         shouldScanKeysValues = 
230           (scanner.seekTo(KeyValue.createFirstOnRow(this.row).getKey()) != -1);
231       } else {
232         shouldScanKeysValues = scanner.seekTo();
233       }
234       if (shouldScanKeysValues)
235         scanKeysValues(file, fileStats, scanner, row);
236     }
237 
238     // print meta data
239     if (shouldPrintMeta) {
240       printMeta(reader, fileInfo);
241     }
242 
243     if (printBlocks) {
244       System.out.println("Block Index:");
245       System.out.println(reader.getDataBlockIndexReader());
246     }
247 
248     if (printStats) {
249       fileStats.finish();
250       System.out.println("Stats:\n" + fileStats);
251     }
252 
253     reader.close();
254   }
255 
256   private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
257       HFileScanner scanner,  byte[] row) throws IOException {
258     KeyValue pkv = null;
259     do {
260       KeyValue kv = scanner.getKeyValue();
261       if (row != null && row.length != 0) {
262         int result = Bytes.compareTo(kv.getRow(), row);
263         if (result > 0) {
264           break;
265         } else if (result < 0) {
266           continue;
267         }
268       }
269       // collect stats
270       if (printStats) {
271         fileStats.collect(kv);
272       }
273       // dump key value
274       if (printKey) {
275         System.out.print("K: " + kv);
276         if (printValue) {
277           System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
278         }
279         System.out.println();
280       }
281       // check if rows are in order
282       if (checkRow && pkv != null) {
283         if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
284           System.err.println("WARNING, previous row is greater then"
285               + " current row\n\tfilename -> " + file + "\n\tprevious -> "
286               + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent  -> "
287               + Bytes.toStringBinary(kv.getKey()));
288         }
289       }
290       // check if families are consistent
291       if (checkFamily) {
292         String fam = Bytes.toString(kv.getFamily());
293         if (!file.toString().contains(fam)) {
294           System.err.println("WARNING, filename does not match kv family,"
295               + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
296               + Bytes.toStringBinary(kv.getKey()));
297         }
298         if (pkv != null
299             && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
300           System.err.println("WARNING, previous kv has different family"
301               + " compared to current key\n\tfilename -> " + file
302               + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
303               + "\n\tcurrent  -> " + Bytes.toStringBinary(kv.getKey()));
304         }
305       }
306       pkv = kv;
307       ++count;
308     } while (scanner.next());
309   }
310 
311   /**
312    * Format a string of the form "k1=v1, k2=v2, ..." into separate lines
313    * with a four-space indentation.
314    */
315   private static String asSeparateLines(String keyValueStr) {
316     return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
317                                   ",\n" + FOUR_SPACES + "$1");
318   }
319 
320   private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
321       throws IOException {
322     System.out.println("Block index size as per heapsize: "
323         + reader.indexSize());
324     System.out.println(asSeparateLines(reader.toString()));
325     System.out.println("Trailer:\n    "
326         + asSeparateLines(reader.getTrailer().toString()));
327     System.out.println("Fileinfo:");
328     for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
329       System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
330       if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
331         long seqid = Bytes.toLong(e.getValue());
332         System.out.println(seqid);
333       } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
334         TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
335         Writables.copyWritable(e.getValue(), timeRangeTracker);
336         System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
337             + timeRangeTracker.getMaximumTimestamp());
338       } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
339           || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
340         System.out.println(Bytes.toInt(e.getValue()));
341       } else {
342         System.out.println(Bytes.toStringBinary(e.getValue()));
343       }
344     }
345 
346     try {
347       System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
348     } catch (Exception e) {
349       System.out.println ("Unable to retrieve the midkey");
350     }
351 
352     // Printing general bloom information
353     DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
354     BloomFilter bloomFilter = null;
355     if (bloomMeta != null)
356       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
357 
358     System.out.println("Bloom filter:");
359     if (bloomFilter != null) {
360       System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
361           ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
362     } else {
363       System.out.println(FOUR_SPACES + "Not present");
364     }
365 
366     // Printing delete bloom information
367     bloomMeta = reader.getDeleteBloomFilterMetadata();
368     bloomFilter = null;
369     if (bloomMeta != null)
370       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
371 
372     System.out.println("Delete Family Bloom filter:");
373     if (bloomFilter != null) {
374       System.out.println(FOUR_SPACES
375           + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
376               "\n" + FOUR_SPACES));
377     } else {
378       System.out.println(FOUR_SPACES + "Not present");
379     }
380   }
381 
382   private static class KeyValueStatsCollector {
383     private final MetricsRegistry metricsRegistry = new MetricsRegistry();
384     private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
385     private final SimpleReporter simpleReporter = new SimpleReporter(metricsRegistry, new PrintStream(metricsOutput));
386     Histogram keyLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Key length");
387     Histogram valLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Val length");
388     Histogram rowSizeBytes = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (bytes)");
389     Histogram rowSizeCols = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (columns)");
390 
391     long curRowBytes = 0;
392     long curRowCols = 0;
393 
394     byte[] biggestRow = null;
395 
396     private KeyValue prevKV = null;
397     private long maxRowBytes = 0;
398     private long curRowKeyLength;
399 
400     public void collect(KeyValue kv) {
401       valLen.update(kv.getValueLength());
402       if (prevKV != null &&
403           KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
404         // new row
405         collectRow();
406       }
407       curRowBytes += kv.getLength();
408       curRowKeyLength = kv.getKeyLength();
409       curRowCols++;
410       prevKV = kv;
411     }
412 
413     private void collectRow() {
414       rowSizeBytes.update(curRowBytes);
415       rowSizeCols.update(curRowCols);
416       keyLen.update(curRowKeyLength);
417 
418       if (curRowBytes > maxRowBytes && prevKV != null) {
419         biggestRow = prevKV.getRow();
420         maxRowBytes = curRowBytes;
421       }
422 
423       curRowBytes = 0;
424       curRowCols = 0;
425     }
426 
427     public void finish() {
428       if (curRowCols > 0) {
429         collectRow();
430       }
431     }
432 
433     @Override
434     public String toString() {
435       if (prevKV == null)
436         return "no data available for statistics";
437 
438       // Dump the metrics to the output stream
439       simpleReporter.shutdown();
440       simpleReporter.run();
441       metricsRegistry.shutdown();
442 
443       return
444               metricsOutput.toString() +
445                       "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
446     }
447   }
448 
449   private static class SimpleReporter extends ConsoleReporter {
450     private final PrintStream out;
451 
452     public SimpleReporter(MetricsRegistry metricsRegistry, PrintStream out) {
453       super(metricsRegistry, out, MetricPredicate.ALL);
454       this.out = out;
455     }
456 
457     @Override
458     public void run() {
459       for (Map.Entry<String, SortedMap<MetricName, Metric>> entry : getMetricsRegistry().groupedMetrics(
460               MetricPredicate.ALL).entrySet()) {
461         try {
462           for (Map.Entry<MetricName, Metric> subEntry : entry.getValue().entrySet()) {
463             out.print("   " + subEntry.getKey().getName());
464             out.println(':');
465 
466             subEntry.getValue().processWith(this, subEntry.getKey(), out);
467           }
468         } catch (Exception e) {
469           e.printStackTrace(out);
470         }
471       }
472     }
473 
474     @Override
475     public void processHistogram(MetricName name, Histogram histogram, PrintStream stream) {
476       super.processHistogram(name, histogram, stream);
477       stream.printf(Locale.getDefault(), "             count = %d\n", histogram.count());
478     }
479   }
480 }