View Javadoc

1   
2   /*
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import java.io.DataInput;
23  import java.io.IOException;
24  import java.util.ArrayList;
25  import java.util.List;
26  import java.util.Map;
27  
28  import org.apache.commons.cli.CommandLine;
29  import org.apache.commons.cli.CommandLineParser;
30  import org.apache.commons.cli.HelpFormatter;
31  import org.apache.commons.cli.Options;
32  import org.apache.commons.cli.ParseException;
33  import org.apache.commons.cli.PosixParser;
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.classification.InterfaceAudience;
37  import org.apache.hadoop.classification.InterfaceStability;
38  import org.apache.hadoop.conf.Configuration;
39  import org.apache.hadoop.fs.FileSystem;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.hbase.HBaseConfiguration;
42  import org.apache.hadoop.hbase.HRegionInfo;
43  import org.apache.hadoop.hbase.KeyValue;
44  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
45  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
46  import org.apache.hadoop.hbase.util.BloomFilter;
47  import org.apache.hadoop.hbase.util.BloomFilterFactory;
48  import org.apache.hadoop.hbase.util.ByteBloomFilter;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.apache.hadoop.hbase.util.FSUtils;
51  import org.apache.hadoop.hbase.util.Writables;
52  
53  /**
54   * Implements pretty-printing functionality for {@link HFile}s.
55   */
56  @InterfaceAudience.Public
57  @InterfaceStability.Evolving
58  public class HFilePrettyPrinter {
59  
60    private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
61  
62    private Options options = new Options();
63  
64    private boolean verbose;
65    private boolean printValue;
66    private boolean printKey;
67    private boolean shouldPrintMeta;
68    private boolean printBlocks;
69    private boolean printStats;
70    private boolean checkRow;
71    private boolean checkFamily;
72    private boolean isSeekToRow = false;
73  
74    /**
75     * The row which the user wants to specify and print all the KeyValues for.
76     */
77    private byte[] row = null;
78    private Configuration conf;
79  
80    private List<Path> files = new ArrayList<Path>();
81    private int count;
82  
83    private static final String FOUR_SPACES = "    ";
84  
85    public HFilePrettyPrinter() {
86      options.addOption("v", "verbose", false,
87          "Verbose output; emits file and meta data delimiters");
88      options.addOption("p", "printkv", false, "Print key/value pairs");
89      options.addOption("e", "printkey", false, "Print keys");
90      options.addOption("m", "printmeta", false, "Print meta data of file");
91      options.addOption("b", "printblocks", false, "Print block index meta data");
92      options.addOption("k", "checkrow", false,
93          "Enable row order check; looks for out-of-order keys");
94      options.addOption("a", "checkfamily", false, "Enable family check");
95      options.addOption("f", "file", true,
96          "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/.META./12/34");
97      options.addOption("w", "seekToRow", true,
98        "Seek to this row and print all the kvs for this row only");
99      options.addOption("r", "region", true,
100         "Region to scan. Pass region name; e.g. '.META.,,1'");
101     options.addOption("s", "stats", false, "Print statistics");
102   }
103 
104   public boolean parseOptions(String args[]) throws ParseException,
105       IOException {
106     if (args.length == 0) {
107       HelpFormatter formatter = new HelpFormatter();
108       formatter.printHelp("HFile", options, true);
109       return false;
110     }
111     CommandLineParser parser = new PosixParser();
112     CommandLine cmd = parser.parse(options, args);
113 
114     verbose = cmd.hasOption("v");
115     printValue = cmd.hasOption("p");
116     printKey = cmd.hasOption("e") || printValue;
117     shouldPrintMeta = cmd.hasOption("m");
118     printBlocks = cmd.hasOption("b");
119     printStats = cmd.hasOption("s");
120     checkRow = cmd.hasOption("k");
121     checkFamily = cmd.hasOption("a");
122 
123     if (cmd.hasOption("f")) {
124       files.add(new Path(cmd.getOptionValue("f")));
125     }
126 
127     if (cmd.hasOption("w")) {
128       String key = cmd.getOptionValue("w");
129       if (key != null && key.length() != 0) {
130         row = key.getBytes();
131         isSeekToRow = true;
132       } else {
133         System.err.println("Invalid row is specified.");
134         System.exit(-1);
135       }
136     }
137 
138     if (cmd.hasOption("r")) {
139       String regionName = cmd.getOptionValue("r");
140       byte[] rn = Bytes.toBytes(regionName);
141       byte[][] hri = HRegionInfo.parseRegionName(rn);
142       Path rootDir = FSUtils.getRootDir(conf);
143       Path tableDir = new Path(rootDir, Bytes.toString(hri[0]));
144       String enc = HRegionInfo.encodeRegionName(rn);
145       Path regionDir = new Path(tableDir, enc);
146       if (verbose)
147         System.out.println("region dir -> " + regionDir);
148       List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(conf),
149           regionDir);
150       if (verbose)
151         System.out.println("Number of region files found -> "
152             + regionFiles.size());
153       if (verbose) {
154         int i = 1;
155         for (Path p : regionFiles) {
156           if (verbose)
157             System.out.println("Found file[" + i++ + "] -> " + p);
158         }
159       }
160       files.addAll(regionFiles);
161     }
162 
163     return true;
164   }
165 
166   /**
167    * Runs the command-line pretty-printer, and returns the desired command
168    * exit code (zero for success, non-zero for failure).
169    */
170   public int run(String[] args) {
171     conf = HBaseConfiguration.create();
172     try {
173       FSUtils.setFsDefault(conf, FSUtils.getRootDir(conf));
174       if (!parseOptions(args))
175         return 1;
176     } catch (IOException ex) {
177       LOG.error("Error parsing command-line options", ex);
178       return 1;
179     } catch (ParseException ex) {
180       LOG.error("Error parsing command-line options", ex);
181       return 1;
182     }
183 
184     // iterate over all files found
185     for (Path fileName : files) {
186       try {
187         processFile(fileName);
188       } catch (IOException ex) {
189         LOG.error("Error reading " + fileName, ex);
190       }
191     }
192 
193     if (verbose || printKey) {
194       System.out.println("Scanned kv count -> " + count);
195     }
196 
197     return 0;
198   }
199 
200   private void processFile(Path file) throws IOException {
201     if (verbose)
202       System.out.println("Scanning -> " + file);
203     FileSystem fs = file.getFileSystem(conf);
204     if (!fs.exists(file)) {
205       System.err.println("ERROR, file doesnt exist: " + file);
206     }
207 
208     HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(conf));
209 
210     Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
211 
212     KeyValueStatsCollector fileStats = null;
213 
214     if (verbose || printKey || checkRow || checkFamily || printStats) {
215       // scan over file and read key/value's and check if requested
216       HFileScanner scanner = reader.getScanner(false, false, false);
217       fileStats = new KeyValueStatsCollector();
218       boolean shouldScanKeysValues = false;
219       if (this.isSeekToRow) {
220         // seek to the first kv on this row
221         shouldScanKeysValues = 
222           (scanner.seekTo(KeyValue.createFirstOnRow(this.row).getKey()) != -1);
223       } else {
224         shouldScanKeysValues = scanner.seekTo();
225       }
226       if (shouldScanKeysValues)
227         scanKeysValues(file, fileStats, scanner, row);
228     }
229 
230     // print meta data
231     if (shouldPrintMeta) {
232       printMeta(reader, fileInfo);
233     }
234 
235     if (printBlocks) {
236       System.out.println("Block Index:");
237       System.out.println(reader.getDataBlockIndexReader());
238     }
239 
240     if (printStats) {
241       fileStats.finish();
242       System.out.println("Stats:\n" + fileStats);
243     }
244 
245     reader.close();
246   }
247 
248   private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
249       HFileScanner scanner,  byte[] row) throws IOException {
250     KeyValue pkv = null;
251     do {
252       KeyValue kv = scanner.getKeyValue();
253       if (row != null && row.length != 0) {
254         int result = Bytes.compareTo(kv.getRow(), row);
255         if (result > 0) {
256           break;
257         } else if (result < 0) {
258           continue;
259         }
260       }
261       // collect stats
262       if (printStats) {
263         fileStats.collect(kv);
264       }
265       // dump key value
266       if (printKey) {
267         System.out.print("K: " + kv);
268         if (printValue) {
269           System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
270         }
271         System.out.println();
272       }
273       // check if rows are in order
274       if (checkRow && pkv != null) {
275         if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
276           System.err.println("WARNING, previous row is greater then"
277               + " current row\n\tfilename -> " + file + "\n\tprevious -> "
278               + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent  -> "
279               + Bytes.toStringBinary(kv.getKey()));
280         }
281       }
282       // check if families are consistent
283       if (checkFamily) {
284         String fam = Bytes.toString(kv.getFamily());
285         if (!file.toString().contains(fam)) {
286           System.err.println("WARNING, filename does not match kv family,"
287               + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
288               + Bytes.toStringBinary(kv.getKey()));
289         }
290         if (pkv != null
291             && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
292           System.err.println("WARNING, previous kv has different family"
293               + " compared to current key\n\tfilename -> " + file
294               + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
295               + "\n\tcurrent  -> " + Bytes.toStringBinary(kv.getKey()));
296         }
297       }
298       pkv = kv;
299       ++count;
300     } while (scanner.next());
301   }
302 
303   /**
304    * Format a string of the form "k1=v1, k2=v2, ..." into separate lines
305    * with a four-space indentation.
306    */
307   private static String asSeparateLines(String keyValueStr) {
308     return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
309                                   ",\n" + FOUR_SPACES + "$1");
310   }
311 
312   private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
313       throws IOException {
314     System.out.println("Block index size as per heapsize: "
315         + reader.indexSize());
316     System.out.println(asSeparateLines(reader.toString()));
317     System.out.println("Trailer:\n    "
318         + asSeparateLines(reader.getTrailer().toString()));
319     System.out.println("Fileinfo:");
320     for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
321       System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
322       if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
323         long seqid = Bytes.toLong(e.getValue());
324         System.out.println(seqid);
325       } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
326         TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
327         Writables.copyWritable(e.getValue(), timeRangeTracker);
328         System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
329             + timeRangeTracker.getMaximumTimestamp());
330       } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
331           || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
332         System.out.println(Bytes.toInt(e.getValue()));
333       } else {
334         System.out.println(Bytes.toStringBinary(e.getValue()));
335       }
336     }
337 
338     System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
339 
340     // Printing general bloom information
341     DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
342     BloomFilter bloomFilter = null;
343     if (bloomMeta != null)
344       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
345 
346     System.out.println("Bloom filter:");
347     if (bloomFilter != null) {
348       System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
349           ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
350     } else {
351       System.out.println(FOUR_SPACES + "Not present");
352     }
353 
354     // Printing delete bloom information
355     bloomMeta = reader.getDeleteBloomFilterMetadata();
356     bloomFilter = null;
357     if (bloomMeta != null)
358       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
359 
360     System.out.println("Delete Family Bloom filter:");
361     if (bloomFilter != null) {
362       System.out.println(FOUR_SPACES
363           + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
364               "\n" + FOUR_SPACES));
365     } else {
366       System.out.println(FOUR_SPACES + "Not present");
367     }
368   }
369 
370   private static class LongStats {
371     private long min = Long.MAX_VALUE;
372     private long max = Long.MIN_VALUE;
373     private long sum = 0;
374     private long count = 0;
375 
376     void collect(long d) {
377       if (d < min) min = d;
378       if (d > max) max = d;
379       sum += d;
380       count++;
381     }
382 
383     public String toString() {
384       return "count: " + count +
385         "\tmin: " + min +
386         "\tmax: " + max +
387         "\tmean: " + ((double)sum/count);
388     }
389   }
390 
391   private static class KeyValueStatsCollector {
392     LongStats keyLen = new LongStats();
393     LongStats valLen = new LongStats();
394     LongStats rowSizeBytes = new LongStats();
395     LongStats rowSizeCols = new LongStats();
396 
397     long curRowBytes = 0;
398     long curRowCols = 0;
399 
400     byte[] biggestRow = null;
401 
402     private KeyValue prevKV = null;
403     private long maxRowBytes = 0;
404 
405     public void collect(KeyValue kv) {
406       keyLen.collect(kv.getKeyLength());
407       valLen.collect(kv.getValueLength());
408       if (prevKV != null &&
409           KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
410         // new row
411         collectRow();
412       }
413       curRowBytes += kv.getLength();
414       curRowCols++;
415       prevKV = kv;
416     }
417 
418     private void collectRow() {
419       rowSizeBytes.collect(curRowBytes);
420       rowSizeCols.collect(curRowCols);
421 
422       if (curRowBytes > maxRowBytes && prevKV != null) {
423         biggestRow = prevKV.getRow();
424       }
425 
426       curRowBytes = 0;
427       curRowCols = 0;
428     }
429 
430     public void finish() {
431       if (curRowCols > 0) {
432         collectRow();
433       }
434     }
435 
436     @Override
437     public String toString() {
438       if (prevKV == null)
439         return "no data available for statistics";
440 
441       return
442         "Key length: " + keyLen + "\n" +
443         "Val length: " + valLen + "\n" +
444         "Row size (bytes): " + rowSizeBytes + "\n" +
445         "Row size (columns): " + rowSizeCols + "\n" +
446         "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
447     }
448   }
449 }