View Javadoc

1   
2   /*
3    * Copyright 2011 The Apache Software Foundation
4    *
5    * Licensed to the Apache Software Foundation (ASF) under one
6    * or more contributor license agreements.  See the NOTICE file
7    * distributed with this work for additional information
8    * regarding copyright ownership.  The ASF licenses this file
9    * to you under the Apache License, Version 2.0 (the
10   * "License"); you may not use this file except in compliance
11   * with the License.  You may obtain a copy of the License at
12   *
13   *     http://www.apache.org/licenses/LICENSE-2.0
14   *
15   * Unless required by applicable law or agreed to in writing, software
16   * distributed under the License is distributed on an "AS IS" BASIS,
17   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18   * See the License for the specific language governing permissions and
19   * limitations under the License.
20   */
21  package org.apache.hadoop.hbase.io.hfile;
22  
23  import java.io.DataInput;
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.List;
27  import java.util.Map;
28  
29  import org.apache.commons.cli.CommandLine;
30  import org.apache.commons.cli.CommandLineParser;
31  import org.apache.commons.cli.HelpFormatter;
32  import org.apache.commons.cli.Options;
33  import org.apache.commons.cli.ParseException;
34  import org.apache.commons.cli.PosixParser;
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.hbase.HBaseConfiguration;
41  import org.apache.hadoop.hbase.HRegionInfo;
42  import org.apache.hadoop.hbase.KeyValue;
43  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
44  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
45  import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics;
46  import org.apache.hadoop.hbase.util.BloomFilter;
47  import org.apache.hadoop.hbase.util.BloomFilterFactory;
48  import org.apache.hadoop.hbase.util.ByteBloomFilter;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.apache.hadoop.hbase.util.FSUtils;
51  import org.apache.hadoop.hbase.util.Writables;
52  
53  /**
54   * Implements pretty-printing functionality for {@link HFile}s.
55   */
56  public class HFilePrettyPrinter {
57  
58    private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
59  
60    private Options options = new Options();
61  
62    private boolean verbose;
63    private boolean printValue;
64    private boolean printKey;
65    private boolean shouldPrintMeta;
66    private boolean printBlocks;
67    private boolean printStats;
68    private boolean checkRow;
69    private boolean checkFamily;
70    private boolean isSeekToRow = false;
71  
72    /**
73     * The row which the user wants to specify and print all the KeyValues for.
74     */
75    private byte[] row = null;
76    private Configuration conf;
77  
78    private List<Path> files = new ArrayList<Path>();
79    private int count;
80  
81    private static final String FOUR_SPACES = "    ";
82  
83    public HFilePrettyPrinter() {
84      options.addOption("v", "verbose", false,
85          "Verbose output; emits file and meta data delimiters");
86      options.addOption("p", "printkv", false, "Print key/value pairs");
87      options.addOption("e", "printkey", false, "Print keys");
88      options.addOption("m", "printmeta", false, "Print meta data of file");
89      options.addOption("b", "printblocks", false, "Print block index meta data");
90      options.addOption("k", "checkrow", false,
91          "Enable row order check; looks for out-of-order keys");
92      options.addOption("a", "checkfamily", false, "Enable family check");
93      options.addOption("f", "file", true,
94          "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/.META./12/34");
95      options.addOption("w", "seekToRow", true,
96        "Seek to this row and print all the kvs for this row only");
97      options.addOption("r", "region", true,
98          "Region to scan. Pass region name; e.g. '.META.,,1'");
99      options.addOption("s", "stats", false, "Print statistics");
100   }
101 
102   public boolean parseOptions(String args[]) throws ParseException,
103       IOException {
104     if (args.length == 0) {
105       HelpFormatter formatter = new HelpFormatter();
106       formatter.printHelp("HFile", options, true);
107       return false;
108     }
109     CommandLineParser parser = new PosixParser();
110     CommandLine cmd = parser.parse(options, args);
111 
112     verbose = cmd.hasOption("v");
113     printValue = cmd.hasOption("p");
114     printKey = cmd.hasOption("e") || printValue;
115     shouldPrintMeta = cmd.hasOption("m");
116     printBlocks = cmd.hasOption("b");
117     printStats = cmd.hasOption("s");
118     checkRow = cmd.hasOption("k");
119     checkFamily = cmd.hasOption("a");
120 
121     if (cmd.hasOption("f")) {
122       files.add(new Path(cmd.getOptionValue("f")));
123     }
124 
125     if (cmd.hasOption("w")) {
126       String key = cmd.getOptionValue("w");
127       if (key != null && key.length() != 0) {
128         row = key.getBytes();
129         isSeekToRow = true;
130       } else {
131         System.err.println("Invalid row is specified.");
132         System.exit(-1);
133       }
134     }
135 
136     if (cmd.hasOption("r")) {
137       String regionName = cmd.getOptionValue("r");
138       byte[] rn = Bytes.toBytes(regionName);
139       byte[][] hri = HRegionInfo.parseRegionName(rn);
140       Path rootDir = FSUtils.getRootDir(conf);
141       Path tableDir = new Path(rootDir, Bytes.toString(hri[0]));
142       String enc = HRegionInfo.encodeRegionName(rn);
143       Path regionDir = new Path(tableDir, enc);
144       if (verbose)
145         System.out.println("region dir -> " + regionDir);
146       List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(conf),
147           regionDir);
148       if (verbose)
149         System.out.println("Number of region files found -> "
150             + regionFiles.size());
151       if (verbose) {
152         int i = 1;
153         for (Path p : regionFiles) {
154           if (verbose)
155             System.out.println("Found file[" + i++ + "] -> " + p);
156         }
157       }
158       files.addAll(regionFiles);
159     }
160 
161     return true;
162   }
163 
164   /**
165    * Runs the command-line pretty-printer, and returns the desired command
166    * exit code (zero for success, non-zero for failure).
167    */
168   public int run(String[] args) {
169     conf = HBaseConfiguration.create();
170     conf.set("fs.defaultFS",
171         conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
172     conf.set("fs.default.name",
173         conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
174     SchemaMetrics.configureGlobally(conf);
175     try {
176       if (!parseOptions(args))
177         return 1;
178     } catch (IOException ex) {
179       LOG.error("Error parsing command-line options", ex);
180       return 1;
181     } catch (ParseException ex) {
182       LOG.error("Error parsing command-line options", ex);
183       return 1;
184     }
185 
186     // iterate over all files found
187     for (Path fileName : files) {
188       try {
189         processFile(fileName);
190       } catch (IOException ex) {
191         LOG.error("Error reading " + fileName, ex);
192       }
193     }
194 
195     if (verbose || printKey) {
196       System.out.println("Scanned kv count -> " + count);
197     }
198 
199     return 0;
200   }
201 
202   private void processFile(Path file) throws IOException {
203     if (verbose)
204       System.out.println("Scanning -> " + file);
205     FileSystem fs = file.getFileSystem(conf);
206     if (!fs.exists(file)) {
207       System.err.println("ERROR, file doesnt exist: " + file);
208     }
209 
210     HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(conf));
211 
212     Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
213 
214     KeyValueStatsCollector fileStats = null;
215 
216     if (verbose || printKey || checkRow || checkFamily || printStats) {
217       // scan over file and read key/value's and check if requested
218       HFileScanner scanner = reader.getScanner(false, false, false);
219       fileStats = new KeyValueStatsCollector();
220       boolean shouldScanKeysValues = false;
221       if (this.isSeekToRow) {
222         // seek to the first kv on this row
223         shouldScanKeysValues = 
224           (scanner.seekTo(KeyValue.createFirstOnRow(this.row).getKey()) != -1);
225       } else {
226         shouldScanKeysValues = scanner.seekTo();
227       }
228       if (shouldScanKeysValues)
229         scanKeysValues(file, fileStats, scanner, row);
230     }
231 
232     // print meta data
233     if (shouldPrintMeta) {
234       printMeta(reader, fileInfo);
235     }
236 
237     if (printBlocks) {
238       System.out.println("Block Index:");
239       System.out.println(reader.getDataBlockIndexReader());
240     }
241 
242     if (printStats) {
243       fileStats.finish();
244       System.out.println("Stats:\n" + fileStats);
245     }
246 
247     reader.close();
248   }
249 
250   private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
251       HFileScanner scanner,  byte[] row) throws IOException {
252     KeyValue pkv = null;
253     do {
254       KeyValue kv = scanner.getKeyValue();
255       if (row != null && row.length != 0) {
256         int result = Bytes.compareTo(kv.getRow(), row);
257         if (result > 0) {
258           break;
259         } else if (result < 0) {
260           continue;
261         }
262       }
263       // collect stats
264       if (printStats) {
265         fileStats.collect(kv);
266       }
267       // dump key value
268       if (printKey) {
269         System.out.print("K: " + kv);
270         if (printValue) {
271           System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
272         }
273         System.out.println();
274       }
275       // check if rows are in order
276       if (checkRow && pkv != null) {
277         if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
278           System.err.println("WARNING, previous row is greater then"
279               + " current row\n\tfilename -> " + file + "\n\tprevious -> "
280               + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent  -> "
281               + Bytes.toStringBinary(kv.getKey()));
282         }
283       }
284       // check if families are consistent
285       if (checkFamily) {
286         String fam = Bytes.toString(kv.getFamily());
287         if (!file.toString().contains(fam)) {
288           System.err.println("WARNING, filename does not match kv family,"
289               + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
290               + Bytes.toStringBinary(kv.getKey()));
291         }
292         if (pkv != null
293             && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
294           System.err.println("WARNING, previous kv has different family"
295               + " compared to current key\n\tfilename -> " + file
296               + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
297               + "\n\tcurrent  -> " + Bytes.toStringBinary(kv.getKey()));
298         }
299       }
300       pkv = kv;
301       ++count;
302     } while (scanner.next());
303   }
304 
305   /**
306    * Format a string of the form "k1=v1, k2=v2, ..." into separate lines
307    * with a four-space indentation.
308    */
309   private static String asSeparateLines(String keyValueStr) {
310     return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
311                                   ",\n" + FOUR_SPACES + "$1");
312   }
313 
314   private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
315       throws IOException {
316     System.out.println("Block index size as per heapsize: "
317         + reader.indexSize());
318     System.out.println(asSeparateLines(reader.toString()));
319     System.out.println("Trailer:\n    "
320         + asSeparateLines(reader.getTrailer().toString()));
321     System.out.println("Fileinfo:");
322     for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
323       System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
324       if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
325         long seqid = Bytes.toLong(e.getValue());
326         System.out.println(seqid);
327       } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
328         TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
329         Writables.copyWritable(e.getValue(), timeRangeTracker);
330         System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
331             + timeRangeTracker.getMaximumTimestamp());
332       } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
333           || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
334         System.out.println(Bytes.toInt(e.getValue()));
335       } else {
336         System.out.println(Bytes.toStringBinary(e.getValue()));
337       }
338     }
339 
340     System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
341 
342     // Printing general bloom information
343     DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
344     BloomFilter bloomFilter = null;
345     if (bloomMeta != null)
346       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
347 
348     System.out.println("Bloom filter:");
349     if (bloomFilter != null) {
350       System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
351           ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
352     } else {
353       System.out.println(FOUR_SPACES + "Not present");
354     }
355 
356     // Printing delete bloom information
357     bloomMeta = reader.getDeleteBloomFilterMetadata();
358     bloomFilter = null;
359     if (bloomMeta != null)
360       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
361 
362     System.out.println("Delete Family Bloom filter:");
363     if (bloomFilter != null) {
364       System.out.println(FOUR_SPACES
365           + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
366               "\n" + FOUR_SPACES));
367     } else {
368       System.out.println(FOUR_SPACES + "Not present");
369     }
370   }
371 
372   private static class LongStats {
373     private long min = Long.MAX_VALUE;
374     private long max = Long.MIN_VALUE;
375     private long sum = 0;
376     private long count = 0;
377 
378     void collect(long d) {
379       if (d < min) min = d;
380       if (d > max) max = d;
381       sum += d;
382       count++;
383     }
384 
385     public String toString() {
386       return "count: " + count +
387         "\tmin: " + min +
388         "\tmax: " + max +
389         "\tmean: " + ((double)sum/count);
390     }
391   }
392 
393   private static class KeyValueStatsCollector {
394     LongStats keyLen = new LongStats();
395     LongStats valLen = new LongStats();
396     LongStats rowSizeBytes = new LongStats();
397     LongStats rowSizeCols = new LongStats();
398 
399     long curRowBytes = 0;
400     long curRowCols = 0;
401 
402     byte[] biggestRow = null;
403 
404     private KeyValue prevKV = null;
405     private long maxRowBytes = 0;
406 
407     public void collect(KeyValue kv) {
408       keyLen.collect(kv.getKeyLength());
409       valLen.collect(kv.getValueLength());
410       if (prevKV != null &&
411           KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
412         // new row
413         collectRow();
414       }
415       curRowBytes += kv.getLength();
416       curRowCols++;
417       prevKV = kv;
418     }
419 
420     private void collectRow() {
421       rowSizeBytes.collect(curRowBytes);
422       rowSizeCols.collect(curRowCols);
423 
424       if (curRowBytes > maxRowBytes && prevKV != null) {
425         biggestRow = prevKV.getRow();
426       }
427 
428       curRowBytes = 0;
429       curRowCols = 0;
430     }
431 
432     public void finish() {
433       if (curRowCols > 0) {
434         collectRow();
435       }
436     }
437 
438     @Override
439     public String toString() {
440       if (prevKV == null)
441         return "no data available for statistics";
442 
443       return
444         "Key length: " + keyLen + "\n" +
445         "Val length: " + valLen + "\n" +
446         "Row size (bytes): " + rowSizeBytes + "\n" +
447         "Row size (columns): " + rowSizeCols + "\n" +
448         "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
449     }
450   }
451 }