View Javadoc

1   
2   /*
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import java.io.DataInput;
23  import java.io.IOException;
24  import java.util.ArrayList;
25  import java.util.List;
26  import java.util.Map;
27  
28  import org.apache.commons.cli.CommandLine;
29  import org.apache.commons.cli.CommandLineParser;
30  import org.apache.commons.cli.HelpFormatter;
31  import org.apache.commons.cli.Options;
32  import org.apache.commons.cli.ParseException;
33  import org.apache.commons.cli.PosixParser;
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.classification.InterfaceAudience;
37  import org.apache.hadoop.classification.InterfaceStability;
38  import org.apache.hadoop.conf.Configuration;
39  import org.apache.hadoop.fs.FileSystem;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.hbase.TableName;
42  import org.apache.hadoop.hbase.HBaseConfiguration;
43  import org.apache.hadoop.hbase.HRegionInfo;
44  import org.apache.hadoop.hbase.KeyValue;
45  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
46  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
47  import org.apache.hadoop.hbase.util.BloomFilter;
48  import org.apache.hadoop.hbase.util.BloomFilterFactory;
49  import org.apache.hadoop.hbase.util.ByteBloomFilter;
50  import org.apache.hadoop.hbase.util.Bytes;
51  import org.apache.hadoop.hbase.util.FSUtils;
52  import org.apache.hadoop.hbase.util.Writables;
53  
54  /**
55   * Implements pretty-printing functionality for {@link HFile}s.
56   */
57  @InterfaceAudience.Public
58  @InterfaceStability.Evolving
59  public class HFilePrettyPrinter {
60  
61    private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
62  
63    private Options options = new Options();
64  
65    private boolean verbose;
66    private boolean printValue;
67    private boolean printKey;
68    private boolean shouldPrintMeta;
69    private boolean printBlocks;
70    private boolean printStats;
71    private boolean checkRow;
72    private boolean checkFamily;
73    private boolean isSeekToRow = false;
74  
75    /**
76     * The row which the user wants to specify and print all the KeyValues for.
77     */
78    private byte[] row = null;
79    private Configuration conf;
80  
81    private List<Path> files = new ArrayList<Path>();
82    private int count;
83  
84    private static final String FOUR_SPACES = "    ";
85  
86    public HFilePrettyPrinter() {
87      options.addOption("v", "verbose", false,
88          "Verbose output; emits file and meta data delimiters");
89      options.addOption("p", "printkv", false, "Print key/value pairs");
90      options.addOption("e", "printkey", false, "Print keys");
91      options.addOption("m", "printmeta", false, "Print meta data of file");
92      options.addOption("b", "printblocks", false, "Print block index meta data");
93      options.addOption("k", "checkrow", false,
94          "Enable row order check; looks for out-of-order keys");
95      options.addOption("a", "checkfamily", false, "Enable family check");
96      options.addOption("f", "file", true,
97          "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/.META./12/34");
98      options.addOption("w", "seekToRow", true,
99        "Seek to this row and print all the kvs for this row only");
100     options.addOption("r", "region", true,
101         "Region to scan. Pass region name; e.g. '.META.,,1'");
102     options.addOption("s", "stats", false, "Print statistics");
103   }
104 
105   public boolean parseOptions(String args[]) throws ParseException,
106       IOException {
107     if (args.length == 0) {
108       HelpFormatter formatter = new HelpFormatter();
109       formatter.printHelp("HFile", options, true);
110       return false;
111     }
112     CommandLineParser parser = new PosixParser();
113     CommandLine cmd = parser.parse(options, args);
114 
115     verbose = cmd.hasOption("v");
116     printValue = cmd.hasOption("p");
117     printKey = cmd.hasOption("e") || printValue;
118     shouldPrintMeta = cmd.hasOption("m");
119     printBlocks = cmd.hasOption("b");
120     printStats = cmd.hasOption("s");
121     checkRow = cmd.hasOption("k");
122     checkFamily = cmd.hasOption("a");
123 
124     if (cmd.hasOption("f")) {
125       files.add(new Path(cmd.getOptionValue("f")));
126     }
127 
128     if (cmd.hasOption("w")) {
129       String key = cmd.getOptionValue("w");
130       if (key != null && key.length() != 0) {
131         row = key.getBytes();
132         isSeekToRow = true;
133       } else {
134         System.err.println("Invalid row is specified.");
135         System.exit(-1);
136       }
137     }
138 
139     if (cmd.hasOption("r")) {
140       String regionName = cmd.getOptionValue("r");
141       byte[] rn = Bytes.toBytes(regionName);
142       byte[][] hri = HRegionInfo.parseRegionName(rn);
143       Path rootDir = FSUtils.getRootDir(conf);
144       Path tableDir = FSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
145       String enc = HRegionInfo.encodeRegionName(rn);
146       Path regionDir = new Path(tableDir, enc);
147       if (verbose)
148         System.out.println("region dir -> " + regionDir);
149       List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(conf),
150           regionDir);
151       if (verbose)
152         System.out.println("Number of region files found -> "
153             + regionFiles.size());
154       if (verbose) {
155         int i = 1;
156         for (Path p : regionFiles) {
157           if (verbose)
158             System.out.println("Found file[" + i++ + "] -> " + p);
159         }
160       }
161       files.addAll(regionFiles);
162     }
163 
164     return true;
165   }
166 
167   /**
168    * Runs the command-line pretty-printer, and returns the desired command
169    * exit code (zero for success, non-zero for failure).
170    */
171   public int run(String[] args) {
172     conf = HBaseConfiguration.create();
173     try {
174       FSUtils.setFsDefault(conf, FSUtils.getRootDir(conf));
175       if (!parseOptions(args))
176         return 1;
177     } catch (IOException ex) {
178       LOG.error("Error parsing command-line options", ex);
179       return 1;
180     } catch (ParseException ex) {
181       LOG.error("Error parsing command-line options", ex);
182       return 1;
183     }
184 
185     // iterate over all files found
186     for (Path fileName : files) {
187       try {
188         processFile(fileName);
189       } catch (IOException ex) {
190         LOG.error("Error reading " + fileName, ex);
191       }
192     }
193 
194     if (verbose || printKey) {
195       System.out.println("Scanned kv count -> " + count);
196     }
197 
198     return 0;
199   }
200 
201   private void processFile(Path file) throws IOException {
202     if (verbose)
203       System.out.println("Scanning -> " + file);
204     FileSystem fs = file.getFileSystem(conf);
205     if (!fs.exists(file)) {
206       System.err.println("ERROR, file doesnt exist: " + file);
207     }
208 
209     HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(conf));
210 
211     Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
212 
213     KeyValueStatsCollector fileStats = null;
214 
215     if (verbose || printKey || checkRow || checkFamily || printStats) {
216       // scan over file and read key/value's and check if requested
217       HFileScanner scanner = reader.getScanner(false, false, false);
218       fileStats = new KeyValueStatsCollector();
219       boolean shouldScanKeysValues = false;
220       if (this.isSeekToRow) {
221         // seek to the first kv on this row
222         shouldScanKeysValues = 
223           (scanner.seekTo(KeyValue.createFirstOnRow(this.row).getKey()) != -1);
224       } else {
225         shouldScanKeysValues = scanner.seekTo();
226       }
227       if (shouldScanKeysValues)
228         scanKeysValues(file, fileStats, scanner, row);
229     }
230 
231     // print meta data
232     if (shouldPrintMeta) {
233       printMeta(reader, fileInfo);
234     }
235 
236     if (printBlocks) {
237       System.out.println("Block Index:");
238       System.out.println(reader.getDataBlockIndexReader());
239     }
240 
241     if (printStats) {
242       fileStats.finish();
243       System.out.println("Stats:\n" + fileStats);
244     }
245 
246     reader.close();
247   }
248 
249   private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
250       HFileScanner scanner,  byte[] row) throws IOException {
251     KeyValue pkv = null;
252     do {
253       KeyValue kv = scanner.getKeyValue();
254       if (row != null && row.length != 0) {
255         int result = Bytes.compareTo(kv.getRow(), row);
256         if (result > 0) {
257           break;
258         } else if (result < 0) {
259           continue;
260         }
261       }
262       // collect stats
263       if (printStats) {
264         fileStats.collect(kv);
265       }
266       // dump key value
267       if (printKey) {
268         System.out.print("K: " + kv);
269         if (printValue) {
270           System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
271         }
272         System.out.println();
273       }
274       // check if rows are in order
275       if (checkRow && pkv != null) {
276         if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
277           System.err.println("WARNING, previous row is greater then"
278               + " current row\n\tfilename -> " + file + "\n\tprevious -> "
279               + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent  -> "
280               + Bytes.toStringBinary(kv.getKey()));
281         }
282       }
283       // check if families are consistent
284       if (checkFamily) {
285         String fam = Bytes.toString(kv.getFamily());
286         if (!file.toString().contains(fam)) {
287           System.err.println("WARNING, filename does not match kv family,"
288               + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
289               + Bytes.toStringBinary(kv.getKey()));
290         }
291         if (pkv != null
292             && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
293           System.err.println("WARNING, previous kv has different family"
294               + " compared to current key\n\tfilename -> " + file
295               + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
296               + "\n\tcurrent  -> " + Bytes.toStringBinary(kv.getKey()));
297         }
298       }
299       pkv = kv;
300       ++count;
301     } while (scanner.next());
302   }
303 
304   /**
305    * Format a string of the form "k1=v1, k2=v2, ..." into separate lines
306    * with a four-space indentation.
307    */
308   private static String asSeparateLines(String keyValueStr) {
309     return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
310                                   ",\n" + FOUR_SPACES + "$1");
311   }
312 
313   private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
314       throws IOException {
315     System.out.println("Block index size as per heapsize: "
316         + reader.indexSize());
317     System.out.println(asSeparateLines(reader.toString()));
318     System.out.println("Trailer:\n    "
319         + asSeparateLines(reader.getTrailer().toString()));
320     System.out.println("Fileinfo:");
321     for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
322       System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
323       if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
324         long seqid = Bytes.toLong(e.getValue());
325         System.out.println(seqid);
326       } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
327         TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
328         Writables.copyWritable(e.getValue(), timeRangeTracker);
329         System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
330             + timeRangeTracker.getMaximumTimestamp());
331       } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
332           || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
333         System.out.println(Bytes.toInt(e.getValue()));
334       } else {
335         System.out.println(Bytes.toStringBinary(e.getValue()));
336       }
337     }
338 
339     System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
340 
341     // Printing general bloom information
342     DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
343     BloomFilter bloomFilter = null;
344     if (bloomMeta != null)
345       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
346 
347     System.out.println("Bloom filter:");
348     if (bloomFilter != null) {
349       System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
350           ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
351     } else {
352       System.out.println(FOUR_SPACES + "Not present");
353     }
354 
355     // Printing delete bloom information
356     bloomMeta = reader.getDeleteBloomFilterMetadata();
357     bloomFilter = null;
358     if (bloomMeta != null)
359       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
360 
361     System.out.println("Delete Family Bloom filter:");
362     if (bloomFilter != null) {
363       System.out.println(FOUR_SPACES
364           + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
365               "\n" + FOUR_SPACES));
366     } else {
367       System.out.println(FOUR_SPACES + "Not present");
368     }
369   }
370 
371   private static class LongStats {
372     private long min = Long.MAX_VALUE;
373     private long max = Long.MIN_VALUE;
374     private long sum = 0;
375     private long count = 0;
376 
377     void collect(long d) {
378       if (d < min) min = d;
379       if (d > max) max = d;
380       sum += d;
381       count++;
382     }
383 
384     public String toString() {
385       return "count: " + count +
386         "\tmin: " + min +
387         "\tmax: " + max +
388         "\tmean: " + ((double)sum/count);
389     }
390   }
391 
392   private static class KeyValueStatsCollector {
393     LongStats keyLen = new LongStats();
394     LongStats valLen = new LongStats();
395     LongStats rowSizeBytes = new LongStats();
396     LongStats rowSizeCols = new LongStats();
397 
398     long curRowBytes = 0;
399     long curRowCols = 0;
400 
401     byte[] biggestRow = null;
402 
403     private KeyValue prevKV = null;
404     private long maxRowBytes = 0;
405 
406     public void collect(KeyValue kv) {
407       keyLen.collect(kv.getKeyLength());
408       valLen.collect(kv.getValueLength());
409       if (prevKV != null &&
410           KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
411         // new row
412         collectRow();
413       }
414       curRowBytes += kv.getLength();
415       curRowCols++;
416       prevKV = kv;
417     }
418 
419     private void collectRow() {
420       rowSizeBytes.collect(curRowBytes);
421       rowSizeCols.collect(curRowCols);
422 
423       if (curRowBytes > maxRowBytes && prevKV != null) {
424         biggestRow = prevKV.getRow();
425         maxRowBytes = curRowBytes;
426       }
427 
428       curRowBytes = 0;
429       curRowCols = 0;
430     }
431 
432     public void finish() {
433       if (curRowCols > 0) {
434         collectRow();
435       }
436     }
437 
438     @Override
439     public String toString() {
440       if (prevKV == null)
441         return "no data available for statistics";
442 
443       return
444         "Key length: " + keyLen + "\n" +
445         "Val length: " + valLen + "\n" +
446         "Row size (bytes): " + rowSizeBytes + "\n" +
447         "Row size (columns): " + rowSizeCols + "\n" +
448         "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
449     }
450   }
451 }