1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.io.hfile;
21
22 import java.io.DataInput;
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.List;
26 import java.util.Map;
27
28 import org.apache.commons.cli.CommandLine;
29 import org.apache.commons.cli.CommandLineParser;
30 import org.apache.commons.cli.HelpFormatter;
31 import org.apache.commons.cli.Options;
32 import org.apache.commons.cli.ParseException;
33 import org.apache.commons.cli.PosixParser;
34 import org.apache.commons.logging.Log;
35 import org.apache.commons.logging.LogFactory;
36 import org.apache.hadoop.classification.InterfaceAudience;
37 import org.apache.hadoop.classification.InterfaceStability;
38 import org.apache.hadoop.conf.Configuration;
39 import org.apache.hadoop.fs.FileSystem;
40 import org.apache.hadoop.fs.Path;
41 import org.apache.hadoop.hbase.HBaseConfiguration;
42 import org.apache.hadoop.hbase.HRegionInfo;
43 import org.apache.hadoop.hbase.KeyValue;
44 import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
45 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
46 import org.apache.hadoop.hbase.util.BloomFilter;
47 import org.apache.hadoop.hbase.util.BloomFilterFactory;
48 import org.apache.hadoop.hbase.util.ByteBloomFilter;
49 import org.apache.hadoop.hbase.util.Bytes;
50 import org.apache.hadoop.hbase.util.FSUtils;
51 import org.apache.hadoop.hbase.util.Writables;
52
53
54
55
56 @InterfaceAudience.Public
57 @InterfaceStability.Evolving
58 public class HFilePrettyPrinter {
59
60 private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
61
62 private Options options = new Options();
63
64 private boolean verbose;
65 private boolean printValue;
66 private boolean printKey;
67 private boolean shouldPrintMeta;
68 private boolean printBlocks;
69 private boolean printStats;
70 private boolean checkRow;
71 private boolean checkFamily;
72 private boolean isSeekToRow = false;
73
74
75
76
77 private byte[] row = null;
78 private Configuration conf;
79
80 private List<Path> files = new ArrayList<Path>();
81 private int count;
82
83 private static final String FOUR_SPACES = " ";
84
85 public HFilePrettyPrinter() {
86 options.addOption("v", "verbose", false,
87 "Verbose output; emits file and meta data delimiters");
88 options.addOption("p", "printkv", false, "Print key/value pairs");
89 options.addOption("e", "printkey", false, "Print keys");
90 options.addOption("m", "printmeta", false, "Print meta data of file");
91 options.addOption("b", "printblocks", false, "Print block index meta data");
92 options.addOption("k", "checkrow", false,
93 "Enable row order check; looks for out-of-order keys");
94 options.addOption("a", "checkfamily", false, "Enable family check");
95 options.addOption("f", "file", true,
96 "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/.META./12/34");
97 options.addOption("w", "seekToRow", true,
98 "Seek to this row and print all the kvs for this row only");
99 options.addOption("r", "region", true,
100 "Region to scan. Pass region name; e.g. '.META.,,1'");
101 options.addOption("s", "stats", false, "Print statistics");
102 }
103
104 public boolean parseOptions(String args[]) throws ParseException,
105 IOException {
106 if (args.length == 0) {
107 HelpFormatter formatter = new HelpFormatter();
108 formatter.printHelp("HFile", options, true);
109 return false;
110 }
111 CommandLineParser parser = new PosixParser();
112 CommandLine cmd = parser.parse(options, args);
113
114 verbose = cmd.hasOption("v");
115 printValue = cmd.hasOption("p");
116 printKey = cmd.hasOption("e") || printValue;
117 shouldPrintMeta = cmd.hasOption("m");
118 printBlocks = cmd.hasOption("b");
119 printStats = cmd.hasOption("s");
120 checkRow = cmd.hasOption("k");
121 checkFamily = cmd.hasOption("a");
122
123 if (cmd.hasOption("f")) {
124 files.add(new Path(cmd.getOptionValue("f")));
125 }
126
127 if (cmd.hasOption("w")) {
128 String key = cmd.getOptionValue("w");
129 if (key != null && key.length() != 0) {
130 row = key.getBytes();
131 isSeekToRow = true;
132 } else {
133 System.err.println("Invalid row is specified.");
134 System.exit(-1);
135 }
136 }
137
138 if (cmd.hasOption("r")) {
139 String regionName = cmd.getOptionValue("r");
140 byte[] rn = Bytes.toBytes(regionName);
141 byte[][] hri = HRegionInfo.parseRegionName(rn);
142 Path rootDir = FSUtils.getRootDir(conf);
143 Path tableDir = new Path(rootDir, Bytes.toString(hri[0]));
144 String enc = HRegionInfo.encodeRegionName(rn);
145 Path regionDir = new Path(tableDir, enc);
146 if (verbose)
147 System.out.println("region dir -> " + regionDir);
148 List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(conf),
149 regionDir);
150 if (verbose)
151 System.out.println("Number of region files found -> "
152 + regionFiles.size());
153 if (verbose) {
154 int i = 1;
155 for (Path p : regionFiles) {
156 if (verbose)
157 System.out.println("Found file[" + i++ + "] -> " + p);
158 }
159 }
160 files.addAll(regionFiles);
161 }
162
163 return true;
164 }
165
166
167
168
169
170 public int run(String[] args) {
171 conf = HBaseConfiguration.create();
172 try {
173 FSUtils.setFsDefault(conf, FSUtils.getRootDir(conf));
174 if (!parseOptions(args))
175 return 1;
176 } catch (IOException ex) {
177 LOG.error("Error parsing command-line options", ex);
178 return 1;
179 } catch (ParseException ex) {
180 LOG.error("Error parsing command-line options", ex);
181 return 1;
182 }
183
184
185 for (Path fileName : files) {
186 try {
187 processFile(fileName);
188 } catch (IOException ex) {
189 LOG.error("Error reading " + fileName, ex);
190 }
191 }
192
193 if (verbose || printKey) {
194 System.out.println("Scanned kv count -> " + count);
195 }
196
197 return 0;
198 }
199
200 private void processFile(Path file) throws IOException {
201 if (verbose)
202 System.out.println("Scanning -> " + file);
203 FileSystem fs = file.getFileSystem(conf);
204 if (!fs.exists(file)) {
205 System.err.println("ERROR, file doesnt exist: " + file);
206 }
207
208 HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(conf));
209
210 Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
211
212 KeyValueStatsCollector fileStats = null;
213
214 if (verbose || printKey || checkRow || checkFamily || printStats) {
215
216 HFileScanner scanner = reader.getScanner(false, false, false);
217 fileStats = new KeyValueStatsCollector();
218 boolean shouldScanKeysValues = false;
219 if (this.isSeekToRow) {
220
221 shouldScanKeysValues =
222 (scanner.seekTo(KeyValue.createFirstOnRow(this.row).getKey()) != -1);
223 } else {
224 shouldScanKeysValues = scanner.seekTo();
225 }
226 if (shouldScanKeysValues)
227 scanKeysValues(file, fileStats, scanner, row);
228 }
229
230
231 if (shouldPrintMeta) {
232 printMeta(reader, fileInfo);
233 }
234
235 if (printBlocks) {
236 System.out.println("Block Index:");
237 System.out.println(reader.getDataBlockIndexReader());
238 }
239
240 if (printStats) {
241 fileStats.finish();
242 System.out.println("Stats:\n" + fileStats);
243 }
244
245 reader.close();
246 }
247
248 private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
249 HFileScanner scanner, byte[] row) throws IOException {
250 KeyValue pkv = null;
251 do {
252 KeyValue kv = scanner.getKeyValue();
253 if (row != null && row.length != 0) {
254 int result = Bytes.compareTo(kv.getRow(), row);
255 if (result > 0) {
256 break;
257 } else if (result < 0) {
258 continue;
259 }
260 }
261
262 if (printStats) {
263 fileStats.collect(kv);
264 }
265
266 if (printKey) {
267 System.out.print("K: " + kv);
268 if (printValue) {
269 System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
270 }
271 System.out.println();
272 }
273
274 if (checkRow && pkv != null) {
275 if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
276 System.err.println("WARNING, previous row is greater then"
277 + " current row\n\tfilename -> " + file + "\n\tprevious -> "
278 + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent -> "
279 + Bytes.toStringBinary(kv.getKey()));
280 }
281 }
282
283 if (checkFamily) {
284 String fam = Bytes.toString(kv.getFamily());
285 if (!file.toString().contains(fam)) {
286 System.err.println("WARNING, filename does not match kv family,"
287 + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
288 + Bytes.toStringBinary(kv.getKey()));
289 }
290 if (pkv != null
291 && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
292 System.err.println("WARNING, previous kv has different family"
293 + " compared to current key\n\tfilename -> " + file
294 + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
295 + "\n\tcurrent -> " + Bytes.toStringBinary(kv.getKey()));
296 }
297 }
298 pkv = kv;
299 ++count;
300 } while (scanner.next());
301 }
302
303
304
305
306
307 private static String asSeparateLines(String keyValueStr) {
308 return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
309 ",\n" + FOUR_SPACES + "$1");
310 }
311
312 private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
313 throws IOException {
314 System.out.println("Block index size as per heapsize: "
315 + reader.indexSize());
316 System.out.println(asSeparateLines(reader.toString()));
317 System.out.println("Trailer:\n "
318 + asSeparateLines(reader.getTrailer().toString()));
319 System.out.println("Fileinfo:");
320 for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
321 System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
322 if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
323 long seqid = Bytes.toLong(e.getValue());
324 System.out.println(seqid);
325 } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
326 TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
327 Writables.copyWritable(e.getValue(), timeRangeTracker);
328 System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
329 + timeRangeTracker.getMaximumTimestamp());
330 } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
331 || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
332 System.out.println(Bytes.toInt(e.getValue()));
333 } else {
334 System.out.println(Bytes.toStringBinary(e.getValue()));
335 }
336 }
337
338 System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
339
340
341 DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
342 BloomFilter bloomFilter = null;
343 if (bloomMeta != null)
344 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
345
346 System.out.println("Bloom filter:");
347 if (bloomFilter != null) {
348 System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
349 ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
350 } else {
351 System.out.println(FOUR_SPACES + "Not present");
352 }
353
354
355 bloomMeta = reader.getDeleteBloomFilterMetadata();
356 bloomFilter = null;
357 if (bloomMeta != null)
358 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
359
360 System.out.println("Delete Family Bloom filter:");
361 if (bloomFilter != null) {
362 System.out.println(FOUR_SPACES
363 + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
364 "\n" + FOUR_SPACES));
365 } else {
366 System.out.println(FOUR_SPACES + "Not present");
367 }
368 }
369
370 private static class LongStats {
371 private long min = Long.MAX_VALUE;
372 private long max = Long.MIN_VALUE;
373 private long sum = 0;
374 private long count = 0;
375
376 void collect(long d) {
377 if (d < min) min = d;
378 if (d > max) max = d;
379 sum += d;
380 count++;
381 }
382
383 public String toString() {
384 return "count: " + count +
385 "\tmin: " + min +
386 "\tmax: " + max +
387 "\tmean: " + ((double)sum/count);
388 }
389 }
390
391 private static class KeyValueStatsCollector {
392 LongStats keyLen = new LongStats();
393 LongStats valLen = new LongStats();
394 LongStats rowSizeBytes = new LongStats();
395 LongStats rowSizeCols = new LongStats();
396
397 long curRowBytes = 0;
398 long curRowCols = 0;
399
400 byte[] biggestRow = null;
401
402 private KeyValue prevKV = null;
403 private long maxRowBytes = 0;
404
405 public void collect(KeyValue kv) {
406 keyLen.collect(kv.getKeyLength());
407 valLen.collect(kv.getValueLength());
408 if (prevKV != null &&
409 KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
410
411 collectRow();
412 }
413 curRowBytes += kv.getLength();
414 curRowCols++;
415 prevKV = kv;
416 }
417
418 private void collectRow() {
419 rowSizeBytes.collect(curRowBytes);
420 rowSizeCols.collect(curRowCols);
421
422 if (curRowBytes > maxRowBytes && prevKV != null) {
423 biggestRow = prevKV.getRow();
424 }
425
426 curRowBytes = 0;
427 curRowCols = 0;
428 }
429
430 public void finish() {
431 if (curRowCols > 0) {
432 collectRow();
433 }
434 }
435
436 @Override
437 public String toString() {
438 if (prevKV == null)
439 return "no data available for statistics";
440
441 return
442 "Key length: " + keyLen + "\n" +
443 "Val length: " + valLen + "\n" +
444 "Row size (bytes): " + rowSizeBytes + "\n" +
445 "Row size (columns): " + rowSizeCols + "\n" +
446 "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
447 }
448 }
449 }