1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package org.apache.hadoop.hbase.io.hfile;
22
23 import java.io.DataInput;
24 import java.io.IOException;
25 import java.util.ArrayList;
26 import java.util.List;
27 import java.util.Map;
28
29 import org.apache.commons.cli.CommandLine;
30 import org.apache.commons.cli.CommandLineParser;
31 import org.apache.commons.cli.HelpFormatter;
32 import org.apache.commons.cli.Options;
33 import org.apache.commons.cli.ParseException;
34 import org.apache.commons.cli.PosixParser;
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.conf.Configuration;
38 import org.apache.hadoop.fs.FileSystem;
39 import org.apache.hadoop.fs.Path;
40 import org.apache.hadoop.hbase.HBaseConfiguration;
41 import org.apache.hadoop.hbase.HRegionInfo;
42 import org.apache.hadoop.hbase.KeyValue;
43 import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
44 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
45 import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics;
46 import org.apache.hadoop.hbase.util.BloomFilter;
47 import org.apache.hadoop.hbase.util.BloomFilterFactory;
48 import org.apache.hadoop.hbase.util.ByteBloomFilter;
49 import org.apache.hadoop.hbase.util.Bytes;
50 import org.apache.hadoop.hbase.util.FSUtils;
51 import org.apache.hadoop.hbase.util.Writables;
52
53
54
55
56 public class HFilePrettyPrinter {
57
58 private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
59
60 private Options options = new Options();
61
62 private boolean verbose;
63 private boolean printValue;
64 private boolean printKey;
65 private boolean shouldPrintMeta;
66 private boolean printBlocks;
67 private boolean printStats;
68 private boolean checkRow;
69 private boolean checkFamily;
70 private boolean isSeekToRow = false;
71
72
73
74
75 private byte[] row = null;
76 private Configuration conf;
77
78 private List<Path> files = new ArrayList<Path>();
79 private int count;
80
81 private static final String FOUR_SPACES = " ";
82
83 public HFilePrettyPrinter() {
84 options.addOption("v", "verbose", false,
85 "Verbose output; emits file and meta data delimiters");
86 options.addOption("p", "printkv", false, "Print key/value pairs");
87 options.addOption("e", "printkey", false, "Print keys");
88 options.addOption("m", "printmeta", false, "Print meta data of file");
89 options.addOption("b", "printblocks", false, "Print block index meta data");
90 options.addOption("k", "checkrow", false,
91 "Enable row order check; looks for out-of-order keys");
92 options.addOption("a", "checkfamily", false, "Enable family check");
93 options.addOption("f", "file", true,
94 "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/.META./12/34");
95 options.addOption("w", "seekToRow", true,
96 "Seek to this row and print all the kvs for this row only");
97 options.addOption("r", "region", true,
98 "Region to scan. Pass region name; e.g. '.META.,,1'");
99 options.addOption("s", "stats", false, "Print statistics");
100 }
101
102 public boolean parseOptions(String args[]) throws ParseException,
103 IOException {
104 if (args.length == 0) {
105 HelpFormatter formatter = new HelpFormatter();
106 formatter.printHelp("HFile", options, true);
107 return false;
108 }
109 CommandLineParser parser = new PosixParser();
110 CommandLine cmd = parser.parse(options, args);
111
112 verbose = cmd.hasOption("v");
113 printValue = cmd.hasOption("p");
114 printKey = cmd.hasOption("e") || printValue;
115 shouldPrintMeta = cmd.hasOption("m");
116 printBlocks = cmd.hasOption("b");
117 printStats = cmd.hasOption("s");
118 checkRow = cmd.hasOption("k");
119 checkFamily = cmd.hasOption("a");
120
121 if (cmd.hasOption("f")) {
122 files.add(new Path(cmd.getOptionValue("f")));
123 }
124
125 if (cmd.hasOption("w")) {
126 String key = cmd.getOptionValue("w");
127 if (key != null && key.length() != 0) {
128 row = key.getBytes();
129 isSeekToRow = true;
130 } else {
131 System.err.println("Invalid row is specified.");
132 System.exit(-1);
133 }
134 }
135
136 if (cmd.hasOption("r")) {
137 String regionName = cmd.getOptionValue("r");
138 byte[] rn = Bytes.toBytes(regionName);
139 byte[][] hri = HRegionInfo.parseRegionName(rn);
140 Path rootDir = FSUtils.getRootDir(conf);
141 Path tableDir = new Path(rootDir, Bytes.toString(hri[0]));
142 String enc = HRegionInfo.encodeRegionName(rn);
143 Path regionDir = new Path(tableDir, enc);
144 if (verbose)
145 System.out.println("region dir -> " + regionDir);
146 List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(conf),
147 regionDir);
148 if (verbose)
149 System.out.println("Number of region files found -> "
150 + regionFiles.size());
151 if (verbose) {
152 int i = 1;
153 for (Path p : regionFiles) {
154 if (verbose)
155 System.out.println("Found file[" + i++ + "] -> " + p);
156 }
157 }
158 files.addAll(regionFiles);
159 }
160
161 return true;
162 }
163
164
165
166
167
168 public int run(String[] args) {
169 conf = HBaseConfiguration.create();
170 conf.set("fs.defaultFS",
171 conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
172 conf.set("fs.default.name",
173 conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
174 SchemaMetrics.configureGlobally(conf);
175 try {
176 if (!parseOptions(args))
177 return 1;
178 } catch (IOException ex) {
179 LOG.error("Error parsing command-line options", ex);
180 return 1;
181 } catch (ParseException ex) {
182 LOG.error("Error parsing command-line options", ex);
183 return 1;
184 }
185
186
187 for (Path fileName : files) {
188 try {
189 processFile(fileName);
190 } catch (IOException ex) {
191 LOG.error("Error reading " + fileName, ex);
192 }
193 }
194
195 if (verbose || printKey) {
196 System.out.println("Scanned kv count -> " + count);
197 }
198
199 return 0;
200 }
201
202 private void processFile(Path file) throws IOException {
203 if (verbose)
204 System.out.println("Scanning -> " + file);
205 FileSystem fs = file.getFileSystem(conf);
206 if (!fs.exists(file)) {
207 System.err.println("ERROR, file doesnt exist: " + file);
208 }
209
210 HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(conf));
211
212 Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
213
214 KeyValueStatsCollector fileStats = null;
215
216 if (verbose || printKey || checkRow || checkFamily || printStats) {
217
218 HFileScanner scanner = reader.getScanner(false, false, false);
219 fileStats = new KeyValueStatsCollector();
220 boolean shouldScanKeysValues = false;
221 if (this.isSeekToRow) {
222
223 shouldScanKeysValues =
224 (scanner.seekTo(KeyValue.createFirstOnRow(this.row).getKey()) != -1);
225 } else {
226 shouldScanKeysValues = scanner.seekTo();
227 }
228 if (shouldScanKeysValues)
229 scanKeysValues(file, fileStats, scanner, row);
230 }
231
232
233 if (shouldPrintMeta) {
234 printMeta(reader, fileInfo);
235 }
236
237 if (printBlocks) {
238 System.out.println("Block Index:");
239 System.out.println(reader.getDataBlockIndexReader());
240 }
241
242 if (printStats) {
243 fileStats.finish();
244 System.out.println("Stats:\n" + fileStats);
245 }
246
247 reader.close();
248 }
249
250 private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
251 HFileScanner scanner, byte[] row) throws IOException {
252 KeyValue pkv = null;
253 do {
254 KeyValue kv = scanner.getKeyValue();
255 if (row != null && row.length != 0) {
256 int result = Bytes.compareTo(kv.getRow(), row);
257 if (result > 0) {
258 break;
259 } else if (result < 0) {
260 continue;
261 }
262 }
263
264 if (printStats) {
265 fileStats.collect(kv);
266 }
267
268 if (printKey) {
269 System.out.print("K: " + kv);
270 if (printValue) {
271 System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
272 }
273 System.out.println();
274 }
275
276 if (checkRow && pkv != null) {
277 if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
278 System.err.println("WARNING, previous row is greater then"
279 + " current row\n\tfilename -> " + file + "\n\tprevious -> "
280 + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent -> "
281 + Bytes.toStringBinary(kv.getKey()));
282 }
283 }
284
285 if (checkFamily) {
286 String fam = Bytes.toString(kv.getFamily());
287 if (!file.toString().contains(fam)) {
288 System.err.println("WARNING, filename does not match kv family,"
289 + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
290 + Bytes.toStringBinary(kv.getKey()));
291 }
292 if (pkv != null
293 && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
294 System.err.println("WARNING, previous kv has different family"
295 + " compared to current key\n\tfilename -> " + file
296 + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
297 + "\n\tcurrent -> " + Bytes.toStringBinary(kv.getKey()));
298 }
299 }
300 pkv = kv;
301 ++count;
302 } while (scanner.next());
303 }
304
305
306
307
308
309 private static String asSeparateLines(String keyValueStr) {
310 return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
311 ",\n" + FOUR_SPACES + "$1");
312 }
313
314 private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
315 throws IOException {
316 System.out.println("Block index size as per heapsize: "
317 + reader.indexSize());
318 System.out.println(asSeparateLines(reader.toString()));
319 System.out.println("Trailer:\n "
320 + asSeparateLines(reader.getTrailer().toString()));
321 System.out.println("Fileinfo:");
322 for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
323 System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
324 if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
325 long seqid = Bytes.toLong(e.getValue());
326 System.out.println(seqid);
327 } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
328 TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
329 Writables.copyWritable(e.getValue(), timeRangeTracker);
330 System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
331 + timeRangeTracker.getMaximumTimestamp());
332 } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
333 || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
334 System.out.println(Bytes.toInt(e.getValue()));
335 } else {
336 System.out.println(Bytes.toStringBinary(e.getValue()));
337 }
338 }
339
340 System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
341
342
343 DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
344 BloomFilter bloomFilter = null;
345 if (bloomMeta != null)
346 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
347
348 System.out.println("Bloom filter:");
349 if (bloomFilter != null) {
350 System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
351 ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
352 } else {
353 System.out.println(FOUR_SPACES + "Not present");
354 }
355
356
357 bloomMeta = reader.getDeleteBloomFilterMetadata();
358 bloomFilter = null;
359 if (bloomMeta != null)
360 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
361
362 System.out.println("Delete Family Bloom filter:");
363 if (bloomFilter != null) {
364 System.out.println(FOUR_SPACES
365 + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
366 "\n" + FOUR_SPACES));
367 } else {
368 System.out.println(FOUR_SPACES + "Not present");
369 }
370 }
371
372 private static class LongStats {
373 private long min = Long.MAX_VALUE;
374 private long max = Long.MIN_VALUE;
375 private long sum = 0;
376 private long count = 0;
377
378 void collect(long d) {
379 if (d < min) min = d;
380 if (d > max) max = d;
381 sum += d;
382 count++;
383 }
384
385 public String toString() {
386 return "count: " + count +
387 "\tmin: " + min +
388 "\tmax: " + max +
389 "\tmean: " + ((double)sum/count);
390 }
391 }
392
393 private static class KeyValueStatsCollector {
394 LongStats keyLen = new LongStats();
395 LongStats valLen = new LongStats();
396 LongStats rowSizeBytes = new LongStats();
397 LongStats rowSizeCols = new LongStats();
398
399 long curRowBytes = 0;
400 long curRowCols = 0;
401
402 byte[] biggestRow = null;
403
404 private KeyValue prevKV = null;
405 private long maxRowBytes = 0;
406
407 public void collect(KeyValue kv) {
408 keyLen.collect(kv.getKeyLength());
409 valLen.collect(kv.getValueLength());
410 if (prevKV != null &&
411 KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
412
413 collectRow();
414 }
415 curRowBytes += kv.getLength();
416 curRowCols++;
417 prevKV = kv;
418 }
419
420 private void collectRow() {
421 rowSizeBytes.collect(curRowBytes);
422 rowSizeCols.collect(curRowCols);
423
424 if (curRowBytes > maxRowBytes && prevKV != null) {
425 biggestRow = prevKV.getRow();
426 maxRowBytes = curRowBytes;
427 }
428
429 curRowBytes = 0;
430 curRowCols = 0;
431 }
432
433 public void finish() {
434 if (curRowCols > 0) {
435 collectRow();
436 }
437 }
438
439 @Override
440 public String toString() {
441 if (prevKV == null)
442 return "no data available for statistics";
443
444 return
445 "Key length: " + keyLen + "\n" +
446 "Val length: " + valLen + "\n" +
447 "Row size (bytes): " + rowSizeBytes + "\n" +
448 "Row size (columns): " + rowSizeCols + "\n" +
449 "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
450 }
451 }
452 }