1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.io.hfile;
21
22 import java.io.ByteArrayOutputStream;
23 import java.io.DataInput;
24 import java.io.IOException;
25 import java.io.PrintStream;
26 import java.util.ArrayList;
27 import java.util.List;
28 import java.util.Locale;
29 import java.util.Map;
30 import java.util.SortedMap;
31
32 import com.yammer.metrics.core.*;
33 import com.yammer.metrics.reporting.ConsoleReporter;
34
35 import org.apache.commons.cli.CommandLine;
36 import org.apache.commons.cli.CommandLineParser;
37 import org.apache.commons.cli.HelpFormatter;
38 import org.apache.commons.cli.Options;
39 import org.apache.commons.cli.ParseException;
40 import org.apache.commons.cli.PosixParser;
41 import org.apache.commons.logging.Log;
42 import org.apache.commons.logging.LogFactory;
43 import org.apache.hadoop.classification.InterfaceAudience;
44 import org.apache.hadoop.classification.InterfaceStability;
45 import org.apache.hadoop.conf.Configuration;
46 import org.apache.hadoop.fs.FileSystem;
47 import org.apache.hadoop.fs.Path;
48 import org.apache.hadoop.hbase.TableName;
49 import org.apache.hadoop.hbase.HBaseConfiguration;
50 import org.apache.hadoop.hbase.HRegionInfo;
51 import org.apache.hadoop.hbase.KeyValue;
52 import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
53 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
54 import org.apache.hadoop.hbase.util.BloomFilter;
55 import org.apache.hadoop.hbase.util.BloomFilterFactory;
56 import org.apache.hadoop.hbase.util.ByteBloomFilter;
57 import org.apache.hadoop.hbase.util.Bytes;
58 import org.apache.hadoop.hbase.util.FSUtils;
59 import org.apache.hadoop.hbase.util.Writables;
60
61
62
63
64 @InterfaceAudience.Public
65 @InterfaceStability.Evolving
66 public class HFilePrettyPrinter {
67
68 private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
69
70 private Options options = new Options();
71
72 private boolean verbose;
73 private boolean printValue;
74 private boolean printKey;
75 private boolean shouldPrintMeta;
76 private boolean printBlocks;
77 private boolean printStats;
78 private boolean checkRow;
79 private boolean checkFamily;
80 private boolean isSeekToRow = false;
81
82
83
84
85 private byte[] row = null;
86 private Configuration conf;
87
88 private List<Path> files = new ArrayList<Path>();
89 private int count;
90
91 private static final String FOUR_SPACES = " ";
92
93 public HFilePrettyPrinter() {
94 options.addOption("v", "verbose", false,
95 "Verbose output; emits file and meta data delimiters");
96 options.addOption("p", "printkv", false, "Print key/value pairs");
97 options.addOption("e", "printkey", false, "Print keys");
98 options.addOption("m", "printmeta", false, "Print meta data of file");
99 options.addOption("b", "printblocks", false, "Print block index meta data");
100 options.addOption("k", "checkrow", false,
101 "Enable row order check; looks for out-of-order keys");
102 options.addOption("a", "checkfamily", false, "Enable family check");
103 options.addOption("f", "file", true,
104 "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34");
105 options.addOption("w", "seekToRow", true,
106 "Seek to this row and print all the kvs for this row only");
107 options.addOption("r", "region", true,
108 "Region to scan. Pass region name; e.g. 'hbase:meta,,1'");
109 options.addOption("s", "stats", false, "Print statistics");
110 }
111
112 public boolean parseOptions(String args[]) throws ParseException,
113 IOException {
114 if (args.length == 0) {
115 HelpFormatter formatter = new HelpFormatter();
116 formatter.printHelp("HFile", options, true);
117 return false;
118 }
119 CommandLineParser parser = new PosixParser();
120 CommandLine cmd = parser.parse(options, args);
121
122 verbose = cmd.hasOption("v");
123 printValue = cmd.hasOption("p");
124 printKey = cmd.hasOption("e") || printValue;
125 shouldPrintMeta = cmd.hasOption("m");
126 printBlocks = cmd.hasOption("b");
127 printStats = cmd.hasOption("s");
128 checkRow = cmd.hasOption("k");
129 checkFamily = cmd.hasOption("a");
130
131 if (cmd.hasOption("f")) {
132 files.add(new Path(cmd.getOptionValue("f")));
133 }
134
135 if (cmd.hasOption("w")) {
136 String key = cmd.getOptionValue("w");
137 if (key != null && key.length() != 0) {
138 row = key.getBytes();
139 isSeekToRow = true;
140 } else {
141 System.err.println("Invalid row is specified.");
142 System.exit(-1);
143 }
144 }
145
146 if (cmd.hasOption("r")) {
147 String regionName = cmd.getOptionValue("r");
148 byte[] rn = Bytes.toBytes(regionName);
149 byte[][] hri = HRegionInfo.parseRegionName(rn);
150 Path rootDir = FSUtils.getRootDir(conf);
151 Path tableDir = FSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
152 String enc = HRegionInfo.encodeRegionName(rn);
153 Path regionDir = new Path(tableDir, enc);
154 if (verbose)
155 System.out.println("region dir -> " + regionDir);
156 List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(conf),
157 regionDir);
158 if (verbose)
159 System.out.println("Number of region files found -> "
160 + regionFiles.size());
161 if (verbose) {
162 int i = 1;
163 for (Path p : regionFiles) {
164 if (verbose)
165 System.out.println("Found file[" + i++ + "] -> " + p);
166 }
167 }
168 files.addAll(regionFiles);
169 }
170
171 return true;
172 }
173
174
175
176
177
178 public int run(String[] args) {
179 conf = HBaseConfiguration.create();
180 try {
181 FSUtils.setFsDefault(conf, FSUtils.getRootDir(conf));
182 if (!parseOptions(args))
183 return 1;
184 } catch (IOException ex) {
185 LOG.error("Error parsing command-line options", ex);
186 return 1;
187 } catch (ParseException ex) {
188 LOG.error("Error parsing command-line options", ex);
189 return 1;
190 }
191
192
193 for (Path fileName : files) {
194 try {
195 processFile(fileName);
196 } catch (IOException ex) {
197 LOG.error("Error reading " + fileName, ex);
198 }
199 }
200
201 if (verbose || printKey) {
202 System.out.println("Scanned kv count -> " + count);
203 }
204
205 return 0;
206 }
207
208 private void processFile(Path file) throws IOException {
209 if (verbose)
210 System.out.println("Scanning -> " + file);
211 FileSystem fs = file.getFileSystem(conf);
212 if (!fs.exists(file)) {
213 System.err.println("ERROR, file doesnt exist: " + file);
214 }
215
216 HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(conf));
217
218 Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
219
220 KeyValueStatsCollector fileStats = null;
221
222 if (verbose || printKey || checkRow || checkFamily || printStats) {
223
224 HFileScanner scanner = reader.getScanner(false, false, false);
225 fileStats = new KeyValueStatsCollector();
226 boolean shouldScanKeysValues = false;
227 if (this.isSeekToRow) {
228
229 shouldScanKeysValues =
230 (scanner.seekTo(KeyValue.createFirstOnRow(this.row).getKey()) != -1);
231 } else {
232 shouldScanKeysValues = scanner.seekTo();
233 }
234 if (shouldScanKeysValues)
235 scanKeysValues(file, fileStats, scanner, row);
236 }
237
238
239 if (shouldPrintMeta) {
240 printMeta(reader, fileInfo);
241 }
242
243 if (printBlocks) {
244 System.out.println("Block Index:");
245 System.out.println(reader.getDataBlockIndexReader());
246 }
247
248 if (printStats) {
249 fileStats.finish();
250 System.out.println("Stats:\n" + fileStats);
251 }
252
253 reader.close();
254 }
255
256 private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
257 HFileScanner scanner, byte[] row) throws IOException {
258 KeyValue pkv = null;
259 do {
260 KeyValue kv = scanner.getKeyValue();
261 if (row != null && row.length != 0) {
262 int result = Bytes.compareTo(kv.getRow(), row);
263 if (result > 0) {
264 break;
265 } else if (result < 0) {
266 continue;
267 }
268 }
269
270 if (printStats) {
271 fileStats.collect(kv);
272 }
273
274 if (printKey) {
275 System.out.print("K: " + kv);
276 if (printValue) {
277 System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
278 }
279 System.out.println();
280 }
281
282 if (checkRow && pkv != null) {
283 if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
284 System.err.println("WARNING, previous row is greater then"
285 + " current row\n\tfilename -> " + file + "\n\tprevious -> "
286 + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent -> "
287 + Bytes.toStringBinary(kv.getKey()));
288 }
289 }
290
291 if (checkFamily) {
292 String fam = Bytes.toString(kv.getFamily());
293 if (!file.toString().contains(fam)) {
294 System.err.println("WARNING, filename does not match kv family,"
295 + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
296 + Bytes.toStringBinary(kv.getKey()));
297 }
298 if (pkv != null
299 && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
300 System.err.println("WARNING, previous kv has different family"
301 + " compared to current key\n\tfilename -> " + file
302 + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
303 + "\n\tcurrent -> " + Bytes.toStringBinary(kv.getKey()));
304 }
305 }
306 pkv = kv;
307 ++count;
308 } while (scanner.next());
309 }
310
311
312
313
314
315 private static String asSeparateLines(String keyValueStr) {
316 return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
317 ",\n" + FOUR_SPACES + "$1");
318 }
319
320 private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
321 throws IOException {
322 System.out.println("Block index size as per heapsize: "
323 + reader.indexSize());
324 System.out.println(asSeparateLines(reader.toString()));
325 System.out.println("Trailer:\n "
326 + asSeparateLines(reader.getTrailer().toString()));
327 System.out.println("Fileinfo:");
328 for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
329 System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
330 if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
331 long seqid = Bytes.toLong(e.getValue());
332 System.out.println(seqid);
333 } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
334 TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
335 Writables.copyWritable(e.getValue(), timeRangeTracker);
336 System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
337 + timeRangeTracker.getMaximumTimestamp());
338 } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
339 || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
340 System.out.println(Bytes.toInt(e.getValue()));
341 } else {
342 System.out.println(Bytes.toStringBinary(e.getValue()));
343 }
344 }
345
346 try {
347 System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
348 } catch (Exception e) {
349 System.out.println ("Unable to retrieve the midkey");
350 }
351
352
353 DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
354 BloomFilter bloomFilter = null;
355 if (bloomMeta != null)
356 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
357
358 System.out.println("Bloom filter:");
359 if (bloomFilter != null) {
360 System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
361 ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
362 } else {
363 System.out.println(FOUR_SPACES + "Not present");
364 }
365
366
367 bloomMeta = reader.getDeleteBloomFilterMetadata();
368 bloomFilter = null;
369 if (bloomMeta != null)
370 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
371
372 System.out.println("Delete Family Bloom filter:");
373 if (bloomFilter != null) {
374 System.out.println(FOUR_SPACES
375 + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
376 "\n" + FOUR_SPACES));
377 } else {
378 System.out.println(FOUR_SPACES + "Not present");
379 }
380 }
381
382 private static class KeyValueStatsCollector {
383 private final MetricsRegistry metricsRegistry = new MetricsRegistry();
384 private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
385 private final SimpleReporter simpleReporter = new SimpleReporter(metricsRegistry, new PrintStream(metricsOutput));
386 Histogram keyLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Key length");
387 Histogram valLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Val length");
388 Histogram rowSizeBytes = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (bytes)");
389 Histogram rowSizeCols = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (columns)");
390
391 long curRowBytes = 0;
392 long curRowCols = 0;
393
394 byte[] biggestRow = null;
395
396 private KeyValue prevKV = null;
397 private long maxRowBytes = 0;
398 private long curRowKeyLength;
399
400 public void collect(KeyValue kv) {
401 valLen.update(kv.getValueLength());
402 if (prevKV != null &&
403 KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
404
405 collectRow();
406 }
407 curRowBytes += kv.getLength();
408 curRowKeyLength = kv.getKeyLength();
409 curRowCols++;
410 prevKV = kv;
411 }
412
413 private void collectRow() {
414 rowSizeBytes.update(curRowBytes);
415 rowSizeCols.update(curRowCols);
416 keyLen.update(curRowKeyLength);
417
418 if (curRowBytes > maxRowBytes && prevKV != null) {
419 biggestRow = prevKV.getRow();
420 maxRowBytes = curRowBytes;
421 }
422
423 curRowBytes = 0;
424 curRowCols = 0;
425 }
426
427 public void finish() {
428 if (curRowCols > 0) {
429 collectRow();
430 }
431 }
432
433 @Override
434 public String toString() {
435 if (prevKV == null)
436 return "no data available for statistics";
437
438
439 simpleReporter.shutdown();
440 simpleReporter.run();
441 metricsRegistry.shutdown();
442
443 return
444 metricsOutput.toString() +
445 "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
446 }
447 }
448
449 private static class SimpleReporter extends ConsoleReporter {
450 private final PrintStream out;
451
452 public SimpleReporter(MetricsRegistry metricsRegistry, PrintStream out) {
453 super(metricsRegistry, out, MetricPredicate.ALL);
454 this.out = out;
455 }
456
457 @Override
458 public void run() {
459 for (Map.Entry<String, SortedMap<MetricName, Metric>> entry : getMetricsRegistry().groupedMetrics(
460 MetricPredicate.ALL).entrySet()) {
461 try {
462 for (Map.Entry<MetricName, Metric> subEntry : entry.getValue().entrySet()) {
463 out.print(" " + subEntry.getKey().getName());
464 out.println(':');
465
466 subEntry.getValue().processWith(this, subEntry.getKey(), out);
467 }
468 } catch (Exception e) {
469 e.printStackTrace(out);
470 }
471 }
472 }
473
474 @Override
475 public void processHistogram(MetricName name, Histogram histogram, PrintStream stream) {
476 super.processHistogram(name, histogram, stream);
477 stream.printf(Locale.getDefault(), " count = %d\n", histogram.count());
478 }
479 }
480 }