1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package org.apache.hadoop.hbase.io.hfile;
22
23 import java.io.ByteArrayOutputStream;
24 import java.io.DataInput;
25 import java.io.IOException;
26 import java.io.PrintStream;
27 import java.util.ArrayList;
28 import java.util.List;
29 import java.util.Locale;
30 import java.util.Map;
31 import java.util.SortedMap;
32
33 import com.yammer.metrics.core.*;
34 import com.yammer.metrics.reporting.ConsoleReporter;
35
36 import org.apache.commons.cli.CommandLine;
37 import org.apache.commons.cli.CommandLineParser;
38 import org.apache.commons.cli.HelpFormatter;
39 import org.apache.commons.cli.Options;
40 import org.apache.commons.cli.ParseException;
41 import org.apache.commons.cli.PosixParser;
42 import org.apache.commons.logging.Log;
43 import org.apache.commons.logging.LogFactory;
44 import org.apache.hadoop.conf.Configuration;
45 import org.apache.hadoop.fs.FileSystem;
46 import org.apache.hadoop.fs.Path;
47 import org.apache.hadoop.hbase.HBaseConfiguration;
48 import org.apache.hadoop.hbase.HRegionInfo;
49 import org.apache.hadoop.hbase.KeyValue;
50 import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
51 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
52 import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics;
53 import org.apache.hadoop.hbase.util.BloomFilter;
54 import org.apache.hadoop.hbase.util.BloomFilterFactory;
55 import org.apache.hadoop.hbase.util.ByteBloomFilter;
56 import org.apache.hadoop.hbase.util.Bytes;
57 import org.apache.hadoop.hbase.util.FSUtils;
58 import org.apache.hadoop.hbase.util.Writables;
59
60
61
62
63 public class HFilePrettyPrinter {
64
65 private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
66
67 private Options options = new Options();
68
69 private boolean verbose;
70 private boolean printValue;
71 private boolean printKey;
72 private boolean shouldPrintMeta;
73 private boolean printBlocks;
74 private boolean printStats;
75 private boolean checkRow;
76 private boolean checkFamily;
77 private boolean isSeekToRow = false;
78
79
80
81
82 private byte[] row = null;
83 private Configuration conf;
84
85 private List<Path> files = new ArrayList<Path>();
86 private int count;
87
88 private static final String FOUR_SPACES = " ";
89
90 public HFilePrettyPrinter() {
91 options.addOption("v", "verbose", false,
92 "Verbose output; emits file and meta data delimiters");
93 options.addOption("p", "printkv", false, "Print key/value pairs");
94 options.addOption("e", "printkey", false, "Print keys");
95 options.addOption("m", "printmeta", false, "Print meta data of file");
96 options.addOption("b", "printblocks", false, "Print block index meta data");
97 options.addOption("k", "checkrow", false,
98 "Enable row order check; looks for out-of-order keys");
99 options.addOption("a", "checkfamily", false, "Enable family check");
100 options.addOption("f", "file", true,
101 "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/.META./12/34");
102 options.addOption("w", "seekToRow", true,
103 "Seek to this row and print all the kvs for this row only");
104 options.addOption("r", "region", true,
105 "Region to scan. Pass region name; e.g. '.META.,,1'");
106 options.addOption("s", "stats", false, "Print statistics");
107 }
108
109 public boolean parseOptions(String args[]) throws ParseException,
110 IOException {
111 if (args.length == 0) {
112 HelpFormatter formatter = new HelpFormatter();
113 formatter.printHelp("HFile", options, true);
114 return false;
115 }
116 CommandLineParser parser = new PosixParser();
117 CommandLine cmd = parser.parse(options, args);
118
119 verbose = cmd.hasOption("v");
120 printValue = cmd.hasOption("p");
121 printKey = cmd.hasOption("e") || printValue;
122 shouldPrintMeta = cmd.hasOption("m");
123 printBlocks = cmd.hasOption("b");
124 printStats = cmd.hasOption("s");
125 checkRow = cmd.hasOption("k");
126 checkFamily = cmd.hasOption("a");
127
128 if (cmd.hasOption("f")) {
129 files.add(new Path(cmd.getOptionValue("f")));
130 }
131
132 if (cmd.hasOption("w")) {
133 String key = cmd.getOptionValue("w");
134 if (key != null && key.length() != 0) {
135 row = key.getBytes();
136 isSeekToRow = true;
137 } else {
138 System.err.println("Invalid row is specified.");
139 System.exit(-1);
140 }
141 }
142
143 if (cmd.hasOption("r")) {
144 String regionName = cmd.getOptionValue("r");
145 byte[] rn = Bytes.toBytes(regionName);
146 byte[][] hri = HRegionInfo.parseRegionName(rn);
147 Path rootDir = FSUtils.getRootDir(conf);
148 Path tableDir = new Path(rootDir, Bytes.toString(hri[0]));
149 String enc = HRegionInfo.encodeRegionName(rn);
150 Path regionDir = new Path(tableDir, enc);
151 if (verbose)
152 System.out.println("region dir -> " + regionDir);
153 List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(conf),
154 regionDir);
155 if (verbose)
156 System.out.println("Number of region files found -> "
157 + regionFiles.size());
158 if (verbose) {
159 int i = 1;
160 for (Path p : regionFiles) {
161 if (verbose)
162 System.out.println("Found file[" + i++ + "] -> " + p);
163 }
164 }
165 files.addAll(regionFiles);
166 }
167
168 return true;
169 }
170
171
172
173
174
175 public int run(String[] args) {
176 conf = HBaseConfiguration.create();
177 conf.set("fs.defaultFS",
178 conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
179 conf.set("fs.default.name",
180 conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
181 SchemaMetrics.configureGlobally(conf);
182 try {
183 if (!parseOptions(args))
184 return 1;
185 } catch (IOException ex) {
186 LOG.error("Error parsing command-line options", ex);
187 return 1;
188 } catch (ParseException ex) {
189 LOG.error("Error parsing command-line options", ex);
190 return 1;
191 }
192
193
194 for (Path fileName : files) {
195 try {
196 processFile(fileName);
197 } catch (IOException ex) {
198 LOG.error("Error reading " + fileName, ex);
199 }
200 }
201
202 if (verbose || printKey) {
203 System.out.println("Scanned kv count -> " + count);
204 }
205
206 return 0;
207 }
208
209 private void processFile(Path file) throws IOException {
210 if (verbose)
211 System.out.println("Scanning -> " + file);
212 FileSystem fs = file.getFileSystem(conf);
213 if (!fs.exists(file)) {
214 System.err.println("ERROR, file doesnt exist: " + file);
215 }
216
217 HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(conf));
218
219 Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
220
221 KeyValueStatsCollector fileStats = null;
222
223 if (verbose || printKey || checkRow || checkFamily || printStats) {
224
225 HFileScanner scanner = reader.getScanner(false, false, false);
226 fileStats = new KeyValueStatsCollector();
227 boolean shouldScanKeysValues = false;
228 if (this.isSeekToRow) {
229
230 shouldScanKeysValues =
231 (scanner.seekTo(KeyValue.createFirstOnRow(this.row).getKey()) != -1);
232 } else {
233 shouldScanKeysValues = scanner.seekTo();
234 }
235 if (shouldScanKeysValues)
236 scanKeysValues(file, fileStats, scanner, row);
237 }
238
239
240 if (shouldPrintMeta) {
241 printMeta(reader, fileInfo);
242 }
243
244 if (printBlocks) {
245 System.out.println("Block Index:");
246 System.out.println(reader.getDataBlockIndexReader());
247 }
248
249 if (printStats) {
250 fileStats.finish();
251 System.out.println("Stats:\n" + fileStats);
252 }
253
254 reader.close();
255 }
256
257 private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
258 HFileScanner scanner, byte[] row) throws IOException {
259 KeyValue pkv = null;
260 do {
261 KeyValue kv = scanner.getKeyValue();
262 if (row != null && row.length != 0) {
263 int result = Bytes.compareTo(kv.getRow(), row);
264 if (result > 0) {
265 break;
266 } else if (result < 0) {
267 continue;
268 }
269 }
270
271 if (printStats) {
272 fileStats.collect(kv);
273 }
274
275 if (printKey) {
276 System.out.print("K: " + kv);
277 if (printValue) {
278 System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
279 }
280 System.out.println();
281 }
282
283 if (checkRow && pkv != null) {
284 if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
285 System.err.println("WARNING, previous row is greater then"
286 + " current row\n\tfilename -> " + file + "\n\tprevious -> "
287 + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent -> "
288 + Bytes.toStringBinary(kv.getKey()));
289 }
290 }
291
292 if (checkFamily) {
293 String fam = Bytes.toString(kv.getFamily());
294 if (!file.toString().contains(fam)) {
295 System.err.println("WARNING, filename does not match kv family,"
296 + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
297 + Bytes.toStringBinary(kv.getKey()));
298 }
299 if (pkv != null
300 && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
301 System.err.println("WARNING, previous kv has different family"
302 + " compared to current key\n\tfilename -> " + file
303 + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
304 + "\n\tcurrent -> " + Bytes.toStringBinary(kv.getKey()));
305 }
306 }
307 pkv = kv;
308 ++count;
309 } while (scanner.next());
310 }
311
312
313
314
315
316 private static String asSeparateLines(String keyValueStr) {
317 return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
318 ",\n" + FOUR_SPACES + "$1");
319 }
320
321 private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
322 throws IOException {
323 System.out.println("Block index size as per heapsize: "
324 + reader.indexSize());
325 System.out.println(asSeparateLines(reader.toString()));
326 System.out.println("Trailer:\n "
327 + asSeparateLines(reader.getTrailer().toString()));
328 System.out.println("Fileinfo:");
329 for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
330 System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
331 if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
332 long seqid = Bytes.toLong(e.getValue());
333 System.out.println(seqid);
334 } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
335 TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
336 Writables.copyWritable(e.getValue(), timeRangeTracker);
337 System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
338 + timeRangeTracker.getMaximumTimestamp());
339 } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
340 || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
341 System.out.println(Bytes.toInt(e.getValue()));
342 } else {
343 System.out.println(Bytes.toStringBinary(e.getValue()));
344 }
345 }
346
347 try {
348 System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
349 } catch (Exception e) {
350 System.out.println("Unable to retrieve the midkey");
351 }
352
353
354 DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
355 BloomFilter bloomFilter = null;
356 if (bloomMeta != null)
357 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
358
359 System.out.println("Bloom filter:");
360 if (bloomFilter != null) {
361 System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
362 ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
363 } else {
364 System.out.println(FOUR_SPACES + "Not present");
365 }
366
367
368 bloomMeta = reader.getDeleteBloomFilterMetadata();
369 bloomFilter = null;
370 if (bloomMeta != null)
371 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
372
373 System.out.println("Delete Family Bloom filter:");
374 if (bloomFilter != null) {
375 System.out.println(FOUR_SPACES
376 + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
377 "\n" + FOUR_SPACES));
378 } else {
379 System.out.println(FOUR_SPACES + "Not present");
380 }
381 }
382
383 private static class KeyValueStatsCollector {
384 private final MetricsRegistry metricsRegistry = new MetricsRegistry();
385 private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
386 private final SimpleReporter simpleReporter = new SimpleReporter(metricsRegistry, new PrintStream(metricsOutput));
387 Histogram keyLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Key length");
388 Histogram valLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Val length");
389 Histogram rowSizeBytes = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (bytes)");
390 Histogram rowSizeCols = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (columns)");
391
392 long curRowBytes = 0;
393 long curRowCols = 0;
394
395 byte[] biggestRow = null;
396
397 private KeyValue prevKV = null;
398 private long maxRowBytes = 0;
399 private long curRowKeyLength;
400
401 public void collect(KeyValue kv) {
402 valLen.update(kv.getValueLength());
403 if (prevKV != null &&
404 KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
405
406 collectRow();
407 }
408 curRowBytes += kv.getLength();
409 curRowKeyLength = kv.getKeyLength();
410 curRowCols++;
411 prevKV = kv;
412 }
413
414 private void collectRow() {
415 rowSizeBytes.update(curRowBytes);
416 rowSizeCols.update(curRowCols);
417 keyLen.update(curRowKeyLength);
418
419 if (curRowBytes > maxRowBytes && prevKV != null) {
420 biggestRow = prevKV.getRow();
421 maxRowBytes = curRowBytes;
422 }
423
424 curRowBytes = 0;
425 curRowCols = 0;
426 }
427
428 public void finish() {
429 if (curRowCols > 0) {
430 collectRow();
431 }
432 }
433
434 @Override
435 public String toString() {
436 if (prevKV == null)
437 return "no data available for statistics";
438
439
440 simpleReporter.shutdown();
441 simpleReporter.run();
442 metricsRegistry.shutdown();
443
444 return
445 metricsOutput.toString() +
446 "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
447 }
448 }
449
450 private static class SimpleReporter extends ConsoleReporter {
451 private final PrintStream out;
452
453 public SimpleReporter(MetricsRegistry metricsRegistry, PrintStream out) {
454 super(metricsRegistry, out, MetricPredicate.ALL);
455 this.out = out;
456 }
457
458 @Override
459 public void run() {
460 for (Map.Entry<String, SortedMap<MetricName, Metric>> entry : getMetricsRegistry().groupedMetrics(
461 MetricPredicate.ALL).entrySet()) {
462 try {
463 for (Map.Entry<MetricName, Metric> subEntry : entry.getValue().entrySet()) {
464 out.print(" " + subEntry.getKey().getName());
465 out.println(':');
466
467 subEntry.getValue().processWith(this, subEntry.getKey(), out);
468 }
469 } catch (Exception e) {
470 e.printStackTrace(out);
471 }
472 }
473 }
474
475 @Override
476 public void processHistogram(MetricName name, Histogram histogram, PrintStream stream) {
477 super.processHistogram(name, histogram, stream);
478 stream.printf(Locale.getDefault(), " count = %d\n", histogram.count());
479 }
480 }
481 }