View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.regionserver;
18  
19  import java.io.ByteArrayInputStream;
20  import java.io.ByteArrayOutputStream;
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.text.DecimalFormat;
24  import java.util.ArrayList;
25  import java.util.Iterator;
26  import java.util.List;
27  
28  import org.apache.commons.cli.CommandLine;
29  import org.apache.commons.cli.CommandLineParser;
30  import org.apache.commons.cli.Option;
31  import org.apache.commons.cli.Options;
32  import org.apache.commons.cli.ParseException;
33  import org.apache.commons.cli.PosixParser;
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.fs.FileSystem;
38  import org.apache.hadoop.fs.Path;
39  import org.apache.hadoop.hbase.HBaseConfiguration;
40  import org.apache.hadoop.hbase.KeyValue;
41  import org.apache.hadoop.hbase.io.compress.Compression;
42  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
43  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
44  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
45  import org.apache.hadoop.hbase.io.encoding.EncodedDataBlock;
46  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
47  import org.apache.hadoop.hbase.io.hfile.HFileBlock;
48  import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.apache.hadoop.io.compress.CompressionOutputStream;
51  import org.apache.hadoop.io.compress.Compressor;
52  import org.apache.hadoop.io.compress.Decompressor;
53  
54  /**
55   * Tests various algorithms for key compression on an existing HFile. Useful
56   * for testing, debugging and benchmarking.
57   */
58  public class DataBlockEncodingTool {
59    private static final Log LOG = LogFactory.getLog(
60        DataBlockEncodingTool.class);
61  
62    private static final boolean includesMemstoreTS = true;
63  
64    /**
65     * How many times to run the benchmark. More times means better data in terms
66     * of statistics but slower execution. Has to be strictly larger than
67     * {@link DEFAULT_BENCHMARK_N_OMIT}.
68     */
69    private static final int DEFAULT_BENCHMARK_N_TIMES = 12;
70  
71    /**
72     * How many first runs should not be included in the benchmark. Done in order
73     * to exclude setup cost.
74     */
75    private static final int DEFAULT_BENCHMARK_N_OMIT = 2;
76  
77    /** HFile name to be used in benchmark */
78    private static final String OPT_HFILE_NAME = "f";
79  
80    /** Maximum number of key/value pairs to process in a single benchmark run */
81    private static final String OPT_KV_LIMIT = "n";
82  
83    /** Whether to run a benchmark to measure read throughput */
84    private static final String OPT_MEASURE_THROUGHPUT = "b";
85  
86    /** If this is specified, no correctness testing will be done */
87    private static final String OPT_OMIT_CORRECTNESS_TEST = "c";
88  
89    /** What encoding algorithm to test */
90    private static final String OPT_ENCODING_ALGORITHM = "a";
91  
92    /** Number of times to run each benchmark */
93    private static final String OPT_BENCHMARK_N_TIMES = "t";
94  
95    /** Number of first runs of every benchmark to omit from statistics */
96    private static final String OPT_BENCHMARK_N_OMIT = "omit";
97  
98    /** Compression algorithm to use if not specified on the command line */
99    private static final Algorithm DEFAULT_COMPRESSION =
100       Compression.Algorithm.GZ;
101 
102   private static final DecimalFormat DELIMITED_DECIMAL_FORMAT =
103       new DecimalFormat();
104 
105   static {
106     DELIMITED_DECIMAL_FORMAT.setGroupingSize(3);
107   }
108 
109   private static final String PCT_FORMAT = "%.2f %%";
110   private static final String INT_FORMAT = "%d";
111 
112   private static int benchmarkNTimes = DEFAULT_BENCHMARK_N_TIMES;
113   private static int benchmarkNOmit = DEFAULT_BENCHMARK_N_OMIT;
114 
115   private List<EncodedDataBlock> codecs = new ArrayList<EncodedDataBlock>();
116   private long totalPrefixLength = 0;
117   private long totalKeyLength = 0;
118   private long totalValueLength = 0;
119   private long totalKeyRedundancyLength = 0;
120   private long totalCFLength = 0;
121 
122   private byte[] rawKVs;
123   private int minorVersion = 0;
124 
125   private final String compressionAlgorithmName;
126   private final Algorithm compressionAlgorithm;
127   private final Compressor compressor;
128   private final Decompressor decompressor;
129 
130   private static enum Manipulation {
131     ENCODING,
132     DECODING,
133     COMPRESSION,
134     DECOMPRESSION;
135 
136     @Override
137     public String toString() {
138       String s = super.toString();
139       StringBuilder sb = new StringBuilder();
140       sb.append(s.charAt(0));
141       sb.append(s.substring(1).toLowerCase());
142       return sb.toString();
143     }
144   }
145 
146   /**
147    * @param compressionAlgorithmName What kind of algorithm should be used
148    *                                 as baseline for comparison (e.g. lzo, gz).
149    */
150   public DataBlockEncodingTool(String compressionAlgorithmName) {
151     this.compressionAlgorithmName = compressionAlgorithmName;
152     this.compressionAlgorithm = Compression.getCompressionAlgorithmByName(
153         compressionAlgorithmName);
154     this.compressor = this.compressionAlgorithm.getCompressor();
155     this.decompressor = this.compressionAlgorithm.getDecompressor();
156   }
157 
158   /**
159    * Check statistics for given HFile for different data block encoders.
160    * @param scanner Of file which will be compressed.
161    * @param kvLimit Maximal count of KeyValue which will be processed.
162    * @throws IOException thrown if scanner is invalid
163    */
164   public void checkStatistics(final KeyValueScanner scanner, final int kvLimit)
165       throws IOException {
166     scanner.seek(KeyValue.LOWESTKEY);
167 
168     KeyValue currentKV;
169 
170     byte[] previousKey = null;
171     byte[] currentKey;
172 
173     DataBlockEncoding[] encodings = DataBlockEncoding.values();
174 
175     ByteArrayOutputStream uncompressedOutputStream =
176         new ByteArrayOutputStream();
177 
178     int j = 0;
179     while ((currentKV = scanner.next()) != null && j < kvLimit) {
180       // Iterates through key/value pairs
181       j++;
182       currentKey = currentKV.getKey();
183       if (previousKey != null) {
184         for (int i = 0; i < previousKey.length && i < currentKey.length &&
185             previousKey[i] == currentKey[i]; ++i) {
186           totalKeyRedundancyLength++;
187         }
188       }
189 
190       uncompressedOutputStream.write(currentKV.getBuffer(),
191           currentKV.getOffset(), currentKV.getLength());
192 
193       previousKey = currentKey;
194 
195       int kLen = currentKV.getKeyLength();
196       int vLen = currentKV.getValueLength();
197       int cfLen = currentKV.getFamilyLength(currentKV.getFamilyOffset());
198       int restLen = currentKV.getLength() - kLen - vLen;
199 
200       totalKeyLength += kLen;
201       totalValueLength += vLen;
202       totalPrefixLength += restLen;
203       totalCFLength += cfLen;
204     }
205 
206     rawKVs = uncompressedOutputStream.toByteArray();
207 
208     for (DataBlockEncoding encoding : encodings) {
209       if (encoding == DataBlockEncoding.NONE) {
210         continue;
211       }
212       DataBlockEncoder d = encoding.getEncoder();
213       codecs.add(new EncodedDataBlock(d, includesMemstoreTS, encoding, rawKVs));
214     }
215   }
216 
217   /**
218    * Verify if all data block encoders are working properly.
219    *
220    * @param scanner Of file which was compressed.
221    * @param kvLimit Maximal count of KeyValue which will be processed.
222    * @return true if all data block encoders compressed/decompressed correctly.
223    * @throws IOException thrown if scanner is invalid
224    */
225   public boolean verifyCodecs(final KeyValueScanner scanner, final int kvLimit)
226       throws IOException {
227     KeyValue currentKv;
228 
229     scanner.seek(KeyValue.LOWESTKEY);
230     List<Iterator<KeyValue>> codecIterators =
231         new ArrayList<Iterator<KeyValue>>();
232     for(EncodedDataBlock codec : codecs) {
233       codecIterators.add(codec.getIterator(HFileBlock.headerSize(minorVersion)));
234     }
235 
236     int j = 0;
237     while ((currentKv = scanner.next()) != null && j < kvLimit) {
238       // Iterates through key/value pairs
239       ++j;
240       for (Iterator<KeyValue> it : codecIterators) {
241         KeyValue codecKv = it.next();
242         if (codecKv == null || 0 != Bytes.compareTo(
243             codecKv.getBuffer(), codecKv.getOffset(), codecKv.getLength(),
244             currentKv.getBuffer(), currentKv.getOffset(),
245             currentKv.getLength())) {
246           if (codecKv == null) {
247             LOG.error("There is a bug in codec " + it +
248                 " it returned null KeyValue,");
249           } else {
250             int prefix = 0;
251             int limitLength = 2 * Bytes.SIZEOF_INT +
252                 Math.min(codecKv.getLength(), currentKv.getLength());
253             while (prefix < limitLength &&
254                 codecKv.getBuffer()[prefix + codecKv.getOffset()] ==
255                 currentKv.getBuffer()[prefix + currentKv.getOffset()]) {
256               prefix++;
257             }
258 
259             LOG.error("There is bug in codec " + it.toString() +
260                 "\n on element " + j +
261                 "\n codecKv.getKeyLength() " + codecKv.getKeyLength() +
262                 "\n codecKv.getValueLength() " + codecKv.getValueLength() +
263                 "\n codecKv.getLength() " + codecKv.getLength() +
264                 "\n currentKv.getKeyLength() " + currentKv.getKeyLength() +
265                 "\n currentKv.getValueLength() " + currentKv.getValueLength() +
266                 "\n codecKv.getLength() " + currentKv.getLength() +
267                 "\n currentKV rowLength " + currentKv.getRowLength() +
268                 " familyName " + currentKv.getFamilyLength() +
269                 " qualifier " + currentKv.getQualifierLength() +
270                 "\n prefix " + prefix +
271                 "\n codecKv   '" + Bytes.toStringBinary(codecKv.getBuffer(),
272                     codecKv.getOffset(), prefix) + "' diff '" +
273                     Bytes.toStringBinary(codecKv.getBuffer(),
274                         codecKv.getOffset() + prefix, codecKv.getLength() -
275                         prefix) + "'" +
276                 "\n currentKv '" + Bytes.toStringBinary(
277                    currentKv.getBuffer(),
278                    currentKv.getOffset(), prefix) + "' diff '" +
279                    Bytes.toStringBinary(currentKv.getBuffer(),
280                        currentKv.getOffset() + prefix, currentKv.getLength() -
281                        prefix) + "'"
282                 );
283           }
284           return false;
285         }
286       }
287     }
288 
289     LOG.info("Verification was successful!");
290 
291     return true;
292   }
293 
294   /**
295    * Benchmark codec's speed.
296    */
297   public void benchmarkCodecs() throws IOException {
298     LOG.info("Starting a throughput benchmark for data block encoding codecs");
299     int prevTotalSize = -1;
300     for (EncodedDataBlock codec : codecs) {
301       prevTotalSize = benchmarkEncoder(prevTotalSize, codec);
302     }
303 
304     benchmarkDefaultCompression(prevTotalSize, rawKVs);
305   }
306 
307   /**
308    * Benchmark compression/decompression throughput.
309    * @param previousTotalSize Total size used for verification. Use -1 if
310    *          unknown.
311    * @param codec Tested encoder.
312    * @return Size of uncompressed data.
313    */
314   private int benchmarkEncoder(int previousTotalSize, EncodedDataBlock codec) {
315     int prevTotalSize = previousTotalSize;
316     int totalSize = 0;
317 
318     // decompression time
319     List<Long> durations = new ArrayList<Long>();
320     for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) {
321       totalSize = 0;
322 
323       Iterator<KeyValue> it;
324 
325       it = codec.getIterator(HFileBlock.headerSize(minorVersion));
326 
327       // count only the algorithm time, without memory allocations
328       // (expect first time)
329       final long startTime = System.nanoTime();
330       while (it.hasNext()) {
331         totalSize += it.next().getLength();
332       }
333       final long finishTime = System.nanoTime();
334       if (itTime >= benchmarkNOmit) {
335         durations.add(finishTime - startTime);
336       }
337 
338       if (prevTotalSize != -1 && prevTotalSize != totalSize) {
339         throw new IllegalStateException(String.format(
340             "Algorithm '%s' decoded data to different size", codec.toString()));
341       }
342       prevTotalSize = totalSize;
343     }
344 
345     List<Long> encodingDurations = new ArrayList<Long>();
346     for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) {
347       final long startTime = System.nanoTime();
348       codec.encodeData();
349       final long finishTime = System.nanoTime();
350       if (itTime >= benchmarkNOmit) {
351         encodingDurations.add(finishTime - startTime);
352       }
353     }
354 
355     System.out.println(codec.toString() + ":");
356     printBenchmarkResult(totalSize, encodingDurations, Manipulation.ENCODING);
357     printBenchmarkResult(totalSize, durations, Manipulation.DECODING);
358     System.out.println();
359 
360     return prevTotalSize;
361   }
362 
363   private void benchmarkDefaultCompression(int totalSize, byte[] rawBuffer)
364       throws IOException {
365     benchmarkAlgorithm(compressionAlgorithm,
366         compressionAlgorithmName.toUpperCase(), rawBuffer, 0, totalSize);
367   }
368 
369   /**
370    * Check decompress performance of a given algorithm and print it.
371    * @param algorithm Compression algorithm.
372    * @param name Name of algorithm.
373    * @param buffer Buffer to be compressed.
374    * @param offset Position of the beginning of the data.
375    * @param length Length of data in buffer.
376    * @throws IOException
377    */
378   public void benchmarkAlgorithm(Compression.Algorithm algorithm, String name,
379       byte[] buffer, int offset, int length) throws IOException {
380     System.out.println(name + ":");
381 
382     // compress it
383     List<Long> compressDurations = new ArrayList<Long>();
384     ByteArrayOutputStream compressedStream = new ByteArrayOutputStream();
385     CompressionOutputStream compressingStream =
386         algorithm.createPlainCompressionStream(compressedStream, compressor);
387     try {
388       for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) {
389         final long startTime = System.nanoTime();
390         compressingStream.resetState();
391         compressedStream.reset();
392         compressingStream.write(buffer, offset, length);
393         compressingStream.flush();
394         compressedStream.toByteArray();
395 
396         final long finishTime = System.nanoTime();
397 
398         // add time record
399         if (itTime >= benchmarkNOmit) {
400           compressDurations.add(finishTime - startTime);
401         }
402       }
403     } catch (IOException e) {
404       throw new RuntimeException(String.format(
405           "Benchmark, or encoding algorithm '%s' cause some stream problems",
406           name), e);
407     }
408     compressingStream.close();
409     printBenchmarkResult(length, compressDurations, Manipulation.COMPRESSION);
410 
411     byte[] compBuffer = compressedStream.toByteArray();
412 
413     // uncompress it several times and measure performance
414     List<Long> durations = new ArrayList<Long>();
415     for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) {
416       final long startTime = System.nanoTime();
417       byte[] newBuf = new byte[length + 1];
418 
419       try {
420         ByteArrayInputStream downStream = new ByteArrayInputStream(compBuffer,
421             0, compBuffer.length);
422         InputStream decompressedStream = algorithm.createDecompressionStream(
423             downStream, decompressor, 0);
424 
425         int destOffset = 0;
426         int nextChunk;
427         while ((nextChunk = decompressedStream.available()) > 0) {
428           destOffset += decompressedStream.read(newBuf, destOffset, nextChunk);
429         }
430         decompressedStream.close();
431 
432         // iterate over KeyValues
433         KeyValue kv;
434         for (int pos = 0; pos < length; pos += kv.getLength()) {
435           kv = new KeyValue(newBuf, pos);
436         }
437 
438       } catch (IOException e) {
439         throw new RuntimeException(String.format(
440             "Decoding path in '%s' algorithm cause exception ", name), e);
441       }
442 
443       final long finishTime = System.nanoTime();
444 
445       // check correctness
446       if (0 != Bytes.compareTo(buffer, 0, length, newBuf, 0, length)) {
447         int prefix = 0;
448         for(; prefix < buffer.length && prefix < newBuf.length; ++prefix) {
449           if (buffer[prefix] != newBuf[prefix]) {
450             break;
451           }
452         }
453         throw new RuntimeException(String.format(
454             "Algorithm '%s' is corrupting the data", name));
455       }
456 
457       // add time record
458       if (itTime >= benchmarkNOmit) {
459         durations.add(finishTime - startTime);
460       }
461     }
462     printBenchmarkResult(length, durations, Manipulation.DECOMPRESSION);
463     System.out.println();
464   }
465 
466   private static final double BYTES_IN_MB = 1024 * 1024.0;
467   private static final double NS_IN_SEC = 1000.0 * 1000.0 * 1000.0;
468   private static final double MB_SEC_COEF = NS_IN_SEC / BYTES_IN_MB;
469 
470   private static void printBenchmarkResult(int totalSize,
471       List<Long> durationsInNanoSec, Manipulation manipulation) {
472     final int n = durationsInNanoSec.size();
473     long meanTime = 0;
474     for (long time : durationsInNanoSec) {
475       meanTime += time;
476     }
477     meanTime /= n;
478 
479     double meanMBPerSec = totalSize * MB_SEC_COEF / meanTime;
480     double mbPerSecSTD = 0;
481     if (n > 0) {
482       for (long time : durationsInNanoSec) {
483         double mbPerSec = totalSize * MB_SEC_COEF / time;
484         double dev = mbPerSec - meanMBPerSec;
485         mbPerSecSTD += dev * dev;
486       }
487       mbPerSecSTD = Math.sqrt(mbPerSecSTD / n);
488     }
489 
490     outputTuple(manipulation + " performance", "%6.2f MB/s (+/- %.2f MB/s)",
491          meanMBPerSec, mbPerSecSTD);
492   }
493 
494   private static void outputTuple(String caption, String format,
495       Object... values) {
496     if (format.startsWith(INT_FORMAT)) {
497       format = "%s" + format.substring(INT_FORMAT.length());
498       values[0] = DELIMITED_DECIMAL_FORMAT.format(values[0]);
499     }
500 
501     StringBuilder sb = new StringBuilder();
502     sb.append("  ");
503     sb.append(caption);
504     sb.append(":");
505 
506     String v = String.format(format, values);
507     int padding = 60 - sb.length() - v.length();
508     for (int i = 0; i < padding; ++i) {
509       sb.append(' ');
510     }
511     sb.append(v);
512     System.out.println(sb);
513   }
514 
515   /**
516    * Display statistics of different compression algorithms.
517    * @throws IOException
518    */
519   public void displayStatistics() throws IOException {
520     final String comprAlgo = compressionAlgorithmName.toUpperCase();
521     long rawBytes = totalKeyLength + totalPrefixLength + totalValueLength;
522 
523     System.out.println("Raw data size:");
524     outputTuple("Raw bytes", INT_FORMAT, rawBytes);
525     outputTuplePct("Key bytes", totalKeyLength);
526     outputTuplePct("Value bytes", totalValueLength);
527     outputTuplePct("KV infrastructure", totalPrefixLength);
528     outputTuplePct("CF overhead", totalCFLength);
529     outputTuplePct("Total key redundancy", totalKeyRedundancyLength);
530 
531     int compressedSize = EncodedDataBlock.getCompressedSize(
532         compressionAlgorithm, compressor, rawKVs, 0, rawKVs.length);
533     outputTuple(comprAlgo + " only size", INT_FORMAT,
534         compressedSize);
535     outputSavings(comprAlgo + " only", compressedSize, rawBytes);
536     System.out.println();
537 
538     for (EncodedDataBlock codec : codecs) {
539       System.out.println(codec.toString());
540       long encodedBytes = codec.getSize();
541       outputTuple("Encoded bytes", INT_FORMAT, encodedBytes);
542       outputSavings("Key encoding", encodedBytes - totalValueLength,
543           rawBytes - totalValueLength);
544       outputSavings("Total encoding", encodedBytes, rawBytes);
545 
546       int encodedCompressedSize = codec.getEncodedCompressedSize(
547           compressionAlgorithm, compressor);
548       outputTuple("Encoding + " + comprAlgo + " size", INT_FORMAT,
549           encodedCompressedSize);
550       outputSavings("Encoding + " + comprAlgo, encodedCompressedSize, rawBytes);
551       outputSavings("Encoding with " + comprAlgo, encodedCompressedSize,
552           compressedSize);
553 
554       System.out.println();
555     }
556   }
557 
558   private void outputTuplePct(String caption, long size) {
559     outputTuple(caption, INT_FORMAT + " (" + PCT_FORMAT + ")",
560         size, size * 100.0 / rawKVs.length);
561   }
562 
563   private void outputSavings(String caption, long part, long whole) {
564     double pct = 100.0 * (1 - 1.0 * part / whole);
565     double times = whole * 1.0 / part;
566     outputTuple(caption + " savings", PCT_FORMAT + " (%.2f x)",
567         pct, times);
568   }
569 
570   /**
571    * Test a data block encoder on the given HFile. Output results to console.
572    * @param kvLimit The limit of KeyValue which will be analyzed.
573    * @param hfilePath an HFile path on the file system.
574    * @param compressionName Compression algorithm used for comparison.
575    * @param doBenchmark Run performance benchmarks.
576    * @param doVerify Verify correctness.
577    * @throws IOException When pathName is incorrect.
578    */
579   public static void testCodecs(Configuration conf, int kvLimit,
580       String hfilePath, String compressionName, boolean doBenchmark,
581       boolean doVerify) throws IOException {
582     // create environment
583     Path path = new Path(hfilePath);
584     CacheConfig cacheConf = new CacheConfig(conf);
585     FileSystem fs = FileSystem.get(conf);
586     StoreFile hsf = new StoreFile(fs, path, conf, cacheConf,
587         BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
588 
589     StoreFile.Reader reader = hsf.createReader();
590     reader.loadFileInfo();
591     KeyValueScanner scanner = reader.getStoreFileScanner(true, true);
592 
593     // run the utilities
594     DataBlockEncodingTool comp = new DataBlockEncodingTool(compressionName);
595     comp.minorVersion = reader.getHFileMinorVersion();
596     comp.checkStatistics(scanner, kvLimit);
597     if (doVerify) {
598       comp.verifyCodecs(scanner, kvLimit);
599     }
600     if (doBenchmark) {
601       comp.benchmarkCodecs();
602     }
603     comp.displayStatistics();
604 
605     // cleanup
606     scanner.close();
607     reader.close(cacheConf.shouldEvictOnClose());
608   }
609 
610   private static void printUsage(Options options) {
611     System.err.println("Usage:");
612     System.err.println(String.format("./hbase %s <options>",
613         DataBlockEncodingTool.class.getName()));
614     System.err.println("Options:");
615     for (Object it : options.getOptions()) {
616       Option opt = (Option) it;
617       if (opt.hasArg()) {
618         System.err.println(String.format("-%s %s: %s", opt.getOpt(),
619             opt.getArgName(), opt.getDescription()));
620       } else {
621         System.err.println(String.format("-%s: %s", opt.getOpt(),
622             opt.getDescription()));
623       }
624     }
625   }
626 
627   /**
628    * A command line interface to benchmarks. Parses command-line arguments and
629    * runs the appropriate benchmarks.
630    * @param args Should have length at least 1 and holds the file path to HFile.
631    * @throws IOException If you specified the wrong file.
632    */
633   public static void main(final String[] args) throws IOException {
634     // set up user arguments
635     Options options = new Options();
636     options.addOption(OPT_HFILE_NAME, true, "HFile to analyse (REQUIRED)");
637     options.getOption(OPT_HFILE_NAME).setArgName("FILENAME");
638     options.addOption(OPT_KV_LIMIT, true,
639         "Maximum number of KeyValues to process. A benchmark stops running " +
640         "after iterating over this many KV pairs.");
641     options.getOption(OPT_KV_LIMIT).setArgName("NUMBER");
642     options.addOption(OPT_MEASURE_THROUGHPUT, false,
643         "Measure read throughput");
644     options.addOption(OPT_OMIT_CORRECTNESS_TEST, false,
645         "Omit corectness tests.");
646     options.addOption(OPT_ENCODING_ALGORITHM, true,
647         "What kind of compression algorithm use for comparison.");
648     options.addOption(OPT_BENCHMARK_N_TIMES,
649         true, "Number of times to run each benchmark. Default value: " +
650             DEFAULT_BENCHMARK_N_TIMES);
651     options.addOption(OPT_BENCHMARK_N_OMIT, true,
652         "Number of first runs of every benchmark to exclude from "
653             + "statistics (" + DEFAULT_BENCHMARK_N_OMIT
654             + " by default, so that " + "only the last "
655             + (DEFAULT_BENCHMARK_N_TIMES - DEFAULT_BENCHMARK_N_OMIT)
656             + " times are included in statistics.)");
657 
658     // parse arguments
659     CommandLineParser parser = new PosixParser();
660     CommandLine cmd = null;
661     try {
662       cmd = parser.parse(options, args);
663     } catch (ParseException e) {
664       System.err.println("Could not parse arguments!");
665       System.exit(-1);
666       return; // avoid warning
667     }
668 
669     int kvLimit = Integer.MAX_VALUE;
670     if (cmd.hasOption(OPT_KV_LIMIT)) {
671       kvLimit = Integer.parseInt(cmd.getOptionValue(OPT_KV_LIMIT));
672     }
673 
674     // basic argument sanity checks
675     if (!cmd.hasOption(OPT_HFILE_NAME)) {
676       LOG.error("Please specify HFile name using the " + OPT_HFILE_NAME
677           + " option");
678       printUsage(options);
679       System.exit(-1);
680     }
681 
682     String pathName = cmd.getOptionValue(OPT_HFILE_NAME);
683     String compressionName = DEFAULT_COMPRESSION.getName();
684     if (cmd.hasOption(OPT_ENCODING_ALGORITHM)) {
685       compressionName =
686           cmd.getOptionValue(OPT_ENCODING_ALGORITHM).toLowerCase();
687     }
688     boolean doBenchmark = cmd.hasOption(OPT_MEASURE_THROUGHPUT);
689     boolean doVerify = !cmd.hasOption(OPT_OMIT_CORRECTNESS_TEST);
690 
691     if (cmd.hasOption(OPT_BENCHMARK_N_TIMES)) {
692       benchmarkNTimes = Integer.valueOf(cmd.getOptionValue(
693           OPT_BENCHMARK_N_TIMES));
694     }
695     if (cmd.hasOption(OPT_BENCHMARK_N_OMIT)) {
696       benchmarkNOmit =
697           Integer.valueOf(cmd.getOptionValue(OPT_BENCHMARK_N_OMIT));
698     }
699     if (benchmarkNTimes < benchmarkNOmit) {
700       LOG.error("The number of times to run each benchmark ("
701           + benchmarkNTimes
702           + ") must be greater than the number of benchmark runs to exclude "
703           + "from statistics (" + benchmarkNOmit + ")");
704       System.exit(1);
705     }
706     LOG.info("Running benchmark " + benchmarkNTimes + " times. " +
707         "Excluding the first " + benchmarkNOmit + " times from statistics.");
708 
709     final Configuration conf = HBaseConfiguration.create();
710     try {
711       testCodecs(conf, kvLimit, pathName, compressionName, doBenchmark,
712           doVerify);
713     } finally {
714       (new CacheConfig(conf)).getBlockCache().shutdown();
715     }
716   }
717 
718 }