View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations under
15   * the License.
16   */
17  
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.IOException;
21  import java.nio.ByteBuffer;
22  import java.util.Random;
23  import java.util.StringTokenizer;
24  
25  import junit.framework.TestCase;
26  
27  import org.apache.commons.cli.CommandLine;
28  import org.apache.commons.cli.CommandLineParser;
29  import org.apache.commons.cli.GnuParser;
30  import org.apache.commons.cli.HelpFormatter;
31  import org.apache.commons.cli.Option;
32  import org.apache.commons.cli.OptionBuilder;
33  import org.apache.commons.cli.Options;
34  import org.apache.commons.cli.ParseException;
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.fs.FSDataInputStream;
39  import org.apache.hadoop.fs.FSDataOutputStream;
40  import org.apache.hadoop.fs.FileSystem;
41  import org.apache.hadoop.fs.Path;
42  import org.apache.hadoop.fs.RawLocalFileSystem;
43  import org.apache.hadoop.hbase.HBaseTestingUtility;
44  import org.apache.hadoop.hbase.MediumTests;
45  import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
46  import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
47  import org.apache.hadoop.io.BytesWritable;
48  import org.junit.experimental.categories.Category;
49  
50  /**
51   * test the performance for seek.
52   * <p>
53   * Copied from
54   * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
55   * Remove after tfile is committed and use the tfile version of this class
56   * instead.</p>
57   */
58  @Category(MediumTests.class)
59  public class TestHFileSeek extends TestCase {
60    private static final boolean USE_PREAD = true;
61    private MyOptions options;
62    private Configuration conf;
63    private Path path;
64    private FileSystem fs;
65    private NanoTimer timer;
66    private Random rng;
67    private RandomDistribution.DiscreteRNG keyLenGen;
68    private KVGenerator kvGen;
69  
70    private static final Log LOG = LogFactory.getLog(TestHFileSeek.class);
71  
72    private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
73  
74    @Override
75    public void setUp() throws IOException {
76      if (options == null) {
77        options = new MyOptions(new String[0]);
78      }
79  
80      conf = new Configuration();
81      
82      if (options.useRawFs) {
83        conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class);
84      }
85      
86      conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize);
87      conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize);
88      path = new Path(new Path(options.rootDir), options.file);
89      fs = path.getFileSystem(conf);
90      timer = new NanoTimer(false);
91      rng = new Random(options.seed);
92      keyLenGen =
93          new RandomDistribution.Zipf(new Random(rng.nextLong()),
94              options.minKeyLen, options.maxKeyLen, 1.2);
95      RandomDistribution.DiscreteRNG valLenGen =
96          new RandomDistribution.Flat(new Random(rng.nextLong()),
97              options.minValLength, options.maxValLength);
98      RandomDistribution.DiscreteRNG wordLenGen =
99          new RandomDistribution.Flat(new Random(rng.nextLong()),
100             options.minWordLen, options.maxWordLen);
101     kvGen =
102         new KVGenerator(rng, true, keyLenGen, valLenGen, wordLenGen,
103             options.dictSize);
104   }
105 
106   @Override
107   public void tearDown() {
108     try {
109       fs.close();
110     }
111     catch (Exception e) {
112       // Nothing
113     }
114   }
115 
116   private static FSDataOutputStream createFSOutput(Path name, FileSystem fs)
117     throws IOException {
118     if (fs.exists(name)) {
119       fs.delete(name, true);
120     }
121     FSDataOutputStream fout = fs.create(name);
122     return fout;
123   }
124 
125   private void createTFile() throws IOException {
126     long totalBytes = 0;
127     FSDataOutputStream fout = createFSOutput(path, fs);
128     try {
129       Writer writer = HFile.getWriterFactoryNoCache(conf)
130           .withOutputStream(fout)
131           .withBlockSize(options.minBlockSize)
132           .withCompression(options.compress)
133           .create();
134       try {
135         BytesWritable key = new BytesWritable();
136         BytesWritable val = new BytesWritable();
137         timer.start();
138         for (long i = 0; true; ++i) {
139           if (i % 1000 == 0) { // test the size for every 1000 rows.
140             if (fs.getFileStatus(path).getLen() >= options.fileSize) {
141               break;
142             }
143           }
144           kvGen.next(key, val, false);
145           byte [] k = new byte [key.getLength()];
146           System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
147           byte [] v = new byte [val.getLength()];
148           System.arraycopy(val.getBytes(), 0, v, 0, key.getLength());
149           writer.append(k, v);
150           totalBytes += key.getLength();
151           totalBytes += val.getLength();
152         }
153         timer.stop();
154       }
155       finally {
156         writer.close();
157       }
158     }
159     finally {
160       fout.close();
161     }
162     double duration = (double)timer.read()/1000; // in us.
163     long fsize = fs.getFileStatus(path).getLen();
164 
165     System.out.printf(
166         "time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n",
167         timer.toString(), (double) totalBytes / 1024 / 1024, totalBytes
168             / duration);
169     System.out.printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n",
170         timer.toString(), (double) fsize / 1024 / 1024, fsize / duration);
171   }
172 
173   public void seekTFile() throws IOException {
174     int miss = 0;
175     long totalBytes = 0;
176     FSDataInputStream fsdis = fs.open(path);
177     Reader reader = HFile.createReaderFromStream(path, fsdis,
178         fs.getFileStatus(path).getLen(), new CacheConfig(conf));
179     reader.loadFileInfo();
180     KeySampler kSampler =
181         new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(),
182             keyLenGen);
183     HFileScanner scanner = reader.getScanner(false, USE_PREAD);
184     BytesWritable key = new BytesWritable();
185     timer.reset();
186     timer.start();
187     for (int i = 0; i < options.seekCount; ++i) {
188       kSampler.next(key);
189       byte [] k = new byte [key.getLength()];
190       System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
191       if (scanner.seekTo(k) >= 0) {
192         ByteBuffer bbkey = scanner.getKey();
193         ByteBuffer bbval = scanner.getValue();
194         totalBytes += bbkey.limit();
195         totalBytes += bbval.limit();
196       }
197       else {
198         ++miss;
199       }
200     }
201     timer.stop();
202     System.out.printf(
203         "time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n",
204         timer.toString(), NanoTimer.nanoTimeToString(timer.read()
205             / options.seekCount), options.seekCount - miss, miss,
206         (double) totalBytes / 1024 / (options.seekCount - miss));
207 
208   }
209 
210   public void testSeeks() throws IOException {
211     if (options.doCreate()) {
212       createTFile();
213     }
214 
215     if (options.doRead()) {
216       seekTFile();
217     }
218 
219     if (options.doCreate()) {
220       fs.delete(path, true);
221     }
222   }
223 
224   private static class IntegerRange {
225     private final int from, to;
226 
227     public IntegerRange(int from, int to) {
228       this.from = from;
229       this.to = to;
230     }
231 
232     public static IntegerRange parse(String s) throws ParseException {
233       StringTokenizer st = new StringTokenizer(s, " \t,");
234       if (st.countTokens() != 2) {
235         throw new ParseException("Bad integer specification: " + s);
236       }
237       int from = Integer.parseInt(st.nextToken());
238       int to = Integer.parseInt(st.nextToken());
239       return new IntegerRange(from, to);
240     }
241 
242     public int from() {
243       return from;
244     }
245 
246     public int to() {
247       return to;
248     }
249   }
250 
251   private static class MyOptions {
252     // hard coded constants
253     int dictSize = 1000;
254     int minWordLen = 5;
255     int maxWordLen = 20;
256 
257     private HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
258     String rootDir =
259       TEST_UTIL.getDataTestDir("TestTFileSeek").toString();
260     String file = "TestTFileSeek";
261     // String compress = "lzo"; DISABLED
262     String compress = "none";
263     int minKeyLen = 10;
264     int maxKeyLen = 50;
265     int minValLength = 1024;
266     int maxValLength = 2 * 1024;
267     int minBlockSize = 1 * 1024 * 1024;
268     int fsOutputBufferSize = 1;
269     int fsInputBufferSize = 0;
270     // Default writing 10MB.
271     long fileSize = 10 * 1024 * 1024;
272     long seekCount = 1000;
273     long trialCount = 1;
274     long seed;
275     boolean useRawFs = false;
276 
277     static final int OP_CREATE = 1;
278     static final int OP_READ = 2;
279     int op = OP_CREATE | OP_READ;
280 
281     boolean proceed = false;
282 
283     public MyOptions(String[] args) {
284       seed = System.nanoTime();
285 
286       try {
287         Options opts = buildOptions();
288         CommandLineParser parser = new GnuParser();
289         CommandLine line = parser.parse(opts, args, true);
290         processOptions(line, opts);
291         validateOptions();
292       }
293       catch (ParseException e) {
294         System.out.println(e.getMessage());
295         System.out.println("Try \"--help\" option for details.");
296         setStopProceed();
297       }
298     }
299 
300     public boolean proceed() {
301       return proceed;
302     }
303 
304     private Options buildOptions() {
305       Option compress =
306           OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz|snappy]")
307               .hasArg().withDescription("compression scheme").create('c');
308 
309       Option fileSize =
310           OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB")
311               .hasArg().withDescription("target size of the file (in MB).")
312               .create('s');
313 
314       Option fsInputBufferSz =
315           OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size")
316               .hasArg().withDescription(
317                   "size of the file system input buffer (in bytes).").create(
318                   'i');
319 
320       Option fsOutputBufferSize =
321           OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size")
322               .hasArg().withDescription(
323                   "size of the file system output buffer (in bytes).").create(
324                   'o');
325 
326       Option keyLen =
327           OptionBuilder
328               .withLongOpt("key-length")
329               .withArgName("min,max")
330               .hasArg()
331               .withDescription(
332                   "the length range of the key (in bytes)")
333               .create('k');
334 
335       Option valueLen =
336           OptionBuilder
337               .withLongOpt("value-length")
338               .withArgName("min,max")
339               .hasArg()
340               .withDescription(
341                   "the length range of the value (in bytes)")
342               .create('v');
343 
344       Option blockSz =
345           OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg()
346               .withDescription("minimum block size (in KB)").create('b');
347 
348       Option operation =
349           OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg()
350               .withDescription(
351                   "action: seek-only, create-only, seek-after-create").create(
352                   'x');
353 
354       Option rootDir =
355           OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg()
356               .withDescription(
357                   "specify root directory where files will be created.")
358               .create('r');
359 
360       Option file =
361           OptionBuilder.withLongOpt("file").withArgName("name").hasArg()
362               .withDescription("specify the file name to be created or read.")
363               .create('f');
364 
365       Option seekCount =
366           OptionBuilder
367               .withLongOpt("seek")
368               .withArgName("count")
369               .hasArg()
370               .withDescription(
371                   "specify how many seek operations we perform (requires -x r or -x rw.")
372               .create('n');
373       
374       Option trialCount =
375           OptionBuilder 
376               .withLongOpt("trials")
377               .withArgName("n")
378               .hasArg()
379               .withDescription(
380                   "specify how many times to run the whole benchmark")
381               .create('t');
382 
383       Option useRawFs =
384           OptionBuilder
385             .withLongOpt("rawfs")
386             .withDescription("use raw instead of checksummed file system")
387             .create();
388       
389       Option help =
390           OptionBuilder.withLongOpt("help").hasArg(false).withDescription(
391               "show this screen").create("h");
392 
393       return new Options().addOption(compress).addOption(fileSize).addOption(
394           fsInputBufferSz).addOption(fsOutputBufferSize).addOption(keyLen)
395           .addOption(blockSz).addOption(rootDir).addOption(valueLen)
396           .addOption(operation).addOption(seekCount).addOption(file)
397           .addOption(trialCount).addOption(useRawFs).addOption(help);
398 
399     }
400 
401     private void processOptions(CommandLine line, Options opts)
402         throws ParseException {
403       // --help -h and --version -V must be processed first.
404       if (line.hasOption('h')) {
405         HelpFormatter formatter = new HelpFormatter();
406         System.out.println("TFile and SeqFile benchmark.");
407         System.out.println();
408         formatter.printHelp(100,
409             "java ... TestTFileSeqFileComparison [options]",
410             "\nSupported options:", opts, "");
411         return;
412       }
413 
414       if (line.hasOption('c')) {
415         compress = line.getOptionValue('c');
416       }
417 
418       if (line.hasOption('d')) {
419         dictSize = Integer.parseInt(line.getOptionValue('d'));
420       }
421 
422       if (line.hasOption('s')) {
423         fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024;
424       }
425 
426       if (line.hasOption('i')) {
427         fsInputBufferSize = Integer.parseInt(line.getOptionValue('i'));
428       }
429 
430       if (line.hasOption('o')) {
431         fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o'));
432       }
433 
434       if (line.hasOption('n')) {
435         seekCount = Integer.parseInt(line.getOptionValue('n'));
436       }
437       
438       if (line.hasOption('t')) {
439         trialCount = Integer.parseInt(line.getOptionValue('t'));
440       }
441 
442       if (line.hasOption('k')) {
443         IntegerRange ir = IntegerRange.parse(line.getOptionValue('k'));
444         minKeyLen = ir.from();
445         maxKeyLen = ir.to();
446       }
447 
448       if (line.hasOption('v')) {
449         IntegerRange ir = IntegerRange.parse(line.getOptionValue('v'));
450         minValLength = ir.from();
451         maxValLength = ir.to();
452       }
453 
454       if (line.hasOption('b')) {
455         minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024;
456       }
457 
458       if (line.hasOption('r')) {
459         rootDir = line.getOptionValue('r');
460       }
461 
462       if (line.hasOption('f')) {
463         file = line.getOptionValue('f');
464       }
465 
466       if (line.hasOption('S')) {
467         seed = Long.parseLong(line.getOptionValue('S'));
468       }
469 
470       if (line.hasOption('x')) {
471         String strOp = line.getOptionValue('x');
472         if (strOp.equals("r")) {
473           op = OP_READ;
474         }
475         else if (strOp.equals("w")) {
476           op = OP_CREATE;
477         }
478         else if (strOp.equals("rw")) {
479           op = OP_CREATE | OP_READ;
480         }
481         else {
482           throw new ParseException("Unknown action specifier: " + strOp);
483         }
484       }
485       
486       useRawFs = line.hasOption("rawfs");
487 
488       proceed = true;
489     }
490 
491     private void validateOptions() throws ParseException {
492       if (!compress.equals("none") && !compress.equals("lzo")
493           && !compress.equals("gz") && !compress.equals("snappy")) {
494         throw new ParseException("Unknown compression scheme: " + compress);
495       }
496 
497       if (minKeyLen >= maxKeyLen) {
498         throw new ParseException(
499             "Max key length must be greater than min key length.");
500       }
501 
502       if (minValLength >= maxValLength) {
503         throw new ParseException(
504             "Max value length must be greater than min value length.");
505       }
506 
507       if (minWordLen >= maxWordLen) {
508         throw new ParseException(
509             "Max word length must be greater than min word length.");
510       }
511       return;
512     }
513 
514     private void setStopProceed() {
515       proceed = false;
516     }
517 
518     public boolean doCreate() {
519       return (op & OP_CREATE) != 0;
520     }
521 
522     public boolean doRead() {
523       return (op & OP_READ) != 0;
524     }
525   }
526 
527   public static void main(String[] argv) throws IOException {
528     TestHFileSeek testCase = new TestHFileSeek();
529     MyOptions options = new MyOptions(argv);
530 
531     if (options.proceed == false) {
532       return;
533     }
534 
535     testCase.options = options;
536     for (int i = 0; i < options.trialCount; i++) {
537       LOG.info("Beginning trial " + (i+1));
538       testCase.setUp();
539       testCase.testSeeks();
540       testCase.tearDown();
541     }
542   }
543 
544 }
545