1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations under
15   * the License.
16   */
17  
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.IOException;
21  import java.nio.ByteBuffer;
22  import java.util.Random;
23  import java.util.StringTokenizer;
24  
25  import junit.framework.TestCase;
26  
27  import org.apache.commons.cli.CommandLine;
28  import org.apache.commons.cli.CommandLineParser;
29  import org.apache.commons.cli.GnuParser;
30  import org.apache.commons.cli.HelpFormatter;
31  import org.apache.commons.cli.Option;
32  import org.apache.commons.cli.OptionBuilder;
33  import org.apache.commons.cli.Options;
34  import org.apache.commons.cli.ParseException;
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.fs.FSDataInputStream;
39  import org.apache.hadoop.fs.FSDataOutputStream;
40  import org.apache.hadoop.fs.FileSystem;
41  import org.apache.hadoop.fs.Path;
42  import org.apache.hadoop.fs.RawLocalFileSystem;
43  import org.apache.hadoop.hbase.HBaseTestingUtility;
44  import org.apache.hadoop.hbase.MediumTests;
45  import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
46  import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
47  import org.apache.hadoop.io.BytesWritable;
48  import org.junit.experimental.categories.Category;
49  
50  /**
51   * test the performance for seek.
52   * <p>
53   * Copied from
54   * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
55   * Remove after tfile is committed and use the tfile version of this class
56   * instead.</p>
57   */
58  @Category(MediumTests.class)
59  public class TestHFileSeek extends TestCase {
60    private static final Log LOG = LogFactory.getLog(TestHFileSeek.class);
61    private static final boolean USE_PREAD = true;
62    private MyOptions options;
63    private Configuration conf;
64    private Path path;
65    private FileSystem fs;
66    private NanoTimer timer;
67    private Random rng;
68    private RandomDistribution.DiscreteRNG keyLenGen;
69    private KVGenerator kvGen;
70  
71    @Override
72    public void setUp() throws IOException {
73      if (options == null) {
74        options = new MyOptions(new String[0]);
75      }
76  
77      conf = new Configuration();
78      
79      if (options.useRawFs) {
80        conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class);
81      }
82      
83      conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize);
84      conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize);
85      path = new Path(new Path(options.rootDir), options.file);
86      fs = path.getFileSystem(conf);
87      timer = new NanoTimer(false);
88      rng = new Random(options.seed);
89      keyLenGen =
90          new RandomDistribution.Zipf(new Random(rng.nextLong()),
91              options.minKeyLen, options.maxKeyLen, 1.2);
92      RandomDistribution.DiscreteRNG valLenGen =
93          new RandomDistribution.Flat(new Random(rng.nextLong()),
94              options.minValLength, options.maxValLength);
95      RandomDistribution.DiscreteRNG wordLenGen =
96          new RandomDistribution.Flat(new Random(rng.nextLong()),
97              options.minWordLen, options.maxWordLen);
98      kvGen =
99          new KVGenerator(rng, true, keyLenGen, valLenGen, wordLenGen,
100             options.dictSize);
101   }
102 
103   @Override
104   public void tearDown() {
105     try {
106       fs.close();
107     }
108     catch (Exception e) {
109       // Nothing
110     }
111   }
112 
113   private static FSDataOutputStream createFSOutput(Path name, FileSystem fs)
114     throws IOException {
115     if (fs.exists(name)) {
116       fs.delete(name, true);
117     }
118     FSDataOutputStream fout = fs.create(name);
119     return fout;
120   }
121 
122   private void createTFile() throws IOException {
123     long totalBytes = 0;
124     FSDataOutputStream fout = createFSOutput(path, fs);
125     try {
126       Writer writer = HFile.getWriterFactoryNoCache(conf)
127           .withOutputStream(fout)
128           .withBlockSize(options.minBlockSize)
129           .withCompression(options.compress)
130           .create();
131       try {
132         BytesWritable key = new BytesWritable();
133         BytesWritable val = new BytesWritable();
134         timer.start();
135         for (long i = 0; true; ++i) {
136           if (i % 1000 == 0) { // test the size for every 1000 rows.
137             if (fs.getFileStatus(path).getLen() >= options.fileSize) {
138               break;
139             }
140           }
141           kvGen.next(key, val, false);
142           byte [] k = new byte [key.getLength()];
143           System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
144           byte [] v = new byte [val.getLength()];
145           System.arraycopy(val.getBytes(), 0, v, 0, key.getLength());
146           writer.append(k, v);
147           totalBytes += key.getLength();
148           totalBytes += val.getLength();
149         }
150         timer.stop();
151       }
152       finally {
153         writer.close();
154       }
155     }
156     finally {
157       fout.close();
158     }
159     double duration = (double)timer.read()/1000; // in us.
160     long fsize = fs.getFileStatus(path).getLen();
161 
162     System.out.printf(
163         "time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n",
164         timer.toString(), (double) totalBytes / 1024 / 1024, totalBytes
165             / duration);
166     System.out.printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n",
167         timer.toString(), (double) fsize / 1024 / 1024, fsize / duration);
168   }
169 
170   public void seekTFile() throws IOException {
171     int miss = 0;
172     long totalBytes = 0;
173     FSDataInputStream fsdis = fs.open(path);
174     Reader reader = HFile.createReaderFromStream(path, fsdis,
175         fs.getFileStatus(path).getLen(), new CacheConfig(conf));
176     reader.loadFileInfo();
177     KeySampler kSampler =
178         new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(),
179             keyLenGen);
180     HFileScanner scanner = reader.getScanner(false, USE_PREAD);
181     BytesWritable key = new BytesWritable();
182     timer.reset();
183     timer.start();
184     for (int i = 0; i < options.seekCount; ++i) {
185       kSampler.next(key);
186       byte [] k = new byte [key.getLength()];
187       System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
188       if (scanner.seekTo(k) >= 0) {
189         ByteBuffer bbkey = scanner.getKey();
190         ByteBuffer bbval = scanner.getValue();
191         totalBytes += bbkey.limit();
192         totalBytes += bbval.limit();
193       }
194       else {
195         ++miss;
196       }
197     }
198     timer.stop();
199     System.out.printf(
200         "time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n",
201         timer.toString(), NanoTimer.nanoTimeToString(timer.read()
202             / options.seekCount), options.seekCount - miss, miss,
203         (double) totalBytes / 1024 / (options.seekCount - miss));
204 
205   }
206 
207   public void testSeeks() throws IOException {
208     if (options.doCreate()) {
209       createTFile();
210     }
211 
212     if (options.doRead()) {
213       seekTFile();
214     }
215 
216     if (options.doCreate()) {
217       fs.delete(path, true);
218     }
219   }
220 
221   private static class IntegerRange {
222     private final int from, to;
223 
224     public IntegerRange(int from, int to) {
225       this.from = from;
226       this.to = to;
227     }
228 
229     public static IntegerRange parse(String s) throws ParseException {
230       StringTokenizer st = new StringTokenizer(s, " \t,");
231       if (st.countTokens() != 2) {
232         throw new ParseException("Bad integer specification: " + s);
233       }
234       int from = Integer.parseInt(st.nextToken());
235       int to = Integer.parseInt(st.nextToken());
236       return new IntegerRange(from, to);
237     }
238 
239     public int from() {
240       return from;
241     }
242 
243     public int to() {
244       return to;
245     }
246   }
247 
248   private static class MyOptions {
249     // hard coded constants
250     int dictSize = 1000;
251     int minWordLen = 5;
252     int maxWordLen = 20;
253 
254     private HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
255     String rootDir =
256       TEST_UTIL.getDataTestDir("TestTFileSeek").toString();
257     String file = "TestTFileSeek";
258     // String compress = "lzo"; DISABLED
259     String compress = "none";
260     int minKeyLen = 10;
261     int maxKeyLen = 50;
262     int minValLength = 1024;
263     int maxValLength = 2 * 1024;
264     int minBlockSize = 1 * 1024 * 1024;
265     int fsOutputBufferSize = 1;
266     int fsInputBufferSize = 0;
267     // Default writing 10MB.
268     long fileSize = 10 * 1024 * 1024;
269     long seekCount = 1000;
270     long trialCount = 1;
271     long seed;
272     boolean useRawFs = false;
273 
274     static final int OP_CREATE = 1;
275     static final int OP_READ = 2;
276     int op = OP_CREATE | OP_READ;
277 
278     boolean proceed = false;
279 
280     public MyOptions(String[] args) {
281       seed = System.nanoTime();
282 
283       try {
284         Options opts = buildOptions();
285         CommandLineParser parser = new GnuParser();
286         CommandLine line = parser.parse(opts, args, true);
287         processOptions(line, opts);
288         validateOptions();
289       }
290       catch (ParseException e) {
291         System.out.println(e.getMessage());
292         System.out.println("Try \"--help\" option for details.");
293         setStopProceed();
294       }
295     }
296 
297     public boolean proceed() {
298       return proceed;
299     }
300 
301     private Options buildOptions() {
302       Option compress =
303           OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz|snappy]")
304               .hasArg().withDescription("compression scheme").create('c');
305 
306       Option fileSize =
307           OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB")
308               .hasArg().withDescription("target size of the file (in MB).")
309               .create('s');
310 
311       Option fsInputBufferSz =
312           OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size")
313               .hasArg().withDescription(
314                   "size of the file system input buffer (in bytes).").create(
315                   'i');
316 
317       Option fsOutputBufferSize =
318           OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size")
319               .hasArg().withDescription(
320                   "size of the file system output buffer (in bytes).").create(
321                   'o');
322 
323       Option keyLen =
324           OptionBuilder
325               .withLongOpt("key-length")
326               .withArgName("min,max")
327               .hasArg()
328               .withDescription(
329                   "the length range of the key (in bytes)")
330               .create('k');
331 
332       Option valueLen =
333           OptionBuilder
334               .withLongOpt("value-length")
335               .withArgName("min,max")
336               .hasArg()
337               .withDescription(
338                   "the length range of the value (in bytes)")
339               .create('v');
340 
341       Option blockSz =
342           OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg()
343               .withDescription("minimum block size (in KB)").create('b');
344 
345       Option operation =
346           OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg()
347               .withDescription(
348                   "action: seek-only, create-only, seek-after-create").create(
349                   'x');
350 
351       Option rootDir =
352           OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg()
353               .withDescription(
354                   "specify root directory where files will be created.")
355               .create('r');
356 
357       Option file =
358           OptionBuilder.withLongOpt("file").withArgName("name").hasArg()
359               .withDescription("specify the file name to be created or read.")
360               .create('f');
361 
362       Option seekCount =
363           OptionBuilder
364               .withLongOpt("seek")
365               .withArgName("count")
366               .hasArg()
367               .withDescription(
368                   "specify how many seek operations we perform (requires -x r or -x rw.")
369               .create('n');
370       
371       Option trialCount =
372           OptionBuilder 
373               .withLongOpt("trials")
374               .withArgName("n")
375               .hasArg()
376               .withDescription(
377                   "specify how many times to run the whole benchmark")
378               .create('t');
379 
380       Option useRawFs =
381           OptionBuilder
382             .withLongOpt("rawfs")
383             .withDescription("use raw instead of checksummed file system")
384             .create();
385       
386       Option help =
387           OptionBuilder.withLongOpt("help").hasArg(false).withDescription(
388               "show this screen").create("h");
389 
390       return new Options().addOption(compress).addOption(fileSize).addOption(
391           fsInputBufferSz).addOption(fsOutputBufferSize).addOption(keyLen)
392           .addOption(blockSz).addOption(rootDir).addOption(valueLen)
393           .addOption(operation).addOption(seekCount).addOption(file)
394           .addOption(trialCount).addOption(useRawFs).addOption(help);
395 
396     }
397 
398     private void processOptions(CommandLine line, Options opts)
399         throws ParseException {
400       // --help -h and --version -V must be processed first.
401       if (line.hasOption('h')) {
402         HelpFormatter formatter = new HelpFormatter();
403         System.out.println("TFile and SeqFile benchmark.");
404         System.out.println();
405         formatter.printHelp(100,
406             "java ... TestTFileSeqFileComparison [options]",
407             "\nSupported options:", opts, "");
408         return;
409       }
410 
411       if (line.hasOption('c')) {
412         compress = line.getOptionValue('c');
413       }
414 
415       if (line.hasOption('d')) {
416         dictSize = Integer.parseInt(line.getOptionValue('d'));
417       }
418 
419       if (line.hasOption('s')) {
420         fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024;
421       }
422 
423       if (line.hasOption('i')) {
424         fsInputBufferSize = Integer.parseInt(line.getOptionValue('i'));
425       }
426 
427       if (line.hasOption('o')) {
428         fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o'));
429       }
430 
431       if (line.hasOption('n')) {
432         seekCount = Integer.parseInt(line.getOptionValue('n'));
433       }
434       
435       if (line.hasOption('t')) {
436         trialCount = Integer.parseInt(line.getOptionValue('t'));
437       }
438 
439       if (line.hasOption('k')) {
440         IntegerRange ir = IntegerRange.parse(line.getOptionValue('k'));
441         minKeyLen = ir.from();
442         maxKeyLen = ir.to();
443       }
444 
445       if (line.hasOption('v')) {
446         IntegerRange ir = IntegerRange.parse(line.getOptionValue('v'));
447         minValLength = ir.from();
448         maxValLength = ir.to();
449       }
450 
451       if (line.hasOption('b')) {
452         minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024;
453       }
454 
455       if (line.hasOption('r')) {
456         rootDir = line.getOptionValue('r');
457       }
458 
459       if (line.hasOption('f')) {
460         file = line.getOptionValue('f');
461       }
462 
463       if (line.hasOption('S')) {
464         seed = Long.parseLong(line.getOptionValue('S'));
465       }
466 
467       if (line.hasOption('x')) {
468         String strOp = line.getOptionValue('x');
469         if (strOp.equals("r")) {
470           op = OP_READ;
471         }
472         else if (strOp.equals("w")) {
473           op = OP_CREATE;
474         }
475         else if (strOp.equals("rw")) {
476           op = OP_CREATE | OP_READ;
477         }
478         else {
479           throw new ParseException("Unknown action specifier: " + strOp);
480         }
481       }
482       
483       useRawFs = line.hasOption("rawfs");
484 
485       proceed = true;
486     }
487 
488     private void validateOptions() throws ParseException {
489       if (!compress.equals("none") && !compress.equals("lzo")
490           && !compress.equals("gz") && !compress.equals("snappy")) {
491         throw new ParseException("Unknown compression scheme: " + compress);
492       }
493 
494       if (minKeyLen >= maxKeyLen) {
495         throw new ParseException(
496             "Max key length must be greater than min key length.");
497       }
498 
499       if (minValLength >= maxValLength) {
500         throw new ParseException(
501             "Max value length must be greater than min value length.");
502       }
503 
504       if (minWordLen >= maxWordLen) {
505         throw new ParseException(
506             "Max word length must be greater than min word length.");
507       }
508       return;
509     }
510 
511     private void setStopProceed() {
512       proceed = false;
513     }
514 
515     public boolean doCreate() {
516       return (op & OP_CREATE) != 0;
517     }
518 
519     public boolean doRead() {
520       return (op & OP_READ) != 0;
521     }
522   }
523 
524   public static void main(String[] argv) throws IOException {
525     TestHFileSeek testCase = new TestHFileSeek();
526     MyOptions options = new MyOptions(argv);
527 
528     if (options.proceed == false) {
529       return;
530     }
531 
532     testCase.options = options;
533     for (int i = 0; i < options.trialCount; i++) {
534       LOG.info("Beginning trial " + (i+1));
535       testCase.setUp();
536       testCase.testSeeks();
537       testCase.tearDown();
538     }
539   }
540 
541   @org.junit.Rule
542   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
543     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
544 }
545