1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations under
15   * the License.
16   */
17  
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.IOException;
21  import java.nio.ByteBuffer;
22  import java.util.Random;
23  import java.util.StringTokenizer;
24  
25  import junit.framework.TestCase;
26  
27  import org.apache.commons.cli.CommandLine;
28  import org.apache.commons.cli.CommandLineParser;
29  import org.apache.commons.cli.GnuParser;
30  import org.apache.commons.cli.HelpFormatter;
31  import org.apache.commons.cli.Option;
32  import org.apache.commons.cli.OptionBuilder;
33  import org.apache.commons.cli.Options;
34  import org.apache.commons.cli.ParseException;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.fs.FSDataInputStream;
37  import org.apache.hadoop.fs.FSDataOutputStream;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.fs.RawLocalFileSystem;
41  import org.apache.hadoop.hbase.HBaseTestingUtility;
42  import org.apache.hadoop.hbase.MediumTests;
43  import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
44  import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
45  import org.apache.hadoop.io.BytesWritable;
46  import org.junit.experimental.categories.Category;
47  import org.mortbay.log.Log;
48  
49  /**
50   * test the performance for seek.
51   * <p>
52   * Copied from
53   * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
54   * Remove after tfile is committed and use the tfile version of this class
55   * instead.</p>
56   */
57  @Category(MediumTests.class)
58  public class TestHFileSeek extends TestCase {
59    private static final boolean USE_PREAD = true;
60    private MyOptions options;
61    private Configuration conf;
62    private Path path;
63    private FileSystem fs;
64    private NanoTimer timer;
65    private Random rng;
66    private RandomDistribution.DiscreteRNG keyLenGen;
67    private KVGenerator kvGen;
68  
69    @Override
70    public void setUp() throws IOException {
71      if (options == null) {
72        options = new MyOptions(new String[0]);
73      }
74  
75      conf = new Configuration();
76      
77      if (options.useRawFs) {
78        conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class);
79      }
80      
81      conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize);
82      conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize);
83      path = new Path(new Path(options.rootDir), options.file);
84      fs = path.getFileSystem(conf);
85      timer = new NanoTimer(false);
86      rng = new Random(options.seed);
87      keyLenGen =
88          new RandomDistribution.Zipf(new Random(rng.nextLong()),
89              options.minKeyLen, options.maxKeyLen, 1.2);
90      RandomDistribution.DiscreteRNG valLenGen =
91          new RandomDistribution.Flat(new Random(rng.nextLong()),
92              options.minValLength, options.maxValLength);
93      RandomDistribution.DiscreteRNG wordLenGen =
94          new RandomDistribution.Flat(new Random(rng.nextLong()),
95              options.minWordLen, options.maxWordLen);
96      kvGen =
97          new KVGenerator(rng, true, keyLenGen, valLenGen, wordLenGen,
98              options.dictSize);
99    }
100 
101   @Override
102   public void tearDown() {
103     try {
104       fs.close();
105     }
106     catch (Exception e) {
107       // Nothing
108     }
109   }
110 
111   private static FSDataOutputStream createFSOutput(Path name, FileSystem fs)
112     throws IOException {
113     if (fs.exists(name)) {
114       fs.delete(name, true);
115     }
116     FSDataOutputStream fout = fs.create(name);
117     return fout;
118   }
119 
120   private void createTFile() throws IOException {
121     long totalBytes = 0;
122     FSDataOutputStream fout = createFSOutput(path, fs);
123     try {
124       Writer writer = HFile.getWriterFactoryNoCache(conf)
125           .withOutputStream(fout)
126           .withBlockSize(options.minBlockSize)
127           .withCompression(options.compress)
128           .create();
129       try {
130         BytesWritable key = new BytesWritable();
131         BytesWritable val = new BytesWritable();
132         timer.start();
133         for (long i = 0; true; ++i) {
134           if (i % 1000 == 0) { // test the size for every 1000 rows.
135             if (fs.getFileStatus(path).getLen() >= options.fileSize) {
136               break;
137             }
138           }
139           kvGen.next(key, val, false);
140           byte [] k = new byte [key.getLength()];
141           System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
142           byte [] v = new byte [val.getLength()];
143           System.arraycopy(val.getBytes(), 0, v, 0, key.getLength());
144           writer.append(k, v);
145           totalBytes += key.getLength();
146           totalBytes += val.getLength();
147         }
148         timer.stop();
149       }
150       finally {
151         writer.close();
152       }
153     }
154     finally {
155       fout.close();
156     }
157     double duration = (double)timer.read()/1000; // in us.
158     long fsize = fs.getFileStatus(path).getLen();
159 
160     System.out.printf(
161         "time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n",
162         timer.toString(), (double) totalBytes / 1024 / 1024, totalBytes
163             / duration);
164     System.out.printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n",
165         timer.toString(), (double) fsize / 1024 / 1024, fsize / duration);
166   }
167 
168   public void seekTFile() throws IOException {
169     int miss = 0;
170     long totalBytes = 0;
171     FSDataInputStream fsdis = fs.open(path);
172     Reader reader = HFile.createReaderFromStream(path, fsdis,
173         fs.getFileStatus(path).getLen(), new CacheConfig(conf));
174     reader.loadFileInfo();
175     KeySampler kSampler =
176         new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(),
177             keyLenGen);
178     HFileScanner scanner = reader.getScanner(false, USE_PREAD);
179     BytesWritable key = new BytesWritable();
180     timer.reset();
181     timer.start();
182     for (int i = 0; i < options.seekCount; ++i) {
183       kSampler.next(key);
184       byte [] k = new byte [key.getLength()];
185       System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
186       if (scanner.seekTo(k) >= 0) {
187         ByteBuffer bbkey = scanner.getKey();
188         ByteBuffer bbval = scanner.getValue();
189         totalBytes += bbkey.limit();
190         totalBytes += bbval.limit();
191       }
192       else {
193         ++miss;
194       }
195     }
196     timer.stop();
197     System.out.printf(
198         "time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n",
199         timer.toString(), NanoTimer.nanoTimeToString(timer.read()
200             / options.seekCount), options.seekCount - miss, miss,
201         (double) totalBytes / 1024 / (options.seekCount - miss));
202 
203   }
204 
205   public void testSeeks() throws IOException {
206     if (options.doCreate()) {
207       createTFile();
208     }
209 
210     if (options.doRead()) {
211       seekTFile();
212     }
213 
214     if (options.doCreate()) {
215       fs.delete(path, true);
216     }
217   }
218 
219   private static class IntegerRange {
220     private final int from, to;
221 
222     public IntegerRange(int from, int to) {
223       this.from = from;
224       this.to = to;
225     }
226 
227     public static IntegerRange parse(String s) throws ParseException {
228       StringTokenizer st = new StringTokenizer(s, " \t,");
229       if (st.countTokens() != 2) {
230         throw new ParseException("Bad integer specification: " + s);
231       }
232       int from = Integer.parseInt(st.nextToken());
233       int to = Integer.parseInt(st.nextToken());
234       return new IntegerRange(from, to);
235     }
236 
237     public int from() {
238       return from;
239     }
240 
241     public int to() {
242       return to;
243     }
244   }
245 
246   private static class MyOptions {
247     // hard coded constants
248     int dictSize = 1000;
249     int minWordLen = 5;
250     int maxWordLen = 20;
251 
252     private HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
253     String rootDir =
254       TEST_UTIL.getDataTestDir("TestTFileSeek").toString();
255     String file = "TestTFileSeek";
256     // String compress = "lzo"; DISABLED
257     String compress = "none";
258     int minKeyLen = 10;
259     int maxKeyLen = 50;
260     int minValLength = 1024;
261     int maxValLength = 2 * 1024;
262     int minBlockSize = 1 * 1024 * 1024;
263     int fsOutputBufferSize = 1;
264     int fsInputBufferSize = 0;
265     // Default writing 10MB.
266     long fileSize = 10 * 1024 * 1024;
267     long seekCount = 1000;
268     long trialCount = 1;
269     long seed;
270     boolean useRawFs = false;
271 
272     static final int OP_CREATE = 1;
273     static final int OP_READ = 2;
274     int op = OP_CREATE | OP_READ;
275 
276     boolean proceed = false;
277 
278     public MyOptions(String[] args) {
279       seed = System.nanoTime();
280 
281       try {
282         Options opts = buildOptions();
283         CommandLineParser parser = new GnuParser();
284         CommandLine line = parser.parse(opts, args, true);
285         processOptions(line, opts);
286         validateOptions();
287       }
288       catch (ParseException e) {
289         System.out.println(e.getMessage());
290         System.out.println("Try \"--help\" option for details.");
291         setStopProceed();
292       }
293     }
294 
295     public boolean proceed() {
296       return proceed;
297     }
298 
299     private Options buildOptions() {
300       Option compress =
301           OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz|snappy]")
302               .hasArg().withDescription("compression scheme").create('c');
303 
304       Option fileSize =
305           OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB")
306               .hasArg().withDescription("target size of the file (in MB).")
307               .create('s');
308 
309       Option fsInputBufferSz =
310           OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size")
311               .hasArg().withDescription(
312                   "size of the file system input buffer (in bytes).").create(
313                   'i');
314 
315       Option fsOutputBufferSize =
316           OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size")
317               .hasArg().withDescription(
318                   "size of the file system output buffer (in bytes).").create(
319                   'o');
320 
321       Option keyLen =
322           OptionBuilder
323               .withLongOpt("key-length")
324               .withArgName("min,max")
325               .hasArg()
326               .withDescription(
327                   "the length range of the key (in bytes)")
328               .create('k');
329 
330       Option valueLen =
331           OptionBuilder
332               .withLongOpt("value-length")
333               .withArgName("min,max")
334               .hasArg()
335               .withDescription(
336                   "the length range of the value (in bytes)")
337               .create('v');
338 
339       Option blockSz =
340           OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg()
341               .withDescription("minimum block size (in KB)").create('b');
342 
343       Option operation =
344           OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg()
345               .withDescription(
346                   "action: seek-only, create-only, seek-after-create").create(
347                   'x');
348 
349       Option rootDir =
350           OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg()
351               .withDescription(
352                   "specify root directory where files will be created.")
353               .create('r');
354 
355       Option file =
356           OptionBuilder.withLongOpt("file").withArgName("name").hasArg()
357               .withDescription("specify the file name to be created or read.")
358               .create('f');
359 
360       Option seekCount =
361           OptionBuilder
362               .withLongOpt("seek")
363               .withArgName("count")
364               .hasArg()
365               .withDescription(
366                   "specify how many seek operations we perform (requires -x r or -x rw.")
367               .create('n');
368       
369       Option trialCount =
370           OptionBuilder 
371               .withLongOpt("trials")
372               .withArgName("n")
373               .hasArg()
374               .withDescription(
375                   "specify how many times to run the whole benchmark")
376               .create('t');
377 
378       Option useRawFs =
379           OptionBuilder
380             .withLongOpt("rawfs")
381             .withDescription("use raw instead of checksummed file system")
382             .create();
383       
384       Option help =
385           OptionBuilder.withLongOpt("help").hasArg(false).withDescription(
386               "show this screen").create("h");
387 
388       return new Options().addOption(compress).addOption(fileSize).addOption(
389           fsInputBufferSz).addOption(fsOutputBufferSize).addOption(keyLen)
390           .addOption(blockSz).addOption(rootDir).addOption(valueLen)
391           .addOption(operation).addOption(seekCount).addOption(file)
392           .addOption(trialCount).addOption(useRawFs).addOption(help);
393 
394     }
395 
396     private void processOptions(CommandLine line, Options opts)
397         throws ParseException {
398       // --help -h and --version -V must be processed first.
399       if (line.hasOption('h')) {
400         HelpFormatter formatter = new HelpFormatter();
401         System.out.println("TFile and SeqFile benchmark.");
402         System.out.println();
403         formatter.printHelp(100,
404             "java ... TestTFileSeqFileComparison [options]",
405             "\nSupported options:", opts, "");
406         return;
407       }
408 
409       if (line.hasOption('c')) {
410         compress = line.getOptionValue('c');
411       }
412 
413       if (line.hasOption('d')) {
414         dictSize = Integer.parseInt(line.getOptionValue('d'));
415       }
416 
417       if (line.hasOption('s')) {
418         fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024;
419       }
420 
421       if (line.hasOption('i')) {
422         fsInputBufferSize = Integer.parseInt(line.getOptionValue('i'));
423       }
424 
425       if (line.hasOption('o')) {
426         fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o'));
427       }
428 
429       if (line.hasOption('n')) {
430         seekCount = Integer.parseInt(line.getOptionValue('n'));
431       }
432       
433       if (line.hasOption('t')) {
434         trialCount = Integer.parseInt(line.getOptionValue('t'));
435       }
436 
437       if (line.hasOption('k')) {
438         IntegerRange ir = IntegerRange.parse(line.getOptionValue('k'));
439         minKeyLen = ir.from();
440         maxKeyLen = ir.to();
441       }
442 
443       if (line.hasOption('v')) {
444         IntegerRange ir = IntegerRange.parse(line.getOptionValue('v'));
445         minValLength = ir.from();
446         maxValLength = ir.to();
447       }
448 
449       if (line.hasOption('b')) {
450         minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024;
451       }
452 
453       if (line.hasOption('r')) {
454         rootDir = line.getOptionValue('r');
455       }
456 
457       if (line.hasOption('f')) {
458         file = line.getOptionValue('f');
459       }
460 
461       if (line.hasOption('S')) {
462         seed = Long.parseLong(line.getOptionValue('S'));
463       }
464 
465       if (line.hasOption('x')) {
466         String strOp = line.getOptionValue('x');
467         if (strOp.equals("r")) {
468           op = OP_READ;
469         }
470         else if (strOp.equals("w")) {
471           op = OP_CREATE;
472         }
473         else if (strOp.equals("rw")) {
474           op = OP_CREATE | OP_READ;
475         }
476         else {
477           throw new ParseException("Unknown action specifier: " + strOp);
478         }
479       }
480       
481       useRawFs = line.hasOption("rawfs");
482 
483       proceed = true;
484     }
485 
486     private void validateOptions() throws ParseException {
487       if (!compress.equals("none") && !compress.equals("lzo")
488           && !compress.equals("gz") && !compress.equals("snappy")) {
489         throw new ParseException("Unknown compression scheme: " + compress);
490       }
491 
492       if (minKeyLen >= maxKeyLen) {
493         throw new ParseException(
494             "Max key length must be greater than min key length.");
495       }
496 
497       if (minValLength >= maxValLength) {
498         throw new ParseException(
499             "Max value length must be greater than min value length.");
500       }
501 
502       if (minWordLen >= maxWordLen) {
503         throw new ParseException(
504             "Max word length must be greater than min word length.");
505       }
506       return;
507     }
508 
509     private void setStopProceed() {
510       proceed = false;
511     }
512 
513     public boolean doCreate() {
514       return (op & OP_CREATE) != 0;
515     }
516 
517     public boolean doRead() {
518       return (op & OP_READ) != 0;
519     }
520   }
521 
522   public static void main(String[] argv) throws IOException {
523     TestHFileSeek testCase = new TestHFileSeek();
524     MyOptions options = new MyOptions(argv);
525 
526     if (options.proceed == false) {
527       return;
528     }
529 
530     testCase.options = options;
531     for (int i = 0; i < options.trialCount; i++) {
532       Log.info("Beginning trial " + (i+1));
533       testCase.setUp();
534       testCase.testSeeks();
535       testCase.tearDown();
536     }
537   }
538 
539   @org.junit.Rule
540   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
541     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
542 }
543