1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations under
15   * the License.
16   */
17  
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.IOException;
21  import java.nio.ByteBuffer;
22  import java.util.Random;
23  import java.util.StringTokenizer;
24  
25  import junit.framework.TestCase;
26  
27  import org.apache.commons.cli.CommandLine;
28  import org.apache.commons.cli.CommandLineParser;
29  import org.apache.commons.cli.GnuParser;
30  import org.apache.commons.cli.HelpFormatter;
31  import org.apache.commons.cli.Option;
32  import org.apache.commons.cli.OptionBuilder;
33  import org.apache.commons.cli.Options;
34  import org.apache.commons.cli.ParseException;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.fs.FSDataInputStream;
37  import org.apache.hadoop.fs.FSDataOutputStream;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.hbase.HBaseTestingUtility;
41  import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
42  import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
43  import org.apache.hadoop.io.BytesWritable;
44  
45  /**
46   * test the performance for seek.
47   * <p>
48   * Copied from
49   * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
50   * Remove after tfile is committed and use the tfile version of this class
51   * instead.</p>
52   */
53  public class TestHFileSeek extends TestCase {
54    private MyOptions options;
55    private Configuration conf;
56    private Path path;
57    private FileSystem fs;
58    private NanoTimer timer;
59    private Random rng;
60    private RandomDistribution.DiscreteRNG keyLenGen;
61    private KVGenerator kvGen;
62  
63    @Override
64    public void setUp() throws IOException {
65      if (options == null) {
66        options = new MyOptions(new String[0]);
67      }
68  
69      conf = new Configuration();
70      conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize);
71      conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize);
72      path = new Path(new Path(options.rootDir), options.file);
73      fs = path.getFileSystem(conf);
74      timer = new NanoTimer(false);
75      rng = new Random(options.seed);
76      keyLenGen =
77          new RandomDistribution.Zipf(new Random(rng.nextLong()),
78              options.minKeyLen, options.maxKeyLen, 1.2);
79      RandomDistribution.DiscreteRNG valLenGen =
80          new RandomDistribution.Flat(new Random(rng.nextLong()),
81              options.minValLength, options.maxValLength);
82      RandomDistribution.DiscreteRNG wordLenGen =
83          new RandomDistribution.Flat(new Random(rng.nextLong()),
84              options.minWordLen, options.maxWordLen);
85      kvGen =
86          new KVGenerator(rng, true, keyLenGen, valLenGen, wordLenGen,
87              options.dictSize);
88    }
89  
90    @Override
91    public void tearDown() {
92      try {
93        fs.close();
94      }
95      catch (Exception e) {
96        // Nothing
97      }
98    }
99  
100   private static FSDataOutputStream createFSOutput(Path name, FileSystem fs)
101     throws IOException {
102     if (fs.exists(name)) {
103       fs.delete(name, true);
104     }
105     FSDataOutputStream fout = fs.create(name);
106     return fout;
107   }
108 
109   private void createTFile() throws IOException {
110     long totalBytes = 0;
111     FSDataOutputStream fout = createFSOutput(path, fs);
112     try {
113       Writer writer =
114           new Writer(fout, options.minBlockSize, options.compress, null);
115       try {
116         BytesWritable key = new BytesWritable();
117         BytesWritable val = new BytesWritable();
118         timer.start();
119         for (long i = 0; true; ++i) {
120           if (i % 1000 == 0) { // test the size for every 1000 rows.
121             if (fs.getFileStatus(path).getLen() >= options.fileSize) {
122               break;
123             }
124           }
125           kvGen.next(key, val, false);
126           byte [] k = new byte [key.getLength()];
127           System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
128           byte [] v = new byte [val.getLength()];
129           System.arraycopy(val.getBytes(), 0, v, 0, key.getLength());
130           writer.append(k, v);
131           totalBytes += key.getLength();
132           totalBytes += val.getLength();
133         }
134         timer.stop();
135       }
136       finally {
137         writer.close();
138       }
139     }
140     finally {
141       fout.close();
142     }
143     double duration = (double)timer.read()/1000; // in us.
144     long fsize = fs.getFileStatus(path).getLen();
145 
146     System.out.printf(
147         "time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n",
148         timer.toString(), (double) totalBytes / 1024 / 1024, totalBytes
149             / duration);
150     System.out.printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n",
151         timer.toString(), (double) fsize / 1024 / 1024, fsize / duration);
152   }
153 
154   public void seekTFile() throws IOException {
155     int miss = 0;
156     long totalBytes = 0;
157     FSDataInputStream fsdis = fs.open(path);
158     Reader reader =
159       new Reader(fsdis, fs.getFileStatus(path).getLen(), null, false);
160     reader.loadFileInfo();
161     KeySampler kSampler =
162         new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(),
163             keyLenGen);
164     HFileScanner scanner = reader.getScanner(false, false);
165     BytesWritable key = new BytesWritable();
166     timer.reset();
167     timer.start();
168     for (int i = 0; i < options.seekCount; ++i) {
169       kSampler.next(key);
170       byte [] k = new byte [key.getLength()];
171       System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
172       if (scanner.seekTo(k) >= 0) {
173         ByteBuffer bbkey = scanner.getKey();
174         ByteBuffer bbval = scanner.getValue();
175         totalBytes += bbkey.limit();
176         totalBytes += bbval.limit();
177       }
178       else {
179         ++miss;
180       }
181     }
182     timer.stop();
183     System.out.printf(
184         "time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n",
185         timer.toString(), NanoTimer.nanoTimeToString(timer.read()
186             / options.seekCount), options.seekCount - miss, miss,
187         (double) totalBytes / 1024 / (options.seekCount - miss));
188 
189   }
190 
191   public void testSeeks() throws IOException {
192     if (options.doCreate()) {
193       createTFile();
194     }
195 
196     if (options.doRead()) {
197       seekTFile();
198     }
199 
200     if (options.doCreate()) {
201       fs.delete(path, true);
202     }
203   }
204 
205   private static class IntegerRange {
206     private final int from, to;
207 
208     public IntegerRange(int from, int to) {
209       this.from = from;
210       this.to = to;
211     }
212 
213     public static IntegerRange parse(String s) throws ParseException {
214       StringTokenizer st = new StringTokenizer(s, " \t,");
215       if (st.countTokens() != 2) {
216         throw new ParseException("Bad integer specification: " + s);
217       }
218       int from = Integer.parseInt(st.nextToken());
219       int to = Integer.parseInt(st.nextToken());
220       return new IntegerRange(from, to);
221     }
222 
223     public int from() {
224       return from;
225     }
226 
227     public int to() {
228       return to;
229     }
230   }
231 
232   private static class MyOptions {
233     // hard coded constants
234     int dictSize = 1000;
235     int minWordLen = 5;
236     int maxWordLen = 20;
237 
238     String rootDir =
239       HBaseTestingUtility.getTestDir("TestTFileSeek").toString();
240     String file = "TestTFileSeek";
241     // String compress = "lzo"; DISABLED
242     String compress = "none";
243     int minKeyLen = 10;
244     int maxKeyLen = 50;
245     int minValLength = 1024;
246     int maxValLength = 2 * 1024;
247     int minBlockSize = 1 * 1024 * 1024;
248     int fsOutputBufferSize = 1;
249     int fsInputBufferSize = 0;
250     // Default writing 10MB.
251     long fileSize = 10 * 1024 * 1024;
252     long seekCount = 1000;
253     long seed;
254 
255     static final int OP_CREATE = 1;
256     static final int OP_READ = 2;
257     int op = OP_CREATE | OP_READ;
258 
259     boolean proceed = false;
260 
261     public MyOptions(String[] args) {
262       seed = System.nanoTime();
263 
264       try {
265         Options opts = buildOptions();
266         CommandLineParser parser = new GnuParser();
267         CommandLine line = parser.parse(opts, args, true);
268         processOptions(line, opts);
269         validateOptions();
270       }
271       catch (ParseException e) {
272         System.out.println(e.getMessage());
273         System.out.println("Try \"--help\" option for details.");
274         setStopProceed();
275       }
276     }
277 
278     public boolean proceed() {
279       return proceed;
280     }
281 
282     private Options buildOptions() {
283       Option compress =
284           OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz]")
285               .hasArg().withDescription("compression scheme").create('c');
286 
287       Option fileSize =
288           OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB")
289               .hasArg().withDescription("target size of the file (in MB).")
290               .create('s');
291 
292       Option fsInputBufferSz =
293           OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size")
294               .hasArg().withDescription(
295                   "size of the file system input buffer (in bytes).").create(
296                   'i');
297 
298       Option fsOutputBufferSize =
299           OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size")
300               .hasArg().withDescription(
301                   "size of the file system output buffer (in bytes).").create(
302                   'o');
303 
304       Option keyLen =
305           OptionBuilder
306               .withLongOpt("key-length")
307               .withArgName("min,max")
308               .hasArg()
309               .withDescription(
310                   "the length range of the key (in bytes)")
311               .create('k');
312 
313       Option valueLen =
314           OptionBuilder
315               .withLongOpt("value-length")
316               .withArgName("min,max")
317               .hasArg()
318               .withDescription(
319                   "the length range of the value (in bytes)")
320               .create('v');
321 
322       Option blockSz =
323           OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg()
324               .withDescription("minimum block size (in KB)").create('b');
325 
326       Option operation =
327           OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg()
328               .withDescription(
329                   "action: seek-only, create-only, seek-after-create").create(
330                   'x');
331 
332       Option rootDir =
333           OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg()
334               .withDescription(
335                   "specify root directory where files will be created.")
336               .create('r');
337 
338       Option file =
339           OptionBuilder.withLongOpt("file").withArgName("name").hasArg()
340               .withDescription("specify the file name to be created or read.")
341               .create('f');
342 
343       Option seekCount =
344           OptionBuilder
345               .withLongOpt("seek")
346               .withArgName("count")
347               .hasArg()
348               .withDescription(
349                   "specify how many seek operations we perform (requires -x r or -x rw.")
350               .create('n');
351 
352       Option help =
353           OptionBuilder.withLongOpt("help").hasArg(false).withDescription(
354               "show this screen").create("h");
355 
356       return new Options().addOption(compress).addOption(fileSize).addOption(
357           fsInputBufferSz).addOption(fsOutputBufferSize).addOption(keyLen)
358           .addOption(blockSz).addOption(rootDir).addOption(valueLen).addOption(
359               operation).addOption(seekCount).addOption(file).addOption(help);
360 
361     }
362 
363     private void processOptions(CommandLine line, Options opts)
364         throws ParseException {
365       // --help -h and --version -V must be processed first.
366       if (line.hasOption('h')) {
367         HelpFormatter formatter = new HelpFormatter();
368         System.out.println("TFile and SeqFile benchmark.");
369         System.out.println();
370         formatter.printHelp(100,
371             "java ... TestTFileSeqFileComparison [options]",
372             "\nSupported options:", opts, "");
373         return;
374       }
375 
376       if (line.hasOption('c')) {
377         compress = line.getOptionValue('c');
378       }
379 
380       if (line.hasOption('d')) {
381         dictSize = Integer.parseInt(line.getOptionValue('d'));
382       }
383 
384       if (line.hasOption('s')) {
385         fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024;
386       }
387 
388       if (line.hasOption('i')) {
389         fsInputBufferSize = Integer.parseInt(line.getOptionValue('i'));
390       }
391 
392       if (line.hasOption('o')) {
393         fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o'));
394       }
395 
396       if (line.hasOption('n')) {
397         seekCount = Integer.parseInt(line.getOptionValue('n'));
398       }
399 
400       if (line.hasOption('k')) {
401         IntegerRange ir = IntegerRange.parse(line.getOptionValue('k'));
402         minKeyLen = ir.from();
403         maxKeyLen = ir.to();
404       }
405 
406       if (line.hasOption('v')) {
407         IntegerRange ir = IntegerRange.parse(line.getOptionValue('v'));
408         minValLength = ir.from();
409         maxValLength = ir.to();
410       }
411 
412       if (line.hasOption('b')) {
413         minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024;
414       }
415 
416       if (line.hasOption('r')) {
417         rootDir = line.getOptionValue('r');
418       }
419 
420       if (line.hasOption('f')) {
421         file = line.getOptionValue('f');
422       }
423 
424       if (line.hasOption('S')) {
425         seed = Long.parseLong(line.getOptionValue('S'));
426       }
427 
428       if (line.hasOption('x')) {
429         String strOp = line.getOptionValue('x');
430         if (strOp.equals("r")) {
431           op = OP_READ;
432         }
433         else if (strOp.equals("w")) {
434           op = OP_CREATE;
435         }
436         else if (strOp.equals("rw")) {
437           op = OP_CREATE | OP_READ;
438         }
439         else {
440           throw new ParseException("Unknown action specifier: " + strOp);
441         }
442       }
443 
444       proceed = true;
445     }
446 
447     private void validateOptions() throws ParseException {
448       if (!compress.equals("none") && !compress.equals("lzo")
449           && !compress.equals("gz")) {
450         throw new ParseException("Unknown compression scheme: " + compress);
451       }
452 
453       if (minKeyLen >= maxKeyLen) {
454         throw new ParseException(
455             "Max key length must be greater than min key length.");
456       }
457 
458       if (minValLength >= maxValLength) {
459         throw new ParseException(
460             "Max value length must be greater than min value length.");
461       }
462 
463       if (minWordLen >= maxWordLen) {
464         throw new ParseException(
465             "Max word length must be greater than min word length.");
466       }
467       return;
468     }
469 
470     private void setStopProceed() {
471       proceed = false;
472     }
473 
474     public boolean doCreate() {
475       return (op & OP_CREATE) != 0;
476     }
477 
478     public boolean doRead() {
479       return (op & OP_READ) != 0;
480     }
481   }
482 
483   public static void main(String[] argv) throws IOException {
484     TestHFileSeek testCase = new TestHFileSeek();
485     MyOptions options = new MyOptions(argv);
486 
487     if (options.proceed == false) {
488       return;
489     }
490 
491     testCase.options = options;
492     testCase.setUp();
493     testCase.testSeeks();
494     testCase.tearDown();
495   }
496 }