1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.io.hfile;
19
20 import java.io.IOException;
21 import java.nio.ByteBuffer;
22 import java.util.Random;
23 import java.util.StringTokenizer;
24
25 import junit.framework.TestCase;
26
27 import org.apache.commons.cli.CommandLine;
28 import org.apache.commons.cli.CommandLineParser;
29 import org.apache.commons.cli.GnuParser;
30 import org.apache.commons.cli.HelpFormatter;
31 import org.apache.commons.cli.Option;
32 import org.apache.commons.cli.OptionBuilder;
33 import org.apache.commons.cli.Options;
34 import org.apache.commons.cli.ParseException;
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.conf.Configuration;
38 import org.apache.hadoop.fs.FSDataInputStream;
39 import org.apache.hadoop.fs.FSDataOutputStream;
40 import org.apache.hadoop.fs.FileSystem;
41 import org.apache.hadoop.fs.Path;
42 import org.apache.hadoop.fs.RawLocalFileSystem;
43 import org.apache.hadoop.hbase.HBaseTestingUtility;
44 import org.apache.hadoop.hbase.MediumTests;
45 import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
46 import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
47 import org.apache.hadoop.io.BytesWritable;
48 import org.junit.experimental.categories.Category;
49
50
51
52
53
54
55
56
57
58 @Category(MediumTests.class)
59 public class TestHFileSeek extends TestCase {
60 private static final boolean USE_PREAD = true;
61 private MyOptions options;
62 private Configuration conf;
63 private Path path;
64 private FileSystem fs;
65 private NanoTimer timer;
66 private Random rng;
67 private RandomDistribution.DiscreteRNG keyLenGen;
68 private KVGenerator kvGen;
69
70 private static final Log LOG = LogFactory.getLog(TestHFileSeek.class);
71
72 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
73
74 @Override
75 public void setUp() throws IOException {
76 if (options == null) {
77 options = new MyOptions(new String[0]);
78 }
79
80 conf = new Configuration();
81
82 if (options.useRawFs) {
83 conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class);
84 }
85
86 conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize);
87 conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize);
88 path = new Path(new Path(options.rootDir), options.file);
89 fs = path.getFileSystem(conf);
90 timer = new NanoTimer(false);
91 rng = new Random(options.seed);
92 keyLenGen =
93 new RandomDistribution.Zipf(new Random(rng.nextLong()),
94 options.minKeyLen, options.maxKeyLen, 1.2);
95 RandomDistribution.DiscreteRNG valLenGen =
96 new RandomDistribution.Flat(new Random(rng.nextLong()),
97 options.minValLength, options.maxValLength);
98 RandomDistribution.DiscreteRNG wordLenGen =
99 new RandomDistribution.Flat(new Random(rng.nextLong()),
100 options.minWordLen, options.maxWordLen);
101 kvGen =
102 new KVGenerator(rng, true, keyLenGen, valLenGen, wordLenGen,
103 options.dictSize);
104 }
105
106 @Override
107 public void tearDown() {
108 try {
109 fs.close();
110 }
111 catch (Exception e) {
112
113 }
114 }
115
116 private static FSDataOutputStream createFSOutput(Path name, FileSystem fs)
117 throws IOException {
118 if (fs.exists(name)) {
119 fs.delete(name, true);
120 }
121 FSDataOutputStream fout = fs.create(name);
122 return fout;
123 }
124
125 private void createTFile() throws IOException {
126 long totalBytes = 0;
127 FSDataOutputStream fout = createFSOutput(path, fs);
128 try {
129 Writer writer = HFile.getWriterFactoryNoCache(conf)
130 .withOutputStream(fout)
131 .withBlockSize(options.minBlockSize)
132 .withCompression(options.compress)
133 .create();
134 try {
135 BytesWritable key = new BytesWritable();
136 BytesWritable val = new BytesWritable();
137 timer.start();
138 for (long i = 0; true; ++i) {
139 if (i % 1000 == 0) {
140 if (fs.getFileStatus(path).getLen() >= options.fileSize) {
141 break;
142 }
143 }
144 kvGen.next(key, val, false);
145 byte [] k = new byte [key.getLength()];
146 System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
147 byte [] v = new byte [val.getLength()];
148 System.arraycopy(val.getBytes(), 0, v, 0, key.getLength());
149 writer.append(k, v);
150 totalBytes += key.getLength();
151 totalBytes += val.getLength();
152 }
153 timer.stop();
154 }
155 finally {
156 writer.close();
157 }
158 }
159 finally {
160 fout.close();
161 }
162 double duration = (double)timer.read()/1000;
163 long fsize = fs.getFileStatus(path).getLen();
164
165 System.out.printf(
166 "time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n",
167 timer.toString(), (double) totalBytes / 1024 / 1024, totalBytes
168 / duration);
169 System.out.printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n",
170 timer.toString(), (double) fsize / 1024 / 1024, fsize / duration);
171 }
172
173 public void seekTFile() throws IOException {
174 int miss = 0;
175 long totalBytes = 0;
176 FSDataInputStream fsdis = fs.open(path);
177 Reader reader = HFile.createReaderFromStream(path, fsdis,
178 fs.getFileStatus(path).getLen(), new CacheConfig(conf));
179 reader.loadFileInfo();
180 KeySampler kSampler =
181 new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(),
182 keyLenGen);
183 HFileScanner scanner = reader.getScanner(false, USE_PREAD);
184 BytesWritable key = new BytesWritable();
185 timer.reset();
186 timer.start();
187 for (int i = 0; i < options.seekCount; ++i) {
188 kSampler.next(key);
189 byte [] k = new byte [key.getLength()];
190 System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
191 if (scanner.seekTo(k) >= 0) {
192 ByteBuffer bbkey = scanner.getKey();
193 ByteBuffer bbval = scanner.getValue();
194 totalBytes += bbkey.limit();
195 totalBytes += bbval.limit();
196 }
197 else {
198 ++miss;
199 }
200 }
201 timer.stop();
202 System.out.printf(
203 "time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n",
204 timer.toString(), NanoTimer.nanoTimeToString(timer.read()
205 / options.seekCount), options.seekCount - miss, miss,
206 (double) totalBytes / 1024 / (options.seekCount - miss));
207
208 }
209
210 public void testSeeks() throws IOException {
211 if (options.doCreate()) {
212 createTFile();
213 }
214
215 if (options.doRead()) {
216 seekTFile();
217 }
218
219 if (options.doCreate()) {
220 fs.delete(path, true);
221 }
222 }
223
224 private static class IntegerRange {
225 private final int from, to;
226
227 public IntegerRange(int from, int to) {
228 this.from = from;
229 this.to = to;
230 }
231
232 public static IntegerRange parse(String s) throws ParseException {
233 StringTokenizer st = new StringTokenizer(s, " \t,");
234 if (st.countTokens() != 2) {
235 throw new ParseException("Bad integer specification: " + s);
236 }
237 int from = Integer.parseInt(st.nextToken());
238 int to = Integer.parseInt(st.nextToken());
239 return new IntegerRange(from, to);
240 }
241
242 public int from() {
243 return from;
244 }
245
246 public int to() {
247 return to;
248 }
249 }
250
251 private static class MyOptions {
252
253 int dictSize = 1000;
254 int minWordLen = 5;
255 int maxWordLen = 20;
256
257 private HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
258 String rootDir =
259 TEST_UTIL.getDataTestDir("TestTFileSeek").toString();
260 String file = "TestTFileSeek";
261
262 String compress = "none";
263 int minKeyLen = 10;
264 int maxKeyLen = 50;
265 int minValLength = 1024;
266 int maxValLength = 2 * 1024;
267 int minBlockSize = 1 * 1024 * 1024;
268 int fsOutputBufferSize = 1;
269 int fsInputBufferSize = 0;
270
271 long fileSize = 10 * 1024 * 1024;
272 long seekCount = 1000;
273 long trialCount = 1;
274 long seed;
275 boolean useRawFs = false;
276
277 static final int OP_CREATE = 1;
278 static final int OP_READ = 2;
279 int op = OP_CREATE | OP_READ;
280
281 boolean proceed = false;
282
283 public MyOptions(String[] args) {
284 seed = System.nanoTime();
285
286 try {
287 Options opts = buildOptions();
288 CommandLineParser parser = new GnuParser();
289 CommandLine line = parser.parse(opts, args, true);
290 processOptions(line, opts);
291 validateOptions();
292 }
293 catch (ParseException e) {
294 System.out.println(e.getMessage());
295 System.out.println("Try \"--help\" option for details.");
296 setStopProceed();
297 }
298 }
299
300 public boolean proceed() {
301 return proceed;
302 }
303
304 private Options buildOptions() {
305 Option compress =
306 OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz|snappy]")
307 .hasArg().withDescription("compression scheme").create('c');
308
309 Option fileSize =
310 OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB")
311 .hasArg().withDescription("target size of the file (in MB).")
312 .create('s');
313
314 Option fsInputBufferSz =
315 OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size")
316 .hasArg().withDescription(
317 "size of the file system input buffer (in bytes).").create(
318 'i');
319
320 Option fsOutputBufferSize =
321 OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size")
322 .hasArg().withDescription(
323 "size of the file system output buffer (in bytes).").create(
324 'o');
325
326 Option keyLen =
327 OptionBuilder
328 .withLongOpt("key-length")
329 .withArgName("min,max")
330 .hasArg()
331 .withDescription(
332 "the length range of the key (in bytes)")
333 .create('k');
334
335 Option valueLen =
336 OptionBuilder
337 .withLongOpt("value-length")
338 .withArgName("min,max")
339 .hasArg()
340 .withDescription(
341 "the length range of the value (in bytes)")
342 .create('v');
343
344 Option blockSz =
345 OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg()
346 .withDescription("minimum block size (in KB)").create('b');
347
348 Option operation =
349 OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg()
350 .withDescription(
351 "action: seek-only, create-only, seek-after-create").create(
352 'x');
353
354 Option rootDir =
355 OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg()
356 .withDescription(
357 "specify root directory where files will be created.")
358 .create('r');
359
360 Option file =
361 OptionBuilder.withLongOpt("file").withArgName("name").hasArg()
362 .withDescription("specify the file name to be created or read.")
363 .create('f');
364
365 Option seekCount =
366 OptionBuilder
367 .withLongOpt("seek")
368 .withArgName("count")
369 .hasArg()
370 .withDescription(
371 "specify how many seek operations we perform (requires -x r or -x rw.")
372 .create('n');
373
374 Option trialCount =
375 OptionBuilder
376 .withLongOpt("trials")
377 .withArgName("n")
378 .hasArg()
379 .withDescription(
380 "specify how many times to run the whole benchmark")
381 .create('t');
382
383 Option useRawFs =
384 OptionBuilder
385 .withLongOpt("rawfs")
386 .withDescription("use raw instead of checksummed file system")
387 .create();
388
389 Option help =
390 OptionBuilder.withLongOpt("help").hasArg(false).withDescription(
391 "show this screen").create("h");
392
393 return new Options().addOption(compress).addOption(fileSize).addOption(
394 fsInputBufferSz).addOption(fsOutputBufferSize).addOption(keyLen)
395 .addOption(blockSz).addOption(rootDir).addOption(valueLen)
396 .addOption(operation).addOption(seekCount).addOption(file)
397 .addOption(trialCount).addOption(useRawFs).addOption(help);
398
399 }
400
401 private void processOptions(CommandLine line, Options opts)
402 throws ParseException {
403
404 if (line.hasOption('h')) {
405 HelpFormatter formatter = new HelpFormatter();
406 System.out.println("TFile and SeqFile benchmark.");
407 System.out.println();
408 formatter.printHelp(100,
409 "java ... TestTFileSeqFileComparison [options]",
410 "\nSupported options:", opts, "");
411 return;
412 }
413
414 if (line.hasOption('c')) {
415 compress = line.getOptionValue('c');
416 }
417
418 if (line.hasOption('d')) {
419 dictSize = Integer.parseInt(line.getOptionValue('d'));
420 }
421
422 if (line.hasOption('s')) {
423 fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024;
424 }
425
426 if (line.hasOption('i')) {
427 fsInputBufferSize = Integer.parseInt(line.getOptionValue('i'));
428 }
429
430 if (line.hasOption('o')) {
431 fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o'));
432 }
433
434 if (line.hasOption('n')) {
435 seekCount = Integer.parseInt(line.getOptionValue('n'));
436 }
437
438 if (line.hasOption('t')) {
439 trialCount = Integer.parseInt(line.getOptionValue('t'));
440 }
441
442 if (line.hasOption('k')) {
443 IntegerRange ir = IntegerRange.parse(line.getOptionValue('k'));
444 minKeyLen = ir.from();
445 maxKeyLen = ir.to();
446 }
447
448 if (line.hasOption('v')) {
449 IntegerRange ir = IntegerRange.parse(line.getOptionValue('v'));
450 minValLength = ir.from();
451 maxValLength = ir.to();
452 }
453
454 if (line.hasOption('b')) {
455 minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024;
456 }
457
458 if (line.hasOption('r')) {
459 rootDir = line.getOptionValue('r');
460 }
461
462 if (line.hasOption('f')) {
463 file = line.getOptionValue('f');
464 }
465
466 if (line.hasOption('S')) {
467 seed = Long.parseLong(line.getOptionValue('S'));
468 }
469
470 if (line.hasOption('x')) {
471 String strOp = line.getOptionValue('x');
472 if (strOp.equals("r")) {
473 op = OP_READ;
474 }
475 else if (strOp.equals("w")) {
476 op = OP_CREATE;
477 }
478 else if (strOp.equals("rw")) {
479 op = OP_CREATE | OP_READ;
480 }
481 else {
482 throw new ParseException("Unknown action specifier: " + strOp);
483 }
484 }
485
486 useRawFs = line.hasOption("rawfs");
487
488 proceed = true;
489 }
490
491 private void validateOptions() throws ParseException {
492 if (!compress.equals("none") && !compress.equals("lzo")
493 && !compress.equals("gz") && !compress.equals("snappy")) {
494 throw new ParseException("Unknown compression scheme: " + compress);
495 }
496
497 if (minKeyLen >= maxKeyLen) {
498 throw new ParseException(
499 "Max key length must be greater than min key length.");
500 }
501
502 if (minValLength >= maxValLength) {
503 throw new ParseException(
504 "Max value length must be greater than min value length.");
505 }
506
507 if (minWordLen >= maxWordLen) {
508 throw new ParseException(
509 "Max word length must be greater than min word length.");
510 }
511 return;
512 }
513
514 private void setStopProceed() {
515 proceed = false;
516 }
517
518 public boolean doCreate() {
519 return (op & OP_CREATE) != 0;
520 }
521
522 public boolean doRead() {
523 return (op & OP_READ) != 0;
524 }
525 }
526
527 public static void main(String[] argv) throws IOException {
528 TestHFileSeek testCase = new TestHFileSeek();
529 MyOptions options = new MyOptions(argv);
530
531 if (options.proceed == false) {
532 return;
533 }
534
535 testCase.options = options;
536 for (int i = 0; i < options.trialCount; i++) {
537 LOG.info("Beginning trial " + (i+1));
538 testCase.setUp();
539 testCase.testSeeks();
540 testCase.tearDown();
541 }
542 }
543
544 }
545