1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.io.hfile;
19
20 import java.io.IOException;
21 import java.nio.ByteBuffer;
22 import java.util.Random;
23 import java.util.StringTokenizer;
24
25 import junit.framework.TestCase;
26
27 import org.apache.commons.cli.CommandLine;
28 import org.apache.commons.cli.CommandLineParser;
29 import org.apache.commons.cli.GnuParser;
30 import org.apache.commons.cli.HelpFormatter;
31 import org.apache.commons.cli.Option;
32 import org.apache.commons.cli.OptionBuilder;
33 import org.apache.commons.cli.Options;
34 import org.apache.commons.cli.ParseException;
35 import org.apache.hadoop.conf.Configuration;
36 import org.apache.hadoop.fs.FSDataInputStream;
37 import org.apache.hadoop.fs.FSDataOutputStream;
38 import org.apache.hadoop.fs.FileSystem;
39 import org.apache.hadoop.fs.Path;
40 import org.apache.hadoop.fs.RawLocalFileSystem;
41 import org.apache.hadoop.hbase.HBaseTestingUtility;
42 import org.apache.hadoop.hbase.MediumTests;
43 import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
44 import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
45 import org.apache.hadoop.io.BytesWritable;
46 import org.junit.experimental.categories.Category;
47 import org.mortbay.log.Log;
48
49
50
51
52
53
54
55
56
57 @Category(MediumTests.class)
58 public class TestHFileSeek extends TestCase {
59 private static final boolean USE_PREAD = true;
60 private MyOptions options;
61 private Configuration conf;
62 private Path path;
63 private FileSystem fs;
64 private NanoTimer timer;
65 private Random rng;
66 private RandomDistribution.DiscreteRNG keyLenGen;
67 private KVGenerator kvGen;
68
69 @Override
70 public void setUp() throws IOException {
71 if (options == null) {
72 options = new MyOptions(new String[0]);
73 }
74
75 conf = new Configuration();
76
77 if (options.useRawFs) {
78 conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class);
79 }
80
81 conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize);
82 conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize);
83 path = new Path(new Path(options.rootDir), options.file);
84 fs = path.getFileSystem(conf);
85 timer = new NanoTimer(false);
86 rng = new Random(options.seed);
87 keyLenGen =
88 new RandomDistribution.Zipf(new Random(rng.nextLong()),
89 options.minKeyLen, options.maxKeyLen, 1.2);
90 RandomDistribution.DiscreteRNG valLenGen =
91 new RandomDistribution.Flat(new Random(rng.nextLong()),
92 options.minValLength, options.maxValLength);
93 RandomDistribution.DiscreteRNG wordLenGen =
94 new RandomDistribution.Flat(new Random(rng.nextLong()),
95 options.minWordLen, options.maxWordLen);
96 kvGen =
97 new KVGenerator(rng, true, keyLenGen, valLenGen, wordLenGen,
98 options.dictSize);
99 }
100
101 @Override
102 public void tearDown() {
103 try {
104 fs.close();
105 }
106 catch (Exception e) {
107
108 }
109 }
110
111 private static FSDataOutputStream createFSOutput(Path name, FileSystem fs)
112 throws IOException {
113 if (fs.exists(name)) {
114 fs.delete(name, true);
115 }
116 FSDataOutputStream fout = fs.create(name);
117 return fout;
118 }
119
120 private void createTFile() throws IOException {
121 long totalBytes = 0;
122 FSDataOutputStream fout = createFSOutput(path, fs);
123 try {
124 Writer writer = HFile.getWriterFactoryNoCache(conf)
125 .withOutputStream(fout)
126 .withBlockSize(options.minBlockSize)
127 .withCompression(options.compress)
128 .create();
129 try {
130 BytesWritable key = new BytesWritable();
131 BytesWritable val = new BytesWritable();
132 timer.start();
133 for (long i = 0; true; ++i) {
134 if (i % 1000 == 0) {
135 if (fs.getFileStatus(path).getLen() >= options.fileSize) {
136 break;
137 }
138 }
139 kvGen.next(key, val, false);
140 byte [] k = new byte [key.getLength()];
141 System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
142 byte [] v = new byte [val.getLength()];
143 System.arraycopy(val.getBytes(), 0, v, 0, key.getLength());
144 writer.append(k, v);
145 totalBytes += key.getLength();
146 totalBytes += val.getLength();
147 }
148 timer.stop();
149 }
150 finally {
151 writer.close();
152 }
153 }
154 finally {
155 fout.close();
156 }
157 double duration = (double)timer.read()/1000;
158 long fsize = fs.getFileStatus(path).getLen();
159
160 System.out.printf(
161 "time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n",
162 timer.toString(), (double) totalBytes / 1024 / 1024, totalBytes
163 / duration);
164 System.out.printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n",
165 timer.toString(), (double) fsize / 1024 / 1024, fsize / duration);
166 }
167
168 public void seekTFile() throws IOException {
169 int miss = 0;
170 long totalBytes = 0;
171 FSDataInputStream fsdis = fs.open(path);
172 Reader reader = HFile.createReaderFromStream(path, fsdis,
173 fs.getFileStatus(path).getLen(), new CacheConfig(conf));
174 reader.loadFileInfo();
175 KeySampler kSampler =
176 new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(),
177 keyLenGen);
178 HFileScanner scanner = reader.getScanner(false, USE_PREAD);
179 BytesWritable key = new BytesWritable();
180 timer.reset();
181 timer.start();
182 for (int i = 0; i < options.seekCount; ++i) {
183 kSampler.next(key);
184 byte [] k = new byte [key.getLength()];
185 System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
186 if (scanner.seekTo(k) >= 0) {
187 ByteBuffer bbkey = scanner.getKey();
188 ByteBuffer bbval = scanner.getValue();
189 totalBytes += bbkey.limit();
190 totalBytes += bbval.limit();
191 }
192 else {
193 ++miss;
194 }
195 }
196 timer.stop();
197 System.out.printf(
198 "time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n",
199 timer.toString(), NanoTimer.nanoTimeToString(timer.read()
200 / options.seekCount), options.seekCount - miss, miss,
201 (double) totalBytes / 1024 / (options.seekCount - miss));
202
203 }
204
205 public void testSeeks() throws IOException {
206 if (options.doCreate()) {
207 createTFile();
208 }
209
210 if (options.doRead()) {
211 seekTFile();
212 }
213
214 if (options.doCreate()) {
215 fs.delete(path, true);
216 }
217 }
218
219 private static class IntegerRange {
220 private final int from, to;
221
222 public IntegerRange(int from, int to) {
223 this.from = from;
224 this.to = to;
225 }
226
227 public static IntegerRange parse(String s) throws ParseException {
228 StringTokenizer st = new StringTokenizer(s, " \t,");
229 if (st.countTokens() != 2) {
230 throw new ParseException("Bad integer specification: " + s);
231 }
232 int from = Integer.parseInt(st.nextToken());
233 int to = Integer.parseInt(st.nextToken());
234 return new IntegerRange(from, to);
235 }
236
237 public int from() {
238 return from;
239 }
240
241 public int to() {
242 return to;
243 }
244 }
245
246 private static class MyOptions {
247
248 int dictSize = 1000;
249 int minWordLen = 5;
250 int maxWordLen = 20;
251
252 private HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
253 String rootDir =
254 TEST_UTIL.getDataTestDir("TestTFileSeek").toString();
255 String file = "TestTFileSeek";
256
257 String compress = "none";
258 int minKeyLen = 10;
259 int maxKeyLen = 50;
260 int minValLength = 1024;
261 int maxValLength = 2 * 1024;
262 int minBlockSize = 1 * 1024 * 1024;
263 int fsOutputBufferSize = 1;
264 int fsInputBufferSize = 0;
265
266 long fileSize = 10 * 1024 * 1024;
267 long seekCount = 1000;
268 long trialCount = 1;
269 long seed;
270 boolean useRawFs = false;
271
272 static final int OP_CREATE = 1;
273 static final int OP_READ = 2;
274 int op = OP_CREATE | OP_READ;
275
276 boolean proceed = false;
277
278 public MyOptions(String[] args) {
279 seed = System.nanoTime();
280
281 try {
282 Options opts = buildOptions();
283 CommandLineParser parser = new GnuParser();
284 CommandLine line = parser.parse(opts, args, true);
285 processOptions(line, opts);
286 validateOptions();
287 }
288 catch (ParseException e) {
289 System.out.println(e.getMessage());
290 System.out.println("Try \"--help\" option for details.");
291 setStopProceed();
292 }
293 }
294
295 public boolean proceed() {
296 return proceed;
297 }
298
299 private Options buildOptions() {
300 Option compress =
301 OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz|snappy]")
302 .hasArg().withDescription("compression scheme").create('c');
303
304 Option fileSize =
305 OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB")
306 .hasArg().withDescription("target size of the file (in MB).")
307 .create('s');
308
309 Option fsInputBufferSz =
310 OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size")
311 .hasArg().withDescription(
312 "size of the file system input buffer (in bytes).").create(
313 'i');
314
315 Option fsOutputBufferSize =
316 OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size")
317 .hasArg().withDescription(
318 "size of the file system output buffer (in bytes).").create(
319 'o');
320
321 Option keyLen =
322 OptionBuilder
323 .withLongOpt("key-length")
324 .withArgName("min,max")
325 .hasArg()
326 .withDescription(
327 "the length range of the key (in bytes)")
328 .create('k');
329
330 Option valueLen =
331 OptionBuilder
332 .withLongOpt("value-length")
333 .withArgName("min,max")
334 .hasArg()
335 .withDescription(
336 "the length range of the value (in bytes)")
337 .create('v');
338
339 Option blockSz =
340 OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg()
341 .withDescription("minimum block size (in KB)").create('b');
342
343 Option operation =
344 OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg()
345 .withDescription(
346 "action: seek-only, create-only, seek-after-create").create(
347 'x');
348
349 Option rootDir =
350 OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg()
351 .withDescription(
352 "specify root directory where files will be created.")
353 .create('r');
354
355 Option file =
356 OptionBuilder.withLongOpt("file").withArgName("name").hasArg()
357 .withDescription("specify the file name to be created or read.")
358 .create('f');
359
360 Option seekCount =
361 OptionBuilder
362 .withLongOpt("seek")
363 .withArgName("count")
364 .hasArg()
365 .withDescription(
366 "specify how many seek operations we perform (requires -x r or -x rw.")
367 .create('n');
368
369 Option trialCount =
370 OptionBuilder
371 .withLongOpt("trials")
372 .withArgName("n")
373 .hasArg()
374 .withDescription(
375 "specify how many times to run the whole benchmark")
376 .create('t');
377
378 Option useRawFs =
379 OptionBuilder
380 .withLongOpt("rawfs")
381 .withDescription("use raw instead of checksummed file system")
382 .create();
383
384 Option help =
385 OptionBuilder.withLongOpt("help").hasArg(false).withDescription(
386 "show this screen").create("h");
387
388 return new Options().addOption(compress).addOption(fileSize).addOption(
389 fsInputBufferSz).addOption(fsOutputBufferSize).addOption(keyLen)
390 .addOption(blockSz).addOption(rootDir).addOption(valueLen)
391 .addOption(operation).addOption(seekCount).addOption(file)
392 .addOption(trialCount).addOption(useRawFs).addOption(help);
393
394 }
395
396 private void processOptions(CommandLine line, Options opts)
397 throws ParseException {
398
399 if (line.hasOption('h')) {
400 HelpFormatter formatter = new HelpFormatter();
401 System.out.println("TFile and SeqFile benchmark.");
402 System.out.println();
403 formatter.printHelp(100,
404 "java ... TestTFileSeqFileComparison [options]",
405 "\nSupported options:", opts, "");
406 return;
407 }
408
409 if (line.hasOption('c')) {
410 compress = line.getOptionValue('c');
411 }
412
413 if (line.hasOption('d')) {
414 dictSize = Integer.parseInt(line.getOptionValue('d'));
415 }
416
417 if (line.hasOption('s')) {
418 fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024;
419 }
420
421 if (line.hasOption('i')) {
422 fsInputBufferSize = Integer.parseInt(line.getOptionValue('i'));
423 }
424
425 if (line.hasOption('o')) {
426 fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o'));
427 }
428
429 if (line.hasOption('n')) {
430 seekCount = Integer.parseInt(line.getOptionValue('n'));
431 }
432
433 if (line.hasOption('t')) {
434 trialCount = Integer.parseInt(line.getOptionValue('t'));
435 }
436
437 if (line.hasOption('k')) {
438 IntegerRange ir = IntegerRange.parse(line.getOptionValue('k'));
439 minKeyLen = ir.from();
440 maxKeyLen = ir.to();
441 }
442
443 if (line.hasOption('v')) {
444 IntegerRange ir = IntegerRange.parse(line.getOptionValue('v'));
445 minValLength = ir.from();
446 maxValLength = ir.to();
447 }
448
449 if (line.hasOption('b')) {
450 minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024;
451 }
452
453 if (line.hasOption('r')) {
454 rootDir = line.getOptionValue('r');
455 }
456
457 if (line.hasOption('f')) {
458 file = line.getOptionValue('f');
459 }
460
461 if (line.hasOption('S')) {
462 seed = Long.parseLong(line.getOptionValue('S'));
463 }
464
465 if (line.hasOption('x')) {
466 String strOp = line.getOptionValue('x');
467 if (strOp.equals("r")) {
468 op = OP_READ;
469 }
470 else if (strOp.equals("w")) {
471 op = OP_CREATE;
472 }
473 else if (strOp.equals("rw")) {
474 op = OP_CREATE | OP_READ;
475 }
476 else {
477 throw new ParseException("Unknown action specifier: " + strOp);
478 }
479 }
480
481 useRawFs = line.hasOption("rawfs");
482
483 proceed = true;
484 }
485
486 private void validateOptions() throws ParseException {
487 if (!compress.equals("none") && !compress.equals("lzo")
488 && !compress.equals("gz") && !compress.equals("snappy")) {
489 throw new ParseException("Unknown compression scheme: " + compress);
490 }
491
492 if (minKeyLen >= maxKeyLen) {
493 throw new ParseException(
494 "Max key length must be greater than min key length.");
495 }
496
497 if (minValLength >= maxValLength) {
498 throw new ParseException(
499 "Max value length must be greater than min value length.");
500 }
501
502 if (minWordLen >= maxWordLen) {
503 throw new ParseException(
504 "Max word length must be greater than min word length.");
505 }
506 return;
507 }
508
509 private void setStopProceed() {
510 proceed = false;
511 }
512
513 public boolean doCreate() {
514 return (op & OP_CREATE) != 0;
515 }
516
517 public boolean doRead() {
518 return (op & OP_READ) != 0;
519 }
520 }
521
522 public static void main(String[] argv) throws IOException {
523 TestHFileSeek testCase = new TestHFileSeek();
524 MyOptions options = new MyOptions(argv);
525
526 if (options.proceed == false) {
527 return;
528 }
529
530 testCase.options = options;
531 for (int i = 0; i < options.trialCount; i++) {
532 Log.info("Beginning trial " + (i+1));
533 testCase.setUp();
534 testCase.testSeeks();
535 testCase.tearDown();
536 }
537 }
538
539 @org.junit.Rule
540 public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
541 new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
542 }
543