1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.io.hfile;
19
20 import java.io.IOException;
21 import java.nio.ByteBuffer;
22 import java.util.Random;
23 import java.util.StringTokenizer;
24
25 import junit.framework.TestCase;
26
27 import org.apache.commons.cli.CommandLine;
28 import org.apache.commons.cli.CommandLineParser;
29 import org.apache.commons.cli.GnuParser;
30 import org.apache.commons.cli.HelpFormatter;
31 import org.apache.commons.cli.Option;
32 import org.apache.commons.cli.OptionBuilder;
33 import org.apache.commons.cli.Options;
34 import org.apache.commons.cli.ParseException;
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.conf.Configuration;
38 import org.apache.hadoop.fs.FSDataInputStream;
39 import org.apache.hadoop.fs.FSDataOutputStream;
40 import org.apache.hadoop.fs.FileSystem;
41 import org.apache.hadoop.fs.Path;
42 import org.apache.hadoop.fs.RawLocalFileSystem;
43 import org.apache.hadoop.hbase.HBaseTestingUtility;
44 import org.apache.hadoop.hbase.MediumTests;
45 import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
46 import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
47 import org.apache.hadoop.io.BytesWritable;
48 import org.junit.experimental.categories.Category;
49
50
51
52
53
54
55
56
57
58 @Category(MediumTests.class)
59 public class TestHFileSeek extends TestCase {
60 private static final Log LOG = LogFactory.getLog(TestHFileSeek.class);
61 private static final boolean USE_PREAD = true;
62 private MyOptions options;
63 private Configuration conf;
64 private Path path;
65 private FileSystem fs;
66 private NanoTimer timer;
67 private Random rng;
68 private RandomDistribution.DiscreteRNG keyLenGen;
69 private KVGenerator kvGen;
70
71 @Override
72 public void setUp() throws IOException {
73 if (options == null) {
74 options = new MyOptions(new String[0]);
75 }
76
77 conf = new Configuration();
78
79 if (options.useRawFs) {
80 conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class);
81 }
82
83 conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize);
84 conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize);
85 path = new Path(new Path(options.rootDir), options.file);
86 fs = path.getFileSystem(conf);
87 timer = new NanoTimer(false);
88 rng = new Random(options.seed);
89 keyLenGen =
90 new RandomDistribution.Zipf(new Random(rng.nextLong()),
91 options.minKeyLen, options.maxKeyLen, 1.2);
92 RandomDistribution.DiscreteRNG valLenGen =
93 new RandomDistribution.Flat(new Random(rng.nextLong()),
94 options.minValLength, options.maxValLength);
95 RandomDistribution.DiscreteRNG wordLenGen =
96 new RandomDistribution.Flat(new Random(rng.nextLong()),
97 options.minWordLen, options.maxWordLen);
98 kvGen =
99 new KVGenerator(rng, true, keyLenGen, valLenGen, wordLenGen,
100 options.dictSize);
101 }
102
103 @Override
104 public void tearDown() {
105 try {
106 fs.close();
107 }
108 catch (Exception e) {
109
110 }
111 }
112
113 private static FSDataOutputStream createFSOutput(Path name, FileSystem fs)
114 throws IOException {
115 if (fs.exists(name)) {
116 fs.delete(name, true);
117 }
118 FSDataOutputStream fout = fs.create(name);
119 return fout;
120 }
121
122 private void createTFile() throws IOException {
123 long totalBytes = 0;
124 FSDataOutputStream fout = createFSOutput(path, fs);
125 try {
126 Writer writer = HFile.getWriterFactoryNoCache(conf)
127 .withOutputStream(fout)
128 .withBlockSize(options.minBlockSize)
129 .withCompression(options.compress)
130 .create();
131 try {
132 BytesWritable key = new BytesWritable();
133 BytesWritable val = new BytesWritable();
134 timer.start();
135 for (long i = 0; true; ++i) {
136 if (i % 1000 == 0) {
137 if (fs.getFileStatus(path).getLen() >= options.fileSize) {
138 break;
139 }
140 }
141 kvGen.next(key, val, false);
142 byte [] k = new byte [key.getLength()];
143 System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
144 byte [] v = new byte [val.getLength()];
145 System.arraycopy(val.getBytes(), 0, v, 0, key.getLength());
146 writer.append(k, v);
147 totalBytes += key.getLength();
148 totalBytes += val.getLength();
149 }
150 timer.stop();
151 }
152 finally {
153 writer.close();
154 }
155 }
156 finally {
157 fout.close();
158 }
159 double duration = (double)timer.read()/1000;
160 long fsize = fs.getFileStatus(path).getLen();
161
162 System.out.printf(
163 "time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n",
164 timer.toString(), (double) totalBytes / 1024 / 1024, totalBytes
165 / duration);
166 System.out.printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n",
167 timer.toString(), (double) fsize / 1024 / 1024, fsize / duration);
168 }
169
170 public void seekTFile() throws IOException {
171 int miss = 0;
172 long totalBytes = 0;
173 FSDataInputStream fsdis = fs.open(path);
174 Reader reader = HFile.createReaderFromStream(path, fsdis,
175 fs.getFileStatus(path).getLen(), new CacheConfig(conf));
176 reader.loadFileInfo();
177 KeySampler kSampler =
178 new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(),
179 keyLenGen);
180 HFileScanner scanner = reader.getScanner(false, USE_PREAD);
181 BytesWritable key = new BytesWritable();
182 timer.reset();
183 timer.start();
184 for (int i = 0; i < options.seekCount; ++i) {
185 kSampler.next(key);
186 byte [] k = new byte [key.getLength()];
187 System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
188 if (scanner.seekTo(k) >= 0) {
189 ByteBuffer bbkey = scanner.getKey();
190 ByteBuffer bbval = scanner.getValue();
191 totalBytes += bbkey.limit();
192 totalBytes += bbval.limit();
193 }
194 else {
195 ++miss;
196 }
197 }
198 timer.stop();
199 System.out.printf(
200 "time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n",
201 timer.toString(), NanoTimer.nanoTimeToString(timer.read()
202 / options.seekCount), options.seekCount - miss, miss,
203 (double) totalBytes / 1024 / (options.seekCount - miss));
204
205 }
206
207 public void testSeeks() throws IOException {
208 if (options.doCreate()) {
209 createTFile();
210 }
211
212 if (options.doRead()) {
213 seekTFile();
214 }
215
216 if (options.doCreate()) {
217 fs.delete(path, true);
218 }
219 }
220
221 private static class IntegerRange {
222 private final int from, to;
223
224 public IntegerRange(int from, int to) {
225 this.from = from;
226 this.to = to;
227 }
228
229 public static IntegerRange parse(String s) throws ParseException {
230 StringTokenizer st = new StringTokenizer(s, " \t,");
231 if (st.countTokens() != 2) {
232 throw new ParseException("Bad integer specification: " + s);
233 }
234 int from = Integer.parseInt(st.nextToken());
235 int to = Integer.parseInt(st.nextToken());
236 return new IntegerRange(from, to);
237 }
238
239 public int from() {
240 return from;
241 }
242
243 public int to() {
244 return to;
245 }
246 }
247
248 private static class MyOptions {
249
250 int dictSize = 1000;
251 int minWordLen = 5;
252 int maxWordLen = 20;
253
254 private HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
255 String rootDir =
256 TEST_UTIL.getDataTestDir("TestTFileSeek").toString();
257 String file = "TestTFileSeek";
258
259 String compress = "none";
260 int minKeyLen = 10;
261 int maxKeyLen = 50;
262 int minValLength = 1024;
263 int maxValLength = 2 * 1024;
264 int minBlockSize = 1 * 1024 * 1024;
265 int fsOutputBufferSize = 1;
266 int fsInputBufferSize = 0;
267
268 long fileSize = 10 * 1024 * 1024;
269 long seekCount = 1000;
270 long trialCount = 1;
271 long seed;
272 boolean useRawFs = false;
273
274 static final int OP_CREATE = 1;
275 static final int OP_READ = 2;
276 int op = OP_CREATE | OP_READ;
277
278 boolean proceed = false;
279
280 public MyOptions(String[] args) {
281 seed = System.nanoTime();
282
283 try {
284 Options opts = buildOptions();
285 CommandLineParser parser = new GnuParser();
286 CommandLine line = parser.parse(opts, args, true);
287 processOptions(line, opts);
288 validateOptions();
289 }
290 catch (ParseException e) {
291 System.out.println(e.getMessage());
292 System.out.println("Try \"--help\" option for details.");
293 setStopProceed();
294 }
295 }
296
297 public boolean proceed() {
298 return proceed;
299 }
300
301 private Options buildOptions() {
302 Option compress =
303 OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz|snappy]")
304 .hasArg().withDescription("compression scheme").create('c');
305
306 Option fileSize =
307 OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB")
308 .hasArg().withDescription("target size of the file (in MB).")
309 .create('s');
310
311 Option fsInputBufferSz =
312 OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size")
313 .hasArg().withDescription(
314 "size of the file system input buffer (in bytes).").create(
315 'i');
316
317 Option fsOutputBufferSize =
318 OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size")
319 .hasArg().withDescription(
320 "size of the file system output buffer (in bytes).").create(
321 'o');
322
323 Option keyLen =
324 OptionBuilder
325 .withLongOpt("key-length")
326 .withArgName("min,max")
327 .hasArg()
328 .withDescription(
329 "the length range of the key (in bytes)")
330 .create('k');
331
332 Option valueLen =
333 OptionBuilder
334 .withLongOpt("value-length")
335 .withArgName("min,max")
336 .hasArg()
337 .withDescription(
338 "the length range of the value (in bytes)")
339 .create('v');
340
341 Option blockSz =
342 OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg()
343 .withDescription("minimum block size (in KB)").create('b');
344
345 Option operation =
346 OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg()
347 .withDescription(
348 "action: seek-only, create-only, seek-after-create").create(
349 'x');
350
351 Option rootDir =
352 OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg()
353 .withDescription(
354 "specify root directory where files will be created.")
355 .create('r');
356
357 Option file =
358 OptionBuilder.withLongOpt("file").withArgName("name").hasArg()
359 .withDescription("specify the file name to be created or read.")
360 .create('f');
361
362 Option seekCount =
363 OptionBuilder
364 .withLongOpt("seek")
365 .withArgName("count")
366 .hasArg()
367 .withDescription(
368 "specify how many seek operations we perform (requires -x r or -x rw.")
369 .create('n');
370
371 Option trialCount =
372 OptionBuilder
373 .withLongOpt("trials")
374 .withArgName("n")
375 .hasArg()
376 .withDescription(
377 "specify how many times to run the whole benchmark")
378 .create('t');
379
380 Option useRawFs =
381 OptionBuilder
382 .withLongOpt("rawfs")
383 .withDescription("use raw instead of checksummed file system")
384 .create();
385
386 Option help =
387 OptionBuilder.withLongOpt("help").hasArg(false).withDescription(
388 "show this screen").create("h");
389
390 return new Options().addOption(compress).addOption(fileSize).addOption(
391 fsInputBufferSz).addOption(fsOutputBufferSize).addOption(keyLen)
392 .addOption(blockSz).addOption(rootDir).addOption(valueLen)
393 .addOption(operation).addOption(seekCount).addOption(file)
394 .addOption(trialCount).addOption(useRawFs).addOption(help);
395
396 }
397
398 private void processOptions(CommandLine line, Options opts)
399 throws ParseException {
400
401 if (line.hasOption('h')) {
402 HelpFormatter formatter = new HelpFormatter();
403 System.out.println("TFile and SeqFile benchmark.");
404 System.out.println();
405 formatter.printHelp(100,
406 "java ... TestTFileSeqFileComparison [options]",
407 "\nSupported options:", opts, "");
408 return;
409 }
410
411 if (line.hasOption('c')) {
412 compress = line.getOptionValue('c');
413 }
414
415 if (line.hasOption('d')) {
416 dictSize = Integer.parseInt(line.getOptionValue('d'));
417 }
418
419 if (line.hasOption('s')) {
420 fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024;
421 }
422
423 if (line.hasOption('i')) {
424 fsInputBufferSize = Integer.parseInt(line.getOptionValue('i'));
425 }
426
427 if (line.hasOption('o')) {
428 fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o'));
429 }
430
431 if (line.hasOption('n')) {
432 seekCount = Integer.parseInt(line.getOptionValue('n'));
433 }
434
435 if (line.hasOption('t')) {
436 trialCount = Integer.parseInt(line.getOptionValue('t'));
437 }
438
439 if (line.hasOption('k')) {
440 IntegerRange ir = IntegerRange.parse(line.getOptionValue('k'));
441 minKeyLen = ir.from();
442 maxKeyLen = ir.to();
443 }
444
445 if (line.hasOption('v')) {
446 IntegerRange ir = IntegerRange.parse(line.getOptionValue('v'));
447 minValLength = ir.from();
448 maxValLength = ir.to();
449 }
450
451 if (line.hasOption('b')) {
452 minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024;
453 }
454
455 if (line.hasOption('r')) {
456 rootDir = line.getOptionValue('r');
457 }
458
459 if (line.hasOption('f')) {
460 file = line.getOptionValue('f');
461 }
462
463 if (line.hasOption('S')) {
464 seed = Long.parseLong(line.getOptionValue('S'));
465 }
466
467 if (line.hasOption('x')) {
468 String strOp = line.getOptionValue('x');
469 if (strOp.equals("r")) {
470 op = OP_READ;
471 }
472 else if (strOp.equals("w")) {
473 op = OP_CREATE;
474 }
475 else if (strOp.equals("rw")) {
476 op = OP_CREATE | OP_READ;
477 }
478 else {
479 throw new ParseException("Unknown action specifier: " + strOp);
480 }
481 }
482
483 useRawFs = line.hasOption("rawfs");
484
485 proceed = true;
486 }
487
488 private void validateOptions() throws ParseException {
489 if (!compress.equals("none") && !compress.equals("lzo")
490 && !compress.equals("gz") && !compress.equals("snappy")) {
491 throw new ParseException("Unknown compression scheme: " + compress);
492 }
493
494 if (minKeyLen >= maxKeyLen) {
495 throw new ParseException(
496 "Max key length must be greater than min key length.");
497 }
498
499 if (minValLength >= maxValLength) {
500 throw new ParseException(
501 "Max value length must be greater than min value length.");
502 }
503
504 if (minWordLen >= maxWordLen) {
505 throw new ParseException(
506 "Max word length must be greater than min word length.");
507 }
508 return;
509 }
510
511 private void setStopProceed() {
512 proceed = false;
513 }
514
515 public boolean doCreate() {
516 return (op & OP_CREATE) != 0;
517 }
518
519 public boolean doRead() {
520 return (op & OP_READ) != 0;
521 }
522 }
523
524 public static void main(String[] argv) throws IOException {
525 TestHFileSeek testCase = new TestHFileSeek();
526 MyOptions options = new MyOptions(argv);
527
528 if (options.proceed == false) {
529 return;
530 }
531
532 testCase.options = options;
533 for (int i = 0; i < options.trialCount; i++) {
534 LOG.info("Beginning trial " + (i+1));
535 testCase.setUp();
536 testCase.testSeeks();
537 testCase.tearDown();
538 }
539 }
540
541 @org.junit.Rule
542 public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
543 new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
544 }
545