1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.io.hfile;
19
20 import java.io.IOException;
21 import java.nio.ByteBuffer;
22 import java.util.Random;
23 import java.util.StringTokenizer;
24
25 import junit.framework.TestCase;
26
27 import org.apache.commons.cli.CommandLine;
28 import org.apache.commons.cli.CommandLineParser;
29 import org.apache.commons.cli.GnuParser;
30 import org.apache.commons.cli.HelpFormatter;
31 import org.apache.commons.cli.Option;
32 import org.apache.commons.cli.OptionBuilder;
33 import org.apache.commons.cli.Options;
34 import org.apache.commons.cli.ParseException;
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.conf.Configuration;
38 import org.apache.hadoop.fs.FSDataInputStream;
39 import org.apache.hadoop.fs.FSDataOutputStream;
40 import org.apache.hadoop.fs.FileSystem;
41 import org.apache.hadoop.fs.Path;
42 import org.apache.hadoop.fs.RawLocalFileSystem;
43 import org.apache.hadoop.hbase.HBaseTestingUtility;
44 import org.apache.hadoop.hbase.KeyValue;
45 import org.apache.hadoop.hbase.MediumTests;
46 import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
47 import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
48 import org.apache.hadoop.io.BytesWritable;
49 import org.junit.experimental.categories.Category;
50
51
52
53
54
55
56
57
58
59 @Category(MediumTests.class)
60 public class TestHFileSeek extends TestCase {
61 private static final boolean USE_PREAD = true;
62 private MyOptions options;
63 private Configuration conf;
64 private Path path;
65 private FileSystem fs;
66 private NanoTimer timer;
67 private Random rng;
68 private RandomDistribution.DiscreteRNG keyLenGen;
69 private KVGenerator kvGen;
70
71 private static final Log LOG = LogFactory.getLog(TestHFileSeek.class);
72
73 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
74
75 @Override
76 public void setUp() throws IOException {
77 if (options == null) {
78 options = new MyOptions(new String[0]);
79 }
80
81 conf = new Configuration();
82
83 if (options.useRawFs) {
84 conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class);
85 }
86
87 conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize);
88 conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize);
89 path = new Path(new Path(options.rootDir), options.file);
90 fs = path.getFileSystem(conf);
91 timer = new NanoTimer(false);
92 rng = new Random(options.seed);
93 keyLenGen =
94 new RandomDistribution.Zipf(new Random(rng.nextLong()),
95 options.minKeyLen, options.maxKeyLen, 1.2);
96 RandomDistribution.DiscreteRNG valLenGen =
97 new RandomDistribution.Flat(new Random(rng.nextLong()),
98 options.minValLength, options.maxValLength);
99 RandomDistribution.DiscreteRNG wordLenGen =
100 new RandomDistribution.Flat(new Random(rng.nextLong()),
101 options.minWordLen, options.maxWordLen);
102 kvGen =
103 new KVGenerator(rng, true, keyLenGen, valLenGen, wordLenGen,
104 options.dictSize);
105 }
106
107 @Override
108 public void tearDown() {
109 try {
110 fs.close();
111 }
112 catch (Exception e) {
113
114 }
115 }
116
117 private static FSDataOutputStream createFSOutput(Path name, FileSystem fs)
118 throws IOException {
119 if (fs.exists(name)) {
120 fs.delete(name, true);
121 }
122 FSDataOutputStream fout = fs.create(name);
123 return fout;
124 }
125
126 private void createTFile() throws IOException {
127 long totalBytes = 0;
128 FSDataOutputStream fout = createFSOutput(path, fs);
129 try {
130 Writer writer = HFile.getWriterFactoryNoCache(conf)
131 .withOutputStream(fout)
132 .withBlockSize(options.minBlockSize)
133 .withCompression(options.compress)
134 .withComparator(new KeyValue.RawBytesComparator())
135 .create();
136 try {
137 BytesWritable key = new BytesWritable();
138 BytesWritable val = new BytesWritable();
139 timer.start();
140 for (long i = 0; true; ++i) {
141 if (i % 1000 == 0) {
142 if (fs.getFileStatus(path).getLen() >= options.fileSize) {
143 break;
144 }
145 }
146 kvGen.next(key, val, false);
147 byte [] k = new byte [key.getLength()];
148 System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
149 byte [] v = new byte [val.getLength()];
150 System.arraycopy(val.getBytes(), 0, v, 0, key.getLength());
151 writer.append(k, v);
152 totalBytes += key.getLength();
153 totalBytes += val.getLength();
154 }
155 timer.stop();
156 }
157 finally {
158 writer.close();
159 }
160 }
161 finally {
162 fout.close();
163 }
164 double duration = (double)timer.read()/1000;
165 long fsize = fs.getFileStatus(path).getLen();
166
167 System.out.printf(
168 "time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n",
169 timer.toString(), (double) totalBytes / 1024 / 1024, totalBytes
170 / duration);
171 System.out.printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n",
172 timer.toString(), (double) fsize / 1024 / 1024, fsize / duration);
173 }
174
175 public void seekTFile() throws IOException {
176 int miss = 0;
177 long totalBytes = 0;
178 FSDataInputStream fsdis = fs.open(path);
179 Reader reader = HFile.createReaderFromStream(path, fsdis,
180 fs.getFileStatus(path).getLen(), new CacheConfig(conf));
181 reader.loadFileInfo();
182 KeySampler kSampler =
183 new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(),
184 keyLenGen);
185 HFileScanner scanner = reader.getScanner(false, USE_PREAD);
186 BytesWritable key = new BytesWritable();
187 timer.reset();
188 timer.start();
189 for (int i = 0; i < options.seekCount; ++i) {
190 kSampler.next(key);
191 byte [] k = new byte [key.getLength()];
192 System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
193 if (scanner.seekTo(k) >= 0) {
194 ByteBuffer bbkey = scanner.getKey();
195 ByteBuffer bbval = scanner.getValue();
196 totalBytes += bbkey.limit();
197 totalBytes += bbval.limit();
198 }
199 else {
200 ++miss;
201 }
202 }
203 timer.stop();
204 System.out.printf(
205 "time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n",
206 timer.toString(), NanoTimer.nanoTimeToString(timer.read()
207 / options.seekCount), options.seekCount - miss, miss,
208 (double) totalBytes / 1024 / (options.seekCount - miss));
209
210 }
211
212 public void testSeeks() throws IOException {
213 if (options.doCreate()) {
214 createTFile();
215 }
216
217 if (options.doRead()) {
218 seekTFile();
219 }
220
221 if (options.doCreate()) {
222 fs.delete(path, true);
223 }
224 }
225
226 private static class IntegerRange {
227 private final int from, to;
228
229 public IntegerRange(int from, int to) {
230 this.from = from;
231 this.to = to;
232 }
233
234 public static IntegerRange parse(String s) throws ParseException {
235 StringTokenizer st = new StringTokenizer(s, " \t,");
236 if (st.countTokens() != 2) {
237 throw new ParseException("Bad integer specification: " + s);
238 }
239 int from = Integer.parseInt(st.nextToken());
240 int to = Integer.parseInt(st.nextToken());
241 return new IntegerRange(from, to);
242 }
243
244 public int from() {
245 return from;
246 }
247
248 public int to() {
249 return to;
250 }
251 }
252
253 private static class MyOptions {
254
255 int dictSize = 1000;
256 int minWordLen = 5;
257 int maxWordLen = 20;
258
259 private HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
260 String rootDir =
261 TEST_UTIL.getDataTestDir("TestTFileSeek").toString();
262 String file = "TestTFileSeek";
263
264 String compress = "none";
265 int minKeyLen = 10;
266 int maxKeyLen = 50;
267 int minValLength = 1024;
268 int maxValLength = 2 * 1024;
269 int minBlockSize = 1 * 1024 * 1024;
270 int fsOutputBufferSize = 1;
271 int fsInputBufferSize = 0;
272
273 long fileSize = 10 * 1024 * 1024;
274 long seekCount = 1000;
275 long trialCount = 1;
276 long seed;
277 boolean useRawFs = false;
278
279 static final int OP_CREATE = 1;
280 static final int OP_READ = 2;
281 int op = OP_CREATE | OP_READ;
282
283 boolean proceed = false;
284
285 public MyOptions(String[] args) {
286 seed = System.nanoTime();
287
288 try {
289 Options opts = buildOptions();
290 CommandLineParser parser = new GnuParser();
291 CommandLine line = parser.parse(opts, args, true);
292 processOptions(line, opts);
293 validateOptions();
294 }
295 catch (ParseException e) {
296 System.out.println(e.getMessage());
297 System.out.println("Try \"--help\" option for details.");
298 setStopProceed();
299 }
300 }
301
302 public boolean proceed() {
303 return proceed;
304 }
305
306 private Options buildOptions() {
307 Option compress =
308 OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz|snappy]")
309 .hasArg().withDescription("compression scheme").create('c');
310
311 Option fileSize =
312 OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB")
313 .hasArg().withDescription("target size of the file (in MB).")
314 .create('s');
315
316 Option fsInputBufferSz =
317 OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size")
318 .hasArg().withDescription(
319 "size of the file system input buffer (in bytes).").create(
320 'i');
321
322 Option fsOutputBufferSize =
323 OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size")
324 .hasArg().withDescription(
325 "size of the file system output buffer (in bytes).").create(
326 'o');
327
328 Option keyLen =
329 OptionBuilder
330 .withLongOpt("key-length")
331 .withArgName("min,max")
332 .hasArg()
333 .withDescription(
334 "the length range of the key (in bytes)")
335 .create('k');
336
337 Option valueLen =
338 OptionBuilder
339 .withLongOpt("value-length")
340 .withArgName("min,max")
341 .hasArg()
342 .withDescription(
343 "the length range of the value (in bytes)")
344 .create('v');
345
346 Option blockSz =
347 OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg()
348 .withDescription("minimum block size (in KB)").create('b');
349
350 Option operation =
351 OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg()
352 .withDescription(
353 "action: seek-only, create-only, seek-after-create").create(
354 'x');
355
356 Option rootDir =
357 OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg()
358 .withDescription(
359 "specify root directory where files will be created.")
360 .create('r');
361
362 Option file =
363 OptionBuilder.withLongOpt("file").withArgName("name").hasArg()
364 .withDescription("specify the file name to be created or read.")
365 .create('f');
366
367 Option seekCount =
368 OptionBuilder
369 .withLongOpt("seek")
370 .withArgName("count")
371 .hasArg()
372 .withDescription(
373 "specify how many seek operations we perform (requires -x r or -x rw.")
374 .create('n');
375
376 Option trialCount =
377 OptionBuilder
378 .withLongOpt("trials")
379 .withArgName("n")
380 .hasArg()
381 .withDescription(
382 "specify how many times to run the whole benchmark")
383 .create('t');
384
385 Option useRawFs =
386 OptionBuilder
387 .withLongOpt("rawfs")
388 .withDescription("use raw instead of checksummed file system")
389 .create();
390
391 Option help =
392 OptionBuilder.withLongOpt("help").hasArg(false).withDescription(
393 "show this screen").create("h");
394
395 return new Options().addOption(compress).addOption(fileSize).addOption(
396 fsInputBufferSz).addOption(fsOutputBufferSize).addOption(keyLen)
397 .addOption(blockSz).addOption(rootDir).addOption(valueLen)
398 .addOption(operation).addOption(seekCount).addOption(file)
399 .addOption(trialCount).addOption(useRawFs).addOption(help);
400
401 }
402
403 private void processOptions(CommandLine line, Options opts)
404 throws ParseException {
405
406 if (line.hasOption('h')) {
407 HelpFormatter formatter = new HelpFormatter();
408 System.out.println("TFile and SeqFile benchmark.");
409 System.out.println();
410 formatter.printHelp(100,
411 "java ... TestTFileSeqFileComparison [options]",
412 "\nSupported options:", opts, "");
413 return;
414 }
415
416 if (line.hasOption('c')) {
417 compress = line.getOptionValue('c');
418 }
419
420 if (line.hasOption('d')) {
421 dictSize = Integer.parseInt(line.getOptionValue('d'));
422 }
423
424 if (line.hasOption('s')) {
425 fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024;
426 }
427
428 if (line.hasOption('i')) {
429 fsInputBufferSize = Integer.parseInt(line.getOptionValue('i'));
430 }
431
432 if (line.hasOption('o')) {
433 fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o'));
434 }
435
436 if (line.hasOption('n')) {
437 seekCount = Integer.parseInt(line.getOptionValue('n'));
438 }
439
440 if (line.hasOption('t')) {
441 trialCount = Integer.parseInt(line.getOptionValue('t'));
442 }
443
444 if (line.hasOption('k')) {
445 IntegerRange ir = IntegerRange.parse(line.getOptionValue('k'));
446 minKeyLen = ir.from();
447 maxKeyLen = ir.to();
448 }
449
450 if (line.hasOption('v')) {
451 IntegerRange ir = IntegerRange.parse(line.getOptionValue('v'));
452 minValLength = ir.from();
453 maxValLength = ir.to();
454 }
455
456 if (line.hasOption('b')) {
457 minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024;
458 }
459
460 if (line.hasOption('r')) {
461 rootDir = line.getOptionValue('r');
462 }
463
464 if (line.hasOption('f')) {
465 file = line.getOptionValue('f');
466 }
467
468 if (line.hasOption('S')) {
469 seed = Long.parseLong(line.getOptionValue('S'));
470 }
471
472 if (line.hasOption('x')) {
473 String strOp = line.getOptionValue('x');
474 if (strOp.equals("r")) {
475 op = OP_READ;
476 }
477 else if (strOp.equals("w")) {
478 op = OP_CREATE;
479 }
480 else if (strOp.equals("rw")) {
481 op = OP_CREATE | OP_READ;
482 }
483 else {
484 throw new ParseException("Unknown action specifier: " + strOp);
485 }
486 }
487
488 useRawFs = line.hasOption("rawfs");
489
490 proceed = true;
491 }
492
493 private void validateOptions() throws ParseException {
494 if (!compress.equals("none") && !compress.equals("lzo")
495 && !compress.equals("gz") && !compress.equals("snappy")) {
496 throw new ParseException("Unknown compression scheme: " + compress);
497 }
498
499 if (minKeyLen >= maxKeyLen) {
500 throw new ParseException(
501 "Max key length must be greater than min key length.");
502 }
503
504 if (minValLength >= maxValLength) {
505 throw new ParseException(
506 "Max value length must be greater than min value length.");
507 }
508
509 if (minWordLen >= maxWordLen) {
510 throw new ParseException(
511 "Max word length must be greater than min word length.");
512 }
513 return;
514 }
515
516 private void setStopProceed() {
517 proceed = false;
518 }
519
520 public boolean doCreate() {
521 return (op & OP_CREATE) != 0;
522 }
523
524 public boolean doRead() {
525 return (op & OP_READ) != 0;
526 }
527 }
528
529 public static void main(String[] argv) throws IOException {
530 TestHFileSeek testCase = new TestHFileSeek();
531 MyOptions options = new MyOptions(argv);
532
533 if (options.proceed == false) {
534 return;
535 }
536
537 testCase.options = options;
538 for (int i = 0; i < options.trialCount; i++) {
539 LOG.info("Beginning trial " + (i+1));
540 testCase.setUp();
541 testCase.testSeeks();
542 testCase.tearDown();
543 }
544 }
545
546 }
547