1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.io.hfile;
19
20 import java.io.IOException;
21 import java.nio.ByteBuffer;
22 import java.util.Random;
23 import java.util.StringTokenizer;
24
25 import junit.framework.TestCase;
26
27 import org.apache.commons.cli.CommandLine;
28 import org.apache.commons.cli.CommandLineParser;
29 import org.apache.commons.cli.GnuParser;
30 import org.apache.commons.cli.HelpFormatter;
31 import org.apache.commons.cli.Option;
32 import org.apache.commons.cli.OptionBuilder;
33 import org.apache.commons.cli.Options;
34 import org.apache.commons.cli.ParseException;
35 import org.apache.hadoop.conf.Configuration;
36 import org.apache.hadoop.fs.FSDataInputStream;
37 import org.apache.hadoop.fs.FSDataOutputStream;
38 import org.apache.hadoop.fs.FileSystem;
39 import org.apache.hadoop.fs.Path;
40 import org.apache.hadoop.hbase.HBaseTestingUtility;
41 import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
42 import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
43 import org.apache.hadoop.io.BytesWritable;
44
45
46
47
48
49
50
51
52
53 public class TestHFileSeek extends TestCase {
54 private MyOptions options;
55 private Configuration conf;
56 private Path path;
57 private FileSystem fs;
58 private NanoTimer timer;
59 private Random rng;
60 private RandomDistribution.DiscreteRNG keyLenGen;
61 private KVGenerator kvGen;
62
63 @Override
64 public void setUp() throws IOException {
65 if (options == null) {
66 options = new MyOptions(new String[0]);
67 }
68
69 conf = new Configuration();
70 conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize);
71 conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize);
72 path = new Path(new Path(options.rootDir), options.file);
73 fs = path.getFileSystem(conf);
74 timer = new NanoTimer(false);
75 rng = new Random(options.seed);
76 keyLenGen =
77 new RandomDistribution.Zipf(new Random(rng.nextLong()),
78 options.minKeyLen, options.maxKeyLen, 1.2);
79 RandomDistribution.DiscreteRNG valLenGen =
80 new RandomDistribution.Flat(new Random(rng.nextLong()),
81 options.minValLength, options.maxValLength);
82 RandomDistribution.DiscreteRNG wordLenGen =
83 new RandomDistribution.Flat(new Random(rng.nextLong()),
84 options.minWordLen, options.maxWordLen);
85 kvGen =
86 new KVGenerator(rng, true, keyLenGen, valLenGen, wordLenGen,
87 options.dictSize);
88 }
89
90 @Override
91 public void tearDown() {
92 try {
93 fs.close();
94 }
95 catch (Exception e) {
96
97 }
98 }
99
100 private static FSDataOutputStream createFSOutput(Path name, FileSystem fs)
101 throws IOException {
102 if (fs.exists(name)) {
103 fs.delete(name, true);
104 }
105 FSDataOutputStream fout = fs.create(name);
106 return fout;
107 }
108
109 private void createTFile() throws IOException {
110 long totalBytes = 0;
111 FSDataOutputStream fout = createFSOutput(path, fs);
112 try {
113 Writer writer =
114 new Writer(fout, options.minBlockSize, options.compress, null);
115 try {
116 BytesWritable key = new BytesWritable();
117 BytesWritable val = new BytesWritable();
118 timer.start();
119 for (long i = 0; true; ++i) {
120 if (i % 1000 == 0) {
121 if (fs.getFileStatus(path).getLen() >= options.fileSize) {
122 break;
123 }
124 }
125 kvGen.next(key, val, false);
126 byte [] k = new byte [key.getLength()];
127 System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
128 byte [] v = new byte [val.getLength()];
129 System.arraycopy(val.getBytes(), 0, v, 0, key.getLength());
130 writer.append(k, v);
131 totalBytes += key.getLength();
132 totalBytes += val.getLength();
133 }
134 timer.stop();
135 }
136 finally {
137 writer.close();
138 }
139 }
140 finally {
141 fout.close();
142 }
143 double duration = (double)timer.read()/1000;
144 long fsize = fs.getFileStatus(path).getLen();
145
146 System.out.printf(
147 "time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n",
148 timer.toString(), (double) totalBytes / 1024 / 1024, totalBytes
149 / duration);
150 System.out.printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n",
151 timer.toString(), (double) fsize / 1024 / 1024, fsize / duration);
152 }
153
154 public void seekTFile() throws IOException {
155 int miss = 0;
156 long totalBytes = 0;
157 FSDataInputStream fsdis = fs.open(path);
158 Reader reader =
159 new Reader(fsdis, fs.getFileStatus(path).getLen(), null, false);
160 reader.loadFileInfo();
161 KeySampler kSampler =
162 new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(),
163 keyLenGen);
164 HFileScanner scanner = reader.getScanner(false, false);
165 BytesWritable key = new BytesWritable();
166 timer.reset();
167 timer.start();
168 for (int i = 0; i < options.seekCount; ++i) {
169 kSampler.next(key);
170 byte [] k = new byte [key.getLength()];
171 System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
172 if (scanner.seekTo(k) >= 0) {
173 ByteBuffer bbkey = scanner.getKey();
174 ByteBuffer bbval = scanner.getValue();
175 totalBytes += bbkey.limit();
176 totalBytes += bbval.limit();
177 }
178 else {
179 ++miss;
180 }
181 }
182 timer.stop();
183 System.out.printf(
184 "time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n",
185 timer.toString(), NanoTimer.nanoTimeToString(timer.read()
186 / options.seekCount), options.seekCount - miss, miss,
187 (double) totalBytes / 1024 / (options.seekCount - miss));
188
189 }
190
191 public void testSeeks() throws IOException {
192 if (options.doCreate()) {
193 createTFile();
194 }
195
196 if (options.doRead()) {
197 seekTFile();
198 }
199
200 if (options.doCreate()) {
201 fs.delete(path, true);
202 }
203 }
204
205 private static class IntegerRange {
206 private final int from, to;
207
208 public IntegerRange(int from, int to) {
209 this.from = from;
210 this.to = to;
211 }
212
213 public static IntegerRange parse(String s) throws ParseException {
214 StringTokenizer st = new StringTokenizer(s, " \t,");
215 if (st.countTokens() != 2) {
216 throw new ParseException("Bad integer specification: " + s);
217 }
218 int from = Integer.parseInt(st.nextToken());
219 int to = Integer.parseInt(st.nextToken());
220 return new IntegerRange(from, to);
221 }
222
223 public int from() {
224 return from;
225 }
226
227 public int to() {
228 return to;
229 }
230 }
231
232 private static class MyOptions {
233
234 int dictSize = 1000;
235 int minWordLen = 5;
236 int maxWordLen = 20;
237
238 String rootDir =
239 HBaseTestingUtility.getTestDir("TestTFileSeek").toString();
240 String file = "TestTFileSeek";
241
242 String compress = "none";
243 int minKeyLen = 10;
244 int maxKeyLen = 50;
245 int minValLength = 1024;
246 int maxValLength = 2 * 1024;
247 int minBlockSize = 1 * 1024 * 1024;
248 int fsOutputBufferSize = 1;
249 int fsInputBufferSize = 0;
250
251 long fileSize = 10 * 1024 * 1024;
252 long seekCount = 1000;
253 long seed;
254
255 static final int OP_CREATE = 1;
256 static final int OP_READ = 2;
257 int op = OP_CREATE | OP_READ;
258
259 boolean proceed = false;
260
261 public MyOptions(String[] args) {
262 seed = System.nanoTime();
263
264 try {
265 Options opts = buildOptions();
266 CommandLineParser parser = new GnuParser();
267 CommandLine line = parser.parse(opts, args, true);
268 processOptions(line, opts);
269 validateOptions();
270 }
271 catch (ParseException e) {
272 System.out.println(e.getMessage());
273 System.out.println("Try \"--help\" option for details.");
274 setStopProceed();
275 }
276 }
277
278 public boolean proceed() {
279 return proceed;
280 }
281
282 private Options buildOptions() {
283 Option compress =
284 OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz]")
285 .hasArg().withDescription("compression scheme").create('c');
286
287 Option fileSize =
288 OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB")
289 .hasArg().withDescription("target size of the file (in MB).")
290 .create('s');
291
292 Option fsInputBufferSz =
293 OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size")
294 .hasArg().withDescription(
295 "size of the file system input buffer (in bytes).").create(
296 'i');
297
298 Option fsOutputBufferSize =
299 OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size")
300 .hasArg().withDescription(
301 "size of the file system output buffer (in bytes).").create(
302 'o');
303
304 Option keyLen =
305 OptionBuilder
306 .withLongOpt("key-length")
307 .withArgName("min,max")
308 .hasArg()
309 .withDescription(
310 "the length range of the key (in bytes)")
311 .create('k');
312
313 Option valueLen =
314 OptionBuilder
315 .withLongOpt("value-length")
316 .withArgName("min,max")
317 .hasArg()
318 .withDescription(
319 "the length range of the value (in bytes)")
320 .create('v');
321
322 Option blockSz =
323 OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg()
324 .withDescription("minimum block size (in KB)").create('b');
325
326 Option operation =
327 OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg()
328 .withDescription(
329 "action: seek-only, create-only, seek-after-create").create(
330 'x');
331
332 Option rootDir =
333 OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg()
334 .withDescription(
335 "specify root directory where files will be created.")
336 .create('r');
337
338 Option file =
339 OptionBuilder.withLongOpt("file").withArgName("name").hasArg()
340 .withDescription("specify the file name to be created or read.")
341 .create('f');
342
343 Option seekCount =
344 OptionBuilder
345 .withLongOpt("seek")
346 .withArgName("count")
347 .hasArg()
348 .withDescription(
349 "specify how many seek operations we perform (requires -x r or -x rw.")
350 .create('n');
351
352 Option help =
353 OptionBuilder.withLongOpt("help").hasArg(false).withDescription(
354 "show this screen").create("h");
355
356 return new Options().addOption(compress).addOption(fileSize).addOption(
357 fsInputBufferSz).addOption(fsOutputBufferSize).addOption(keyLen)
358 .addOption(blockSz).addOption(rootDir).addOption(valueLen).addOption(
359 operation).addOption(seekCount).addOption(file).addOption(help);
360
361 }
362
363 private void processOptions(CommandLine line, Options opts)
364 throws ParseException {
365
366 if (line.hasOption('h')) {
367 HelpFormatter formatter = new HelpFormatter();
368 System.out.println("TFile and SeqFile benchmark.");
369 System.out.println();
370 formatter.printHelp(100,
371 "java ... TestTFileSeqFileComparison [options]",
372 "\nSupported options:", opts, "");
373 return;
374 }
375
376 if (line.hasOption('c')) {
377 compress = line.getOptionValue('c');
378 }
379
380 if (line.hasOption('d')) {
381 dictSize = Integer.parseInt(line.getOptionValue('d'));
382 }
383
384 if (line.hasOption('s')) {
385 fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024;
386 }
387
388 if (line.hasOption('i')) {
389 fsInputBufferSize = Integer.parseInt(line.getOptionValue('i'));
390 }
391
392 if (line.hasOption('o')) {
393 fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o'));
394 }
395
396 if (line.hasOption('n')) {
397 seekCount = Integer.parseInt(line.getOptionValue('n'));
398 }
399
400 if (line.hasOption('k')) {
401 IntegerRange ir = IntegerRange.parse(line.getOptionValue('k'));
402 minKeyLen = ir.from();
403 maxKeyLen = ir.to();
404 }
405
406 if (line.hasOption('v')) {
407 IntegerRange ir = IntegerRange.parse(line.getOptionValue('v'));
408 minValLength = ir.from();
409 maxValLength = ir.to();
410 }
411
412 if (line.hasOption('b')) {
413 minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024;
414 }
415
416 if (line.hasOption('r')) {
417 rootDir = line.getOptionValue('r');
418 }
419
420 if (line.hasOption('f')) {
421 file = line.getOptionValue('f');
422 }
423
424 if (line.hasOption('S')) {
425 seed = Long.parseLong(line.getOptionValue('S'));
426 }
427
428 if (line.hasOption('x')) {
429 String strOp = line.getOptionValue('x');
430 if (strOp.equals("r")) {
431 op = OP_READ;
432 }
433 else if (strOp.equals("w")) {
434 op = OP_CREATE;
435 }
436 else if (strOp.equals("rw")) {
437 op = OP_CREATE | OP_READ;
438 }
439 else {
440 throw new ParseException("Unknown action specifier: " + strOp);
441 }
442 }
443
444 proceed = true;
445 }
446
447 private void validateOptions() throws ParseException {
448 if (!compress.equals("none") && !compress.equals("lzo")
449 && !compress.equals("gz")) {
450 throw new ParseException("Unknown compression scheme: " + compress);
451 }
452
453 if (minKeyLen >= maxKeyLen) {
454 throw new ParseException(
455 "Max key length must be greater than min key length.");
456 }
457
458 if (minValLength >= maxValLength) {
459 throw new ParseException(
460 "Max value length must be greater than min value length.");
461 }
462
463 if (minWordLen >= maxWordLen) {
464 throw new ParseException(
465 "Max word length must be greater than min word length.");
466 }
467 return;
468 }
469
470 private void setStopProceed() {
471 proceed = false;
472 }
473
474 public boolean doCreate() {
475 return (op & OP_CREATE) != 0;
476 }
477
478 public boolean doRead() {
479 return (op & OP_READ) != 0;
480 }
481 }
482
483 public static void main(String[] argv) throws IOException {
484 TestHFileSeek testCase = new TestHFileSeek();
485 MyOptions options = new MyOptions(argv);
486
487 if (options.proceed == false) {
488 return;
489 }
490
491 testCase.options = options;
492 testCase.setUp();
493 testCase.testSeeks();
494 testCase.tearDown();
495 }
496 }