1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.io.hfile;
19
20 import java.io.IOException;
21 import java.nio.ByteBuffer;
22 import java.security.SecureRandom;
23 import java.text.DateFormat;
24 import java.text.SimpleDateFormat;
25 import java.util.Random;
26
27 import org.apache.commons.cli.CommandLine;
28 import org.apache.hadoop.conf.Configuration;
29 import org.apache.hadoop.fs.FSDataInputStream;
30 import org.apache.hadoop.fs.FSDataOutputStream;
31 import org.apache.hadoop.fs.FileSystem;
32 import org.apache.hadoop.fs.Path;
33 import org.apache.hadoop.hbase.HBaseConfiguration;
34 import org.apache.hadoop.hbase.HBaseTestingUtility;
35 import org.apache.hadoop.hbase.HConstants;
36 import org.apache.hadoop.hbase.KeyValue;
37 import org.apache.hadoop.hbase.io.crypto.Encryption;
38 import org.apache.hadoop.hbase.io.crypto.KeyProviderForTesting;
39 import org.apache.hadoop.hbase.io.crypto.aes.AES;
40 import org.apache.hadoop.hbase.util.AbstractHBaseTool;
41 import org.apache.hadoop.io.BytesWritable;
42 import org.apache.hadoop.io.SequenceFile;
43 import org.apache.hadoop.io.compress.CompressionCodec;
44 import org.apache.hadoop.io.compress.GzipCodec;
45 import org.apache.hadoop.util.ToolRunner;
46
47
48
49
50
51
52
53
54
55 public class TestHFilePerformance extends AbstractHBaseTool {
56 private HBaseTestingUtility TEST_UTIL;
57 private static String ROOT_DIR;
58 private FileSystem fs;
59 private long startTimeEpoch;
60 private long finishTimeEpoch;
61 private DateFormat formatter;
62
63 @Override
64 public void setConf(Configuration conf) {
65 super.setConf(conf);
66 try {
67 fs = FileSystem.get(conf);
68 } catch (IOException e) {
69 throw new RuntimeException(e);
70 }
71 conf.set(HConstants.CRYPTO_KEYPROVIDER_CONF_KEY, KeyProviderForTesting.class.getName());
72 conf.set(HConstants.CRYPTO_MASTERKEY_NAME_CONF_KEY, "hbase");
73 formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
74 TEST_UTIL = new HBaseTestingUtility(conf);
75 ROOT_DIR = TEST_UTIL.getDataTestDir("TestHFilePerformance").toString();
76 }
77
78 public void startTime() {
79 startTimeEpoch = System.currentTimeMillis();
80 System.out.println(formatTime() + " Started timing.");
81 }
82
83 public void stopTime() {
84 finishTimeEpoch = System.currentTimeMillis();
85 System.out.println(formatTime() + " Stopped timing.");
86 }
87
88 public long getIntervalMillis() {
89 return finishTimeEpoch - startTimeEpoch;
90 }
91
92 public void printlnWithTimestamp(String message) {
93 System.out.println(formatTime() + " " + message);
94 }
95
96
97
98
99 public String formatTime(long milis){
100 return formatter.format(milis);
101 }
102
103 public String formatTime(){
104 return formatTime(System.currentTimeMillis());
105 }
106
107 private FSDataOutputStream createFSOutput(Path name) throws IOException {
108 if (fs.exists(name))
109 fs.delete(name, true);
110 FSDataOutputStream fout = fs.create(name);
111 return fout;
112 }
113
114
115
116
117
118 private static class KeyValueGenerator {
119 Random keyRandomizer;
120 Random valueRandomizer;
121 long randomValueRatio = 3;
122 long valueSequence = 0 ;
123
124
125 KeyValueGenerator() {
126 keyRandomizer = new Random(0L);
127 valueRandomizer = new Random(1L);
128 }
129
130
131 void getKey(byte[] key) {
132 keyRandomizer.nextBytes(key);
133 }
134
135 void getValue(byte[] value) {
136 if (valueSequence % randomValueRatio == 0)
137 valueRandomizer.nextBytes(value);
138 valueSequence++;
139 }
140 }
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155 public void timeWrite(String fileType, int keyLength, int valueLength,
156 String codecName, String cipherName, long rows, String writeMethod, int minBlockSize)
157 throws IOException {
158 System.out.println("File Type: " + fileType);
159 System.out.println("Writing " + fileType + " with codecName: " + codecName +
160 " cipherName: " + cipherName);
161 long totalBytesWritten = 0;
162
163
164
165 byte[] key = new byte[keyLength];
166 byte[] value = new byte[valueLength];
167 KeyValueGenerator generator = new KeyValueGenerator();
168
169 startTime();
170
171 Path path = new Path(ROOT_DIR, fileType + ".Performance");
172 System.out.println(ROOT_DIR + Path.SEPARATOR + path.getName());
173 FSDataOutputStream fout = createFSOutput(path);
174
175 if ("HFile".equals(fileType)){
176 HFileContextBuilder builder = new HFileContextBuilder()
177 .withCompression(AbstractHFileWriter.compressionByName(codecName))
178 .withBlockSize(minBlockSize);
179 if (cipherName != "none") {
180 byte[] cipherKey = new byte[AES.KEY_LENGTH];
181 new SecureRandom().nextBytes(cipherKey);
182 builder.withEncryptionContext(
183 Encryption.newContext(conf)
184 .setCipher(Encryption.getCipher(conf, cipherName))
185 .setKey(cipherKey));
186 }
187 HFileContext context = builder.build();
188 System.out.println("HFile write method: ");
189 HFile.Writer writer = HFile.getWriterFactoryNoCache(conf)
190 .withOutputStream(fout)
191 .withFileContext(context)
192 .withComparator(new KeyValue.RawBytesComparator())
193 .create();
194
195
196 for (long l=0; l<rows; l++ ) {
197 generator.getKey(key);
198 generator.getValue(value);
199 writer.append(key, value);
200 totalBytesWritten += key.length;
201 totalBytesWritten += value.length;
202 }
203 writer.close();
204 } else if ("SequenceFile".equals(fileType)){
205 CompressionCodec codec = null;
206 if ("gz".equals(codecName))
207 codec = new GzipCodec();
208 else if (!"none".equals(codecName))
209 throw new IOException("Codec not supported.");
210
211 SequenceFile.Writer writer;
212
213
214
215
216 if (!"none".equals(codecName))
217 writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
218 BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec);
219 else
220 writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
221 BytesWritable.class, SequenceFile.CompressionType.NONE, null);
222
223 BytesWritable keyBsw;
224 BytesWritable valBsw;
225 for (long l=0; l<rows; l++ ) {
226
227 generator.getKey(key);
228 keyBsw = new BytesWritable(key);
229 totalBytesWritten += keyBsw.getSize();
230
231 generator.getValue(value);
232 valBsw = new BytesWritable(value);
233 writer.append(keyBsw, valBsw);
234 totalBytesWritten += valBsw.getSize();
235 }
236
237 writer.close();
238 } else
239 throw new IOException("File Type is not supported");
240
241 fout.close();
242 stopTime();
243
244 printlnWithTimestamp("Data written: ");
245 printlnWithTimestamp(" rate = " +
246 totalBytesWritten / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
247 printlnWithTimestamp(" total = " + totalBytesWritten + "B");
248
249 printlnWithTimestamp("File written: ");
250 printlnWithTimestamp(" rate = " +
251 fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
252 printlnWithTimestamp(" total = " + fs.getFileStatus(path).getLen() + "B");
253 }
254
255 public void timeReading(String fileType, int keyLength, int valueLength,
256 long rows, int method) throws IOException {
257 System.out.println("Reading file of type: " + fileType);
258 Path path = new Path(ROOT_DIR, fileType + ".Performance");
259 System.out.println("Input file size: " + fs.getFileStatus(path).getLen());
260 long totalBytesRead = 0;
261
262
263 ByteBuffer val;
264
265 ByteBuffer key;
266
267 startTime();
268 FSDataInputStream fin = fs.open(path);
269
270 if ("HFile".equals(fileType)){
271 HFile.Reader reader = HFile.createReaderFromStream(path, fs.open(path),
272 fs.getFileStatus(path).getLen(), new CacheConfig(conf), conf);
273 reader.loadFileInfo();
274 switch (method) {
275
276 case 0:
277 case 1:
278 default:
279 {
280 HFileScanner scanner = reader.getScanner(false, false);
281 scanner.seekTo();
282 for (long l=0; l<rows; l++ ) {
283 key = scanner.getKey();
284 val = scanner.getValue();
285 totalBytesRead += key.limit() + val.limit();
286 scanner.next();
287 }
288 }
289 break;
290 }
291 reader.close();
292 } else if("SequenceFile".equals(fileType)){
293
294 SequenceFile.Reader reader;
295 reader = new SequenceFile.Reader(fs, path, new Configuration());
296
297 if (reader.getCompressionCodec() != null) {
298 printlnWithTimestamp("Compression codec class: " + reader.getCompressionCodec().getClass());
299 } else
300 printlnWithTimestamp("Compression codec class: " + "none");
301
302 BytesWritable keyBsw = new BytesWritable();
303 BytesWritable valBsw = new BytesWritable();
304
305 for (long l=0; l<rows; l++ ) {
306 reader.next(keyBsw, valBsw);
307 totalBytesRead += keyBsw.getSize() + valBsw.getSize();
308 }
309 reader.close();
310
311
312
313 } else {
314 throw new IOException("File Type not supported.");
315 }
316
317
318
319 fin.close();
320 stopTime();
321
322
323 printlnWithTimestamp("Finished in " + getIntervalMillis() + "ms");
324 printlnWithTimestamp("Data read: ");
325 printlnWithTimestamp(" rate = " +
326 totalBytesRead / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
327 printlnWithTimestamp(" total = " + totalBytesRead + "B");
328
329 printlnWithTimestamp("File read: ");
330 printlnWithTimestamp(" rate = " +
331 fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
332 printlnWithTimestamp(" total = " + fs.getFileStatus(path).getLen() + "B");
333
334
335
336 }
337
338 public void testRunComparisons() throws IOException {
339
340 int keyLength = 100;
341 int valueLength = 5*1024;
342 int minBlockSize = 10*1024*1024;
343 int rows = 10000;
344
345 System.out.println("****************************** Sequence File *****************************");
346
347 timeWrite("SequenceFile", keyLength, valueLength, "none", "none", rows, null, minBlockSize);
348 System.out.println("\n+++++++\n");
349 timeReading("SequenceFile", keyLength, valueLength, rows, -1);
350
351 System.out.println("");
352 System.out.println("----------------------");
353 System.out.println("");
354
355
356
357
358
359
360
361
362
363
364
365
366 try {
367 timeWrite("SequenceFile", keyLength, valueLength, "gz", "none", rows, null,
368 minBlockSize);
369 System.out.println("\n+++++++\n");
370 timeReading("SequenceFile", keyLength, valueLength, rows, -1);
371 } catch (IllegalArgumentException e) {
372 System.out.println("Skipping sequencefile gz: " + e.getMessage());
373 }
374
375
376 System.out.println("\n\n\n");
377 System.out.println("****************************** HFile *****************************");
378
379 timeWrite("HFile", keyLength, valueLength, "none", "none", rows, null, minBlockSize);
380 System.out.println("\n+++++++\n");
381 timeReading("HFile", keyLength, valueLength, rows, 0 );
382
383 System.out.println("");
384 System.out.println("----------------------");
385 System.out.println("");
386
387 timeWrite("HFile", keyLength, valueLength, "none", "aes", rows, null, minBlockSize);
388 System.out.println("\n+++++++\n");
389 timeReading("HFile", keyLength, valueLength, rows, 0 );
390
391 System.out.println("");
392 System.out.println("----------------------");
393 System.out.println("");
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409 timeWrite("HFile", keyLength, valueLength, "gz", "none", rows, null, minBlockSize);
410 System.out.println("\n+++++++\n");
411 timeReading("HFile", keyLength, valueLength, rows, 0 );
412
413 System.out.println("");
414 System.out.println("----------------------");
415 System.out.println("");
416
417 timeWrite("HFile", keyLength, valueLength, "gz", "aes", rows, null, minBlockSize);
418 System.out.println("\n+++++++\n");
419 timeReading("HFile", keyLength, valueLength, rows, 0 );
420
421 System.out.println("\n\n\n\nNotes: ");
422 System.out.println(" * Timing includes open/closing of files.");
423 System.out.println(" * Timing includes reading both Key and Value");
424 System.out.println(" * Data is generated as random bytes. Other methods e.g. using " +
425 "dictionary with care for distributation of words is under development.");
426 System.out.println(" * Timing of write currently, includes random value/key generations. " +
427 "Which is the same for Sequence File and HFile. Another possibility is to generate " +
428 "test data beforehand");
429 System.out.println(" * We need to mitigate cache effect on benchmark. We can apply several " +
430 "ideas, for next step we do a large dummy read between benchmark read to dismantle " +
431 "caching of data. Renaming of file may be helpful. We can have a loop that reads with" +
432 " the same method several times and flood cache every time and average it to get a" +
433 " better number.");
434 }
435
436 @Override
437 protected void addOptions() {
438 }
439
440 @Override
441 protected void processOptions(CommandLine cmd) {
442 }
443
444 @Override
445 protected int doWork() throws Exception {
446 testRunComparisons();
447 return 0;
448 }
449
450 public static void main(String[] args) throws Exception {
451 int ret = ToolRunner.run(HBaseConfiguration.create(), new TestHFilePerformance(), args);
452 System.exit(ret);
453 }
454 }