1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.io.hfile;
19
20 import java.io.IOException;
21 import java.nio.ByteBuffer;
22 import java.text.DateFormat;
23 import java.text.SimpleDateFormat;
24 import java.util.Random;
25
26 import org.apache.commons.cli.CommandLine;
27 import org.apache.hadoop.conf.Configuration;
28 import org.apache.hadoop.fs.FSDataInputStream;
29 import org.apache.hadoop.fs.FSDataOutputStream;
30 import org.apache.hadoop.fs.FileSystem;
31 import org.apache.hadoop.fs.Path;
32 import org.apache.hadoop.hbase.HBaseConfiguration;
33 import org.apache.hadoop.hbase.HBaseTestingUtility;
34 import org.apache.hadoop.hbase.KeyValue;
35 import org.apache.hadoop.hbase.util.AbstractHBaseTool;
36 import org.apache.hadoop.io.BytesWritable;
37 import org.apache.hadoop.io.SequenceFile;
38 import org.apache.hadoop.io.compress.CompressionCodec;
39 import org.apache.hadoop.io.compress.GzipCodec;
40 import org.apache.hadoop.util.ToolRunner;
41
42
43
44
45
46
47
48
49
50 public class TestHFilePerformance extends AbstractHBaseTool {
51 private HBaseTestingUtility TEST_UTIL;
52 private static String ROOT_DIR;
53 private FileSystem fs;
54 private long startTimeEpoch;
55 private long finishTimeEpoch;
56 private DateFormat formatter;
57
58 @Override
59 public void setConf(Configuration conf) {
60 super.setConf(conf);
61 try {
62 fs = FileSystem.get(conf);
63 } catch (IOException e) {
64 throw new RuntimeException(e);
65 }
66 formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
67 TEST_UTIL = new HBaseTestingUtility(conf);
68 ROOT_DIR = TEST_UTIL.getDataTestDir("TestHFilePerformance").toString();
69 }
70
71 public void startTime() {
72 startTimeEpoch = System.currentTimeMillis();
73 System.out.println(formatTime() + " Started timing.");
74 }
75
76 public void stopTime() {
77 finishTimeEpoch = System.currentTimeMillis();
78 System.out.println(formatTime() + " Stopped timing.");
79 }
80
81 public long getIntervalMillis() {
82 return finishTimeEpoch - startTimeEpoch;
83 }
84
85 public void printlnWithTimestamp(String message) {
86 System.out.println(formatTime() + " " + message);
87 }
88
89
90
91
92 public String formatTime(long milis){
93 return formatter.format(milis);
94 }
95
96 public String formatTime(){
97 return formatTime(System.currentTimeMillis());
98 }
99
100 private FSDataOutputStream createFSOutput(Path name) throws IOException {
101 if (fs.exists(name))
102 fs.delete(name, true);
103 FSDataOutputStream fout = fs.create(name);
104 return fout;
105 }
106
107
108
109
110
111 private static class KeyValueGenerator {
112 Random keyRandomizer;
113 Random valueRandomizer;
114 long randomValueRatio = 3;
115 long valueSequence = 0 ;
116
117
118 KeyValueGenerator() {
119 keyRandomizer = new Random(0L);
120 valueRandomizer = new Random(1L);
121 }
122
123
124 void getKey(byte[] key) {
125 keyRandomizer.nextBytes(key);
126 }
127
128 void getValue(byte[] value) {
129 if (valueSequence % randomValueRatio == 0)
130 valueRandomizer.nextBytes(value);
131 valueSequence++;
132 }
133 }
134
135
136
137
138
139
140
141
142
143
144
145
146
147 public void timeWrite(String fileType, int keyLength, int valueLength,
148 String codecName, long rows, String writeMethod, int minBlockSize)
149 throws IOException {
150 System.out.println("File Type: " + fileType);
151 System.out.println("Writing " + fileType + " with codecName: " + codecName);
152 long totalBytesWritten = 0;
153
154
155
156 byte[] key = new byte[keyLength];
157 byte[] value = new byte[valueLength];
158 KeyValueGenerator generator = new KeyValueGenerator();
159
160 startTime();
161
162 Path path = new Path(ROOT_DIR, fileType + ".Performance");
163 System.out.println(ROOT_DIR + path.getName());
164 FSDataOutputStream fout = createFSOutput(path);
165
166 if ("HFile".equals(fileType)){
167 System.out.println("HFile write method: ");
168 HFile.Writer writer = HFile.getWriterFactoryNoCache(conf)
169 .withOutputStream(fout)
170 .withBlockSize(minBlockSize)
171 .withCompression(codecName)
172 .withComparator(new KeyValue.RawBytesComparator())
173 .create();
174
175
176 for (long l=0; l<rows; l++ ) {
177 generator.getKey(key);
178 generator.getValue(value);
179 writer.append(key, value);
180 totalBytesWritten += key.length;
181 totalBytesWritten += value.length;
182 }
183 writer.close();
184 } else if ("SequenceFile".equals(fileType)){
185 CompressionCodec codec = null;
186 if ("gz".equals(codecName))
187 codec = new GzipCodec();
188 else if (!"none".equals(codecName))
189 throw new IOException("Codec not supported.");
190
191 SequenceFile.Writer writer;
192
193
194
195
196 if (!"none".equals(codecName))
197 writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
198 BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec);
199 else
200 writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
201 BytesWritable.class, SequenceFile.CompressionType.NONE, null);
202
203 BytesWritable keyBsw;
204 BytesWritable valBsw;
205 for (long l=0; l<rows; l++ ) {
206
207 generator.getKey(key);
208 keyBsw = new BytesWritable(key);
209 totalBytesWritten += keyBsw.getSize();
210
211 generator.getValue(value);
212 valBsw = new BytesWritable(value);
213 writer.append(keyBsw, valBsw);
214 totalBytesWritten += valBsw.getSize();
215 }
216
217 writer.close();
218 } else
219 throw new IOException("File Type is not supported");
220
221 fout.close();
222 stopTime();
223
224 printlnWithTimestamp("Data written: ");
225 printlnWithTimestamp(" rate = " +
226 totalBytesWritten / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
227 printlnWithTimestamp(" total = " + totalBytesWritten + "B");
228
229 printlnWithTimestamp("File written: ");
230 printlnWithTimestamp(" rate = " +
231 fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
232 printlnWithTimestamp(" total = " + fs.getFileStatus(path).getLen() + "B");
233 }
234
235 public void timeReading(String fileType, int keyLength, int valueLength,
236 long rows, int method) throws IOException {
237 System.out.println("Reading file of type: " + fileType);
238 Path path = new Path(ROOT_DIR, fileType + ".Performance");
239 System.out.println("Input file size: " + fs.getFileStatus(path).getLen());
240 long totalBytesRead = 0;
241
242
243 ByteBuffer val;
244
245 ByteBuffer key;
246
247 startTime();
248 FSDataInputStream fin = fs.open(path);
249
250 if ("HFile".equals(fileType)){
251 HFile.Reader reader = HFile.createReaderFromStream(path, fs.open(path),
252 fs.getFileStatus(path).getLen(), new CacheConfig(conf));
253 reader.loadFileInfo();
254 switch (method) {
255
256 case 0:
257 case 1:
258 default:
259 {
260 HFileScanner scanner = reader.getScanner(false, false);
261 scanner.seekTo();
262 for (long l=0; l<rows; l++ ) {
263 key = scanner.getKey();
264 val = scanner.getValue();
265 totalBytesRead += key.limit() + val.limit();
266 scanner.next();
267 }
268 }
269 break;
270 }
271 reader.close();
272 } else if("SequenceFile".equals(fileType)){
273
274 SequenceFile.Reader reader;
275 reader = new SequenceFile.Reader(fs, path, new Configuration());
276
277 if (reader.getCompressionCodec() != null) {
278 printlnWithTimestamp("Compression codec class: " + reader.getCompressionCodec().getClass());
279 } else
280 printlnWithTimestamp("Compression codec class: " + "none");
281
282 BytesWritable keyBsw = new BytesWritable();
283 BytesWritable valBsw = new BytesWritable();
284
285 for (long l=0; l<rows; l++ ) {
286 reader.next(keyBsw, valBsw);
287 totalBytesRead += keyBsw.getSize() + valBsw.getSize();
288 }
289 reader.close();
290
291
292
293 } else {
294 throw new IOException("File Type not supported.");
295 }
296
297
298
299 fin.close();
300 stopTime();
301
302
303 printlnWithTimestamp("Finished in " + getIntervalMillis() + "ms");
304 printlnWithTimestamp("Data read: ");
305 printlnWithTimestamp(" rate = " +
306 totalBytesRead / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
307 printlnWithTimestamp(" total = " + totalBytesRead + "B");
308
309 printlnWithTimestamp("File read: ");
310 printlnWithTimestamp(" rate = " +
311 fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
312 printlnWithTimestamp(" total = " + fs.getFileStatus(path).getLen() + "B");
313
314
315
316 }
317
318 public void testRunComparisons() throws IOException {
319
320 int keyLength = 100;
321 int valueLength = 5*1024;
322 int minBlockSize = 10*1024*1024;
323 int rows = 10000;
324
325 System.out.println("****************************** Sequence File *****************************");
326
327 timeWrite("SequenceFile", keyLength, valueLength, "none", rows, null, minBlockSize);
328 System.out.println("\n+++++++\n");
329 timeReading("SequenceFile", keyLength, valueLength, rows, -1);
330
331 System.out.println("");
332 System.out.println("----------------------");
333 System.out.println("");
334
335
336
337
338
339
340
341
342
343
344
345
346 try {
347 timeWrite("SequenceFile", keyLength, valueLength, "gz", rows, null,
348 minBlockSize);
349 System.out.println("\n+++++++\n");
350 timeReading("SequenceFile", keyLength, valueLength, rows, -1);
351 } catch (IllegalArgumentException e) {
352 System.out.println("Skipping sequencefile gz: " + e.getMessage());
353 }
354
355
356 System.out.println("\n\n\n");
357 System.out.println("****************************** HFile *****************************");
358
359 timeWrite("HFile", keyLength, valueLength, "none", rows, null, minBlockSize);
360 System.out.println("\n+++++++\n");
361 timeReading("HFile", keyLength, valueLength, rows, 0 );
362
363 System.out.println("");
364 System.out.println("----------------------");
365 System.out.println("");
366
367
368
369
370
371
372
373
374
375
376
377
378
379 timeWrite("HFile", keyLength, valueLength, "gz", rows, null, minBlockSize);
380 System.out.println("\n+++++++\n");
381 timeReading("HFile", keyLength, valueLength, rows, 0 );
382
383 System.out.println("\n\n\n\nNotes: ");
384 System.out.println(" * Timing includes open/closing of files.");
385 System.out.println(" * Timing includes reading both Key and Value");
386 System.out.println(" * Data is generated as random bytes. Other methods e.g. using " +
387 "dictionary with care for distributation of words is under development.");
388 System.out.println(" * Timing of write currently, includes random value/key generations. " +
389 "Which is the same for Sequence File and HFile. Another possibility is to generate " +
390 "test data beforehand");
391 System.out.println(" * We need to mitigate cache effect on benchmark. We can apply several " +
392 "ideas, for next step we do a large dummy read between benchmark read to dismantle " +
393 "caching of data. Renaming of file may be helpful. We can have a loop that reads with" +
394 " the same method several times and flood cache every time and average it to get a" +
395 " better number.");
396 }
397
398 @Override
399 protected void addOptions() {
400 }
401
402 @Override
403 protected void processOptions(CommandLine cmd) {
404 }
405
406 @Override
407 protected int doWork() throws Exception {
408 testRunComparisons();
409 return 0;
410 }
411
412 public static void main(String[] args) throws Exception {
413 int ret = ToolRunner.run(HBaseConfiguration.create(), new TestHFilePerformance(), args);
414 System.exit(ret);
415 }
416 }