1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.io.hfile;
19
20 import java.io.IOException;
21 import java.nio.ByteBuffer;
22 import java.text.DateFormat;
23 import java.text.SimpleDateFormat;
24 import java.util.Random;
25
26 import junit.framework.TestCase;
27
28 import org.apache.hadoop.conf.Configuration;
29 import org.apache.hadoop.fs.FSDataInputStream;
30 import org.apache.hadoop.fs.FSDataOutputStream;
31 import org.apache.hadoop.fs.FileSystem;
32 import org.apache.hadoop.fs.Path;
33 import org.apache.hadoop.hbase.HBaseTestingUtility;
34 import org.apache.hadoop.hbase.MediumTests;
35 import org.apache.hadoop.io.BytesWritable;
36 import org.apache.hadoop.io.SequenceFile;
37 import org.apache.hadoop.io.compress.CompressionCodec;
38 import org.apache.hadoop.io.compress.GzipCodec;
39 import org.junit.experimental.categories.Category;
40
41
42
43
44
45
46
47
48
49 @Category(MediumTests.class)
50 public class TestHFilePerformance extends TestCase {
51 private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
52 private static String ROOT_DIR =
53 TEST_UTIL.getDataTestDir("TestHFilePerformance").toString();
54 private FileSystem fs;
55 private Configuration conf;
56 private long startTimeEpoch;
57 private long finishTimeEpoch;
58 private DateFormat formatter;
59
60 @Override
61 public void setUp() throws IOException {
62 conf = new Configuration();
63 fs = FileSystem.get(conf);
64 formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
65 }
66
67 public void startTime() {
68 startTimeEpoch = System.currentTimeMillis();
69 System.out.println(formatTime() + " Started timing.");
70 }
71
72 public void stopTime() {
73 finishTimeEpoch = System.currentTimeMillis();
74 System.out.println(formatTime() + " Stopped timing.");
75 }
76
77 public long getIntervalMillis() {
78 return finishTimeEpoch - startTimeEpoch;
79 }
80
81 public void printlnWithTimestamp(String message) {
82 System.out.println(formatTime() + " " + message);
83 }
84
85
86
87
88 public String formatTime(long milis){
89 return formatter.format(milis);
90 }
91
92 public String formatTime(){
93 return formatTime(System.currentTimeMillis());
94 }
95
96 private FSDataOutputStream createFSOutput(Path name) throws IOException {
97 if (fs.exists(name))
98 fs.delete(name, true);
99 FSDataOutputStream fout = fs.create(name);
100 return fout;
101 }
102
103
104
105
106
107 private static class KeyValueGenerator {
108 Random keyRandomizer;
109 Random valueRandomizer;
110 long randomValueRatio = 3;
111 long valueSequence = 0 ;
112
113
114 KeyValueGenerator() {
115 keyRandomizer = new Random(0L);
116 valueRandomizer = new Random(1L);
117 }
118
119
120 void getKey(byte[] key) {
121 keyRandomizer.nextBytes(key);
122 }
123
124 void getValue(byte[] value) {
125 if (valueSequence % randomValueRatio == 0)
126 valueRandomizer.nextBytes(value);
127 valueSequence++;
128 }
129 }
130
131
132
133
134
135
136
137
138
139
140
141
142
143 public void timeWrite(String fileType, int keyLength, int valueLength,
144 String codecName, long rows, String writeMethod, int minBlockSize)
145 throws IOException {
146 System.out.println("File Type: " + fileType);
147 System.out.println("Writing " + fileType + " with codecName: " + codecName);
148 long totalBytesWritten = 0;
149
150
151
152 byte[] key = new byte[keyLength];
153 byte[] value = new byte[valueLength];
154 KeyValueGenerator generator = new KeyValueGenerator();
155
156 startTime();
157
158 Path path = new Path(ROOT_DIR, fileType + ".Performance");
159 System.out.println(ROOT_DIR + path.getName());
160 FSDataOutputStream fout = createFSOutput(path);
161
162 if ("HFile".equals(fileType)){
163 System.out.println("HFile write method: ");
164 HFile.Writer writer = HFile.getWriterFactoryNoCache(conf)
165 .withOutputStream(fout)
166 .withBlockSize(minBlockSize)
167 .withCompression(codecName)
168 .create();
169
170
171 for (long l=0; l<rows; l++ ) {
172 generator.getKey(key);
173 generator.getValue(value);
174 writer.append(key, value);
175 totalBytesWritten += key.length;
176 totalBytesWritten += value.length;
177 }
178 writer.close();
179 } else if ("SequenceFile".equals(fileType)){
180 CompressionCodec codec = null;
181 if ("gz".equals(codecName))
182 codec = new GzipCodec();
183 else if (!"none".equals(codecName))
184 throw new IOException("Codec not supported.");
185
186 SequenceFile.Writer writer;
187
188
189
190
191 if (!"none".equals(codecName))
192 writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
193 BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec);
194 else
195 writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
196 BytesWritable.class, SequenceFile.CompressionType.NONE, null);
197
198 BytesWritable keyBsw;
199 BytesWritable valBsw;
200 for (long l=0; l<rows; l++ ) {
201
202 generator.getKey(key);
203 keyBsw = new BytesWritable(key);
204 totalBytesWritten += keyBsw.getSize();
205
206 generator.getValue(value);
207 valBsw = new BytesWritable(value);
208 writer.append(keyBsw, valBsw);
209 totalBytesWritten += valBsw.getSize();
210 }
211
212 writer.close();
213 } else
214 throw new IOException("File Type is not supported");
215
216 fout.close();
217 stopTime();
218
219 printlnWithTimestamp("Data written: ");
220 printlnWithTimestamp(" rate = " +
221 totalBytesWritten / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
222 printlnWithTimestamp(" total = " + totalBytesWritten + "B");
223
224 printlnWithTimestamp("File written: ");
225 printlnWithTimestamp(" rate = " +
226 fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
227 printlnWithTimestamp(" total = " + fs.getFileStatus(path).getLen() + "B");
228 }
229
230 public void timeReading(String fileType, int keyLength, int valueLength,
231 long rows, int method) throws IOException {
232 System.out.println("Reading file of type: " + fileType);
233 Path path = new Path(ROOT_DIR, fileType + ".Performance");
234 System.out.println("Input file size: " + fs.getFileStatus(path).getLen());
235 long totalBytesRead = 0;
236
237
238 ByteBuffer val;
239
240 ByteBuffer key;
241
242 startTime();
243 FSDataInputStream fin = fs.open(path);
244
245 if ("HFile".equals(fileType)){
246 HFile.Reader reader = HFile.createReaderFromStream(path, fs.open(path),
247 fs.getFileStatus(path).getLen(), new CacheConfig(conf));
248 reader.loadFileInfo();
249 switch (method) {
250
251 case 0:
252 case 1:
253 default:
254 {
255 HFileScanner scanner = reader.getScanner(false, false);
256 scanner.seekTo();
257 for (long l=0; l<rows; l++ ) {
258 key = scanner.getKey();
259 val = scanner.getValue();
260 totalBytesRead += key.limit() + val.limit();
261 scanner.next();
262 }
263 }
264 break;
265 }
266 reader.close();
267 } else if("SequenceFile".equals(fileType)){
268
269 SequenceFile.Reader reader;
270 reader = new SequenceFile.Reader(fs, path, new Configuration());
271
272 if (reader.getCompressionCodec() != null) {
273 printlnWithTimestamp("Compression codec class: " + reader.getCompressionCodec().getClass());
274 } else
275 printlnWithTimestamp("Compression codec class: " + "none");
276
277 BytesWritable keyBsw = new BytesWritable();
278 BytesWritable valBsw = new BytesWritable();
279
280 for (long l=0; l<rows; l++ ) {
281 reader.next(keyBsw, valBsw);
282 totalBytesRead += keyBsw.getSize() + valBsw.getSize();
283 }
284 reader.close();
285
286
287
288 } else {
289 throw new IOException("File Type not supported.");
290 }
291
292
293
294 fin.close();
295 stopTime();
296
297
298 printlnWithTimestamp("Finished in " + getIntervalMillis() + "ms");
299 printlnWithTimestamp("Data read: ");
300 printlnWithTimestamp(" rate = " +
301 totalBytesRead / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
302 printlnWithTimestamp(" total = " + totalBytesRead + "B");
303
304 printlnWithTimestamp("File read: ");
305 printlnWithTimestamp(" rate = " +
306 fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
307 printlnWithTimestamp(" total = " + fs.getFileStatus(path).getLen() + "B");
308
309
310
311 }
312
313 public void testRunComparisons() throws IOException {
314
315 int keyLength = 100;
316 int valueLength = 5*1024;
317 int minBlockSize = 10*1024*1024;
318 int rows = 10000;
319
320 System.out.println("****************************** Sequence File *****************************");
321
322 timeWrite("SequenceFile", keyLength, valueLength, "none", rows, null, minBlockSize);
323 System.out.println("\n+++++++\n");
324 timeReading("SequenceFile", keyLength, valueLength, rows, -1);
325
326 System.out.println("");
327 System.out.println("----------------------");
328 System.out.println("");
329
330
331
332
333
334
335
336
337
338
339
340
341 try {
342 timeWrite("SequenceFile", keyLength, valueLength, "gz", rows, null,
343 minBlockSize);
344 System.out.println("\n+++++++\n");
345 timeReading("SequenceFile", keyLength, valueLength, rows, -1);
346 } catch (IllegalArgumentException e) {
347 System.out.println("Skipping sequencefile gz: " + e.getMessage());
348 }
349
350
351 System.out.println("\n\n\n");
352 System.out.println("****************************** HFile *****************************");
353
354 timeWrite("HFile", keyLength, valueLength, "none", rows, null, minBlockSize);
355 System.out.println("\n+++++++\n");
356 timeReading("HFile", keyLength, valueLength, rows, 0 );
357
358 System.out.println("");
359 System.out.println("----------------------");
360 System.out.println("");
361
362
363
364
365
366
367
368
369
370
371
372
373
374 timeWrite("HFile", keyLength, valueLength, "gz", rows, null, minBlockSize);
375 System.out.println("\n+++++++\n");
376 timeReading("HFile", keyLength, valueLength, rows, 0 );
377
378 System.out.println("\n\n\n\nNotes: ");
379 System.out.println(" * Timing includes open/closing of files.");
380 System.out.println(" * Timing includes reading both Key and Value");
381 System.out.println(" * Data is generated as random bytes. Other methods e.g. using " +
382 "dictionary with care for distributation of words is under development.");
383 System.out.println(" * Timing of write currently, includes random value/key generations. " +
384 "Which is the same for Sequence File and HFile. Another possibility is to generate " +
385 "test data beforehand");
386 System.out.println(" * We need to mitigate cache effect on benchmark. We can apply several " +
387 "ideas, for next step we do a large dummy read between benchmark read to dismantle " +
388 "caching of data. Renaming of file may be helpful. We can have a loop that reads with" +
389 " the same method several times and flood cache every time and average it to get a" +
390 " better number.");
391 }
392
393 }
394