1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.io.hfile;
19
20 import java.io.IOException;
21 import java.nio.ByteBuffer;
22 import java.text.DateFormat;
23 import java.text.SimpleDateFormat;
24 import java.util.Random;
25
26 import junit.framework.TestCase;
27
28 import org.apache.hadoop.conf.Configuration;
29 import org.apache.hadoop.fs.FSDataInputStream;
30 import org.apache.hadoop.fs.FSDataOutputStream;
31 import org.apache.hadoop.fs.FileSystem;
32 import org.apache.hadoop.fs.Path;
33 import org.apache.hadoop.hbase.HBaseTestingUtility;
34 import org.apache.hadoop.io.BytesWritable;
35 import org.apache.hadoop.io.SequenceFile;
36 import org.apache.hadoop.io.compress.CompressionCodec;
37 import org.apache.hadoop.io.compress.GzipCodec;
38
39
40
41
42
43
44
45
46
47 public class TestHFilePerformance extends TestCase {
48 private static String ROOT_DIR =
49 HBaseTestingUtility.getTestDir("TestHFilePerformance").toString();
50 private FileSystem fs;
51 private Configuration conf;
52 private long startTimeEpoch;
53 private long finishTimeEpoch;
54 private DateFormat formatter;
55
56 @Override
57 public void setUp() throws IOException {
58 conf = new Configuration();
59 fs = FileSystem.get(conf);
60 formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
61 }
62
63 public void startTime() {
64 startTimeEpoch = System.currentTimeMillis();
65 System.out.println(formatTime() + " Started timing.");
66 }
67
68 public void stopTime() {
69 finishTimeEpoch = System.currentTimeMillis();
70 System.out.println(formatTime() + " Stopped timing.");
71 }
72
73 public long getIntervalMillis() {
74 return finishTimeEpoch - startTimeEpoch;
75 }
76
77 public void printlnWithTimestamp(String message) {
78 System.out.println(formatTime() + " " + message);
79 }
80
81
82
83
84 public String formatTime(long milis){
85 return formatter.format(milis);
86 }
87
88 public String formatTime(){
89 return formatTime(System.currentTimeMillis());
90 }
91
92 private FSDataOutputStream createFSOutput(Path name) throws IOException {
93 if (fs.exists(name))
94 fs.delete(name, true);
95 FSDataOutputStream fout = fs.create(name);
96 return fout;
97 }
98
99
100
101
102
103 private static class KeyValueGenerator {
104 Random keyRandomizer;
105 Random valueRandomizer;
106 long randomValueRatio = 3;
107 long valueSequence = 0 ;
108
109
110 KeyValueGenerator() {
111 keyRandomizer = new Random(0L);
112 valueRandomizer = new Random(1L);
113 }
114
115
116 void getKey(byte[] key) {
117 keyRandomizer.nextBytes(key);
118 }
119
120 void getValue(byte[] value) {
121 if (valueSequence % randomValueRatio == 0)
122 valueRandomizer.nextBytes(value);
123 valueSequence++;
124 }
125 }
126
127
128
129
130
131
132
133
134
135
136
137
138
139 public void timeWrite(String fileType, int keyLength, int valueLength,
140 String codecName, long rows, String writeMethod, int minBlockSize)
141 throws IOException {
142 System.out.println("File Type: " + fileType);
143 System.out.println("Writing " + fileType + " with codecName: " + codecName);
144 long totalBytesWritten = 0;
145
146
147
148 byte[] key = new byte[keyLength];
149 byte[] value = new byte[valueLength];
150 KeyValueGenerator generator = new KeyValueGenerator();
151
152 startTime();
153
154 Path path = new Path(ROOT_DIR, fileType + ".Performance");
155 System.out.println(ROOT_DIR + path.getName());
156 FSDataOutputStream fout = createFSOutput(path);
157
158 if ("HFile".equals(fileType)){
159 System.out.println("HFile write method: ");
160 HFile.Writer writer =
161 new HFile.Writer(fout, minBlockSize, codecName, null);
162
163
164 for (long l=0 ; l<rows ; l++ ) {
165 generator.getKey(key);
166 generator.getValue(value);
167 writer.append(key, value);
168 totalBytesWritten += key.length;
169 totalBytesWritten += value.length;
170 }
171 writer.close();
172 } else if ("SequenceFile".equals(fileType)){
173 CompressionCodec codec = null;
174 if ("gz".equals(codecName))
175 codec = new GzipCodec();
176 else if (!"none".equals(codecName))
177 throw new IOException("Codec not supported.");
178
179 SequenceFile.Writer writer;
180
181
182
183
184 if (!"none".equals(codecName))
185 writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
186 BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec);
187 else
188 writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
189 BytesWritable.class, SequenceFile.CompressionType.NONE, null);
190
191 BytesWritable keyBsw;
192 BytesWritable valBsw;
193 for (long l=0 ; l<rows ; l++ ) {
194
195 generator.getKey(key);
196 keyBsw = new BytesWritable(key);
197 totalBytesWritten += keyBsw.getSize();
198
199 generator.getValue(value);
200 valBsw = new BytesWritable(value);
201 writer.append(keyBsw, valBsw);
202 totalBytesWritten += valBsw.getSize();
203 }
204
205 writer.close();
206 } else
207 throw new IOException("File Type is not supported");
208
209 fout.close();
210 stopTime();
211
212 printlnWithTimestamp("Data written: ");
213 printlnWithTimestamp(" rate = " +
214 totalBytesWritten / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
215 printlnWithTimestamp(" total = " + totalBytesWritten + "B");
216
217 printlnWithTimestamp("File written: ");
218 printlnWithTimestamp(" rate = " +
219 fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
220 printlnWithTimestamp(" total = " + fs.getFileStatus(path).getLen() + "B");
221 }
222
223 public void timeReading(String fileType, int keyLength, int valueLength,
224 long rows, int method) throws IOException {
225 System.out.println("Reading file of type: " + fileType);
226 Path path = new Path(ROOT_DIR, fileType + ".Performance");
227 System.out.println("Input file size: " + fs.getFileStatus(path).getLen());
228 long totalBytesRead = 0;
229
230
231 ByteBuffer val;
232
233 ByteBuffer key;
234
235 startTime();
236 FSDataInputStream fin = fs.open(path);
237
238 if ("HFile".equals(fileType)){
239 HFile.Reader reader = new HFile.Reader(fs.open(path),
240 fs.getFileStatus(path).getLen(), null, false);
241 reader.loadFileInfo();
242 switch (method) {
243
244 case 0:
245 case 1:
246 default:
247 {
248 HFileScanner scanner = reader.getScanner(false, false);
249 scanner.seekTo();
250 for (long l=0 ; l<rows ; l++ ) {
251 key = scanner.getKey();
252 val = scanner.getValue();
253 totalBytesRead += key.limit() + val.limit();
254 scanner.next();
255 }
256 }
257 break;
258 }
259 } else if("SequenceFile".equals(fileType)){
260
261 SequenceFile.Reader reader;
262 reader = new SequenceFile.Reader(fs, path, new Configuration());
263
264 if (reader.getCompressionCodec() != null) {
265 printlnWithTimestamp("Compression codec class: " + reader.getCompressionCodec().getClass());
266 } else
267 printlnWithTimestamp("Compression codec class: " + "none");
268
269 BytesWritable keyBsw = new BytesWritable();
270 BytesWritable valBsw = new BytesWritable();
271
272 for (long l=0 ; l<rows ; l++ ) {
273 reader.next(keyBsw, valBsw);
274 totalBytesRead += keyBsw.getSize() + valBsw.getSize();
275 }
276 reader.close();
277
278
279
280 } else {
281 throw new IOException("File Type not supported.");
282 }
283
284
285
286 fin.close();
287 stopTime();
288
289
290 printlnWithTimestamp("Finished in " + getIntervalMillis() + "ms");
291 printlnWithTimestamp("Data read: ");
292 printlnWithTimestamp(" rate = " +
293 totalBytesRead / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
294 printlnWithTimestamp(" total = " + totalBytesRead + "B");
295
296 printlnWithTimestamp("File read: ");
297 printlnWithTimestamp(" rate = " +
298 fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
299 printlnWithTimestamp(" total = " + fs.getFileStatus(path).getLen() + "B");
300
301
302
303 }
304
305 public void testRunComparisons() throws IOException {
306
307 int keyLength = 100;
308 int valueLength = 5*1024;
309 int minBlockSize = 10*1024*1024;
310 int rows = 10000;
311
312 System.out.println("****************************** Sequence File *****************************");
313
314 timeWrite("SequenceFile", keyLength, valueLength, "none", rows, null, minBlockSize);
315 System.out.println("\n+++++++\n");
316 timeReading("SequenceFile", keyLength, valueLength, rows, -1);
317
318 System.out.println("");
319 System.out.println("----------------------");
320 System.out.println("");
321
322
323
324
325
326
327
328
329
330
331
332
333 try {
334 timeWrite("SequenceFile", keyLength, valueLength, "gz", rows, null,
335 minBlockSize);
336 System.out.println("\n+++++++\n");
337 timeReading("SequenceFile", keyLength, valueLength, rows, -1);
338 } catch (IllegalArgumentException e) {
339 System.out.println("Skipping sequencefile gz: " + e.getMessage());
340 }
341
342
343 System.out.println("\n\n\n");
344 System.out.println("****************************** HFile *****************************");
345
346 timeWrite("HFile", keyLength, valueLength, "none", rows, null, minBlockSize);
347 System.out.println("\n+++++++\n");
348 timeReading("HFile", keyLength, valueLength, rows, 0 );
349
350 System.out.println("");
351 System.out.println("----------------------");
352 System.out.println("");
353
354
355
356
357
358
359
360
361
362
363
364
365
366 timeWrite("HFile", keyLength, valueLength, "gz", rows, null, minBlockSize);
367 System.out.println("\n+++++++\n");
368 timeReading("HFile", keyLength, valueLength, rows, 0 );
369
370 System.out.println("\n\n\n\nNotes: ");
371 System.out.println(" * Timing includes open/closing of files.");
372 System.out.println(" * Timing includes reading both Key and Value");
373 System.out.println(" * Data is generated as random bytes. Other methods e.g. using " +
374 "dictionary with care for distributation of words is under development.");
375 System.out.println(" * Timing of write currently, includes random value/key generations. " +
376 "Which is the same for Sequence File and HFile. Another possibility is to generate " +
377 "test data beforehand");
378 System.out.println(" * We need to mitigate cache effect on benchmark. We can apply several " +
379 "ideas, for next step we do a large dummy read between benchmark read to dismantle " +
380 "caching of data. Renaming of file may be helpful. We can have a loop that reads with" +
381 " the same method several times and flood cache every time and average it to get a" +
382 " better number.");
383 }
384 }