View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.IOException;
21  import java.nio.ByteBuffer;
22  import java.text.DateFormat;
23  import java.text.SimpleDateFormat;
24  import java.util.Random;
25  
26  import org.apache.commons.cli.CommandLine;
27  import org.apache.hadoop.conf.Configuration;
28  import org.apache.hadoop.fs.FSDataInputStream;
29  import org.apache.hadoop.fs.FSDataOutputStream;
30  import org.apache.hadoop.fs.FileSystem;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.HBaseConfiguration;
33  import org.apache.hadoop.hbase.HBaseTestingUtility;
34  import org.apache.hadoop.hbase.KeyValue;
35  import org.apache.hadoop.hbase.util.AbstractHBaseTool;
36  import org.apache.hadoop.io.BytesWritable;
37  import org.apache.hadoop.io.SequenceFile;
38  import org.apache.hadoop.io.compress.CompressionCodec;
39  import org.apache.hadoop.io.compress.GzipCodec;
40  import org.apache.hadoop.util.ToolRunner;
41  
42  /**
43   *  Set of long-running tests to measure performance of HFile.
44   * <p>
45   * Copied from
46   * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
47   * Remove after tfile is committed and use the tfile version of this class
48   * instead.</p>
49   */
50  public class TestHFilePerformance extends AbstractHBaseTool {
51    private HBaseTestingUtility TEST_UTIL;
52    private static String ROOT_DIR;
53    private FileSystem fs;
54    private long startTimeEpoch;
55    private long finishTimeEpoch;
56    private DateFormat formatter;
57  
58    @Override
59    public void setConf(Configuration conf) {
60      super.setConf(conf);
61      try {
62        fs = FileSystem.get(conf);
63      } catch (IOException e) {
64        throw new RuntimeException(e);
65      }
66      formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
67      TEST_UTIL = new HBaseTestingUtility(conf);
68      ROOT_DIR = TEST_UTIL.getDataTestDir("TestHFilePerformance").toString();
69    }
70  
71    public void startTime() {
72      startTimeEpoch = System.currentTimeMillis();
73      System.out.println(formatTime() + " Started timing.");
74    }
75  
76    public void stopTime() {
77      finishTimeEpoch = System.currentTimeMillis();
78      System.out.println(formatTime() + " Stopped timing.");
79    }
80  
81    public long getIntervalMillis() {
82      return finishTimeEpoch - startTimeEpoch;
83    }
84  
85    public void printlnWithTimestamp(String message) {
86      System.out.println(formatTime() + "  " +  message);
87    }
88  
89    /*
90     * Format millis into minutes and seconds.
91     */
92    public String formatTime(long milis){
93      return formatter.format(milis);
94    }
95  
96    public String formatTime(){
97      return formatTime(System.currentTimeMillis());
98    }
99  
100   private FSDataOutputStream createFSOutput(Path name) throws IOException {
101     if (fs.exists(name))
102       fs.delete(name, true);
103     FSDataOutputStream fout = fs.create(name);
104     return fout;
105   }
106 
107   //TODO have multiple ways of generating key/value e.g. dictionary words
108   //TODO to have a sample compressable data, for now, made 1 out of 3 values random
109   //     keys are all random.
110 
111   private static class KeyValueGenerator {
112     Random keyRandomizer;
113     Random valueRandomizer;
114     long randomValueRatio = 3; // 1 out of randomValueRatio generated values will be random.
115     long valueSequence = 0 ;
116 
117 
118     KeyValueGenerator() {
119       keyRandomizer = new Random(0L); //TODO with seed zero
120       valueRandomizer = new Random(1L); //TODO with seed one
121     }
122 
123     // Key is always random now.
124     void getKey(byte[] key) {
125       keyRandomizer.nextBytes(key);
126     }
127 
128     void getValue(byte[] value) {
129       if (valueSequence % randomValueRatio == 0)
130           valueRandomizer.nextBytes(value);
131       valueSequence++;
132     }
133   }
134 
135   /**
136    *
137    * @param fileType "HFile" or "SequenceFile"
138    * @param keyLength
139    * @param valueLength
140    * @param codecName "none", "lzo", "gz", "snappy"
141    * @param rows number of rows to be written.
142    * @param writeMethod used for HFile only.
143    * @param minBlockSize used for HFile only.
144    * @throws IOException
145    */
146    //TODO writeMethod: implement multiple ways of writing e.g. A) known length (no chunk) B) using a buffer and streaming (for many chunks).
147   public void timeWrite(String fileType, int keyLength, int valueLength,
148     String codecName, long rows, String writeMethod, int minBlockSize)
149   throws IOException {
150     System.out.println("File Type: " + fileType);
151     System.out.println("Writing " + fileType + " with codecName: " + codecName);
152     long totalBytesWritten = 0;
153 
154 
155     //Using separate randomizer for key/value with seeds matching Sequence File.
156     byte[] key = new byte[keyLength];
157     byte[] value = new byte[valueLength];
158     KeyValueGenerator generator = new KeyValueGenerator();
159 
160     startTime();
161 
162     Path path = new Path(ROOT_DIR, fileType + ".Performance");
163     System.out.println(ROOT_DIR + path.getName());
164     FSDataOutputStream fout =  createFSOutput(path);
165 
166     if ("HFile".equals(fileType)){
167         System.out.println("HFile write method: ");
168         HFile.Writer writer = HFile.getWriterFactoryNoCache(conf)
169             .withOutputStream(fout)
170             .withBlockSize(minBlockSize)
171             .withCompression(codecName)
172             .withComparator(new KeyValue.RawBytesComparator())
173             .create();
174 
175         // Writing value in one shot.
176         for (long l=0; l<rows; l++ ) {
177           generator.getKey(key);
178           generator.getValue(value);
179           writer.append(key, value);
180           totalBytesWritten += key.length;
181           totalBytesWritten += value.length;
182          }
183         writer.close();
184     } else if ("SequenceFile".equals(fileType)){
185         CompressionCodec codec = null;
186         if ("gz".equals(codecName))
187           codec = new GzipCodec();
188         else if (!"none".equals(codecName))
189           throw new IOException("Codec not supported.");
190 
191         SequenceFile.Writer writer;
192 
193         //TODO
194         //JobConf conf = new JobConf();
195 
196         if (!"none".equals(codecName))
197           writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
198             BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec);
199         else
200           writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
201             BytesWritable.class, SequenceFile.CompressionType.NONE, null);
202 
203         BytesWritable keyBsw;
204         BytesWritable valBsw;
205         for (long l=0; l<rows; l++ ) {
206 
207            generator.getKey(key);
208            keyBsw = new BytesWritable(key);
209            totalBytesWritten += keyBsw.getSize();
210 
211            generator.getValue(value);
212            valBsw = new BytesWritable(value);
213            writer.append(keyBsw, valBsw);
214            totalBytesWritten += valBsw.getSize();
215         }
216 
217         writer.close();
218     } else
219        throw new IOException("File Type is not supported");
220 
221     fout.close();
222     stopTime();
223 
224     printlnWithTimestamp("Data written: ");
225     printlnWithTimestamp("  rate  = " +
226       totalBytesWritten / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
227     printlnWithTimestamp("  total = " + totalBytesWritten + "B");
228 
229     printlnWithTimestamp("File written: ");
230     printlnWithTimestamp("  rate  = " +
231       fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
232     printlnWithTimestamp("  total = " + fs.getFileStatus(path).getLen() + "B");
233   }
234 
235   public void timeReading(String fileType, int keyLength, int valueLength,
236       long rows, int method) throws IOException {
237     System.out.println("Reading file of type: " + fileType);
238     Path path = new Path(ROOT_DIR, fileType + ".Performance");
239     System.out.println("Input file size: " + fs.getFileStatus(path).getLen());
240     long totalBytesRead = 0;
241 
242 
243     ByteBuffer val;
244 
245     ByteBuffer key;
246 
247     startTime();
248     FSDataInputStream fin = fs.open(path);
249 
250     if ("HFile".equals(fileType)){
251         HFile.Reader reader = HFile.createReaderFromStream(path, fs.open(path),
252           fs.getFileStatus(path).getLen(), new CacheConfig(conf));
253         reader.loadFileInfo();
254         switch (method) {
255 
256           case 0:
257           case 1:
258           default:
259             {
260               HFileScanner scanner = reader.getScanner(false, false);
261               scanner.seekTo();
262               for (long l=0; l<rows; l++ ) {
263                 key = scanner.getKey();
264                 val = scanner.getValue();
265                 totalBytesRead += key.limit() + val.limit();
266                 scanner.next();
267               }
268             }
269             break;
270         }
271       reader.close();
272     } else if("SequenceFile".equals(fileType)){
273 
274         SequenceFile.Reader reader;
275         reader = new SequenceFile.Reader(fs, path, new Configuration());
276 
277         if (reader.getCompressionCodec() != null) {
278             printlnWithTimestamp("Compression codec class: " + reader.getCompressionCodec().getClass());
279         } else
280             printlnWithTimestamp("Compression codec class: " + "none");
281 
282         BytesWritable keyBsw = new BytesWritable();
283         BytesWritable valBsw = new BytesWritable();
284 
285         for (long l=0; l<rows; l++ ) {
286           reader.next(keyBsw, valBsw);
287           totalBytesRead += keyBsw.getSize() + valBsw.getSize();
288         }
289         reader.close();
290 
291         //TODO make a tests for other types of SequenceFile reading scenarios
292 
293     } else {
294         throw new IOException("File Type not supported.");
295     }
296 
297 
298     //printlnWithTimestamp("Closing reader");
299     fin.close();
300     stopTime();
301     //printlnWithTimestamp("Finished close");
302 
303     printlnWithTimestamp("Finished in " + getIntervalMillis() + "ms");
304     printlnWithTimestamp("Data read: ");
305     printlnWithTimestamp("  rate  = " +
306       totalBytesRead / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
307     printlnWithTimestamp("  total = " + totalBytesRead + "B");
308 
309     printlnWithTimestamp("File read: ");
310     printlnWithTimestamp("  rate  = " +
311       fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
312     printlnWithTimestamp("  total = " + fs.getFileStatus(path).getLen() + "B");
313 
314     //TODO uncomment this for final committing so test files is removed.
315     //fs.delete(path, true);
316   }
317 
318   public void testRunComparisons() throws IOException {
319 
320     int keyLength = 100; // 100B
321     int valueLength = 5*1024; // 5KB
322     int minBlockSize = 10*1024*1024; // 10MB
323     int rows = 10000;
324 
325     System.out.println("****************************** Sequence File *****************************");
326 
327     timeWrite("SequenceFile", keyLength, valueLength, "none", rows, null, minBlockSize);
328     System.out.println("\n+++++++\n");
329     timeReading("SequenceFile", keyLength, valueLength, rows, -1);
330 
331     System.out.println("");
332     System.out.println("----------------------");
333     System.out.println("");
334 
335     /* DISABLED LZO
336     timeWrite("SequenceFile", keyLength, valueLength, "lzo", rows, null, minBlockSize);
337     System.out.println("\n+++++++\n");
338     timeReading("SequenceFile", keyLength, valueLength, rows, -1);
339 
340     System.out.println("");
341     System.out.println("----------------------");
342     System.out.println("");
343 
344     /* Sequence file can only use native hadoop libs gzipping so commenting out.
345      */
346     try {
347       timeWrite("SequenceFile", keyLength, valueLength, "gz", rows, null,
348         minBlockSize);
349       System.out.println("\n+++++++\n");
350       timeReading("SequenceFile", keyLength, valueLength, rows, -1);
351     } catch (IllegalArgumentException e) {
352       System.out.println("Skipping sequencefile gz: " + e.getMessage());
353     }
354 
355 
356     System.out.println("\n\n\n");
357     System.out.println("****************************** HFile *****************************");
358 
359     timeWrite("HFile", keyLength, valueLength, "none", rows, null, minBlockSize);
360     System.out.println("\n+++++++\n");
361     timeReading("HFile", keyLength, valueLength, rows, 0 );
362 
363     System.out.println("");
364     System.out.println("----------------------");
365     System.out.println("");
366 /* DISABLED LZO
367     timeWrite("HFile", keyLength, valueLength, "lzo", rows, null, minBlockSize);
368     System.out.println("\n+++++++\n");
369     timeReading("HFile", keyLength, valueLength, rows, 0 );
370     System.out.println("\n+++++++\n");
371     timeReading("HFile", keyLength, valueLength, rows, 1 );
372     System.out.println("\n+++++++\n");
373     timeReading("HFile", keyLength, valueLength, rows, 2 );
374 
375     System.out.println("");
376     System.out.println("----------------------");
377     System.out.println("");
378 */
379     timeWrite("HFile", keyLength, valueLength, "gz", rows, null, minBlockSize);
380     System.out.println("\n+++++++\n");
381     timeReading("HFile", keyLength, valueLength, rows, 0 );
382 
383     System.out.println("\n\n\n\nNotes: ");
384     System.out.println(" * Timing includes open/closing of files.");
385     System.out.println(" * Timing includes reading both Key and Value");
386     System.out.println(" * Data is generated as random bytes. Other methods e.g. using " +
387             "dictionary with care for distributation of words is under development.");
388     System.out.println(" * Timing of write currently, includes random value/key generations. " +
389             "Which is the same for Sequence File and HFile. Another possibility is to generate " +
390             "test data beforehand");
391     System.out.println(" * We need to mitigate cache effect on benchmark. We can apply several " +
392             "ideas, for next step we do a large dummy read between benchmark read to dismantle " +
393             "caching of data. Renaming of file may be helpful. We can have a loop that reads with" +
394             " the same method several times and flood cache every time and average it to get a" +
395             " better number.");
396   }
397 
398   @Override
399   protected void addOptions() {
400   }
401 
402   @Override
403   protected void processOptions(CommandLine cmd) {
404   }
405 
406   @Override
407   protected int doWork() throws Exception {
408     testRunComparisons();
409     return 0;
410   }
411 
412   public static void main(String[] args) throws Exception {
413     int ret = ToolRunner.run(HBaseConfiguration.create(), new TestHFilePerformance(), args);
414     System.exit(ret);
415   }
416 }