View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.IOException;
21  import java.nio.ByteBuffer;
22  import java.security.SecureRandom;
23  import java.text.DateFormat;
24  import java.text.SimpleDateFormat;
25  import java.util.Random;
26  
27  import org.apache.commons.cli.CommandLine;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.FSDataInputStream;
30  import org.apache.hadoop.fs.FSDataOutputStream;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.HBaseConfiguration;
34  import org.apache.hadoop.hbase.HBaseTestingUtility;
35  import org.apache.hadoop.hbase.HConstants;
36  import org.apache.hadoop.hbase.KeyValue;
37  import org.apache.hadoop.hbase.io.crypto.Encryption;
38  import org.apache.hadoop.hbase.io.crypto.KeyProviderForTesting;
39  import org.apache.hadoop.hbase.io.crypto.aes.AES;
40  import org.apache.hadoop.hbase.util.AbstractHBaseTool;
41  import org.apache.hadoop.io.BytesWritable;
42  import org.apache.hadoop.io.SequenceFile;
43  import org.apache.hadoop.io.compress.CompressionCodec;
44  import org.apache.hadoop.io.compress.GzipCodec;
45  import org.apache.hadoop.util.ToolRunner;
46  
47  /**
48   *  Set of long-running tests to measure performance of HFile.
49   * <p>
50   * Copied from
51   * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
52   * Remove after tfile is committed and use the tfile version of this class
53   * instead.</p>
54   */
55  public class TestHFilePerformance extends AbstractHBaseTool {
56    private HBaseTestingUtility TEST_UTIL;
57    private static String ROOT_DIR;
58    private FileSystem fs;
59    private long startTimeEpoch;
60    private long finishTimeEpoch;
61    private DateFormat formatter;
62  
63    @Override
64    public void setConf(Configuration conf) {
65      super.setConf(conf);
66      try {
67        fs = FileSystem.get(conf);
68      } catch (IOException e) {
69        throw new RuntimeException(e);
70      }
71      conf.set(HConstants.CRYPTO_KEYPROVIDER_CONF_KEY, KeyProviderForTesting.class.getName());
72      conf.set(HConstants.CRYPTO_MASTERKEY_NAME_CONF_KEY, "hbase");
73      formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
74      TEST_UTIL = new HBaseTestingUtility(conf);
75      ROOT_DIR = TEST_UTIL.getDataTestDir("TestHFilePerformance").toString();
76    }
77  
78    public void startTime() {
79      startTimeEpoch = System.currentTimeMillis();
80      System.out.println(formatTime() + " Started timing.");
81    }
82  
83    public void stopTime() {
84      finishTimeEpoch = System.currentTimeMillis();
85      System.out.println(formatTime() + " Stopped timing.");
86    }
87  
88    public long getIntervalMillis() {
89      return finishTimeEpoch - startTimeEpoch;
90    }
91  
92    public void printlnWithTimestamp(String message) {
93      System.out.println(formatTime() + "  " +  message);
94    }
95  
96    /*
97     * Format millis into minutes and seconds.
98     */
99    public String formatTime(long milis){
100     return formatter.format(milis);
101   }
102 
103   public String formatTime(){
104     return formatTime(System.currentTimeMillis());
105   }
106 
107   private FSDataOutputStream createFSOutput(Path name) throws IOException {
108     if (fs.exists(name))
109       fs.delete(name, true);
110     FSDataOutputStream fout = fs.create(name);
111     return fout;
112   }
113 
114   //TODO have multiple ways of generating key/value e.g. dictionary words
115   //TODO to have a sample compressable data, for now, made 1 out of 3 values random
116   //     keys are all random.
117 
118   private static class KeyValueGenerator {
119     Random keyRandomizer;
120     Random valueRandomizer;
121     long randomValueRatio = 3; // 1 out of randomValueRatio generated values will be random.
122     long valueSequence = 0 ;
123 
124 
125     KeyValueGenerator() {
126       keyRandomizer = new Random(0L); //TODO with seed zero
127       valueRandomizer = new Random(1L); //TODO with seed one
128     }
129 
130     // Key is always random now.
131     void getKey(byte[] key) {
132       keyRandomizer.nextBytes(key);
133     }
134 
135     void getValue(byte[] value) {
136       if (valueSequence % randomValueRatio == 0)
137           valueRandomizer.nextBytes(value);
138       valueSequence++;
139     }
140   }
141 
142   /**
143    *
144    * @param fileType "HFile" or "SequenceFile"
145    * @param keyLength
146    * @param valueLength
147    * @param codecName "none", "lzo", "gz", "snappy"
148    * @param cipherName "none", "aes"
149    * @param rows number of rows to be written.
150    * @param writeMethod used for HFile only.
151    * @param minBlockSize used for HFile only.
152    * @throws IOException
153    */
154    //TODO writeMethod: implement multiple ways of writing e.g. A) known length (no chunk) B) using a buffer and streaming (for many chunks).
155   public void timeWrite(String fileType, int keyLength, int valueLength,
156     String codecName, String cipherName, long rows, String writeMethod, int minBlockSize)
157   throws IOException {
158     System.out.println("File Type: " + fileType);
159     System.out.println("Writing " + fileType + " with codecName: " + codecName +
160       " cipherName: " + cipherName);
161     long totalBytesWritten = 0;
162 
163 
164     //Using separate randomizer for key/value with seeds matching Sequence File.
165     byte[] key = new byte[keyLength];
166     byte[] value = new byte[valueLength];
167     KeyValueGenerator generator = new KeyValueGenerator();
168 
169     startTime();
170 
171     Path path = new Path(ROOT_DIR, fileType + ".Performance");
172     System.out.println(ROOT_DIR + path.getName());
173     FSDataOutputStream fout =  createFSOutput(path);
174 
175     if ("HFile".equals(fileType)){
176         HFileContextBuilder builder = new HFileContextBuilder()
177 	    .withCompression(AbstractHFileWriter.compressionByName(codecName))
178 	    .withBlockSize(minBlockSize);
179         if (cipherName != "none") {
180           byte[] cipherKey = new byte[AES.KEY_LENGTH];
181           new SecureRandom().nextBytes(cipherKey);
182           builder.withEncryptionContext(
183             Encryption.newContext(conf)
184               .setCipher(Encryption.getCipher(conf, cipherName))
185               .setKey(cipherKey));
186         }
187         HFileContext context = builder.build();
188         System.out.println("HFile write method: ");
189         HFile.Writer writer = HFile.getWriterFactoryNoCache(conf)
190             .withOutputStream(fout)
191             .withFileContext(context)
192             .withComparator(new KeyValue.RawBytesComparator())
193             .create();
194 
195         // Writing value in one shot.
196         for (long l=0; l<rows; l++ ) {
197           generator.getKey(key);
198           generator.getValue(value);
199           writer.append(key, value);
200           totalBytesWritten += key.length;
201           totalBytesWritten += value.length;
202          }
203         writer.close();
204     } else if ("SequenceFile".equals(fileType)){
205         CompressionCodec codec = null;
206         if ("gz".equals(codecName))
207           codec = new GzipCodec();
208         else if (!"none".equals(codecName))
209           throw new IOException("Codec not supported.");
210 
211         SequenceFile.Writer writer;
212 
213         //TODO
214         //JobConf conf = new JobConf();
215 
216         if (!"none".equals(codecName))
217           writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
218             BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec);
219         else
220           writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
221             BytesWritable.class, SequenceFile.CompressionType.NONE, null);
222 
223         BytesWritable keyBsw;
224         BytesWritable valBsw;
225         for (long l=0; l<rows; l++ ) {
226 
227            generator.getKey(key);
228            keyBsw = new BytesWritable(key);
229            totalBytesWritten += keyBsw.getSize();
230 
231            generator.getValue(value);
232            valBsw = new BytesWritable(value);
233            writer.append(keyBsw, valBsw);
234            totalBytesWritten += valBsw.getSize();
235         }
236 
237         writer.close();
238     } else
239        throw new IOException("File Type is not supported");
240 
241     fout.close();
242     stopTime();
243 
244     printlnWithTimestamp("Data written: ");
245     printlnWithTimestamp("  rate  = " +
246       totalBytesWritten / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
247     printlnWithTimestamp("  total = " + totalBytesWritten + "B");
248 
249     printlnWithTimestamp("File written: ");
250     printlnWithTimestamp("  rate  = " +
251       fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
252     printlnWithTimestamp("  total = " + fs.getFileStatus(path).getLen() + "B");
253   }
254 
255   public void timeReading(String fileType, int keyLength, int valueLength,
256       long rows, int method) throws IOException {
257     System.out.println("Reading file of type: " + fileType);
258     Path path = new Path(ROOT_DIR, fileType + ".Performance");
259     System.out.println("Input file size: " + fs.getFileStatus(path).getLen());
260     long totalBytesRead = 0;
261 
262 
263     ByteBuffer val;
264 
265     ByteBuffer key;
266 
267     startTime();
268     FSDataInputStream fin = fs.open(path);
269 
270     if ("HFile".equals(fileType)){
271         HFile.Reader reader = HFile.createReaderFromStream(path, fs.open(path),
272           fs.getFileStatus(path).getLen(), new CacheConfig(conf), conf);
273         reader.loadFileInfo();
274         switch (method) {
275 
276           case 0:
277           case 1:
278           default:
279             {
280               HFileScanner scanner = reader.getScanner(false, false);
281               scanner.seekTo();
282               for (long l=0; l<rows; l++ ) {
283                 key = scanner.getKey();
284                 val = scanner.getValue();
285                 totalBytesRead += key.limit() + val.limit();
286                 scanner.next();
287               }
288             }
289             break;
290         }
291       reader.close();
292     } else if("SequenceFile".equals(fileType)){
293 
294         SequenceFile.Reader reader;
295         reader = new SequenceFile.Reader(fs, path, new Configuration());
296 
297         if (reader.getCompressionCodec() != null) {
298             printlnWithTimestamp("Compression codec class: " + reader.getCompressionCodec().getClass());
299         } else
300             printlnWithTimestamp("Compression codec class: " + "none");
301 
302         BytesWritable keyBsw = new BytesWritable();
303         BytesWritable valBsw = new BytesWritable();
304 
305         for (long l=0; l<rows; l++ ) {
306           reader.next(keyBsw, valBsw);
307           totalBytesRead += keyBsw.getSize() + valBsw.getSize();
308         }
309         reader.close();
310 
311         //TODO make a tests for other types of SequenceFile reading scenarios
312 
313     } else {
314         throw new IOException("File Type not supported.");
315     }
316 
317 
318     //printlnWithTimestamp("Closing reader");
319     fin.close();
320     stopTime();
321     //printlnWithTimestamp("Finished close");
322 
323     printlnWithTimestamp("Finished in " + getIntervalMillis() + "ms");
324     printlnWithTimestamp("Data read: ");
325     printlnWithTimestamp("  rate  = " +
326       totalBytesRead / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
327     printlnWithTimestamp("  total = " + totalBytesRead + "B");
328 
329     printlnWithTimestamp("File read: ");
330     printlnWithTimestamp("  rate  = " +
331       fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
332     printlnWithTimestamp("  total = " + fs.getFileStatus(path).getLen() + "B");
333 
334     //TODO uncomment this for final committing so test files is removed.
335     //fs.delete(path, true);
336   }
337 
338   public void testRunComparisons() throws IOException {
339 
340     int keyLength = 100; // 100B
341     int valueLength = 5*1024; // 5KB
342     int minBlockSize = 10*1024*1024; // 10MB
343     int rows = 10000;
344 
345     System.out.println("****************************** Sequence File *****************************");
346 
347     timeWrite("SequenceFile", keyLength, valueLength, "none", "none", rows, null, minBlockSize);
348     System.out.println("\n+++++++\n");
349     timeReading("SequenceFile", keyLength, valueLength, rows, -1);
350 
351     System.out.println("");
352     System.out.println("----------------------");
353     System.out.println("");
354 
355     /* DISABLED LZO
356     timeWrite("SequenceFile", keyLength, valueLength, "lzo", rows, null, minBlockSize);
357     System.out.println("\n+++++++\n");
358     timeReading("SequenceFile", keyLength, valueLength, rows, -1);
359 
360     System.out.println("");
361     System.out.println("----------------------");
362     System.out.println("");
363 
364     /* Sequence file can only use native hadoop libs gzipping so commenting out.
365      */
366     try {
367       timeWrite("SequenceFile", keyLength, valueLength, "gz", "none", rows, null,
368         minBlockSize);
369       System.out.println("\n+++++++\n");
370       timeReading("SequenceFile", keyLength, valueLength, rows, -1);
371     } catch (IllegalArgumentException e) {
372       System.out.println("Skipping sequencefile gz: " + e.getMessage());
373     }
374 
375 
376     System.out.println("\n\n\n");
377     System.out.println("****************************** HFile *****************************");
378 
379     timeWrite("HFile", keyLength, valueLength, "none", "none", rows, null, minBlockSize);
380     System.out.println("\n+++++++\n");
381     timeReading("HFile", keyLength, valueLength, rows, 0 );
382 
383     System.out.println("");
384     System.out.println("----------------------");
385     System.out.println("");
386 
387     timeWrite("HFile", keyLength, valueLength, "none", "aes", rows, null, minBlockSize);
388     System.out.println("\n+++++++\n");
389     timeReading("HFile", keyLength, valueLength, rows, 0 );
390 
391     System.out.println("");
392     System.out.println("----------------------");
393     System.out.println("");
394 
395 /* DISABLED LZO
396     timeWrite("HFile", keyLength, valueLength, "lzo", rows, null, minBlockSize);
397     System.out.println("\n+++++++\n");
398     timeReading("HFile", keyLength, valueLength, rows, 0 );
399     System.out.println("\n+++++++\n");
400     timeReading("HFile", keyLength, valueLength, rows, 1 );
401     System.out.println("\n+++++++\n");
402     timeReading("HFile", keyLength, valueLength, rows, 2 );
403 
404     System.out.println("");
405     System.out.println("----------------------");
406     System.out.println("");
407 */
408 
409     timeWrite("HFile", keyLength, valueLength, "gz", "none", rows, null, minBlockSize);
410     System.out.println("\n+++++++\n");
411     timeReading("HFile", keyLength, valueLength, rows, 0 );
412 
413     System.out.println("");
414     System.out.println("----------------------");
415     System.out.println("");
416 
417     timeWrite("HFile", keyLength, valueLength, "gz", "aes", rows, null, minBlockSize);
418     System.out.println("\n+++++++\n");
419     timeReading("HFile", keyLength, valueLength, rows, 0 );
420 
421     System.out.println("\n\n\n\nNotes: ");
422     System.out.println(" * Timing includes open/closing of files.");
423     System.out.println(" * Timing includes reading both Key and Value");
424     System.out.println(" * Data is generated as random bytes. Other methods e.g. using " +
425             "dictionary with care for distributation of words is under development.");
426     System.out.println(" * Timing of write currently, includes random value/key generations. " +
427             "Which is the same for Sequence File and HFile. Another possibility is to generate " +
428             "test data beforehand");
429     System.out.println(" * We need to mitigate cache effect on benchmark. We can apply several " +
430             "ideas, for next step we do a large dummy read between benchmark read to dismantle " +
431             "caching of data. Renaming of file may be helpful. We can have a loop that reads with" +
432             " the same method several times and flood cache every time and average it to get a" +
433             " better number.");
434   }
435 
436   @Override
437   protected void addOptions() {
438   }
439 
440   @Override
441   protected void processOptions(CommandLine cmd) {
442   }
443 
444   @Override
445   protected int doWork() throws Exception {
446     testRunComparisons();
447     return 0;
448   }
449 
450   public static void main(String[] args) throws Exception {
451     int ret = ToolRunner.run(HBaseConfiguration.create(), new TestHFilePerformance(), args);
452     System.exit(ret);
453   }
454 }