1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.IOException;
21  import java.nio.ByteBuffer;
22  import java.text.DateFormat;
23  import java.text.SimpleDateFormat;
24  import java.util.Random;
25  
26  import junit.framework.TestCase;
27  
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.FSDataInputStream;
30  import org.apache.hadoop.fs.FSDataOutputStream;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.HBaseTestingUtility;
34  import org.apache.hadoop.hbase.MediumTests;
35  import org.apache.hadoop.io.BytesWritable;
36  import org.apache.hadoop.io.SequenceFile;
37  import org.apache.hadoop.io.compress.CompressionCodec;
38  import org.apache.hadoop.io.compress.GzipCodec;
39  import org.junit.experimental.categories.Category;
40  
41  /**
42   *  Set of long-running tests to measure performance of HFile.
43   * <p>
44   * Copied from
45   * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
46   * Remove after tfile is committed and use the tfile version of this class
47   * instead.</p>
48   */
49  @Category(MediumTests.class)
50  public class TestHFilePerformance extends TestCase {
51    private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
52    private static String ROOT_DIR =
53      TEST_UTIL.getDataTestDir("TestHFilePerformance").toString();
54    private FileSystem fs;
55    private Configuration conf;
56    private long startTimeEpoch;
57    private long finishTimeEpoch;
58    private DateFormat formatter;
59  
60    @Override
61    public void setUp() throws IOException {
62      conf = new Configuration();
63      fs = FileSystem.get(conf);
64      formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
65    }
66  
67    public void startTime() {
68      startTimeEpoch = System.currentTimeMillis();
69      System.out.println(formatTime() + " Started timing.");
70    }
71  
72    public void stopTime() {
73      finishTimeEpoch = System.currentTimeMillis();
74      System.out.println(formatTime() + " Stopped timing.");
75    }
76  
77    public long getIntervalMillis() {
78      return finishTimeEpoch - startTimeEpoch;
79    }
80  
81    public void printlnWithTimestamp(String message) {
82      System.out.println(formatTime() + "  " +  message);
83    }
84  
85    /*
86     * Format millis into minutes and seconds.
87     */
88    public String formatTime(long milis){
89      return formatter.format(milis);
90    }
91  
92    public String formatTime(){
93      return formatTime(System.currentTimeMillis());
94    }
95  
96    private FSDataOutputStream createFSOutput(Path name) throws IOException {
97      if (fs.exists(name))
98        fs.delete(name, true);
99      FSDataOutputStream fout = fs.create(name);
100     return fout;
101   }
102 
103   //TODO have multiple ways of generating key/value e.g. dictionary words
104   //TODO to have a sample compressable data, for now, made 1 out of 3 values random
105   //     keys are all random.
106 
107   private static class KeyValueGenerator {
108     Random keyRandomizer;
109     Random valueRandomizer;
110     long randomValueRatio = 3; // 1 out of randomValueRatio generated values will be random.
111     long valueSequence = 0 ;
112 
113 
114     KeyValueGenerator() {
115       keyRandomizer = new Random(0L); //TODO with seed zero
116       valueRandomizer = new Random(1L); //TODO with seed one
117     }
118 
119     // Key is always random now.
120     void getKey(byte[] key) {
121       keyRandomizer.nextBytes(key);
122     }
123 
124     void getValue(byte[] value) {
125       if (valueSequence % randomValueRatio == 0)
126           valueRandomizer.nextBytes(value);
127       valueSequence++;
128     }
129   }
130 
131   /**
132    *
133    * @param fileType "HFile" or "SequenceFile"
134    * @param keyLength
135    * @param valueLength
136    * @param codecName "none", "lzo", "gz", "snappy"
137    * @param rows number of rows to be written.
138    * @param writeMethod used for HFile only.
139    * @param minBlockSize used for HFile only.
140    * @throws IOException
141    */
142    //TODO writeMethod: implement multiple ways of writing e.g. A) known length (no chunk) B) using a buffer and streaming (for many chunks).
143   public void timeWrite(String fileType, int keyLength, int valueLength,
144     String codecName, long rows, String writeMethod, int minBlockSize)
145   throws IOException {
146     System.out.println("File Type: " + fileType);
147     System.out.println("Writing " + fileType + " with codecName: " + codecName);
148     long totalBytesWritten = 0;
149 
150 
151     //Using separate randomizer for key/value with seeds matching Sequence File.
152     byte[] key = new byte[keyLength];
153     byte[] value = new byte[valueLength];
154     KeyValueGenerator generator = new KeyValueGenerator();
155 
156     startTime();
157 
158     Path path = new Path(ROOT_DIR, fileType + ".Performance");
159     System.out.println(ROOT_DIR + path.getName());
160     FSDataOutputStream fout =  createFSOutput(path);
161 
162     if ("HFile".equals(fileType)){
163         System.out.println("HFile write method: ");
164         HFile.Writer writer = HFile.getWriterFactoryNoCache(conf)
165             .withOutputStream(fout)
166             .withBlockSize(minBlockSize)
167             .withCompression(codecName)
168             .create();
169 
170         // Writing value in one shot.
171         for (long l=0; l<rows; l++ ) {
172           generator.getKey(key);
173           generator.getValue(value);
174           writer.append(key, value);
175           totalBytesWritten += key.length;
176           totalBytesWritten += value.length;
177          }
178         writer.close();
179     } else if ("SequenceFile".equals(fileType)){
180         CompressionCodec codec = null;
181         if ("gz".equals(codecName))
182           codec = new GzipCodec();
183         else if (!"none".equals(codecName))
184           throw new IOException("Codec not supported.");
185 
186         SequenceFile.Writer writer;
187 
188         //TODO
189         //JobConf conf = new JobConf();
190 
191         if (!"none".equals(codecName))
192           writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
193             BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec);
194         else
195           writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
196             BytesWritable.class, SequenceFile.CompressionType.NONE, null);
197 
198         BytesWritable keyBsw;
199         BytesWritable valBsw;
200         for (long l=0; l<rows; l++ ) {
201 
202            generator.getKey(key);
203            keyBsw = new BytesWritable(key);
204            totalBytesWritten += keyBsw.getSize();
205 
206            generator.getValue(value);
207            valBsw = new BytesWritable(value);
208            writer.append(keyBsw, valBsw);
209            totalBytesWritten += valBsw.getSize();
210         }
211 
212         writer.close();
213     } else
214        throw new IOException("File Type is not supported");
215 
216     fout.close();
217     stopTime();
218 
219     printlnWithTimestamp("Data written: ");
220     printlnWithTimestamp("  rate  = " +
221       totalBytesWritten / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
222     printlnWithTimestamp("  total = " + totalBytesWritten + "B");
223 
224     printlnWithTimestamp("File written: ");
225     printlnWithTimestamp("  rate  = " +
226       fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
227     printlnWithTimestamp("  total = " + fs.getFileStatus(path).getLen() + "B");
228   }
229 
230   public void timeReading(String fileType, int keyLength, int valueLength,
231       long rows, int method) throws IOException {
232     System.out.println("Reading file of type: " + fileType);
233     Path path = new Path(ROOT_DIR, fileType + ".Performance");
234     System.out.println("Input file size: " + fs.getFileStatus(path).getLen());
235     long totalBytesRead = 0;
236 
237 
238     ByteBuffer val;
239 
240     ByteBuffer key;
241 
242     startTime();
243     FSDataInputStream fin = fs.open(path);
244 
245     if ("HFile".equals(fileType)){
246         HFile.Reader reader = HFile.createReaderFromStream(path, fs.open(path),
247           fs.getFileStatus(path).getLen(), new CacheConfig(conf));
248         reader.loadFileInfo();
249         switch (method) {
250 
251           case 0:
252           case 1:
253           default:
254             {
255               HFileScanner scanner = reader.getScanner(false, false);
256               scanner.seekTo();
257               for (long l=0; l<rows; l++ ) {
258                 key = scanner.getKey();
259                 val = scanner.getValue();
260                 totalBytesRead += key.limit() + val.limit();
261                 scanner.next();
262               }
263             }
264             break;
265         }
266       reader.close();
267     } else if("SequenceFile".equals(fileType)){
268 
269         SequenceFile.Reader reader;
270         reader = new SequenceFile.Reader(fs, path, new Configuration());
271 
272         if (reader.getCompressionCodec() != null) {
273             printlnWithTimestamp("Compression codec class: " + reader.getCompressionCodec().getClass());
274         } else
275             printlnWithTimestamp("Compression codec class: " + "none");
276 
277         BytesWritable keyBsw = new BytesWritable();
278         BytesWritable valBsw = new BytesWritable();
279 
280         for (long l=0; l<rows; l++ ) {
281           reader.next(keyBsw, valBsw);
282           totalBytesRead += keyBsw.getSize() + valBsw.getSize();
283         }
284         reader.close();
285 
286         //TODO make a tests for other types of SequenceFile reading scenarios
287 
288     } else {
289         throw new IOException("File Type not supported.");
290     }
291 
292 
293     //printlnWithTimestamp("Closing reader");
294     fin.close();
295     stopTime();
296     //printlnWithTimestamp("Finished close");
297 
298     printlnWithTimestamp("Finished in " + getIntervalMillis() + "ms");
299     printlnWithTimestamp("Data read: ");
300     printlnWithTimestamp("  rate  = " +
301       totalBytesRead / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
302     printlnWithTimestamp("  total = " + totalBytesRead + "B");
303 
304     printlnWithTimestamp("File read: ");
305     printlnWithTimestamp("  rate  = " +
306       fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
307     printlnWithTimestamp("  total = " + fs.getFileStatus(path).getLen() + "B");
308 
309     //TODO uncomment this for final committing so test files is removed.
310     //fs.delete(path, true);
311   }
312 
313   public void testRunComparisons() throws IOException {
314 
315     int keyLength = 100; // 100B
316     int valueLength = 5*1024; // 5KB
317     int minBlockSize = 10*1024*1024; // 10MB
318     int rows = 10000;
319 
320     System.out.println("****************************** Sequence File *****************************");
321 
322     timeWrite("SequenceFile", keyLength, valueLength, "none", rows, null, minBlockSize);
323     System.out.println("\n+++++++\n");
324     timeReading("SequenceFile", keyLength, valueLength, rows, -1);
325 
326     System.out.println("");
327     System.out.println("----------------------");
328     System.out.println("");
329 
330     /* DISABLED LZO
331     timeWrite("SequenceFile", keyLength, valueLength, "lzo", rows, null, minBlockSize);
332     System.out.println("\n+++++++\n");
333     timeReading("SequenceFile", keyLength, valueLength, rows, -1);
334 
335     System.out.println("");
336     System.out.println("----------------------");
337     System.out.println("");
338 
339     /* Sequence file can only use native hadoop libs gzipping so commenting out.
340      */
341     try {
342       timeWrite("SequenceFile", keyLength, valueLength, "gz", rows, null,
343         minBlockSize);
344       System.out.println("\n+++++++\n");
345       timeReading("SequenceFile", keyLength, valueLength, rows, -1);
346     } catch (IllegalArgumentException e) {
347       System.out.println("Skipping sequencefile gz: " + e.getMessage());
348     }
349 
350 
351     System.out.println("\n\n\n");
352     System.out.println("****************************** HFile *****************************");
353 
354     timeWrite("HFile", keyLength, valueLength, "none", rows, null, minBlockSize);
355     System.out.println("\n+++++++\n");
356     timeReading("HFile", keyLength, valueLength, rows, 0 );
357 
358     System.out.println("");
359     System.out.println("----------------------");
360     System.out.println("");
361 /* DISABLED LZO
362     timeWrite("HFile", keyLength, valueLength, "lzo", rows, null, minBlockSize);
363     System.out.println("\n+++++++\n");
364     timeReading("HFile", keyLength, valueLength, rows, 0 );
365     System.out.println("\n+++++++\n");
366     timeReading("HFile", keyLength, valueLength, rows, 1 );
367     System.out.println("\n+++++++\n");
368     timeReading("HFile", keyLength, valueLength, rows, 2 );
369 
370     System.out.println("");
371     System.out.println("----------------------");
372     System.out.println("");
373 */
374     timeWrite("HFile", keyLength, valueLength, "gz", rows, null, minBlockSize);
375     System.out.println("\n+++++++\n");
376     timeReading("HFile", keyLength, valueLength, rows, 0 );
377 
378     System.out.println("\n\n\n\nNotes: ");
379     System.out.println(" * Timing includes open/closing of files.");
380     System.out.println(" * Timing includes reading both Key and Value");
381     System.out.println(" * Data is generated as random bytes. Other methods e.g. using " +
382             "dictionary with care for distributation of words is under development.");
383     System.out.println(" * Timing of write currently, includes random value/key generations. " +
384             "Which is the same for Sequence File and HFile. Another possibility is to generate " +
385             "test data beforehand");
386     System.out.println(" * We need to mitigate cache effect on benchmark. We can apply several " +
387             "ideas, for next step we do a large dummy read between benchmark read to dismantle " +
388             "caching of data. Renaming of file may be helpful. We can have a loop that reads with" +
389             " the same method several times and flood cache every time and average it to get a" +
390             " better number.");
391   }
392 
393   @org.junit.Rule
394   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
395     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
396 }
397