1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.IOException;
21  import java.nio.ByteBuffer;
22  import java.text.DateFormat;
23  import java.text.SimpleDateFormat;
24  import java.util.Random;
25  
26  import junit.framework.TestCase;
27  
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.FSDataInputStream;
30  import org.apache.hadoop.fs.FSDataOutputStream;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.HBaseTestingUtility;
34  import org.apache.hadoop.io.BytesWritable;
35  import org.apache.hadoop.io.SequenceFile;
36  import org.apache.hadoop.io.compress.CompressionCodec;
37  import org.apache.hadoop.io.compress.GzipCodec;
38  
39  /**
40   *  Set of long-running tests to measure performance of HFile.
41   * <p>
42   * Copied from
43   * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
44   * Remove after tfile is committed and use the tfile version of this class
45   * instead.</p>
46   */
47  public class TestHFilePerformance extends TestCase {
48    private static String ROOT_DIR =
49      HBaseTestingUtility.getTestDir("TestHFilePerformance").toString();
50    private FileSystem fs;
51    private Configuration conf;
52    private long startTimeEpoch;
53    private long finishTimeEpoch;
54    private DateFormat formatter;
55  
56    @Override
57    public void setUp() throws IOException {
58      conf = new Configuration();
59      fs = FileSystem.get(conf);
60      formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
61    }
62  
63    public void startTime() {
64      startTimeEpoch = System.currentTimeMillis();
65      System.out.println(formatTime() + " Started timing.");
66    }
67  
68    public void stopTime() {
69      finishTimeEpoch = System.currentTimeMillis();
70      System.out.println(formatTime() + " Stopped timing.");
71    }
72  
73    public long getIntervalMillis() {
74      return finishTimeEpoch - startTimeEpoch;
75    }
76  
77    public void printlnWithTimestamp(String message) {
78      System.out.println(formatTime() + "  " +  message);
79    }
80  
81    /*
82     * Format millis into minutes and seconds.
83     */
84    public String formatTime(long milis){
85      return formatter.format(milis);
86    }
87  
88    public String formatTime(){
89      return formatTime(System.currentTimeMillis());
90    }
91  
92    private FSDataOutputStream createFSOutput(Path name) throws IOException {
93      if (fs.exists(name))
94        fs.delete(name, true);
95      FSDataOutputStream fout = fs.create(name);
96      return fout;
97    }
98  
99    //TODO have multiple ways of generating key/value e.g. dictionary words
100   //TODO to have a sample compressable data, for now, made 1 out of 3 values random
101   //     keys are all random.
102 
103   private static class KeyValueGenerator {
104     Random keyRandomizer;
105     Random valueRandomizer;
106     long randomValueRatio = 3; // 1 out of randomValueRatio generated values will be random.
107     long valueSequence = 0 ;
108 
109 
110     KeyValueGenerator() {
111       keyRandomizer = new Random(0L); //TODO with seed zero
112       valueRandomizer = new Random(1L); //TODO with seed one
113     }
114 
115     // Key is always random now.
116     void getKey(byte[] key) {
117       keyRandomizer.nextBytes(key);
118     }
119 
120     void getValue(byte[] value) {
121       if (valueSequence % randomValueRatio == 0)
122           valueRandomizer.nextBytes(value);
123       valueSequence++;
124     }
125   }
126 
127   /**
128    *
129    * @param fileType "HFile" or "SequenceFile"
130    * @param keyLength
131    * @param valueLength
132    * @param codecName "none", "lzo", "gz"
133    * @param rows number of rows to be written.
134    * @param writeMethod used for HFile only.
135    * @param minBlockSize used for HFile only.
136    * @throws IOException
137    */
138    //TODO writeMethod: implement multiple ways of writing e.g. A) known length (no chunk) B) using a buffer and streaming (for many chunks).
139   public void timeWrite(String fileType, int keyLength, int valueLength,
140     String codecName, long rows, String writeMethod, int minBlockSize)
141   throws IOException {
142     System.out.println("File Type: " + fileType);
143     System.out.println("Writing " + fileType + " with codecName: " + codecName);
144     long totalBytesWritten = 0;
145 
146 
147     //Using separate randomizer for key/value with seeds matching Sequence File.
148     byte[] key = new byte[keyLength];
149     byte[] value = new byte[valueLength];
150     KeyValueGenerator generator = new KeyValueGenerator();
151 
152     startTime();
153 
154     Path path = new Path(ROOT_DIR, fileType + ".Performance");
155     System.out.println(ROOT_DIR + path.getName());
156     FSDataOutputStream fout =  createFSOutput(path);
157 
158     if ("HFile".equals(fileType)){
159         System.out.println("HFile write method: ");
160         HFile.Writer writer =
161           new HFile.Writer(fout, minBlockSize, codecName, null);
162 
163         // Writing value in one shot.
164         for (long l=0 ; l<rows ; l++ ) {
165           generator.getKey(key);
166           generator.getValue(value);
167           writer.append(key, value);
168           totalBytesWritten += key.length;
169           totalBytesWritten += value.length;
170          }
171         writer.close();
172     } else if ("SequenceFile".equals(fileType)){
173         CompressionCodec codec = null;
174         if ("gz".equals(codecName))
175           codec = new GzipCodec();
176         else if (!"none".equals(codecName))
177           throw new IOException("Codec not supported.");
178 
179         SequenceFile.Writer writer;
180 
181         //TODO
182         //JobConf conf = new JobConf();
183 
184         if (!"none".equals(codecName))
185           writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
186             BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec);
187         else
188           writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
189             BytesWritable.class, SequenceFile.CompressionType.NONE, null);
190 
191         BytesWritable keyBsw;
192         BytesWritable valBsw;
193         for (long l=0 ; l<rows ; l++ ) {
194 
195            generator.getKey(key);
196            keyBsw = new BytesWritable(key);
197            totalBytesWritten += keyBsw.getSize();
198 
199            generator.getValue(value);
200            valBsw = new BytesWritable(value);
201            writer.append(keyBsw, valBsw);
202            totalBytesWritten += valBsw.getSize();
203         }
204 
205         writer.close();
206     } else
207        throw new IOException("File Type is not supported");
208 
209     fout.close();
210     stopTime();
211 
212     printlnWithTimestamp("Data written: ");
213     printlnWithTimestamp("  rate  = " +
214       totalBytesWritten / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
215     printlnWithTimestamp("  total = " + totalBytesWritten + "B");
216 
217     printlnWithTimestamp("File written: ");
218     printlnWithTimestamp("  rate  = " +
219       fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
220     printlnWithTimestamp("  total = " + fs.getFileStatus(path).getLen() + "B");
221   }
222 
223   public void timeReading(String fileType, int keyLength, int valueLength,
224       long rows, int method) throws IOException {
225     System.out.println("Reading file of type: " + fileType);
226     Path path = new Path(ROOT_DIR, fileType + ".Performance");
227     System.out.println("Input file size: " + fs.getFileStatus(path).getLen());
228     long totalBytesRead = 0;
229 
230 
231     ByteBuffer val;
232 
233     ByteBuffer key;
234 
235     startTime();
236     FSDataInputStream fin = fs.open(path);
237 
238     if ("HFile".equals(fileType)){
239         HFile.Reader reader = new HFile.Reader(fs.open(path),
240           fs.getFileStatus(path).getLen(), null, false);
241         reader.loadFileInfo();
242         switch (method) {
243 
244           case 0:
245           case 1:
246           default:
247             {
248               HFileScanner scanner = reader.getScanner(false, false);
249               scanner.seekTo();
250               for (long l=0 ; l<rows ; l++ ) {
251                 key = scanner.getKey();
252                 val = scanner.getValue();
253                 totalBytesRead += key.limit() + val.limit();
254                 scanner.next();
255               }
256             }
257             break;
258         }
259     } else if("SequenceFile".equals(fileType)){
260 
261         SequenceFile.Reader reader;
262         reader = new SequenceFile.Reader(fs, path, new Configuration());
263 
264         if (reader.getCompressionCodec() != null) {
265             printlnWithTimestamp("Compression codec class: " + reader.getCompressionCodec().getClass());
266         } else
267             printlnWithTimestamp("Compression codec class: " + "none");
268 
269         BytesWritable keyBsw = new BytesWritable();
270         BytesWritable valBsw = new BytesWritable();
271 
272         for (long l=0 ; l<rows ; l++ ) {
273           reader.next(keyBsw, valBsw);
274           totalBytesRead += keyBsw.getSize() + valBsw.getSize();
275         }
276         reader.close();
277 
278         //TODO make a tests for other types of SequenceFile reading scenarios
279 
280     } else {
281         throw new IOException("File Type not supported.");
282     }
283 
284 
285     //printlnWithTimestamp("Closing reader");
286     fin.close();
287     stopTime();
288     //printlnWithTimestamp("Finished close");
289 
290     printlnWithTimestamp("Finished in " + getIntervalMillis() + "ms");
291     printlnWithTimestamp("Data read: ");
292     printlnWithTimestamp("  rate  = " +
293       totalBytesRead / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
294     printlnWithTimestamp("  total = " + totalBytesRead + "B");
295 
296     printlnWithTimestamp("File read: ");
297     printlnWithTimestamp("  rate  = " +
298       fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
299     printlnWithTimestamp("  total = " + fs.getFileStatus(path).getLen() + "B");
300 
301     //TODO uncomment this for final committing so test files is removed.
302     //fs.delete(path, true);
303   }
304 
305   public void testRunComparisons() throws IOException {
306 
307     int keyLength = 100; // 100B
308     int valueLength = 5*1024; // 5KB
309     int minBlockSize = 10*1024*1024; // 10MB
310     int rows = 10000;
311 
312     System.out.println("****************************** Sequence File *****************************");
313 
314     timeWrite("SequenceFile", keyLength, valueLength, "none", rows, null, minBlockSize);
315     System.out.println("\n+++++++\n");
316     timeReading("SequenceFile", keyLength, valueLength, rows, -1);
317 
318     System.out.println("");
319     System.out.println("----------------------");
320     System.out.println("");
321 
322     /* DISABLED LZO
323     timeWrite("SequenceFile", keyLength, valueLength, "lzo", rows, null, minBlockSize);
324     System.out.println("\n+++++++\n");
325     timeReading("SequenceFile", keyLength, valueLength, rows, -1);
326 
327     System.out.println("");
328     System.out.println("----------------------");
329     System.out.println("");
330 
331     /* Sequence file can only use native hadoop libs gzipping so commenting out.
332      */
333     try {
334       timeWrite("SequenceFile", keyLength, valueLength, "gz", rows, null,
335         minBlockSize);
336       System.out.println("\n+++++++\n");
337       timeReading("SequenceFile", keyLength, valueLength, rows, -1);
338     } catch (IllegalArgumentException e) {
339       System.out.println("Skipping sequencefile gz: " + e.getMessage());
340     }
341 
342 
343     System.out.println("\n\n\n");
344     System.out.println("****************************** HFile *****************************");
345 
346     timeWrite("HFile", keyLength, valueLength, "none", rows, null, minBlockSize);
347     System.out.println("\n+++++++\n");
348     timeReading("HFile", keyLength, valueLength, rows, 0 );
349 
350     System.out.println("");
351     System.out.println("----------------------");
352     System.out.println("");
353 /* DISABLED LZO
354     timeWrite("HFile", keyLength, valueLength, "lzo", rows, null, minBlockSize);
355     System.out.println("\n+++++++\n");
356     timeReading("HFile", keyLength, valueLength, rows, 0 );
357     System.out.println("\n+++++++\n");
358     timeReading("HFile", keyLength, valueLength, rows, 1 );
359     System.out.println("\n+++++++\n");
360     timeReading("HFile", keyLength, valueLength, rows, 2 );
361 
362     System.out.println("");
363     System.out.println("----------------------");
364     System.out.println("");
365 */
366     timeWrite("HFile", keyLength, valueLength, "gz", rows, null, minBlockSize);
367     System.out.println("\n+++++++\n");
368     timeReading("HFile", keyLength, valueLength, rows, 0 );
369 
370     System.out.println("\n\n\n\nNotes: ");
371     System.out.println(" * Timing includes open/closing of files.");
372     System.out.println(" * Timing includes reading both Key and Value");
373     System.out.println(" * Data is generated as random bytes. Other methods e.g. using " +
374             "dictionary with care for distributation of words is under development.");
375     System.out.println(" * Timing of write currently, includes random value/key generations. " +
376             "Which is the same for Sequence File and HFile. Another possibility is to generate " +
377             "test data beforehand");
378     System.out.println(" * We need to mitigate cache effect on benchmark. We can apply several " +
379             "ideas, for next step we do a large dummy read between benchmark read to dismantle " +
380             "caching of data. Renaming of file may be helpful. We can have a loop that reads with" +
381             " the same method several times and flood cache every time and average it to get a" +
382             " better number.");
383   }
384 }