View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase;
20  
21  import java.io.IOException;
22  import java.io.PrintStream;
23  import java.lang.reflect.Constructor;
24  import java.math.BigDecimal;
25  import java.math.MathContext;
26  import java.text.DecimalFormat;
27  import java.text.SimpleDateFormat;
28  import java.util.ArrayList;
29  import java.util.Arrays;
30  import java.util.Date;
31  import java.util.Map;
32  import java.util.Random;
33  import java.util.TreeMap;
34  import java.util.concurrent.Callable;
35  import java.util.concurrent.ExecutionException;
36  import java.util.concurrent.ExecutorService;
37  import java.util.concurrent.Executors;
38  import java.util.concurrent.Future;
39  
40  import com.google.common.util.concurrent.ThreadFactoryBuilder;
41  import org.apache.commons.logging.Log;
42  import org.apache.commons.logging.LogFactory;
43  import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
44  import org.apache.hadoop.conf.Configuration;
45  import org.apache.hadoop.conf.Configured;
46  import org.apache.hadoop.fs.FileSystem;
47  import org.apache.hadoop.fs.Path;
48  import org.apache.hadoop.hbase.client.Get;
49  import org.apache.hadoop.hbase.client.HBaseAdmin;
50  import org.apache.hadoop.hbase.client.HConnection;
51  import org.apache.hadoop.hbase.client.HConnectionManager;
52  import org.apache.hadoop.hbase.client.HTableInterface;
53  import org.apache.hadoop.hbase.client.Put;
54  import org.apache.hadoop.hbase.client.Result;
55  import org.apache.hadoop.hbase.client.ResultScanner;
56  import org.apache.hadoop.hbase.client.Scan;
57  import org.apache.hadoop.hbase.client.Durability;
58  import org.apache.hadoop.hbase.filter.PageFilter;
59  import org.apache.hadoop.hbase.filter.WhileMatchFilter;
60  import org.apache.hadoop.hbase.filter.Filter;
61  import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
62  import org.apache.hadoop.hbase.filter.CompareFilter;
63  import org.apache.hadoop.hbase.filter.BinaryComparator;
64  import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
65  import org.apache.hadoop.hbase.io.compress.Compression;
66  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
67  import org.apache.hadoop.hbase.util.Bytes;
68  import org.apache.hadoop.hbase.util.Hash;
69  import org.apache.hadoop.hbase.util.MurmurHash;
70  import org.apache.hadoop.hbase.util.Pair;
71  import org.apache.hadoop.io.LongWritable;
72  import org.apache.hadoop.io.Text;
73  import org.apache.hadoop.mapreduce.Job;
74  import org.apache.hadoop.mapreduce.Mapper;
75  import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
76  import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
77  import org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer;
78  import org.apache.hadoop.util.Tool;
79  import org.apache.hadoop.util.ToolRunner;
80  import org.codehaus.jackson.map.ObjectMapper;
81  
82  import static org.codehaus.jackson.map.SerializationConfig.Feature.SORT_PROPERTIES_ALPHABETICALLY;
83  
84  /**
85   * Script used evaluating HBase performance and scalability.  Runs a HBase
86   * client that steps through one of a set of hardcoded tests or 'experiments'
87   * (e.g. a random reads test, a random writes test, etc.). Pass on the
88   * command-line which test to run and how many clients are participating in
89   * this experiment. Run <code>java PerformanceEvaluation --help</code> to
90   * obtain usage.
91   *
92   * <p>This class sets up and runs the evaluation programs described in
93   * Section 7, <i>Performance Evaluation</i>, of the <a
94   * href="http://labs.google.com/papers/bigtable.html">Bigtable</a>
95   * paper, pages 8-10.
96   *
97   * <p>If number of clients > 1, we start up a MapReduce job. Each map task
98   * runs an individual client. Each client does about 1GB of data.
99   */
100 public class PerformanceEvaluation extends Configured implements Tool {
101   protected static final Log LOG = LogFactory.getLog(PerformanceEvaluation.class.getName());
102 
103   public static final String TABLE_NAME = "TestTable";
104   public static final byte[] FAMILY_NAME = Bytes.toBytes("info");
105   public static final byte[] QUALIFIER_NAME = Bytes.toBytes("data");
106   public static final int VALUE_LENGTH = 1000;
107   public static final int ROW_LENGTH = 26;
108 
109   private static final int ONE_GB = 1024 * 1024 * 1000;
110   private static final int ROWS_PER_GB = ONE_GB / VALUE_LENGTH;
111   private static final DecimalFormat FMT = new DecimalFormat("0.##");
112   private static final MathContext CXT = MathContext.DECIMAL64;
113   private static final BigDecimal MS_PER_SEC = BigDecimal.valueOf(1000);
114   private static final BigDecimal BYTES_PER_MB = BigDecimal.valueOf(1024 * 1024);
115   private static final TestOptions DEFAULT_OPTS = new TestOptions();
116 
117   protected Map<String, CmdDescriptor> commands = new TreeMap<String, CmdDescriptor>();
118 
119   private static final Path PERF_EVAL_DIR = new Path("performance_evaluation");
120 
121   /**
122    * Enum for map metrics.  Keep it out here rather than inside in the Map
123    * inner-class so we can find associated properties.
124    */
125   protected static enum Counter {
126     /** elapsed time */
127     ELAPSED_TIME,
128     /** number of rows */
129     ROWS
130   }
131 
132   /**
133    * Constructor
134    * @param conf Configuration object
135    */
136   public PerformanceEvaluation(final Configuration conf) {
137     super(conf);
138 
139     addCommandDescriptor(RandomReadTest.class, "randomRead",
140         "Run random read test");
141     addCommandDescriptor(RandomSeekScanTest.class, "randomSeekScan",
142         "Run random seek and scan 100 test");
143     addCommandDescriptor(RandomScanWithRange10Test.class, "scanRange10",
144         "Run random seek scan with both start and stop row (max 10 rows)");
145     addCommandDescriptor(RandomScanWithRange100Test.class, "scanRange100",
146         "Run random seek scan with both start and stop row (max 100 rows)");
147     addCommandDescriptor(RandomScanWithRange1000Test.class, "scanRange1000",
148         "Run random seek scan with both start and stop row (max 1000 rows)");
149     addCommandDescriptor(RandomScanWithRange10000Test.class, "scanRange10000",
150         "Run random seek scan with both start and stop row (max 10000 rows)");
151     addCommandDescriptor(RandomWriteTest.class, "randomWrite",
152         "Run random write test");
153     addCommandDescriptor(SequentialReadTest.class, "sequentialRead",
154         "Run sequential read test");
155     addCommandDescriptor(SequentialWriteTest.class, "sequentialWrite",
156         "Run sequential write test");
157     addCommandDescriptor(ScanTest.class, "scan",
158         "Run scan test (read every row)");
159     addCommandDescriptor(FilteredScanTest.class, "filterScan",
160         "Run scan test using a filter to find a specific row based on it's value (make sure to use --rows=20)");
161   }
162 
163   protected void addCommandDescriptor(Class<? extends Test> cmdClass,
164       String name, String description) {
165     CmdDescriptor cmdDescriptor =
166       new CmdDescriptor(cmdClass, name, description);
167     commands.put(name, cmdDescriptor);
168   }
169 
170   /**
171    * Implementations can have their status set.
172    */
173   interface Status {
174     /**
175      * Sets status
176      * @param msg status message
177      * @throws IOException
178      */
179     void setStatus(final String msg) throws IOException;
180   }
181 
182   /**
183    * MapReduce job that runs a performance evaluation client in each map task.
184    */
185   public static class EvaluationMapTask
186       extends Mapper<LongWritable, Text, LongWritable, LongWritable> {
187 
188     /** configuration parameter name that contains the command */
189     public final static String CMD_KEY = "EvaluationMapTask.command";
190     /** configuration parameter name that contains the PE impl */
191     public static final String PE_KEY = "EvaluationMapTask.performanceEvalImpl";
192 
193     private Class<? extends Test> cmd;
194     private PerformanceEvaluation pe;
195 
196     @Override
197     protected void setup(Context context) throws IOException, InterruptedException {
198       this.cmd = forName(context.getConfiguration().get(CMD_KEY), Test.class);
199 
200       // this is required so that extensions of PE are instantiated within the
201       // map reduce task...
202       Class<? extends PerformanceEvaluation> peClass =
203           forName(context.getConfiguration().get(PE_KEY), PerformanceEvaluation.class);
204       try {
205         this.pe = peClass.getConstructor(Configuration.class)
206             .newInstance(context.getConfiguration());
207       } catch (Exception e) {
208         throw new IllegalStateException("Could not instantiate PE instance", e);
209       }
210     }
211 
212     private <Type> Class<? extends Type> forName(String className, Class<Type> type) {
213       try {
214         return Class.forName(className).asSubclass(type);
215       } catch (ClassNotFoundException e) {
216         throw new IllegalStateException("Could not find class for name: " + className, e);
217       }
218     }
219 
220     protected void map(LongWritable key, Text value, final Context context)
221            throws IOException, InterruptedException {
222 
223       Status status = new Status() {
224         public void setStatus(String msg) {
225            context.setStatus(msg);
226         }
227       };
228 
229       ObjectMapper mapper = new ObjectMapper();
230       TestOptions opts = mapper.readValue(value.toString(), TestOptions.class);
231       Configuration conf = HBaseConfiguration.create(context.getConfiguration());
232 
233       // Evaluation task
234       long elapsedTime = this.pe.runOneClient(this.cmd, conf, opts, status);
235       // Collect how much time the thing took. Report as map output and
236       // to the ELAPSED_TIME counter.
237       context.getCounter(Counter.ELAPSED_TIME).increment(elapsedTime);
238       context.getCounter(Counter.ROWS).increment(opts.perClientRunRows);
239       context.write(new LongWritable(opts.startRow), new LongWritable(elapsedTime));
240       context.progress();
241     }
242   }
243 
244   /*
245    * If table does not already exist, create.
246    * @param c Client to use checking.
247    * @return True if we created the table.
248    * @throws IOException
249    */
250   private static boolean checkTable(HBaseAdmin admin, TestOptions opts) throws IOException {
251     HTableDescriptor tableDescriptor = getTableDescriptor(opts);
252     if (opts.presplitRegions > 0) {
253       // presplit requested
254       if (admin.tableExists(tableDescriptor.getTableName())) {
255         admin.disableTable(tableDescriptor.getTableName());
256         admin.deleteTable(tableDescriptor.getTableName());
257       }
258 
259       byte[][] splits = getSplits(opts);
260       for (int i=0; i < splits.length; i++) {
261         LOG.debug(" split " + i + ": " + Bytes.toStringBinary(splits[i]));
262       }
263       admin.createTable(tableDescriptor, splits);
264       LOG.info ("Table created with " + opts.presplitRegions + " splits");
265     }
266     else {
267       boolean tableExists = admin.tableExists(tableDescriptor.getTableName());
268       if (!tableExists) {
269         admin.createTable(tableDescriptor);
270         LOG.info("Table " + tableDescriptor + " created");
271       }
272     }
273     return admin.tableExists(tableDescriptor.getTableName());
274   }
275 
276   /**
277    * Create an HTableDescriptor from provided TestOptions.
278    */
279   protected static HTableDescriptor getTableDescriptor(TestOptions opts) {
280     HTableDescriptor desc = new HTableDescriptor(opts.tableName);
281     HColumnDescriptor family = new HColumnDescriptor(FAMILY_NAME);
282     family.setDataBlockEncoding(opts.blockEncoding);
283     family.setCompressionType(opts.compression);
284     if (opts.inMemoryCF) {
285       family.setInMemory(true);
286     }
287     desc.addFamily(family);
288     return desc;
289   }
290 
291   /**
292    * generates splits based on total number of rows and specified split regions
293    */
294   protected static byte[][] getSplits(TestOptions opts) {
295     if (opts.presplitRegions == 0)
296       return new byte [0][];
297 
298     int numSplitPoints = opts.presplitRegions - 1;
299     byte[][] splits = new byte[numSplitPoints][];
300     int jump = opts.totalRows / opts.presplitRegions;
301     for (int i = 0; i < numSplitPoints; i++) {
302       int rowkey = jump * (1 + i);
303       splits[i] = format(rowkey);
304     }
305     return splits;
306   }
307 
308   /*
309    * Run all clients in this vm each to its own thread.
310    * @param cmd Command to run.
311    * @throws IOException
312    */
313   private void doLocalClients(final Class<? extends Test> cmd, final TestOptions opts)
314       throws IOException, InterruptedException {
315     Future<Long>[] threads = new Future[opts.numClientThreads];
316     long[] timings = new long[opts.numClientThreads];
317     ExecutorService pool = Executors.newFixedThreadPool(opts.numClientThreads,
318       new ThreadFactoryBuilder().setNameFormat("TestClient-%s").build());
319     for (int i = 0; i < threads.length; i++) {
320       final int index = i;
321       threads[i] = pool.submit(new Callable<Long>() {
322         @Override
323         public Long call() throws Exception {
324           TestOptions threadOpts = new TestOptions(opts);
325           threadOpts.startRow = index * threadOpts.perClientRunRows;
326           long elapsedTime = runOneClient(cmd, getConf(), threadOpts, new Status() {
327             public void setStatus(final String msg) throws IOException {
328               LOG.info("client-" + Thread.currentThread().getName() + " " + msg);
329             }
330           });
331           LOG.info("Finished " + Thread.currentThread().getName() + " in " + elapsedTime +
332             "ms over " + threadOpts.perClientRunRows + " rows");
333           return elapsedTime;
334         }
335       });
336     }
337     pool.shutdown();
338     for (int i = 0; i < threads.length; i++) {
339       try {
340         timings[i] = threads[i].get();
341       } catch (ExecutionException e) {
342         throw new IOException(e.getCause());
343       }
344     }
345     final String test = cmd.getSimpleName();
346     LOG.info("[" + test + "] Summary of timings (ms): "
347              + Arrays.toString(timings));
348     Arrays.sort(timings);
349     long total = 0;
350     for (int i = 0; i < timings.length; i++) {
351       total += timings[i];
352     }
353     LOG.info("[" + test + "]"
354              + "\tMin: " + timings[0] + "ms"
355              + "\tMax: " + timings[timings.length - 1] + "ms"
356              + "\tAvg: " + (total / timings.length) + "ms");
357   }
358 
359   /*
360    * Run a mapreduce job.  Run as many maps as asked-for clients.
361    * Before we start up the job, write out an input file with instruction
362    * per client regards which row they are to start on.
363    * @param cmd Command to run.
364    * @throws IOException
365    */
366   private void doMapReduce(final Class<? extends Test> cmd, TestOptions opts) throws IOException,
367         InterruptedException, ClassNotFoundException {
368     Configuration conf = getConf();
369     Path inputDir = writeInputFile(conf, opts);
370     conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
371     conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
372     Job job = new Job(conf);
373     job.setJarByClass(PerformanceEvaluation.class);
374     job.setJobName("HBase Performance Evaluation");
375 
376     job.setInputFormatClass(NLineInputFormat.class);
377     NLineInputFormat.setInputPaths(job, inputDir);
378     // this is default, but be explicit about it just in case.
379     NLineInputFormat.setNumLinesPerSplit(job, 1);
380 
381     job.setOutputKeyClass(LongWritable.class);
382     job.setOutputValueClass(LongWritable.class);
383 
384     job.setMapperClass(EvaluationMapTask.class);
385     job.setReducerClass(LongSumReducer.class);
386 
387     job.setNumReduceTasks(1);
388 
389     job.setOutputFormatClass(TextOutputFormat.class);
390     TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
391 
392     TableMapReduceUtil.addDependencyJars(job);
393     TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
394       DescriptiveStatistics.class, // commons-math
395       ObjectMapper.class);         // jackson-mapper-asl
396 
397     TableMapReduceUtil.initCredentials(job);
398 
399     job.waitForCompletion(true);
400   }
401 
402   /*
403    * Write input file of offsets-per-client for the mapreduce job.
404    * @param c Configuration
405    * @return Directory that contains file written.
406    * @throws IOException
407    */
408   private Path writeInputFile(final Configuration c, final TestOptions opts) throws IOException {
409     SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmmss");
410     Path jobdir = new Path(PERF_EVAL_DIR, formatter.format(new Date()));
411     Path inputDir = new Path(jobdir, "inputs");
412 
413     FileSystem fs = FileSystem.get(c);
414     fs.mkdirs(inputDir);
415 
416     Path inputFile = new Path(inputDir, "input.txt");
417     PrintStream out = new PrintStream(fs.create(inputFile));
418     // Make input random.
419     Map<Integer, String> m = new TreeMap<Integer, String>();
420     Hash h = MurmurHash.getInstance();
421     int perClientRows = (opts.totalRows / opts.numClientThreads);
422     ObjectMapper mapper = new ObjectMapper();
423     mapper.configure(SORT_PROPERTIES_ALPHABETICALLY, true);
424     try {
425       for (int i = 0; i < 10; i++) {
426         for (int j = 0; j < opts.numClientThreads; j++) {
427           TestOptions next = new TestOptions(opts);
428           next.startRow = (j * perClientRows) + (i * (perClientRows/10));
429           next.perClientRunRows = perClientRows / 10;
430           String s = mapper.writeValueAsString(next);
431           int hash = h.hash(Bytes.toBytes(s));
432           m.put(hash, s);
433         }
434       }
435       for (Map.Entry<Integer, String> e: m.entrySet()) {
436         out.println(e.getValue());
437       }
438     } finally {
439       out.close();
440     }
441     return inputDir;
442   }
443 
444   /**
445    * Describes a command.
446    */
447   static class CmdDescriptor {
448     private Class<? extends Test> cmdClass;
449     private String name;
450     private String description;
451 
452     CmdDescriptor(Class<? extends Test> cmdClass, String name, String description) {
453       this.cmdClass = cmdClass;
454       this.name = name;
455       this.description = description;
456     }
457 
458     public Class<? extends Test> getCmdClass() {
459       return cmdClass;
460     }
461 
462     public String getName() {
463       return name;
464     }
465 
466     public String getDescription() {
467       return description;
468     }
469   }
470 
471   /**
472    * Wraps up options passed to {@link org.apache.hadoop.hbase.PerformanceEvaluation}.
473    * This makes tracking all these arguments a little easier.
474    */
475   static class TestOptions {
476 
477     public TestOptions() {}
478 
479     public TestOptions(TestOptions that) {
480       this.nomapred = that.nomapred;
481       this.startRow = that.startRow;
482       this.perClientRunRows = that.perClientRunRows;
483       this.numClientThreads = that.numClientThreads;
484       this.totalRows = that.totalRows;
485       this.sampleRate = that.sampleRate;
486       this.tableName = that.tableName;
487       this.flushCommits = that.flushCommits;
488       this.writeToWAL = that.writeToWAL;
489       this.useTags = that.useTags;
490       this.noOfTags = that.noOfTags;
491       this.reportLatency = that.reportLatency;
492       this.multiGet = that.multiGet;
493       this.inMemoryCF = that.inMemoryCF;
494       this.presplitRegions = that.presplitRegions;
495       this.compression = that.compression;
496       this.blockEncoding = that.blockEncoding;
497     }
498 
499     public boolean nomapred = false;
500     public int startRow = 0;
501     public int perClientRunRows = ROWS_PER_GB;
502     public int numClientThreads = 1;
503     public int totalRows = ROWS_PER_GB;
504     public float sampleRate = 1.0f;
505     public String tableName = TABLE_NAME;
506     public boolean flushCommits = true;
507     public boolean writeToWAL = true;
508     public boolean useTags = false;
509     public int noOfTags = 1;
510     public boolean reportLatency = false;
511     public int multiGet = 0;
512     boolean inMemoryCF = false;
513     int presplitRegions = 0;
514     public Compression.Algorithm compression = Compression.Algorithm.NONE;
515     public DataBlockEncoding blockEncoding = DataBlockEncoding.NONE;
516   }
517 
518   /*
519    * A test.
520    * Subclass to particularize what happens per row.
521    */
522   static abstract class Test {
523     // Below is make it so when Tests are all running in the one
524     // jvm, that they each have a differently seeded Random.
525     private static final Random randomSeed = new Random(System.currentTimeMillis());
526     private static long nextRandomSeed() {
527       return randomSeed.nextLong();
528     }
529     protected final Random rand = new Random(nextRandomSeed());
530     protected final Configuration conf;
531     protected final TestOptions opts;
532 
533     private final Status status;
534     protected HConnection connection;
535     protected HTableInterface table;
536 
537     /**
538      * Note that all subclasses of this class must provide a public contructor
539      * that has the exact same list of arguments.
540      */
541     Test(final Configuration conf, final TestOptions options, final Status status) {
542       this.conf = conf;
543       this.opts = options;
544       this.status = status;
545     }
546 
547     private String generateStatus(final int sr, final int i, final int lr) {
548       return sr + "/" + i + "/" + lr;
549     }
550 
551     protected int getReportingPeriod() {
552       int period = opts.perClientRunRows / 10;
553       return period == 0 ? opts.perClientRunRows : period;
554     }
555 
556     void testSetup() throws IOException {
557       this.connection = HConnectionManager.createConnection(conf);
558       this.table = connection.getTable(opts.tableName);
559       this.table.setAutoFlush(false, true);
560     }
561 
562     void testTakedown() throws IOException {
563       if (opts.flushCommits) {
564         this.table.flushCommits();
565       }
566       table.close();
567       connection.close();
568     }
569 
570     /*
571      * Run test
572      * @return Elapsed time.
573      * @throws IOException
574      */
575     long test() throws IOException {
576       testSetup();
577       LOG.info("Timed test starting in thread " + Thread.currentThread().getName());
578       final long startTime = System.nanoTime();
579       try {
580         testTimed();
581       } finally {
582         testTakedown();
583       }
584       return (System.nanoTime() - startTime) / 1000000;
585     }
586 
587     /**
588      * Provides an extension point for tests that don't want a per row invocation.
589      */
590     void testTimed() throws IOException {
591       int lastRow = opts.startRow + opts.perClientRunRows;
592       // Report on completion of 1/10th of total.
593       for (int i = opts.startRow; i < lastRow; i++) {
594         testRow(i);
595         if (status != null && i > 0 && (i % getReportingPeriod()) == 0) {
596           status.setStatus(generateStatus(opts.startRow, i, lastRow));
597         }
598       }
599     }
600 
601     /*
602     * Test for individual row.
603     * @param i Row index.
604     */
605     abstract void testRow(final int i) throws IOException;
606   }
607 
608 
609   @SuppressWarnings("unused")
610   static class RandomSeekScanTest extends Test {
611     RandomSeekScanTest(Configuration conf, TestOptions options, Status status) {
612       super(conf, options, status);
613     }
614 
615     @Override
616     void testRow(final int i) throws IOException {
617       Scan scan = new Scan(getRandomRow(this.rand, opts.totalRows));
618       scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
619       scan.setFilter(new WhileMatchFilter(new PageFilter(120)));
620       ResultScanner s = this.table.getScanner(scan);
621       for (Result rr; (rr = s.next()) != null;) ;
622       s.close();
623     }
624 
625     @Override
626     protected int getReportingPeriod() {
627       int period = opts.perClientRunRows / 100;
628       return period == 0 ? opts.perClientRunRows : period;
629     }
630 
631   }
632 
633   @SuppressWarnings("unused")
634   static abstract class RandomScanWithRangeTest extends Test {
635     RandomScanWithRangeTest(Configuration conf, TestOptions options, Status status) {
636       super(conf, options, status);
637     }
638 
639     @Override
640     void testRow(final int i) throws IOException {
641       Pair<byte[], byte[]> startAndStopRow = getStartAndStopRow();
642       Scan scan = new Scan(startAndStopRow.getFirst(), startAndStopRow.getSecond());
643       scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
644       ResultScanner s = this.table.getScanner(scan);
645       int count = 0;
646       for (Result rr; (rr = s.next()) != null;) {
647         count++;
648       }
649 
650       if (i % 100 == 0) {
651         LOG.info(String.format("Scan for key range %s - %s returned %s rows",
652             Bytes.toString(startAndStopRow.getFirst()),
653             Bytes.toString(startAndStopRow.getSecond()), count));
654       }
655 
656       s.close();
657     }
658 
659     protected abstract Pair<byte[],byte[]> getStartAndStopRow();
660 
661     protected Pair<byte[], byte[]> generateStartAndStopRows(int maxRange) {
662       int start = this.rand.nextInt(Integer.MAX_VALUE) % opts.totalRows;
663       int stop = start + maxRange;
664       return new Pair<byte[],byte[]>(format(start), format(stop));
665     }
666 
667     @Override
668     protected int getReportingPeriod() {
669       int period = opts.perClientRunRows / 100;
670       return period == 0? opts.perClientRunRows: period;
671     }
672   }
673 
674   static class RandomScanWithRange10Test extends RandomScanWithRangeTest {
675     RandomScanWithRange10Test(Configuration conf, TestOptions options, Status status) {
676       super(conf, options, status);
677     }
678 
679     @Override
680     protected Pair<byte[], byte[]> getStartAndStopRow() {
681       return generateStartAndStopRows(10);
682     }
683   }
684 
685   static class RandomScanWithRange100Test extends RandomScanWithRangeTest {
686     RandomScanWithRange100Test(Configuration conf, TestOptions options, Status status) {
687       super(conf, options, status);
688     }
689 
690     @Override
691     protected Pair<byte[], byte[]> getStartAndStopRow() {
692       return generateStartAndStopRows(100);
693     }
694   }
695 
696   static class RandomScanWithRange1000Test extends RandomScanWithRangeTest {
697     RandomScanWithRange1000Test(Configuration conf, TestOptions options, Status status) {
698       super(conf, options, status);
699     }
700 
701     @Override
702     protected Pair<byte[], byte[]> getStartAndStopRow() {
703       return generateStartAndStopRows(1000);
704     }
705   }
706 
707   static class RandomScanWithRange10000Test extends RandomScanWithRangeTest {
708     RandomScanWithRange10000Test(Configuration conf, TestOptions options, Status status) {
709       super(conf, options, status);
710     }
711 
712     @Override
713     protected Pair<byte[], byte[]> getStartAndStopRow() {
714       return generateStartAndStopRows(10000);
715     }
716   }
717 
718   static class RandomReadTest extends Test {
719     private final int everyN;
720     private final double[] times;
721     private ArrayList<Get> gets;
722     int idx = 0;
723 
724     RandomReadTest(Configuration conf, TestOptions options, Status status) {
725       super(conf, options, status);
726       everyN = (int) (opts.totalRows / (opts.totalRows * opts.sampleRate));
727       LOG.info("Sampling 1 every " + everyN + " out of " + opts.perClientRunRows + " total rows.");
728       if (opts.multiGet > 0) {
729         LOG.info("MultiGet enabled. Sending GETs in batches of " + opts.multiGet + ".");
730         this.gets = new ArrayList<Get>(opts.multiGet);
731       }
732       if (opts.reportLatency) {
733         this.times = new double[(int) Math.ceil(opts.perClientRunRows * opts.sampleRate / Math.max(1, opts.multiGet))];
734       } else {
735         this.times = null;
736       }
737     }
738 
739     @Override
740     void testRow(final int i) throws IOException {
741       if (i % everyN == 0) {
742         Get get = new Get(getRandomRow(this.rand, opts.totalRows));
743         get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
744         if (opts.multiGet > 0) {
745           this.gets.add(get);
746           if (this.gets.size() == opts.multiGet) {
747             long start = System.nanoTime();
748             this.table.get(this.gets);
749             if (opts.reportLatency) {
750               times[idx++] = (System.nanoTime() - start) / 1e6;
751             }
752             this.gets.clear();
753           }
754         } else {
755           long start = System.nanoTime();
756           this.table.get(get);
757           if (opts.reportLatency) {
758             times[idx++] = (System.nanoTime() - start) / 1e6;
759           }
760         }
761       }
762     }
763 
764     @Override
765     protected int getReportingPeriod() {
766       int period = opts.perClientRunRows / 100;
767       return period == 0 ? opts.perClientRunRows : period;
768     }
769 
770     @Override
771     protected void testTakedown() throws IOException {
772       if (this.gets != null && this.gets.size() > 0) {
773         this.table.get(gets);
774         this.gets.clear();
775       }
776       super.testTakedown();
777       if (opts.reportLatency) {
778         Arrays.sort(times);
779         DescriptiveStatistics ds = new DescriptiveStatistics();
780         for (double t : times) {
781           ds.addValue(t);
782         }
783         LOG.info("randomRead latency log (ms), on " + times.length + " measures");
784         LOG.info("99.9999% = " + ds.getPercentile(99.9999d));
785         LOG.info(" 99.999% = " + ds.getPercentile(99.999d));
786         LOG.info("  99.99% = " + ds.getPercentile(99.99d));
787         LOG.info("   99.9% = " + ds.getPercentile(99.9d));
788         LOG.info("     99% = " + ds.getPercentile(99d));
789         LOG.info("     95% = " + ds.getPercentile(95d));
790         LOG.info("     90% = " + ds.getPercentile(90d));
791         LOG.info("     80% = " + ds.getPercentile(80d));
792         LOG.info("Standard Deviation = " + ds.getStandardDeviation());
793         LOG.info("Mean = " + ds.getMean());
794       }
795     }
796   }
797 
798   static class RandomWriteTest extends Test {
799     RandomWriteTest(Configuration conf, TestOptions options, Status status) {
800       super(conf, options, status);
801     }
802 
803     @Override
804     void testRow(final int i) throws IOException {
805       byte[] row = getRandomRow(this.rand, opts.totalRows);
806       Put put = new Put(row);
807       byte[] value = generateData(this.rand, VALUE_LENGTH);
808       put.add(FAMILY_NAME, QUALIFIER_NAME, value);
809       put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
810       table.put(put);
811     }
812   }
813 
814 
815   static class ScanTest extends Test {
816     private ResultScanner testScanner;
817 
818     ScanTest(Configuration conf, TestOptions options, Status status) {
819       super(conf, options, status);
820     }
821 
822     @Override
823     void testTakedown() throws IOException {
824       if (this.testScanner != null) {
825         this.testScanner.close();
826       }
827       super.testTakedown();
828     }
829 
830 
831     @Override
832     void testRow(final int i) throws IOException {
833       if (this.testScanner == null) {
834         Scan scan = new Scan(format(opts.startRow));
835         scan.setCaching(30);
836         scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
837         this.testScanner = table.getScanner(scan);
838       }
839       testScanner.next();
840     }
841 
842   }
843 
844   static class SequentialReadTest extends Test {
845     SequentialReadTest(Configuration conf, TestOptions options, Status status) {
846       super(conf, options, status);
847     }
848 
849     @Override
850     void testRow(final int i) throws IOException {
851       Get get = new Get(format(i));
852       get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
853       table.get(get);
854     }
855   }
856 
857   static class SequentialWriteTest extends Test {
858     SequentialWriteTest(Configuration conf, TestOptions options, Status status) {
859       super(conf, options, status);
860     }
861 
862     @Override
863     void testRow(final int i) throws IOException {
864       Put put = new Put(format(i));
865       byte[] value = generateData(this.rand, VALUE_LENGTH);
866       put.add(FAMILY_NAME, QUALIFIER_NAME, value);
867       put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
868       table.put(put);
869     }
870   }
871 
872   static class FilteredScanTest extends Test {
873     protected static final Log LOG = LogFactory.getLog(FilteredScanTest.class.getName());
874 
875     FilteredScanTest(Configuration conf, TestOptions options, Status status) {
876       super(conf, options, status);
877     }
878 
879     @Override
880     void testRow(int i) throws IOException {
881       byte[] value = generateData(this.rand, VALUE_LENGTH);
882       Scan scan = constructScan(value);
883       ResultScanner scanner = null;
884       try {
885         scanner = this.table.getScanner(scan);
886         while (scanner.next() != null) {
887         }
888       } finally {
889         if (scanner != null) scanner.close();
890       }
891     }
892 
893     protected Scan constructScan(byte[] valuePrefix) throws IOException {
894       Filter filter = new SingleColumnValueFilter(
895           FAMILY_NAME, QUALIFIER_NAME, CompareFilter.CompareOp.EQUAL,
896           new BinaryComparator(valuePrefix)
897       );
898       Scan scan = new Scan();
899       scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
900       scan.setFilter(filter);
901       return scan;
902     }
903   }
904 
905   /**
906    * Compute a throughput rate in MB/s.
907    * @param rows Number of records consumed.
908    * @param timeMs Time taken in milliseconds.
909    * @return String value with label, ie '123.76 MB/s'
910    */
911   private static String calculateMbps(int rows, long timeMs) {
912     // MB/s = ((totalRows * ROW_SIZE_BYTES) / totalTimeMS)
913     //        * 1000 MS_PER_SEC / (1024 * 1024) BYTES_PER_MB
914     BigDecimal rowSize =
915       BigDecimal.valueOf(ROW_LENGTH + VALUE_LENGTH + FAMILY_NAME.length + QUALIFIER_NAME.length);
916     BigDecimal mbps = BigDecimal.valueOf(rows).multiply(rowSize, CXT)
917       .divide(BigDecimal.valueOf(timeMs), CXT).multiply(MS_PER_SEC, CXT)
918       .divide(BYTES_PER_MB, CXT);
919     return FMT.format(mbps) + " MB/s";
920   }
921 
922   /*
923    * Format passed integer.
924    * @param number
925    * @return Returns zero-prefixed ROW_LENGTH-byte wide decimal version of passed
926    * number (Does absolute in case number is negative).
927    */
928   public static byte [] format(final int number) {
929     byte [] b = new byte[ROW_LENGTH];
930     int d = Math.abs(number);
931     for (int i = b.length - 1; i >= 0; i--) {
932       b[i] = (byte)((d % 10) + '0');
933       d /= 10;
934     }
935     return b;
936   }
937 
938   /*
939    * This method takes some time and is done inline uploading data.  For
940    * example, doing the mapfile test, generation of the key and value
941    * consumes about 30% of CPU time.
942    * @return Generated random value to insert into a table cell.
943    */
944   public static byte[] generateData(final Random r, int length) {
945     byte [] b = new byte [length];
946     int i = 0;
947 
948     for(i = 0; i < (length-8); i += 8) {
949       b[i] = (byte) (65 + r.nextInt(26));
950       b[i+1] = b[i];
951       b[i+2] = b[i];
952       b[i+3] = b[i];
953       b[i+4] = b[i];
954       b[i+5] = b[i];
955       b[i+6] = b[i];
956       b[i+7] = b[i];
957     }
958 
959     byte a = (byte) (65 + r.nextInt(26));
960     for(; i < length; i++) {
961       b[i] = a;
962     }
963     return b;
964   }
965 
966   static byte [] getRandomRow(final Random random, final int totalRows) {
967     return format(random.nextInt(Integer.MAX_VALUE) % totalRows);
968   }
969 
970   static long runOneClient(final Class<? extends Test> cmd, Configuration conf, TestOptions opts,
971     final Status status)
972       throws IOException {
973     status.setStatus("Start " + cmd + " at offset " + opts.startRow + " for " +
974       opts.perClientRunRows + " rows");
975     long totalElapsedTime = 0;
976 
977     final Test t;
978     try {
979       Constructor<? extends Test> constructor =
980         cmd.getDeclaredConstructor(Configuration.class, TestOptions.class, Status.class);
981       t = constructor.newInstance(conf, opts, status);
982     } catch (NoSuchMethodException e) {
983       throw new IllegalArgumentException("Invalid command class: " +
984           cmd.getName() + ".  It does not provide a constructor as described by " +
985           "the javadoc comment.  Available constructors are: " +
986           Arrays.toString(cmd.getConstructors()));
987     } catch (Exception e) {
988       throw new IllegalStateException("Failed to construct command class", e);
989     }
990     totalElapsedTime = t.test();
991 
992     status.setStatus("Finished " + cmd + " in " + totalElapsedTime +
993       "ms at offset " + opts.startRow + " for " + opts.perClientRunRows + " rows" +
994       " (" + calculateMbps((int)(opts.perClientRunRows * opts.sampleRate), totalElapsedTime) + ")");
995     return totalElapsedTime;
996   }
997 
998   private void runTest(final Class<? extends Test> cmd, TestOptions opts) throws IOException,
999       InterruptedException, ClassNotFoundException {
1000     HBaseAdmin admin = null;
1001     try {
1002       admin = new HBaseAdmin(getConf());
1003       checkTable(admin, opts);
1004     } finally {
1005       if (admin != null) admin.close();
1006     }
1007     if (opts.nomapred) {
1008       doLocalClients(cmd, opts);
1009     } else {
1010       doMapReduce(cmd, opts);
1011     }
1012   }
1013 
1014   protected void printUsage() {
1015     printUsage(null);
1016   }
1017 
1018   protected void printUsage(final String message) {
1019     if (message != null && message.length() > 0) {
1020       System.err.println(message);
1021     }
1022     System.err.println("Usage: java " + this.getClass().getName() + " \\");
1023     System.err.println("  [--nomapred] [--rows=ROWS] [--table=NAME] \\");
1024     System.err.println("  [--compress=TYPE] [--blockEncoding=TYPE] " +
1025       "[-D<property=value>]* <command> <nclients>");
1026     System.err.println();
1027     System.err.println("Options:");
1028     System.err.println(" nomapred        Run multiple clients using threads " +
1029       "(rather than use mapreduce)");
1030     System.err.println(" rows            Rows each client runs. Default: One million");
1031     System.err.println(" sampleRate      Execute test on a sample of total " +
1032       "rows. Only supported by randomRead. Default: 1.0");
1033     System.err.println(" table           Alternate table name. Default: 'TestTable'");
1034     System.err.println(" compress        Compression type to use (GZ, LZO, ...). Default: 'NONE'");
1035     System.err.println(" flushCommits    Used to determine if the test should flush the table. " +
1036       "Default: false");
1037     System.err.println(" writeToWAL      Set writeToWAL on puts. Default: True");
1038     System.err.println(" presplit        Create presplit table. Recommended for accurate perf " +
1039       "analysis (see guide).  Default: disabled");
1040     System.err.println(" inmemory        Tries to keep the HFiles of the CF " +
1041       "inmemory as far as possible. Not guaranteed that reads are always served " +
1042       "from memory.  Default: false");
1043     System.err.println(" latency         Set to report operation latencies. " +
1044       "Currently only supported by randomRead test. Default: False");
1045     System.err.println();
1046     System.err.println(" Note: -D properties will be applied to the conf used. ");
1047     System.err.println("  For example: ");
1048     System.err.println("   -Dmapred.output.compress=true");
1049     System.err.println("   -Dmapreduce.task.timeout=60000");
1050     System.err.println();
1051     System.err.println("Command:");
1052     for (CmdDescriptor command : commands.values()) {
1053       System.err.println(String.format(" %-15s %s", command.getName(), command.getDescription()));
1054     }
1055     System.err.println();
1056     System.err.println("Args:");
1057     System.err.println(" nclients        Integer. Required. Total number of " +
1058       "clients (and HRegionServers)");
1059     System.err.println("                 running: 1 <= value <= 500");
1060     System.err.println("Examples:");
1061     System.err.println(" To run a single evaluation client:");
1062     System.err.println(" $ bin/hbase " + this.getClass().getName()
1063         + " sequentialWrite 1");
1064   }
1065 
1066   private static int getNumClients(final int start, final String[] args) {
1067     if(start + 1 > args.length) {
1068       throw new IllegalArgumentException("must supply the number of clients");
1069     }
1070     int N = Integer.parseInt(args[start]);
1071     if (N < 1) {
1072       throw new IllegalArgumentException("Number of clients must be > 1");
1073     }
1074     return N;
1075   }
1076 
1077   public int run(String[] args) throws Exception {
1078     // Process command-line args. TODO: Better cmd-line processing
1079     // (but hopefully something not as painful as cli options).
1080     int errCode = -1;
1081     if (args.length < 1) {
1082       printUsage();
1083       return errCode;
1084     }
1085 
1086     try {
1087       // MR-NOTE: if you are adding a property that is used to control an operation
1088       // like put(), get(), scan(), ... you must also add it as part of the MR
1089       // input, take a look at writeInputFile().
1090       // Then you must adapt the LINE_PATTERN input regex,
1091       // and parse the argument, take a look at PEInputFormat.getSplits().
1092 
1093       TestOptions opts = new TestOptions();
1094 
1095       for (int i = 0; i < args.length; i++) {
1096         String cmd = args[i];
1097         if (cmd.equals("-h") || cmd.startsWith("--h")) {
1098           printUsage();
1099           errCode = 0;
1100           break;
1101         }
1102 
1103         final String nmr = "--nomapred";
1104         if (cmd.startsWith(nmr)) {
1105           opts.nomapred = true;
1106           continue;
1107         }
1108 
1109         final String rows = "--rows=";
1110         if (cmd.startsWith(rows)) {
1111           opts.perClientRunRows = Integer.parseInt(cmd.substring(rows.length()));
1112           continue;
1113         }
1114 
1115         final String sampleRate = "--sampleRate=";
1116         if (cmd.startsWith(sampleRate)) {
1117           opts.sampleRate = Float.parseFloat(cmd.substring(sampleRate.length()));
1118           continue;
1119         }
1120 
1121         final String table = "--table=";
1122         if (cmd.startsWith(table)) {
1123           opts.tableName = cmd.substring(table.length());
1124           continue;
1125         }
1126 
1127         final String compress = "--compress=";
1128         if (cmd.startsWith(compress)) {
1129           opts.compression = Compression.Algorithm.valueOf(cmd.substring(compress.length()));
1130           continue;
1131         }
1132 
1133         final String blockEncoding = "--blockEncoding=";
1134         if (cmd.startsWith(blockEncoding)) {
1135           opts.blockEncoding = DataBlockEncoding.valueOf(cmd.substring(blockEncoding.length()));
1136           continue;
1137         }
1138 
1139         final String flushCommits = "--flushCommits=";
1140         if (cmd.startsWith(flushCommits)) {
1141           opts.flushCommits = Boolean.parseBoolean(cmd.substring(flushCommits.length()));
1142           continue;
1143         }
1144 
1145         final String writeToWAL = "--writeToWAL=";
1146         if (cmd.startsWith(writeToWAL)) {
1147           opts.writeToWAL = Boolean.parseBoolean(cmd.substring(writeToWAL.length()));
1148           continue;
1149         }
1150 
1151         final String presplit = "--presplit=";
1152         if (cmd.startsWith(presplit)) {
1153           opts.presplitRegions = Integer.parseInt(cmd.substring(presplit.length()));
1154           continue;
1155         }
1156         
1157         final String inMemory = "--inmemory=";
1158         if (cmd.startsWith(inMemory)) {
1159           opts.inMemoryCF = Boolean.parseBoolean(cmd.substring(inMemory.length()));
1160           continue;
1161         }
1162 
1163         final String latency = "--latency";
1164         if (cmd.startsWith(latency)) {
1165           opts.reportLatency = true;
1166           continue;
1167         }
1168 
1169         final String multiGet = "--multiGet=";
1170         if (cmd.startsWith(multiGet)) {
1171           opts.multiGet = Integer.parseInt(cmd.substring(multiGet.length()));
1172           continue;
1173         }
1174         
1175         Class<? extends Test> cmdClass = determineCommandClass(cmd);
1176         if (cmdClass != null) {
1177           opts.numClientThreads = getNumClients(i + 1, args);
1178           // number of rows specified
1179           opts.totalRows = opts.perClientRunRows * opts.numClientThreads;
1180           runTest(cmdClass, opts);
1181           errCode = 0;
1182           break;
1183         }
1184 
1185         printUsage();
1186         break;
1187       }
1188     } catch (Exception e) {
1189       e.printStackTrace();
1190     }
1191 
1192     return errCode;
1193   }
1194 
1195   private Class<? extends Test> determineCommandClass(String cmd) {
1196     CmdDescriptor descriptor = commands.get(cmd);
1197     return descriptor != null ? descriptor.getCmdClass() : null;
1198   }
1199 
1200   public static void main(final String[] args) throws Exception {
1201     int res = ToolRunner.run(new PerformanceEvaluation(HBaseConfiguration.create()), args);
1202     System.exit(res);
1203   }
1204 }