View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase;
20  
21  import java.io.IOException;
22  import java.io.PrintStream;
23  import java.lang.reflect.Constructor;
24  import java.math.BigDecimal;
25  import java.math.MathContext;
26  import java.text.DecimalFormat;
27  import java.text.SimpleDateFormat;
28  import java.util.ArrayList;
29  import java.util.Arrays;
30  import java.util.Date;
31  import java.util.Map;
32  import java.util.Random;
33  import java.util.TreeMap;
34  import java.util.concurrent.Callable;
35  import java.util.concurrent.ExecutionException;
36  import java.util.concurrent.ExecutorService;
37  import java.util.concurrent.Executors;
38  import java.util.concurrent.Future;
39  
40  import com.google.common.util.concurrent.ThreadFactoryBuilder;
41  import org.apache.commons.logging.Log;
42  import org.apache.commons.logging.LogFactory;
43  import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
44  import org.apache.hadoop.conf.Configuration;
45  import org.apache.hadoop.conf.Configured;
46  import org.apache.hadoop.fs.FileSystem;
47  import org.apache.hadoop.fs.Path;
48  import org.apache.hadoop.hbase.client.Durability;
49  import org.apache.hadoop.hbase.client.Get;
50  import org.apache.hadoop.hbase.client.HBaseAdmin;
51  import org.apache.hadoop.hbase.client.HConnection;
52  import org.apache.hadoop.hbase.client.HConnectionManager;
53  import org.apache.hadoop.hbase.client.HTableInterface;
54  import org.apache.hadoop.hbase.client.Put;
55  import org.apache.hadoop.hbase.client.Result;
56  import org.apache.hadoop.hbase.client.ResultScanner;
57  import org.apache.hadoop.hbase.client.Scan;
58  import org.apache.hadoop.hbase.filter.BinaryComparator;
59  import org.apache.hadoop.hbase.filter.CompareFilter;
60  import org.apache.hadoop.hbase.filter.Filter;
61  import org.apache.hadoop.hbase.filter.PageFilter;
62  import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
63  import org.apache.hadoop.hbase.filter.WhileMatchFilter;
64  import org.apache.hadoop.hbase.io.compress.Compression;
65  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
66  import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
67  import org.apache.hadoop.hbase.util.Bytes;
68  import org.apache.hadoop.hbase.util.Hash;
69  import org.apache.hadoop.hbase.util.MurmurHash;
70  import org.apache.hadoop.hbase.util.Pair;
71  import org.apache.hadoop.io.LongWritable;
72  import org.apache.hadoop.io.Text;
73  import org.apache.hadoop.mapreduce.Job;
74  import org.apache.hadoop.mapreduce.Mapper;
75  import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
76  import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
77  import org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer;
78  import org.apache.hadoop.util.Tool;
79  import org.apache.hadoop.util.ToolRunner;
80  import org.codehaus.jackson.map.ObjectMapper;
81  
82  import static org.codehaus.jackson.map.SerializationConfig.Feature.SORT_PROPERTIES_ALPHABETICALLY;
83  
84  /**
85   * Script used evaluating HBase performance and scalability.  Runs a HBase
86   * client that steps through one of a set of hardcoded tests or 'experiments'
87   * (e.g. a random reads test, a random writes test, etc.). Pass on the
88   * command-line which test to run and how many clients are participating in
89   * this experiment. Run <code>java PerformanceEvaluation --help</code> to
90   * obtain usage.
91   *
92   * <p>This class sets up and runs the evaluation programs described in
93   * Section 7, <i>Performance Evaluation</i>, of the <a
94   * href="http://labs.google.com/papers/bigtable.html">Bigtable</a>
95   * paper, pages 8-10.
96   *
97   * <p>If number of clients > 1, we start up a MapReduce job. Each map task
98   * runs an individual client. Each client does about 1GB of data.
99   */
100 public class PerformanceEvaluation extends Configured implements Tool {
101   protected static final Log LOG = LogFactory.getLog(PerformanceEvaluation.class.getName());
102 
103   public static final String TABLE_NAME = "TestTable";
104   public static final byte[] FAMILY_NAME = Bytes.toBytes("info");
105   public static final byte[] QUALIFIER_NAME = Bytes.toBytes("data");
106   public static final int VALUE_LENGTH = 1000;
107   public static final int ROW_LENGTH = 26;
108 
109   private static final int ONE_GB = 1024 * 1024 * 1000;
110   private static final int ROWS_PER_GB = ONE_GB / VALUE_LENGTH;
111   // TODO : should we make this configurable
112   private static final int TAG_LENGTH = 256;
113   private static final DecimalFormat FMT = new DecimalFormat("0.##");
114   private static final MathContext CXT = MathContext.DECIMAL64;
115   private static final BigDecimal MS_PER_SEC = BigDecimal.valueOf(1000);
116   private static final BigDecimal BYTES_PER_MB = BigDecimal.valueOf(1024 * 1024);
117   private static final TestOptions DEFAULT_OPTS = new TestOptions();
118 
119   protected Map<String, CmdDescriptor> commands = new TreeMap<String, CmdDescriptor>();
120 
121   private static final Path PERF_EVAL_DIR = new Path("performance_evaluation");
122 
123   /**
124    * Enum for map metrics.  Keep it out here rather than inside in the Map
125    * inner-class so we can find associated properties.
126    */
127   protected static enum Counter {
128     /** elapsed time */
129     ELAPSED_TIME,
130     /** number of rows */
131     ROWS
132   }
133 
134   /**
135    * Constructor
136    * @param conf Configuration object
137    */
138   public PerformanceEvaluation(final Configuration conf) {
139     super(conf);
140 
141     addCommandDescriptor(RandomReadTest.class, "randomRead",
142         "Run random read test");
143     addCommandDescriptor(RandomSeekScanTest.class, "randomSeekScan",
144         "Run random seek and scan 100 test");
145     addCommandDescriptor(RandomScanWithRange10Test.class, "scanRange10",
146         "Run random seek scan with both start and stop row (max 10 rows)");
147     addCommandDescriptor(RandomScanWithRange100Test.class, "scanRange100",
148         "Run random seek scan with both start and stop row (max 100 rows)");
149     addCommandDescriptor(RandomScanWithRange1000Test.class, "scanRange1000",
150         "Run random seek scan with both start and stop row (max 1000 rows)");
151     addCommandDescriptor(RandomScanWithRange10000Test.class, "scanRange10000",
152         "Run random seek scan with both start and stop row (max 10000 rows)");
153     addCommandDescriptor(RandomWriteTest.class, "randomWrite",
154         "Run random write test");
155     addCommandDescriptor(SequentialReadTest.class, "sequentialRead",
156         "Run sequential read test");
157     addCommandDescriptor(SequentialWriteTest.class, "sequentialWrite",
158         "Run sequential write test");
159     addCommandDescriptor(ScanTest.class, "scan",
160         "Run scan test (read every row)");
161     addCommandDescriptor(FilteredScanTest.class, "filterScan",
162         "Run scan test using a filter to find a specific row based on it's value (make sure to use --rows=20)");
163   }
164 
165   protected void addCommandDescriptor(Class<? extends Test> cmdClass,
166       String name, String description) {
167     CmdDescriptor cmdDescriptor =
168       new CmdDescriptor(cmdClass, name, description);
169     commands.put(name, cmdDescriptor);
170   }
171 
172   /**
173    * Implementations can have their status set.
174    */
175   interface Status {
176     /**
177      * Sets status
178      * @param msg status message
179      * @throws IOException
180      */
181     void setStatus(final String msg) throws IOException;
182   }
183 
184   /**
185    * MapReduce job that runs a performance evaluation client in each map task.
186    */
187   public static class EvaluationMapTask
188       extends Mapper<LongWritable, Text, LongWritable, LongWritable> {
189 
190     /** configuration parameter name that contains the command */
191     public final static String CMD_KEY = "EvaluationMapTask.command";
192     /** configuration parameter name that contains the PE impl */
193     public static final String PE_KEY = "EvaluationMapTask.performanceEvalImpl";
194 
195     private Class<? extends Test> cmd;
196     private PerformanceEvaluation pe;
197 
198     @Override
199     protected void setup(Context context) throws IOException, InterruptedException {
200       this.cmd = forName(context.getConfiguration().get(CMD_KEY), Test.class);
201 
202       // this is required so that extensions of PE are instantiated within the
203       // map reduce task...
204       Class<? extends PerformanceEvaluation> peClass =
205           forName(context.getConfiguration().get(PE_KEY), PerformanceEvaluation.class);
206       try {
207         this.pe = peClass.getConstructor(Configuration.class)
208             .newInstance(context.getConfiguration());
209       } catch (Exception e) {
210         throw new IllegalStateException("Could not instantiate PE instance", e);
211       }
212     }
213 
214     private <Type> Class<? extends Type> forName(String className, Class<Type> type) {
215       try {
216         return Class.forName(className).asSubclass(type);
217       } catch (ClassNotFoundException e) {
218         throw new IllegalStateException("Could not find class for name: " + className, e);
219       }
220     }
221 
222     protected void map(LongWritable key, Text value, final Context context)
223            throws IOException, InterruptedException {
224 
225       Status status = new Status() {
226         public void setStatus(String msg) {
227            context.setStatus(msg);
228         }
229       };
230 
231       ObjectMapper mapper = new ObjectMapper();
232       TestOptions opts = mapper.readValue(value.toString(), TestOptions.class);
233       Configuration conf = HBaseConfiguration.create(context.getConfiguration());
234 
235       // Evaluation task
236       long elapsedTime = this.pe.runOneClient(this.cmd, conf, opts, status);
237       // Collect how much time the thing took. Report as map output and
238       // to the ELAPSED_TIME counter.
239       context.getCounter(Counter.ELAPSED_TIME).increment(elapsedTime);
240       context.getCounter(Counter.ROWS).increment(opts.perClientRunRows);
241       context.write(new LongWritable(opts.startRow), new LongWritable(elapsedTime));
242       context.progress();
243     }
244   }
245 
246   /*
247    * If table does not already exist, create.
248    * @param c Client to use checking.
249    * @return True if we created the table.
250    * @throws IOException
251    */
252   private static boolean checkTable(HBaseAdmin admin, TestOptions opts) throws IOException {
253     HTableDescriptor tableDescriptor = getTableDescriptor(opts);
254     if (opts.presplitRegions > 0) {
255       // presplit requested
256       if (admin.tableExists(tableDescriptor.getTableName())) {
257         admin.disableTable(tableDescriptor.getTableName());
258         admin.deleteTable(tableDescriptor.getTableName());
259       }
260 
261       byte[][] splits = getSplits(opts);
262       for (int i=0; i < splits.length; i++) {
263         LOG.debug(" split " + i + ": " + Bytes.toStringBinary(splits[i]));
264       }
265       admin.createTable(tableDescriptor, splits);
266       LOG.info ("Table created with " + opts.presplitRegions + " splits");
267     }
268     else {
269       boolean tableExists = admin.tableExists(tableDescriptor.getTableName());
270       if (!tableExists) {
271         admin.createTable(tableDescriptor);
272         LOG.info("Table " + tableDescriptor + " created");
273       }
274     }
275     return admin.tableExists(tableDescriptor.getTableName());
276   }
277 
278   /**
279    * Create an HTableDescriptor from provided TestOptions.
280    */
281   protected static HTableDescriptor getTableDescriptor(TestOptions opts) {
282     HTableDescriptor desc = new HTableDescriptor(opts.tableName);
283     HColumnDescriptor family = new HColumnDescriptor(FAMILY_NAME);
284     family.setDataBlockEncoding(opts.blockEncoding);
285     family.setCompressionType(opts.compression);
286     if (opts.inMemoryCF) {
287       family.setInMemory(true);
288     }
289     desc.addFamily(family);
290     return desc;
291   }
292 
293   /**
294    * generates splits based on total number of rows and specified split regions
295    */
296   protected static byte[][] getSplits(TestOptions opts) {
297     if (opts.presplitRegions == 0)
298       return new byte [0][];
299 
300     int numSplitPoints = opts.presplitRegions - 1;
301     byte[][] splits = new byte[numSplitPoints][];
302     int jump = opts.totalRows / opts.presplitRegions;
303     for (int i = 0; i < numSplitPoints; i++) {
304       int rowkey = jump * (1 + i);
305       splits[i] = format(rowkey);
306     }
307     return splits;
308   }
309 
310   /*
311    * Run all clients in this vm each to its own thread.
312    * @param cmd Command to run.
313    * @throws IOException
314    */
315   private void doLocalClients(final Class<? extends Test> cmd, final TestOptions opts)
316       throws IOException, InterruptedException {
317     Future<Long>[] threads = new Future[opts.numClientThreads];
318     long[] timings = new long[opts.numClientThreads];
319     ExecutorService pool = Executors.newFixedThreadPool(opts.numClientThreads,
320       new ThreadFactoryBuilder().setNameFormat("TestClient-%s").build());
321     for (int i = 0; i < threads.length; i++) {
322       final int index = i;
323       threads[i] = pool.submit(new Callable<Long>() {
324         @Override
325         public Long call() throws Exception {
326           TestOptions threadOpts = new TestOptions(opts);
327           threadOpts.startRow = index * threadOpts.perClientRunRows;
328           long elapsedTime = runOneClient(cmd, getConf(), threadOpts, new Status() {
329             public void setStatus(final String msg) throws IOException {
330               LOG.info("client-" + Thread.currentThread().getName() + " " + msg);
331             }
332           });
333           LOG.info("Finished " + Thread.currentThread().getName() + " in " + elapsedTime +
334             "ms over " + threadOpts.perClientRunRows + " rows");
335           return elapsedTime;
336         }
337       });
338     }
339     pool.shutdown();
340     for (int i = 0; i < threads.length; i++) {
341       try {
342         timings[i] = threads[i].get();
343       } catch (ExecutionException e) {
344         throw new IOException(e.getCause());
345       }
346     }
347     final String test = cmd.getSimpleName();
348     LOG.info("[" + test + "] Summary of timings (ms): "
349              + Arrays.toString(timings));
350     Arrays.sort(timings);
351     long total = 0;
352     for (int i = 0; i < timings.length; i++) {
353       total += timings[i];
354     }
355     LOG.info("[" + test + "]"
356              + "\tMin: " + timings[0] + "ms"
357              + "\tMax: " + timings[timings.length - 1] + "ms"
358              + "\tAvg: " + (total / timings.length) + "ms");
359   }
360 
361   /*
362    * Run a mapreduce job.  Run as many maps as asked-for clients.
363    * Before we start up the job, write out an input file with instruction
364    * per client regards which row they are to start on.
365    * @param cmd Command to run.
366    * @throws IOException
367    */
368   private void doMapReduce(final Class<? extends Test> cmd, TestOptions opts) throws IOException,
369         InterruptedException, ClassNotFoundException {
370     Configuration conf = getConf();
371     Path inputDir = writeInputFile(conf, opts);
372     conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
373     conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
374     Job job = new Job(conf);
375     job.setJarByClass(PerformanceEvaluation.class);
376     job.setJobName("HBase Performance Evaluation");
377 
378     job.setInputFormatClass(NLineInputFormat.class);
379     NLineInputFormat.setInputPaths(job, inputDir);
380     // this is default, but be explicit about it just in case.
381     NLineInputFormat.setNumLinesPerSplit(job, 1);
382 
383     job.setOutputKeyClass(LongWritable.class);
384     job.setOutputValueClass(LongWritable.class);
385 
386     job.setMapperClass(EvaluationMapTask.class);
387     job.setReducerClass(LongSumReducer.class);
388 
389     job.setNumReduceTasks(1);
390 
391     job.setOutputFormatClass(TextOutputFormat.class);
392     TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
393 
394     TableMapReduceUtil.addDependencyJars(job);
395     TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
396       DescriptiveStatistics.class, // commons-math
397       ObjectMapper.class);         // jackson-mapper-asl
398 
399     TableMapReduceUtil.initCredentials(job);
400 
401     job.waitForCompletion(true);
402   }
403 
404   /*
405    * Write input file of offsets-per-client for the mapreduce job.
406    * @param c Configuration
407    * @return Directory that contains file written.
408    * @throws IOException
409    */
410   private Path writeInputFile(final Configuration c, final TestOptions opts) throws IOException {
411     SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmmss");
412     Path jobdir = new Path(PERF_EVAL_DIR, formatter.format(new Date()));
413     Path inputDir = new Path(jobdir, "inputs");
414 
415     FileSystem fs = FileSystem.get(c);
416     fs.mkdirs(inputDir);
417 
418     Path inputFile = new Path(inputDir, "input.txt");
419     PrintStream out = new PrintStream(fs.create(inputFile));
420     // Make input random.
421     Map<Integer, String> m = new TreeMap<Integer, String>();
422     Hash h = MurmurHash.getInstance();
423     int perClientRows = (opts.totalRows / opts.numClientThreads);
424     ObjectMapper mapper = new ObjectMapper();
425     mapper.configure(SORT_PROPERTIES_ALPHABETICALLY, true);
426     try {
427       for (int i = 0; i < 10; i++) {
428         for (int j = 0; j < opts.numClientThreads; j++) {
429           TestOptions next = new TestOptions(opts);
430           next.startRow = (j * perClientRows) + (i * (perClientRows/10));
431           next.perClientRunRows = perClientRows / 10;
432           String s = mapper.writeValueAsString(next);
433           int hash = h.hash(Bytes.toBytes(s));
434           m.put(hash, s);
435         }
436       }
437       for (Map.Entry<Integer, String> e: m.entrySet()) {
438         out.println(e.getValue());
439       }
440     } finally {
441       out.close();
442     }
443     return inputDir;
444   }
445 
446   /**
447    * Describes a command.
448    */
449   static class CmdDescriptor {
450     private Class<? extends Test> cmdClass;
451     private String name;
452     private String description;
453 
454     CmdDescriptor(Class<? extends Test> cmdClass, String name, String description) {
455       this.cmdClass = cmdClass;
456       this.name = name;
457       this.description = description;
458     }
459 
460     public Class<? extends Test> getCmdClass() {
461       return cmdClass;
462     }
463 
464     public String getName() {
465       return name;
466     }
467 
468     public String getDescription() {
469       return description;
470     }
471   }
472 
473   /**
474    * Wraps up options passed to {@link org.apache.hadoop.hbase.PerformanceEvaluation}.
475    * This makes tracking all these arguments a little easier.
476    */
477   static class TestOptions {
478 
479     public TestOptions() {}
480 
481     public TestOptions(TestOptions that) {
482       this.nomapred = that.nomapred;
483       this.startRow = that.startRow;
484       this.perClientRunRows = that.perClientRunRows;
485       this.numClientThreads = that.numClientThreads;
486       this.totalRows = that.totalRows;
487       this.sampleRate = that.sampleRate;
488       this.tableName = that.tableName;
489       this.flushCommits = that.flushCommits;
490       this.writeToWAL = that.writeToWAL;
491       this.useTags = that.useTags;
492       this.noOfTags = that.noOfTags;
493       this.reportLatency = that.reportLatency;
494       this.multiGet = that.multiGet;
495       this.inMemoryCF = that.inMemoryCF;
496       this.presplitRegions = that.presplitRegions;
497       this.compression = that.compression;
498       this.blockEncoding = that.blockEncoding;
499     }
500 
501     public boolean nomapred = false;
502     public int startRow = 0;
503     public int perClientRunRows = ROWS_PER_GB;
504     public int numClientThreads = 1;
505     public int totalRows = ROWS_PER_GB;
506     public float sampleRate = 1.0f;
507     public String tableName = TABLE_NAME;
508     public boolean flushCommits = true;
509     public boolean writeToWAL = true;
510     public boolean useTags = false;
511     public int noOfTags = 1;
512     public boolean reportLatency = false;
513     public int multiGet = 0;
514     boolean inMemoryCF = false;
515     int presplitRegions = 0;
516     public Compression.Algorithm compression = Compression.Algorithm.NONE;
517     public DataBlockEncoding blockEncoding = DataBlockEncoding.NONE;
518   }
519 
520   /*
521    * A test.
522    * Subclass to particularize what happens per row.
523    */
524   static abstract class Test {
525     // Below is make it so when Tests are all running in the one
526     // jvm, that they each have a differently seeded Random.
527     private static final Random randomSeed = new Random(System.currentTimeMillis());
528     private static long nextRandomSeed() {
529       return randomSeed.nextLong();
530     }
531     protected final Random rand = new Random(nextRandomSeed());
532     protected final Configuration conf;
533     protected final TestOptions opts;
534 
535     private final Status status;
536     protected HConnection connection;
537     protected HTableInterface table;
538 
539     /**
540      * Note that all subclasses of this class must provide a public contructor
541      * that has the exact same list of arguments.
542      */
543     Test(final Configuration conf, final TestOptions options, final Status status) {
544       this.conf = conf;
545       this.opts = options;
546       this.status = status;
547     }
548 
549     private String generateStatus(final int sr, final int i, final int lr) {
550       return sr + "/" + i + "/" + lr;
551     }
552 
553     protected int getReportingPeriod() {
554       int period = opts.perClientRunRows / 10;
555       return period == 0 ? opts.perClientRunRows : period;
556     }
557 
558     void testSetup() throws IOException {
559       this.connection = HConnectionManager.createConnection(conf);
560       this.table = connection.getTable(opts.tableName);
561       this.table.setAutoFlush(false, true);
562     }
563 
564     void testTakedown() throws IOException {
565       if (opts.flushCommits) {
566         this.table.flushCommits();
567       }
568       table.close();
569       connection.close();
570     }
571 
572     /*
573      * Run test
574      * @return Elapsed time.
575      * @throws IOException
576      */
577     long test() throws IOException {
578       testSetup();
579       LOG.info("Timed test starting in thread " + Thread.currentThread().getName());
580       final long startTime = System.nanoTime();
581       try {
582         testTimed();
583       } finally {
584         testTakedown();
585       }
586       return (System.nanoTime() - startTime) / 1000000;
587     }
588 
589     /**
590      * Provides an extension point for tests that don't want a per row invocation.
591      */
592     void testTimed() throws IOException {
593       int lastRow = opts.startRow + opts.perClientRunRows;
594       // Report on completion of 1/10th of total.
595       for (int i = opts.startRow; i < lastRow; i++) {
596         testRow(i);
597         if (status != null && i > 0 && (i % getReportingPeriod()) == 0) {
598           status.setStatus(generateStatus(opts.startRow, i, lastRow));
599         }
600       }
601     }
602 
603     /*
604     * Test for individual row.
605     * @param i Row index.
606     */
607     abstract void testRow(final int i) throws IOException;
608   }
609 
610 
611   @SuppressWarnings("unused")
612   static class RandomSeekScanTest extends Test {
613     RandomSeekScanTest(Configuration conf, TestOptions options, Status status) {
614       super(conf, options, status);
615     }
616 
617     @Override
618     void testRow(final int i) throws IOException {
619       Scan scan = new Scan(getRandomRow(this.rand, opts.totalRows));
620       scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
621       scan.setFilter(new WhileMatchFilter(new PageFilter(120)));
622       ResultScanner s = this.table.getScanner(scan);
623       for (Result rr; (rr = s.next()) != null;) ;
624       s.close();
625     }
626 
627     @Override
628     protected int getReportingPeriod() {
629       int period = opts.perClientRunRows / 100;
630       return period == 0 ? opts.perClientRunRows : period;
631     }
632 
633   }
634 
635   @SuppressWarnings("unused")
636   static abstract class RandomScanWithRangeTest extends Test {
637     RandomScanWithRangeTest(Configuration conf, TestOptions options, Status status) {
638       super(conf, options, status);
639     }
640 
641     @Override
642     void testRow(final int i) throws IOException {
643       Pair<byte[], byte[]> startAndStopRow = getStartAndStopRow();
644       Scan scan = new Scan(startAndStopRow.getFirst(), startAndStopRow.getSecond());
645       scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
646       ResultScanner s = this.table.getScanner(scan);
647       int count = 0;
648       for (Result rr; (rr = s.next()) != null;) {
649         count++;
650       }
651 
652       if (i % 100 == 0) {
653         LOG.info(String.format("Scan for key range %s - %s returned %s rows",
654             Bytes.toString(startAndStopRow.getFirst()),
655             Bytes.toString(startAndStopRow.getSecond()), count));
656       }
657 
658       s.close();
659     }
660 
661     protected abstract Pair<byte[],byte[]> getStartAndStopRow();
662 
663     protected Pair<byte[], byte[]> generateStartAndStopRows(int maxRange) {
664       int start = this.rand.nextInt(Integer.MAX_VALUE) % opts.totalRows;
665       int stop = start + maxRange;
666       return new Pair<byte[],byte[]>(format(start), format(stop));
667     }
668 
669     @Override
670     protected int getReportingPeriod() {
671       int period = opts.perClientRunRows / 100;
672       return period == 0? opts.perClientRunRows: period;
673     }
674   }
675 
676   static class RandomScanWithRange10Test extends RandomScanWithRangeTest {
677     RandomScanWithRange10Test(Configuration conf, TestOptions options, Status status) {
678       super(conf, options, status);
679     }
680 
681     @Override
682     protected Pair<byte[], byte[]> getStartAndStopRow() {
683       return generateStartAndStopRows(10);
684     }
685   }
686 
687   static class RandomScanWithRange100Test extends RandomScanWithRangeTest {
688     RandomScanWithRange100Test(Configuration conf, TestOptions options, Status status) {
689       super(conf, options, status);
690     }
691 
692     @Override
693     protected Pair<byte[], byte[]> getStartAndStopRow() {
694       return generateStartAndStopRows(100);
695     }
696   }
697 
698   static class RandomScanWithRange1000Test extends RandomScanWithRangeTest {
699     RandomScanWithRange1000Test(Configuration conf, TestOptions options, Status status) {
700       super(conf, options, status);
701     }
702 
703     @Override
704     protected Pair<byte[], byte[]> getStartAndStopRow() {
705       return generateStartAndStopRows(1000);
706     }
707   }
708 
709   static class RandomScanWithRange10000Test extends RandomScanWithRangeTest {
710     RandomScanWithRange10000Test(Configuration conf, TestOptions options, Status status) {
711       super(conf, options, status);
712     }
713 
714     @Override
715     protected Pair<byte[], byte[]> getStartAndStopRow() {
716       return generateStartAndStopRows(10000);
717     }
718   }
719 
720   static class RandomReadTest extends Test {
721     private final int everyN;
722     private final double[] times;
723     private ArrayList<Get> gets;
724     int idx = 0;
725 
726     RandomReadTest(Configuration conf, TestOptions options, Status status) {
727       super(conf, options, status);
728       everyN = (int) (opts.totalRows / (opts.totalRows * opts.sampleRate));
729       LOG.info("Sampling 1 every " + everyN + " out of " + opts.perClientRunRows + " total rows.");
730       if (opts.multiGet > 0) {
731         LOG.info("MultiGet enabled. Sending GETs in batches of " + opts.multiGet + ".");
732         this.gets = new ArrayList<Get>(opts.multiGet);
733       }
734       if (opts.reportLatency) {
735         this.times = new double[(int) Math.ceil(opts.perClientRunRows * opts.sampleRate / Math.max(1, opts.multiGet))];
736       } else {
737         this.times = null;
738       }
739     }
740 
741     @Override
742     void testRow(final int i) throws IOException {
743       if (i % everyN == 0) {
744         Get get = new Get(getRandomRow(this.rand, opts.totalRows));
745         get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
746         if (opts.multiGet > 0) {
747           this.gets.add(get);
748           if (this.gets.size() == opts.multiGet) {
749             long start = System.nanoTime();
750             this.table.get(this.gets);
751             if (opts.reportLatency) {
752               times[idx++] = (System.nanoTime() - start) / 1e6;
753             }
754             this.gets.clear();
755           }
756         } else {
757           long start = System.nanoTime();
758           this.table.get(get);
759           if (opts.reportLatency) {
760             times[idx++] = (System.nanoTime() - start) / 1e6;
761           }
762         }
763       }
764     }
765 
766     @Override
767     protected int getReportingPeriod() {
768       int period = opts.perClientRunRows / 100;
769       return period == 0 ? opts.perClientRunRows : period;
770     }
771 
772     @Override
773     protected void testTakedown() throws IOException {
774       if (this.gets != null && this.gets.size() > 0) {
775         this.table.get(gets);
776         this.gets.clear();
777       }
778       super.testTakedown();
779       if (opts.reportLatency) {
780         Arrays.sort(times);
781         DescriptiveStatistics ds = new DescriptiveStatistics();
782         for (double t : times) {
783           ds.addValue(t);
784         }
785         LOG.info("randomRead latency log (ms), on " + times.length + " measures");
786         LOG.info("99.9999% = " + ds.getPercentile(99.9999d));
787         LOG.info(" 99.999% = " + ds.getPercentile(99.999d));
788         LOG.info("  99.99% = " + ds.getPercentile(99.99d));
789         LOG.info("   99.9% = " + ds.getPercentile(99.9d));
790         LOG.info("     99% = " + ds.getPercentile(99d));
791         LOG.info("     95% = " + ds.getPercentile(95d));
792         LOG.info("     90% = " + ds.getPercentile(90d));
793         LOG.info("     80% = " + ds.getPercentile(80d));
794         LOG.info("Standard Deviation = " + ds.getStandardDeviation());
795         LOG.info("Mean = " + ds.getMean());
796       }
797     }
798   }
799 
800   static class RandomWriteTest extends Test {
801     RandomWriteTest(Configuration conf, TestOptions options, Status status) {
802       super(conf, options, status);
803     }
804 
805     @Override
806     void testRow(final int i) throws IOException {
807       byte[] row = getRandomRow(this.rand, opts.totalRows);
808       Put put = new Put(row);
809       byte[] value = generateData(this.rand, VALUE_LENGTH);
810       if (opts.useTags) {
811         byte[] tag = generateData(this.rand, TAG_LENGTH);
812         Tag[] tags = new Tag[opts.noOfTags];
813         for (int n = 0; n < opts.noOfTags; n++) {
814           Tag t = new Tag((byte) n, tag);
815           tags[n] = t;
816         }
817         KeyValue kv = new KeyValue(row, FAMILY_NAME, QUALIFIER_NAME, HConstants.LATEST_TIMESTAMP,
818             value, tags);
819         put.add(kv);
820       } else {
821         put.add(FAMILY_NAME, QUALIFIER_NAME, value);
822       }
823       put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
824       table.put(put);
825     }
826   }
827 
828 
829   static class ScanTest extends Test {
830     private ResultScanner testScanner;
831 
832     ScanTest(Configuration conf, TestOptions options, Status status) {
833       super(conf, options, status);
834     }
835 
836     @Override
837     void testTakedown() throws IOException {
838       if (this.testScanner != null) {
839         this.testScanner.close();
840       }
841       super.testTakedown();
842     }
843 
844 
845     @Override
846     void testRow(final int i) throws IOException {
847       if (this.testScanner == null) {
848         Scan scan = new Scan(format(opts.startRow));
849         scan.setCaching(30);
850         scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
851         this.testScanner = table.getScanner(scan);
852       }
853       testScanner.next();
854     }
855 
856   }
857 
858   static class SequentialReadTest extends Test {
859     SequentialReadTest(Configuration conf, TestOptions options, Status status) {
860       super(conf, options, status);
861     }
862 
863     @Override
864     void testRow(final int i) throws IOException {
865       Get get = new Get(format(i));
866       get.addColumn(FAMILY_NAME, QUALIFIER_NAME);
867       table.get(get);
868     }
869   }
870 
871   static class SequentialWriteTest extends Test {
872     SequentialWriteTest(Configuration conf, TestOptions options, Status status) {
873       super(conf, options, status);
874     }
875 
876     @Override
877     void testRow(final int i) throws IOException {
878       byte[] row = format(i);
879       Put put = new Put(row);
880       byte[] value = generateData(this.rand, VALUE_LENGTH);
881       if (opts.useTags) {
882         byte[] tag = generateData(this.rand, TAG_LENGTH);
883         Tag[] tags = new Tag[opts.noOfTags];
884         for (int n = 0; n < opts.noOfTags; n++) {
885           Tag t = new Tag((byte) n, tag);
886           tags[n] = t;
887         }
888         KeyValue kv = new KeyValue(row, FAMILY_NAME, QUALIFIER_NAME, HConstants.LATEST_TIMESTAMP,
889             value, tags);
890         put.add(kv);
891       } else {
892         put.add(FAMILY_NAME, QUALIFIER_NAME, value);
893       }
894       put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
895       table.put(put);
896     }
897   }
898 
899   static class FilteredScanTest extends Test {
900     protected static final Log LOG = LogFactory.getLog(FilteredScanTest.class.getName());
901 
902     FilteredScanTest(Configuration conf, TestOptions options, Status status) {
903       super(conf, options, status);
904     }
905 
906     @Override
907     void testRow(int i) throws IOException {
908       byte[] value = generateData(this.rand, VALUE_LENGTH);
909       Scan scan = constructScan(value);
910       ResultScanner scanner = null;
911       try {
912         scanner = this.table.getScanner(scan);
913         while (scanner.next() != null) {
914         }
915       } finally {
916         if (scanner != null) scanner.close();
917       }
918     }
919 
920     protected Scan constructScan(byte[] valuePrefix) throws IOException {
921       Filter filter = new SingleColumnValueFilter(
922           FAMILY_NAME, QUALIFIER_NAME, CompareFilter.CompareOp.EQUAL,
923           new BinaryComparator(valuePrefix)
924       );
925       Scan scan = new Scan();
926       scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
927       scan.setFilter(filter);
928       return scan;
929     }
930   }
931 
932   /**
933    * Compute a throughput rate in MB/s.
934    * @param rows Number of records consumed.
935    * @param timeMs Time taken in milliseconds.
936    * @return String value with label, ie '123.76 MB/s'
937    */
938   private static String calculateMbps(int rows, long timeMs) {
939     // MB/s = ((totalRows * ROW_SIZE_BYTES) / totalTimeMS)
940     //        * 1000 MS_PER_SEC / (1024 * 1024) BYTES_PER_MB
941     BigDecimal rowSize =
942       BigDecimal.valueOf(ROW_LENGTH + VALUE_LENGTH + FAMILY_NAME.length + QUALIFIER_NAME.length);
943     BigDecimal mbps = BigDecimal.valueOf(rows).multiply(rowSize, CXT)
944       .divide(BigDecimal.valueOf(timeMs), CXT).multiply(MS_PER_SEC, CXT)
945       .divide(BYTES_PER_MB, CXT);
946     return FMT.format(mbps) + " MB/s";
947   }
948 
949   /*
950    * Format passed integer.
951    * @param number
952    * @return Returns zero-prefixed ROW_LENGTH-byte wide decimal version of passed
953    * number (Does absolute in case number is negative).
954    */
955   public static byte [] format(final int number) {
956     byte [] b = new byte[ROW_LENGTH];
957     int d = Math.abs(number);
958     for (int i = b.length - 1; i >= 0; i--) {
959       b[i] = (byte)((d % 10) + '0');
960       d /= 10;
961     }
962     return b;
963   }
964 
965   /*
966    * This method takes some time and is done inline uploading data.  For
967    * example, doing the mapfile test, generation of the key and value
968    * consumes about 30% of CPU time.
969    * @return Generated random value to insert into a table cell.
970    */
971   public static byte[] generateData(final Random r, int length) {
972     byte [] b = new byte [length];
973     int i = 0;
974 
975     for(i = 0; i < (length-8); i += 8) {
976       b[i] = (byte) (65 + r.nextInt(26));
977       b[i+1] = b[i];
978       b[i+2] = b[i];
979       b[i+3] = b[i];
980       b[i+4] = b[i];
981       b[i+5] = b[i];
982       b[i+6] = b[i];
983       b[i+7] = b[i];
984     }
985 
986     byte a = (byte) (65 + r.nextInt(26));
987     for(; i < length; i++) {
988       b[i] = a;
989     }
990     return b;
991   }
992 
993   static byte [] getRandomRow(final Random random, final int totalRows) {
994     return format(random.nextInt(Integer.MAX_VALUE) % totalRows);
995   }
996 
997   static long runOneClient(final Class<? extends Test> cmd, Configuration conf, TestOptions opts,
998     final Status status)
999       throws IOException {
1000     status.setStatus("Start " + cmd + " at offset " + opts.startRow + " for " +
1001       opts.perClientRunRows + " rows");
1002     long totalElapsedTime = 0;
1003 
1004     final Test t;
1005     try {
1006       Constructor<? extends Test> constructor =
1007         cmd.getDeclaredConstructor(Configuration.class, TestOptions.class, Status.class);
1008       t = constructor.newInstance(conf, opts, status);
1009     } catch (NoSuchMethodException e) {
1010       throw new IllegalArgumentException("Invalid command class: " +
1011           cmd.getName() + ".  It does not provide a constructor as described by " +
1012           "the javadoc comment.  Available constructors are: " +
1013           Arrays.toString(cmd.getConstructors()));
1014     } catch (Exception e) {
1015       throw new IllegalStateException("Failed to construct command class", e);
1016     }
1017     totalElapsedTime = t.test();
1018 
1019     status.setStatus("Finished " + cmd + " in " + totalElapsedTime +
1020       "ms at offset " + opts.startRow + " for " + opts.perClientRunRows + " rows" +
1021       " (" + calculateMbps((int)(opts.perClientRunRows * opts.sampleRate), totalElapsedTime) + ")");
1022     return totalElapsedTime;
1023   }
1024 
1025   private void runTest(final Class<? extends Test> cmd, TestOptions opts) throws IOException,
1026       InterruptedException, ClassNotFoundException {
1027     HBaseAdmin admin = null;
1028     try {
1029       admin = new HBaseAdmin(getConf());
1030       checkTable(admin, opts);
1031     } finally {
1032       if (admin != null) admin.close();
1033     }
1034     if (opts.nomapred) {
1035       doLocalClients(cmd, opts);
1036     } else {
1037       doMapReduce(cmd, opts);
1038     }
1039   }
1040 
1041   protected void printUsage() {
1042     printUsage(null);
1043   }
1044 
1045   protected void printUsage(final String message) {
1046     if (message != null && message.length() > 0) {
1047       System.err.println(message);
1048     }
1049     System.err.println("Usage: java " + this.getClass().getName() + " \\");
1050     System.err.println("  [--nomapred] [--rows=ROWS] [--table=NAME] \\");
1051     System.err.println("  [--compress=TYPE] [--blockEncoding=TYPE] " +
1052       "[-D<property=value>]* <command> <nclients>");
1053     System.err.println();
1054     System.err.println("Options:");
1055     System.err.println(" nomapred        Run multiple clients using threads " +
1056       "(rather than use mapreduce)");
1057     System.err.println(" rows            Rows each client runs. Default: One million");
1058     System.err.println(" sampleRate      Execute test on a sample of total " +
1059       "rows. Only supported by randomRead. Default: 1.0");
1060     System.err.println(" table           Alternate table name. Default: 'TestTable'");
1061     System.err.println(" compress        Compression type to use (GZ, LZO, ...). Default: 'NONE'");
1062     System.err.println(" flushCommits    Used to determine if the test should flush the table. " +
1063       "Default: false");
1064     System.err.println(" writeToWAL      Set writeToWAL on puts. Default: True");
1065     System.err.println(" presplit        Create presplit table. Recommended for accurate perf " +
1066       "analysis (see guide).  Default: disabled");
1067     System.err.println(" inmemory        Tries to keep the HFiles of the CF " +
1068       "inmemory as far as possible. Not guaranteed that reads are always served " +
1069       "from memory.  Default: false");
1070     System.err.println(" usetags         Writes tags along with KVs. Use with HFile V3. " +
1071       "Default: false");
1072     System.err.println(" numoftags       Specify the no of tags that would be needed. " +
1073        "This works only if usetags is true.");
1074     System.err.println(" latency         Set to report operation latencies. " +
1075       "Currently only supported by randomRead test. Default: False");
1076     System.err.println();
1077     System.err.println(" Note: -D properties will be applied to the conf used. ");
1078     System.err.println("  For example: ");
1079     System.err.println("   -Dmapred.output.compress=true");
1080     System.err.println("   -Dmapreduce.task.timeout=60000");
1081     System.err.println();
1082     System.err.println("Command:");
1083     for (CmdDescriptor command : commands.values()) {
1084       System.err.println(String.format(" %-15s %s", command.getName(), command.getDescription()));
1085     }
1086     System.err.println();
1087     System.err.println("Args:");
1088     System.err.println(" nclients        Integer. Required. Total number of " +
1089       "clients (and HRegionServers)");
1090     System.err.println("                 running: 1 <= value <= 500");
1091     System.err.println("Examples:");
1092     System.err.println(" To run a single evaluation client:");
1093     System.err.println(" $ bin/hbase " + this.getClass().getName()
1094         + " sequentialWrite 1");
1095   }
1096 
1097   private static int getNumClients(final int start, final String[] args) {
1098     if(start + 1 > args.length) {
1099       throw new IllegalArgumentException("must supply the number of clients");
1100     }
1101     int N = Integer.parseInt(args[start]);
1102     if (N < 1) {
1103       throw new IllegalArgumentException("Number of clients must be > 1");
1104     }
1105     return N;
1106   }
1107 
1108   public int run(String[] args) throws Exception {
1109     // Process command-line args. TODO: Better cmd-line processing
1110     // (but hopefully something not as painful as cli options).
1111     int errCode = -1;
1112     if (args.length < 1) {
1113       printUsage();
1114       return errCode;
1115     }
1116 
1117     try {
1118       // MR-NOTE: if you are adding a property that is used to control an operation
1119       // like put(), get(), scan(), ... you must also add it as part of the MR 
1120       // input, take a look at writeInputFile().
1121       // Then you must adapt the LINE_PATTERN input regex,
1122       // and parse the argument, take a look at PEInputFormat.getSplits().
1123 
1124       TestOptions opts = new TestOptions();
1125 
1126       for (int i = 0; i < args.length; i++) {
1127         String cmd = args[i];
1128         if (cmd.equals("-h") || cmd.startsWith("--h")) {
1129           printUsage();
1130           errCode = 0;
1131           break;
1132         }
1133 
1134         final String nmr = "--nomapred";
1135         if (cmd.startsWith(nmr)) {
1136           opts.nomapred = true;
1137           continue;
1138         }
1139 
1140         final String rows = "--rows=";
1141         if (cmd.startsWith(rows)) {
1142           opts.perClientRunRows = Integer.parseInt(cmd.substring(rows.length()));
1143           continue;
1144         }
1145 
1146         final String sampleRate = "--sampleRate=";
1147         if (cmd.startsWith(sampleRate)) {
1148           opts.sampleRate = Float.parseFloat(cmd.substring(sampleRate.length()));
1149           continue;
1150         }
1151 
1152         final String table = "--table=";
1153         if (cmd.startsWith(table)) {
1154           opts.tableName = cmd.substring(table.length());
1155           continue;
1156         }
1157 
1158         final String compress = "--compress=";
1159         if (cmd.startsWith(compress)) {
1160           opts.compression = Compression.Algorithm.valueOf(cmd.substring(compress.length()));
1161           continue;
1162         }
1163 
1164         final String blockEncoding = "--blockEncoding=";
1165         if (cmd.startsWith(blockEncoding)) {
1166           opts.blockEncoding = DataBlockEncoding.valueOf(cmd.substring(blockEncoding.length()));
1167           continue;
1168         }
1169 
1170         final String flushCommits = "--flushCommits=";
1171         if (cmd.startsWith(flushCommits)) {
1172           opts.flushCommits = Boolean.parseBoolean(cmd.substring(flushCommits.length()));
1173           continue;
1174         }
1175 
1176         final String writeToWAL = "--writeToWAL=";
1177         if (cmd.startsWith(writeToWAL)) {
1178           opts.writeToWAL = Boolean.parseBoolean(cmd.substring(writeToWAL.length()));
1179           continue;
1180         }
1181 
1182         final String presplit = "--presplit=";
1183         if (cmd.startsWith(presplit)) {
1184           opts.presplitRegions = Integer.parseInt(cmd.substring(presplit.length()));
1185           continue;
1186         }
1187         
1188         final String inMemory = "--inmemory=";
1189         if (cmd.startsWith(inMemory)) {
1190           opts.inMemoryCF = Boolean.parseBoolean(cmd.substring(inMemory.length()));
1191           continue;
1192         }
1193 
1194         final String latency = "--latency";
1195         if (cmd.startsWith(latency)) {
1196           opts.reportLatency = true;
1197           continue;
1198         }
1199 
1200         final String multiGet = "--multiGet=";
1201         if (cmd.startsWith(multiGet)) {
1202           opts.multiGet = Integer.parseInt(cmd.substring(multiGet.length()));
1203           continue;
1204         }
1205 
1206         final String useTags = "--usetags=";
1207         if (cmd.startsWith(useTags)) {
1208           opts.useTags = Boolean.parseBoolean(cmd.substring(useTags.length()));
1209           continue;
1210         }
1211         
1212         final String noOfTags = "--nooftags=";
1213         if (cmd.startsWith(noOfTags)) {
1214           opts.noOfTags = Integer.parseInt(cmd.substring(noOfTags.length()));
1215           continue;
1216         }
1217         
1218         Class<? extends Test> cmdClass = determineCommandClass(cmd);
1219         if (cmdClass != null) {
1220           opts.numClientThreads = getNumClients(i + 1, args);
1221           // number of rows specified
1222           opts.totalRows = opts.perClientRunRows * opts.numClientThreads;
1223           runTest(cmdClass, opts);
1224           errCode = 0;
1225           break;
1226         }
1227 
1228         printUsage();
1229         break;
1230       }
1231     } catch (Exception e) {
1232       e.printStackTrace();
1233     }
1234 
1235     return errCode;
1236   }
1237 
1238   private Class<? extends Test> determineCommandClass(String cmd) {
1239     CmdDescriptor descriptor = commands.get(cmd);
1240     return descriptor != null ? descriptor.getCmdClass() : null;
1241   }
1242 
1243   public static void main(final String[] args) throws Exception {
1244     int res = ToolRunner.run(new PerformanceEvaluation(HBaseConfiguration.create()), args);
1245     System.exit(res);
1246   }
1247 }