1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.util;
18  
19  import java.io.IOException;
20  import java.io.InterruptedIOException;
21  import java.util.ArrayList;
22  import java.util.Arrays;
23  import java.util.List;
24  import java.util.concurrent.atomic.AtomicReference;
25  
26  import org.apache.commons.cli.CommandLine;
27  import org.apache.commons.cli.ParseException;
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  import org.apache.hadoop.hbase.HBaseConfiguration;
31  import org.apache.hadoop.hbase.HBaseTestingUtility;
32  import org.apache.hadoop.hbase.HColumnDescriptor;
33  import org.apache.hadoop.hbase.HConstants;
34  import org.apache.hadoop.hbase.HTableDescriptor;
35  import org.apache.hadoop.hbase.PerformanceEvaluation;
36  import org.apache.hadoop.hbase.client.HBaseAdmin;
37  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
38  import org.apache.hadoop.hbase.io.hfile.Compression;
39  import org.apache.hadoop.hbase.regionserver.StoreFile;
40  import org.apache.hadoop.util.ToolRunner;
41  
42  /**
43   * A command-line utility that reads, writes, and verifies data. Unlike
44   * {@link PerformanceEvaluation}, this tool validates the data written,
45   * and supports simultaneously writing and reading the same set of keys.
46   */
47  public class LoadTestTool extends AbstractHBaseTool {
48  
49    private static final Log LOG = LogFactory.getLog(LoadTestTool.class);
50  
51    /** Table name for the test */
52    protected byte[] tableName;
53  
54    /** Table name to use of not overridden on the command line */
55    protected static final String DEFAULT_TABLE_NAME = "cluster_test";
56  
57    /** Column family used by the test */
58    protected static byte[] COLUMN_FAMILY = Bytes.toBytes("test_cf");
59  
60    /** Column families used by the test */
61    protected static final byte[][] COLUMN_FAMILIES = { COLUMN_FAMILY };
62  
63    /** The number of reader/writer threads if not specified */
64    protected static final int DEFAULT_NUM_THREADS = 20;
65  
66    /** Usage string for the load option */
67    protected static final String OPT_USAGE_LOAD =
68        "<avg_cols_per_key>:<avg_data_size>" +
69        "[:<#threads=" + DEFAULT_NUM_THREADS + ">]";
70  
71    /** Usa\ge string for the read option */
72    protected static final String OPT_USAGE_READ =
73        "<verify_percent>[:<#threads=" + DEFAULT_NUM_THREADS + ">]";
74  
75    protected static final String OPT_USAGE_BLOOM = "Bloom filter type, one of " +
76        Arrays.toString(StoreFile.BloomType.values());
77  
78    protected static final String OPT_USAGE_COMPRESSION = "Compression type, " +
79        "one of " + Arrays.toString(Compression.Algorithm.values());
80  
81    public static final String OPT_DATA_BLOCK_ENCODING_USAGE =
82      "Encoding algorithm (e.g. prefix "
83          + "compression) to use for data blocks in the test column family, "
84          + "one of " + Arrays.toString(DataBlockEncoding.values()) + ".";
85  
86    private static final String OPT_BLOOM = "bloom";
87    private static final String OPT_COMPRESSION = "compression";
88    public static final String OPT_DATA_BLOCK_ENCODING =
89        HColumnDescriptor.DATA_BLOCK_ENCODING.toLowerCase();
90    public static final String OPT_ENCODE_IN_CACHE_ONLY =
91        "encode_in_cache_only";
92    public static final String OPT_ENCODE_IN_CACHE_ONLY_USAGE =
93        "If this is specified, data blocks will only be encoded in block " +
94        "cache but not on disk";
95  
96    protected static final String OPT_KEY_WINDOW = "key_window";
97    protected static final String OPT_WRITE = "write";
98    protected static final String OPT_MAX_READ_ERRORS = "max_read_errors";
99    protected static final String OPT_MULTIPUT = "multiput";
100   protected static final String OPT_NUM_KEYS = "num_keys";
101   protected static final String OPT_READ = "read";
102   protected static final String OPT_START_KEY = "start_key";
103   protected static final String OPT_TABLE_NAME = "tn";
104   protected static final String OPT_ZK_QUORUM = "zk";
105   protected static final String OPT_SKIP_INIT = "skip_init";
106   protected static final String OPT_INIT_ONLY = "init_only";
107   private static final String NUM_TABLES = "num_tables";
108 
109   protected static final long DEFAULT_START_KEY = 0;
110 
111   /** This will be removed as we factor out the dependency on command line */
112   protected CommandLine cmd;
113 
114   protected MultiThreadedWriter writerThreads = null;
115   protected MultiThreadedReader readerThreads = null;
116 
117   protected long startKey, endKey;
118 
119   protected boolean isWrite, isRead;
120 
121   // Column family options
122   protected DataBlockEncoding dataBlockEncodingAlgo;
123   protected boolean encodeInCacheOnly;
124   protected Compression.Algorithm compressAlgo;
125   protected StoreFile.BloomType bloomType;
126 
127   // Writer options
128   protected int numWriterThreads = DEFAULT_NUM_THREADS;
129   protected int minColsPerKey, maxColsPerKey;
130   protected int minColDataSize, maxColDataSize;
131   protected boolean isMultiPut;
132 
133   // Reader options
134   private int numReaderThreads = DEFAULT_NUM_THREADS;
135   private int keyWindow = MultiThreadedReader.DEFAULT_KEY_WINDOW;
136   private int maxReadErrors = MultiThreadedReader.DEFAULT_MAX_ERRORS;
137   private int verifyPercent;
138  
139   private int numTables = 1;
140 
141   // TODO: refactor LoadTestToolImpl somewhere to make the usage from tests less bad,
142   // console tool itself should only be used from console.
143   protected boolean isSkipInit = false;
144   protected boolean isInitOnly = false;
145 
146   protected String[] splitColonSeparated(String option,
147       int minNumCols, int maxNumCols) {
148     String optVal = cmd.getOptionValue(option);
149     String[] cols = optVal.split(":");
150     if (cols.length < minNumCols || cols.length > maxNumCols) {
151       throw new IllegalArgumentException("Expected at least "
152           + minNumCols + " columns but no more than " + maxNumCols +
153           " in the colon-separated value '" + optVal + "' of the " +
154           "-" + option + " option");
155     }
156     return cols;
157   }
158 
159   protected int getNumThreads(String numThreadsStr) {
160     return parseInt(numThreadsStr, 1, Short.MAX_VALUE);
161   }
162 
163   /**
164    * Apply column family options such as Bloom filters, compression, and data
165    * block encoding.
166    */
167   protected void applyColumnFamilyOptions(byte[] tableName,
168       byte[][] columnFamilies) throws IOException {
169     HBaseAdmin admin = new HBaseAdmin(conf);
170     HTableDescriptor tableDesc = admin.getTableDescriptor(tableName);
171     LOG.info("Disabling table " + Bytes.toString(tableName));
172     admin.disableTable(tableName);
173     for (byte[] cf : columnFamilies) {
174       HColumnDescriptor columnDesc = tableDesc.getFamily(cf);
175       boolean isNewCf = columnDesc == null;
176       if (isNewCf) {
177         columnDesc = new HColumnDescriptor(cf);
178       }
179       if (bloomType != null) {
180         columnDesc.setBloomFilterType(bloomType);
181       }
182       if (compressAlgo != null) {
183         columnDesc.setCompressionType(compressAlgo);
184       }
185       if (dataBlockEncodingAlgo != null) {
186         columnDesc.setDataBlockEncoding(dataBlockEncodingAlgo);
187         columnDesc.setEncodeOnDisk(!encodeInCacheOnly);
188       }
189       if (isNewCf) {
190         admin.addColumn(tableName, columnDesc);
191       } else {
192         admin.modifyColumn(tableName, columnDesc);
193       }
194     }
195     LOG.info("Enabling table " + Bytes.toString(tableName));
196     admin.enableTable(tableName);
197   }
198 
199   @Override
200   protected void addOptions() {
201     addOptWithArg(OPT_ZK_QUORUM, "ZK quorum as comma-separated host names " +
202         "without port numbers");
203     addOptWithArg(OPT_TABLE_NAME, "The name of the table to read or write");
204     addOptWithArg(OPT_WRITE, OPT_USAGE_LOAD);
205     addOptWithArg(OPT_READ, OPT_USAGE_READ);
206     addOptNoArg(OPT_INIT_ONLY, "Initialize the test table only, don't do any loading");
207     addOptWithArg(OPT_BLOOM, OPT_USAGE_BLOOM);
208     addOptWithArg(OPT_COMPRESSION, OPT_USAGE_COMPRESSION);
209     addOptWithArg(OPT_DATA_BLOCK_ENCODING, OPT_DATA_BLOCK_ENCODING_USAGE);
210     addOptWithArg(OPT_MAX_READ_ERRORS, "The maximum number of read errors " +
211         "to tolerate before terminating all reader threads. The default is " +
212         MultiThreadedReader.DEFAULT_MAX_ERRORS + ".");
213     addOptWithArg(OPT_KEY_WINDOW, "The 'key window' to maintain between " +
214         "reads and writes for concurrent write/read workload. The default " +
215         "is " + MultiThreadedReader.DEFAULT_KEY_WINDOW + ".");
216 
217     addOptNoArg(OPT_MULTIPUT, "Whether to use multi-puts as opposed to " +
218         "separate puts for every column in a row");
219     addOptNoArg(OPT_ENCODE_IN_CACHE_ONLY, OPT_ENCODE_IN_CACHE_ONLY_USAGE);
220 
221     addOptWithArg(OPT_NUM_KEYS, "The number of keys to read/write");
222     addOptWithArg(OPT_START_KEY, "The first key to read/write " +
223         "(a 0-based index). The default value is " +
224         DEFAULT_START_KEY + ".");
225     addOptNoArg(OPT_SKIP_INIT, "Skip the initialization; assume test table "
226         + "already exists");
227     
228     addOptWithArg(NUM_TABLES,
229       "A positive integer number. When a number n is speicfied, load test "
230           + "tool  will load n table parallely. -tn parameter value becomes "
231           + "table name prefix. Each table name is in format <tn>_1...<tn>_n");
232   }
233 
234   @Override
235   protected void processOptions(CommandLine cmd) {
236     this.cmd = cmd;
237 
238     tableName = Bytes.toBytes(cmd.getOptionValue(OPT_TABLE_NAME,
239         DEFAULT_TABLE_NAME));
240 
241     isWrite = cmd.hasOption(OPT_WRITE);
242     isRead = cmd.hasOption(OPT_READ);
243     isInitOnly = cmd.hasOption(OPT_INIT_ONLY);
244 
245     if (!isWrite && !isRead && !isInitOnly) {
246       throw new IllegalArgumentException("Either -" + OPT_WRITE + " or " +
247           "-" + OPT_READ + " has to be specified");
248     }
249 
250     if (isInitOnly && (isRead || isWrite)) {
251       throw new IllegalArgumentException(OPT_INIT_ONLY + " cannot be specified with"
252           + " either -" + OPT_WRITE + " or -" + OPT_READ);
253     }
254 
255     if (!isInitOnly) {
256       if (!cmd.hasOption(OPT_NUM_KEYS)) {
257         throw new IllegalArgumentException(OPT_NUM_KEYS + " must be specified in "
258             + "read or write mode");
259       }
260       startKey = parseLong(cmd.getOptionValue(OPT_START_KEY,
261           String.valueOf(DEFAULT_START_KEY)), 0, Long.MAX_VALUE);
262       long numKeys = parseLong(cmd.getOptionValue(OPT_NUM_KEYS), 1,
263           Long.MAX_VALUE - startKey);
264       endKey = startKey + numKeys;
265       isSkipInit = cmd.hasOption(OPT_SKIP_INIT);
266       System.out.println("Key range: [" + startKey + ".." + (endKey - 1) + "]");
267     }
268 
269     encodeInCacheOnly = cmd.hasOption(OPT_ENCODE_IN_CACHE_ONLY);
270     parseColumnFamilyOptions(cmd);
271 
272     if (isWrite) {
273       String[] writeOpts = splitColonSeparated(OPT_WRITE, 2, 3);
274 
275       int colIndex = 0;
276       minColsPerKey = 1;
277       maxColsPerKey = 2 * Integer.parseInt(writeOpts[colIndex++]);
278       int avgColDataSize =
279           parseInt(writeOpts[colIndex++], 1, Integer.MAX_VALUE);
280       minColDataSize = avgColDataSize / 2;
281       maxColDataSize = avgColDataSize * 3 / 2;
282 
283       if (colIndex < writeOpts.length) {
284         numWriterThreads = getNumThreads(writeOpts[colIndex++]);
285       }
286 
287       isMultiPut = cmd.hasOption(OPT_MULTIPUT);
288 
289       System.out.println("Multi-puts: " + isMultiPut);
290       System.out.println("Columns per key: " + minColsPerKey + ".."
291           + maxColsPerKey);
292       System.out.println("Data size per column: " + minColDataSize + ".."
293           + maxColDataSize);
294     }
295 
296     if (isRead) {
297       String[] readOpts = splitColonSeparated(OPT_READ, 1, 2);
298       int colIndex = 0;
299       verifyPercent = parseInt(readOpts[colIndex++], 0, 100);
300       if (colIndex < readOpts.length) {
301         numReaderThreads = getNumThreads(readOpts[colIndex++]);
302       }
303 
304       if (cmd.hasOption(OPT_MAX_READ_ERRORS)) {
305         maxReadErrors = parseInt(cmd.getOptionValue(OPT_MAX_READ_ERRORS),
306             0, Integer.MAX_VALUE);
307       }
308 
309       if (cmd.hasOption(OPT_KEY_WINDOW)) {
310         keyWindow = parseInt(cmd.getOptionValue(OPT_KEY_WINDOW),
311             0, Integer.MAX_VALUE);
312       }
313 
314       System.out.println("Percent of keys to verify: " + verifyPercent);
315       System.out.println("Reader threads: " + numReaderThreads);
316     }
317     
318     numTables = 1;
319     if(cmd.hasOption(NUM_TABLES)) {
320       numTables = parseInt(cmd.getOptionValue(NUM_TABLES), 1, Short.MAX_VALUE);
321     }
322   }
323 
324   protected void parseColumnFamilyOptions(CommandLine cmd) {
325     String dataBlockEncodingStr = cmd.getOptionValue(OPT_DATA_BLOCK_ENCODING);
326     dataBlockEncodingAlgo = dataBlockEncodingStr == null ? null :
327         DataBlockEncoding.valueOf(dataBlockEncodingStr);
328     if (dataBlockEncodingAlgo == DataBlockEncoding.NONE && encodeInCacheOnly) {
329       throw new IllegalArgumentException("-" + OPT_ENCODE_IN_CACHE_ONLY + " " +
330           "does not make sense when data block encoding is not used");
331     }
332 
333     String compressStr = cmd.getOptionValue(OPT_COMPRESSION);
334     compressAlgo = compressStr == null ? Compression.Algorithm.NONE :
335         Compression.Algorithm.valueOf(compressStr);
336 
337     String bloomStr = cmd.getOptionValue(OPT_BLOOM);
338     bloomType = bloomStr == null ? null :
339         StoreFile.BloomType.valueOf(bloomStr);
340   }
341 
342   public void initTestTable() throws IOException {
343     HBaseTestingUtility.createPreSplitLoadTestTable(conf, tableName,
344         COLUMN_FAMILY, compressAlgo, dataBlockEncodingAlgo);
345     applyColumnFamilyOptions(tableName, COLUMN_FAMILIES);
346   }
347 
348   @Override
349   protected int doWork() throws IOException {
350     if (numTables > 1) {
351       return parallelLoadTables();
352     } else {
353       return loadTable();
354     }
355   }
356 
357   protected int loadTable() throws IOException {
358     if (cmd.hasOption(OPT_ZK_QUORUM)) {
359       conf.set(HConstants.ZOOKEEPER_QUORUM, cmd.getOptionValue(OPT_ZK_QUORUM));
360     }
361 
362     if (isInitOnly) {
363       LOG.info("Initializing only; no reads or writes");
364       initTestTable();
365       return 0;
366     }
367 
368     if (!isSkipInit) {
369       initTestTable();
370     }
371 
372     LoadTestDataGenerator dataGen = new MultiThreadedAction.DefaultDataGenerator(
373       minColDataSize, maxColDataSize, minColsPerKey, maxColsPerKey, COLUMN_FAMILY);
374 
375     if (isWrite) {
376       writerThreads = new MultiThreadedWriter(dataGen, conf, tableName);
377       writerThreads.setMultiPut(isMultiPut);
378     }
379 
380     if (isRead) {
381       readerThreads = new MultiThreadedReader(dataGen, conf, tableName, verifyPercent);
382       readerThreads.setMaxErrors(maxReadErrors);
383       readerThreads.setKeyWindow(keyWindow);
384     }
385 
386     if (isRead && isWrite) {
387       LOG.info("Concurrent read/write workload: making readers aware of the " +
388           "write point");
389       readerThreads.linkToWriter(writerThreads);
390     }
391 
392     if (isWrite) {
393       System.out.println("Starting to write data...");
394       writerThreads.start(startKey, endKey, numWriterThreads);
395     }
396 
397     if (isRead) {
398       System.out.println("Starting to read data...");
399       readerThreads.start(startKey, endKey, numReaderThreads);
400     }
401 
402     if (isWrite) {
403       writerThreads.waitForFinish();
404     }
405 
406     if (isRead) {
407       readerThreads.waitForFinish();
408     }
409 
410     boolean success = true;
411     if (isWrite) {
412       success = success && writerThreads.getNumWriteFailures() == 0;
413     }
414     if (isRead) {
415       success = success && readerThreads.getNumReadErrors() == 0
416           && readerThreads.getNumReadFailures() == 0;
417     }
418     return success ? EXIT_SUCCESS : this.EXIT_FAILURE;
419   }
420   
421   public static void main(String[] args) {
422     new LoadTestTool().doStaticMain(args);
423   }
424 
425   /**
426    * When NUM_TABLES is specified, the function starts multiple worker threads 
427    * which individually start a LoadTestTool instance to load a table. Each 
428    * table name is in format <tn>_<index>. For example, "-tn test -num_tables 2"
429    * , table names will be "test_1", "test_2"
430    * 
431    * @throws IOException
432    */
433   private int parallelLoadTables() 
434       throws IOException {
435     // create new command args
436     String tableName = cmd.getOptionValue(OPT_TABLE_NAME, DEFAULT_TABLE_NAME);
437     String[] newArgs = null;
438     if (!cmd.hasOption(LoadTestTool.OPT_TABLE_NAME)) {
439       newArgs = new String[cmdLineArgs.length + 2];
440       newArgs[0] = "-" + LoadTestTool.OPT_TABLE_NAME;
441       for (int i = 0; i < cmdLineArgs.length; i++) {
442         newArgs[i + 2] = cmdLineArgs[i];
443       }
444     } else {
445       newArgs = cmdLineArgs;
446     }
447 
448     int tableNameValueIndex = -1;
449     for (int j = 0; j < newArgs.length; j++) {
450       if (newArgs[j].endsWith(OPT_TABLE_NAME)) {
451         tableNameValueIndex = j + 1;
452       } else if (newArgs[j].endsWith(NUM_TABLES)) {
453         // change NUM_TABLES to 1 so that each worker loads one table
454         newArgs[j + 1] = "1"; 
455       }
456     }
457 
458     // starting to load multiple tables
459     List<WorkerThread> workers = new ArrayList<WorkerThread>();
460     for (int i = 0; i < numTables; i++) {
461       String[] workerArgs = newArgs.clone();
462       workerArgs[tableNameValueIndex] = tableName + "_" + (i+1);
463       WorkerThread worker = new WorkerThread(i, workerArgs);
464       workers.add(worker);
465       LOG.info(worker + " starting");
466       worker.start();
467     }
468 
469     // wait for all workers finish
470     LOG.info("Waiting for worker threads to finish");
471     for (WorkerThread t : workers) {
472       try {
473         t.join();
474       } catch (InterruptedException ie) {
475         IOException iie = new InterruptedIOException();
476         iie.initCause(ie);
477         throw iie;
478       }
479       checkForErrors();
480     }
481     
482     return EXIT_SUCCESS;
483   }
484 
485   // If an exception is thrown by one of worker threads, it will be
486   // stored here.
487   protected AtomicReference<Throwable> thrown = new AtomicReference<Throwable>();
488 
489   private void workerThreadError(Throwable t) {
490     thrown.compareAndSet(null, t);
491   }
492 
493   /**
494    * Check for errors in the writer threads. If any is found, rethrow it.
495    */
496   private void checkForErrors() throws IOException {
497     Throwable thrown = this.thrown.get();
498     if (thrown == null) return;
499     if (thrown instanceof IOException) {
500       throw (IOException) thrown;
501     } else {
502       throw new RuntimeException(thrown);
503     }
504   }
505 
506   class WorkerThread extends Thread {
507     private String[] workerArgs;
508 
509     WorkerThread(int i, String[] args) {
510       super("WorkerThread-" + i);
511       workerArgs = args;
512     }
513 
514     public void run() {
515       try {
516         int ret = ToolRunner.run(HBaseConfiguration.create(), new LoadTestTool(), workerArgs);
517         if (ret != 0) {
518           throw new RuntimeException("LoadTestTool exit with non-zero return code.");
519         }
520       } catch (Exception ex) {
521         LOG.error("Error in worker thread", ex);
522         workerThreadError(ex);
523       }
524     }
525   }
526 }