1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.util;
18  
19  import java.io.IOException;
20  import java.io.InterruptedIOException;
21  import java.util.ArrayList;
22  import java.util.Arrays;
23  import java.util.List;
24  import java.util.concurrent.atomic.AtomicReference;
25  
26  import org.apache.commons.cli.CommandLine;
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.HBaseConfiguration;
30  import org.apache.hadoop.hbase.HBaseTestingUtility;
31  import org.apache.hadoop.hbase.HColumnDescriptor;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.HTableDescriptor;
34  import org.apache.hadoop.hbase.PerformanceEvaluation;
35  import org.apache.hadoop.hbase.client.HBaseAdmin;
36  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
37  import org.apache.hadoop.hbase.io.hfile.Compression;
38  import org.apache.hadoop.hbase.regionserver.StoreFile;
39  import org.apache.hadoop.util.ToolRunner;
40  
41  /**
42   * A command-line utility that reads, writes, and verifies data. Unlike
43   * {@link PerformanceEvaluation}, this tool validates the data written,
44   * and supports simultaneously writing and reading the same set of keys.
45   */
46  public class LoadTestTool extends AbstractHBaseTool {
47  
48    private static final Log LOG = LogFactory.getLog(LoadTestTool.class);
49  
50    /** Table name for the test */
51    protected byte[] tableName;
52  
53    /** Table name to use of not overridden on the command line */
54    protected static final String DEFAULT_TABLE_NAME = "cluster_test";
55  
56    /** Column family used by the test */
57    protected static byte[] COLUMN_FAMILY = Bytes.toBytes("test_cf");
58  
59    /** Column families used by the test */
60    protected static final byte[][] COLUMN_FAMILIES = { COLUMN_FAMILY };
61  
62    /** The default data size if not specified */
63    protected static final int DEFAULT_DATA_SIZE = 64;
64  
65    /** The number of reader/writer threads if not specified */
66    protected static final int DEFAULT_NUM_THREADS = 20;
67  
68    /** Usage string for the load option */
69    protected static final String OPT_USAGE_LOAD =
70        "<avg_cols_per_key>:<avg_data_size>" +
71        "[:<#threads=" + DEFAULT_NUM_THREADS + ">]";
72  
73    /** Usa\ge string for the read option */
74    protected static final String OPT_USAGE_READ =
75        "<verify_percent>[:<#threads=" + DEFAULT_NUM_THREADS + ">]";
76  
77    protected static final String OPT_USAGE_BLOOM = "Bloom filter type, one of " +
78        Arrays.toString(StoreFile.BloomType.values());
79  
80    protected static final String OPT_USAGE_COMPRESSION = "Compression type, " +
81        "one of " + Arrays.toString(Compression.Algorithm.values());
82  
83    public static final String OPT_DATA_BLOCK_ENCODING_USAGE =
84      "Encoding algorithm (e.g. prefix "
85          + "compression) to use for data blocks in the test column family, "
86          + "one of " + Arrays.toString(DataBlockEncoding.values()) + ".";
87  
88    public static final String OPT_INMEMORY = "in_memory";
89    public static final String OPT_USAGE_IN_MEMORY = "Tries to keep the HFiles of the CF " +
90        "inmemory as far as possible.  Not guaranteed that reads are always served from inmemory";
91  
92    private static final String OPT_BLOOM = "bloom";
93    private static final String OPT_COMPRESSION = "compression";
94    public static final String OPT_DATA_BLOCK_ENCODING =
95        HColumnDescriptor.DATA_BLOCK_ENCODING.toLowerCase();
96    public static final String OPT_ENCODE_IN_CACHE_ONLY =
97        "encode_in_cache_only";
98    public static final String OPT_ENCODE_IN_CACHE_ONLY_USAGE =
99        "If this is specified, data blocks will only be encoded in block " +
100       "cache but not on disk";
101 
102   protected static final String OPT_KEY_WINDOW = "key_window";
103   protected static final String OPT_WRITE = "write";
104   protected static final String OPT_MAX_READ_ERRORS = "max_read_errors";
105   protected static final String OPT_MULTIPUT = "multiput";
106   protected static final String OPT_NUM_KEYS = "num_keys";
107   protected static final String OPT_READ = "read";
108   protected static final String OPT_START_KEY = "start_key";
109   protected static final String OPT_TABLE_NAME = "tn";
110   protected static final String OPT_ZK_QUORUM = "zk";
111   protected static final String OPT_SKIP_INIT = "skip_init";
112   protected static final String OPT_INIT_ONLY = "init_only";
113   private static final String NUM_TABLES = "num_tables";
114 
115   protected static final long DEFAULT_START_KEY = 0;
116 
117   /** This will be removed as we factor out the dependency on command line */
118   protected CommandLine cmd;
119 
120   protected MultiThreadedWriter writerThreads = null;
121   protected MultiThreadedReader readerThreads = null;
122 
123   protected long startKey, endKey;
124 
125   protected boolean isWrite, isRead;
126 
127   // Column family options
128   protected DataBlockEncoding dataBlockEncodingAlgo;
129   protected boolean encodeInCacheOnly;
130   protected Compression.Algorithm compressAlgo;
131   protected StoreFile.BloomType bloomType;
132   private boolean inMemoryCF;
133 
134   // Writer options
135   protected int numWriterThreads = DEFAULT_NUM_THREADS;
136   protected int minColsPerKey, maxColsPerKey;
137   protected int minColDataSize = DEFAULT_DATA_SIZE, maxColDataSize = DEFAULT_DATA_SIZE;
138   protected boolean isMultiPut;
139 
140   // Reader options
141   private int numReaderThreads = DEFAULT_NUM_THREADS;
142   private int keyWindow = MultiThreadedReader.DEFAULT_KEY_WINDOW;
143   private int maxReadErrors = MultiThreadedReader.DEFAULT_MAX_ERRORS;
144   private int verifyPercent;
145  
146   private int numTables = 1;
147 
148   // TODO: refactor LoadTestToolImpl somewhere to make the usage from tests less bad,
149   // console tool itself should only be used from console.
150   protected boolean isSkipInit = false;
151   protected boolean isInitOnly = false;
152 
153   protected String[] splitColonSeparated(String option,
154       int minNumCols, int maxNumCols) {
155     String optVal = cmd.getOptionValue(option);
156     String[] cols = optVal.split(":");
157     if (cols.length < minNumCols || cols.length > maxNumCols) {
158       throw new IllegalArgumentException("Expected at least "
159           + minNumCols + " columns but no more than " + maxNumCols +
160           " in the colon-separated value '" + optVal + "' of the " +
161           "-" + option + " option");
162     }
163     return cols;
164   }
165 
166   protected int getNumThreads(String numThreadsStr) {
167     return parseInt(numThreadsStr, 1, Short.MAX_VALUE);
168   }
169 
170   /**
171    * Apply column family options such as Bloom filters, compression, and data
172    * block encoding.
173    */
174   protected void applyColumnFamilyOptions(byte[] tableName,
175       byte[][] columnFamilies) throws IOException {
176     HBaseAdmin admin = new HBaseAdmin(conf);
177     HTableDescriptor tableDesc = admin.getTableDescriptor(tableName);
178     LOG.info("Disabling table " + Bytes.toString(tableName));
179     admin.disableTable(tableName);
180     for (byte[] cf : columnFamilies) {
181       HColumnDescriptor columnDesc = tableDesc.getFamily(cf);
182       boolean isNewCf = columnDesc == null;
183       if (isNewCf) {
184         columnDesc = new HColumnDescriptor(cf);
185       }
186       if (bloomType != null) {
187         columnDesc.setBloomFilterType(bloomType);
188       }
189       if (compressAlgo != null) {
190         columnDesc.setCompressionType(compressAlgo);
191       }
192       if (dataBlockEncodingAlgo != null) {
193         columnDesc.setDataBlockEncoding(dataBlockEncodingAlgo);
194         columnDesc.setEncodeOnDisk(!encodeInCacheOnly);
195       }
196       if (inMemoryCF) {
197         columnDesc.setInMemory(inMemoryCF);
198       }
199       if (isNewCf) {
200         admin.addColumn(tableName, columnDesc);
201       } else {
202         admin.modifyColumn(tableName, columnDesc);
203       }
204     }
205     LOG.info("Enabling table " + Bytes.toString(tableName));
206     admin.enableTable(tableName);
207   }
208 
209   @Override
210   protected void addOptions() {
211     addOptWithArg(OPT_ZK_QUORUM, "ZK quorum as comma-separated host names " +
212         "without port numbers");
213     addOptWithArg(OPT_TABLE_NAME, "The name of the table to read or write");
214     addOptWithArg(OPT_WRITE, OPT_USAGE_LOAD);
215     addOptWithArg(OPT_READ, OPT_USAGE_READ);
216     addOptNoArg(OPT_INIT_ONLY, "Initialize the test table only, don't do any loading");
217     addOptWithArg(OPT_BLOOM, OPT_USAGE_BLOOM);
218     addOptWithArg(OPT_COMPRESSION, OPT_USAGE_COMPRESSION);
219     addOptWithArg(OPT_DATA_BLOCK_ENCODING, OPT_DATA_BLOCK_ENCODING_USAGE);
220     addOptWithArg(OPT_MAX_READ_ERRORS, "The maximum number of read errors " +
221         "to tolerate before terminating all reader threads. The default is " +
222         MultiThreadedReader.DEFAULT_MAX_ERRORS + ".");
223     addOptWithArg(OPT_KEY_WINDOW, "The 'key window' to maintain between " +
224         "reads and writes for concurrent write/read workload. The default " +
225         "is " + MultiThreadedReader.DEFAULT_KEY_WINDOW + ".");
226 
227     addOptNoArg(OPT_MULTIPUT, "Whether to use multi-puts as opposed to " +
228         "separate puts for every column in a row");
229     addOptNoArg(OPT_ENCODE_IN_CACHE_ONLY, OPT_ENCODE_IN_CACHE_ONLY_USAGE);
230     addOptNoArg(OPT_INMEMORY, OPT_USAGE_IN_MEMORY);
231 
232     addOptWithArg(OPT_NUM_KEYS, "The number of keys to read/write");
233     addOptWithArg(OPT_START_KEY, "The first key to read/write " +
234         "(a 0-based index). The default value is " +
235         DEFAULT_START_KEY + ".");
236     addOptNoArg(OPT_SKIP_INIT, "Skip the initialization; assume test table "
237         + "already exists");
238     
239     addOptWithArg(NUM_TABLES,
240       "A positive integer number. When a number n is speicfied, load test "
241           + "tool  will load n table parallely. -tn parameter value becomes "
242           + "table name prefix. Each table name is in format <tn>_1...<tn>_n");
243   }
244 
245   @Override
246   protected void processOptions(CommandLine cmd) {
247     this.cmd = cmd;
248 
249     tableName = Bytes.toBytes(cmd.getOptionValue(OPT_TABLE_NAME,
250         DEFAULT_TABLE_NAME));
251 
252     isWrite = cmd.hasOption(OPT_WRITE);
253     isRead = cmd.hasOption(OPT_READ);
254     isInitOnly = cmd.hasOption(OPT_INIT_ONLY);
255 
256     if (!isWrite && !isRead && !isInitOnly) {
257       throw new IllegalArgumentException("Either -" + OPT_WRITE + " or " +
258           "-" + OPT_READ + " has to be specified");
259     }
260 
261     if (isInitOnly && (isRead || isWrite)) {
262       throw new IllegalArgumentException(OPT_INIT_ONLY + " cannot be specified with"
263           + " either -" + OPT_WRITE + " or -" + OPT_READ);
264     }
265 
266     if (!isInitOnly) {
267       if (!cmd.hasOption(OPT_NUM_KEYS)) {
268         throw new IllegalArgumentException(OPT_NUM_KEYS + " must be specified in "
269             + "read or write mode");
270       }
271       startKey = parseLong(cmd.getOptionValue(OPT_START_KEY,
272           String.valueOf(DEFAULT_START_KEY)), 0, Long.MAX_VALUE);
273       long numKeys = parseLong(cmd.getOptionValue(OPT_NUM_KEYS), 1,
274           Long.MAX_VALUE - startKey);
275       endKey = startKey + numKeys;
276       isSkipInit = cmd.hasOption(OPT_SKIP_INIT);
277       System.out.println("Key range: [" + startKey + ".." + (endKey - 1) + "]");
278     }
279 
280     encodeInCacheOnly = cmd.hasOption(OPT_ENCODE_IN_CACHE_ONLY);
281     parseColumnFamilyOptions(cmd);
282 
283     if (isWrite) {
284       String[] writeOpts = splitColonSeparated(OPT_WRITE, 2, 3);
285 
286       int colIndex = 0;
287       minColsPerKey = 1;
288       maxColsPerKey = 2 * Integer.parseInt(writeOpts[colIndex++]);
289       int avgColDataSize =
290           parseInt(writeOpts[colIndex++], 1, Integer.MAX_VALUE);
291       minColDataSize = avgColDataSize / 2;
292       maxColDataSize = avgColDataSize * 3 / 2;
293 
294       if (colIndex < writeOpts.length) {
295         numWriterThreads = getNumThreads(writeOpts[colIndex++]);
296       }
297 
298       isMultiPut = cmd.hasOption(OPT_MULTIPUT);
299 
300       System.out.println("Multi-puts: " + isMultiPut);
301       System.out.println("Columns per key: " + minColsPerKey + ".."
302           + maxColsPerKey);
303       System.out.println("Data size per column: " + minColDataSize + ".."
304           + maxColDataSize);
305     }
306 
307     if (isRead) {
308       String[] readOpts = splitColonSeparated(OPT_READ, 1, 2);
309       int colIndex = 0;
310       verifyPercent = parseInt(readOpts[colIndex++], 0, 100);
311       if (colIndex < readOpts.length) {
312         numReaderThreads = getNumThreads(readOpts[colIndex++]);
313       }
314 
315       if (cmd.hasOption(OPT_MAX_READ_ERRORS)) {
316         maxReadErrors = parseInt(cmd.getOptionValue(OPT_MAX_READ_ERRORS),
317             0, Integer.MAX_VALUE);
318       }
319 
320       if (cmd.hasOption(OPT_KEY_WINDOW)) {
321         keyWindow = parseInt(cmd.getOptionValue(OPT_KEY_WINDOW),
322             0, Integer.MAX_VALUE);
323       }
324 
325       System.out.println("Percent of keys to verify: " + verifyPercent);
326       System.out.println("Reader threads: " + numReaderThreads);
327     }
328     
329     numTables = 1;
330     if(cmd.hasOption(NUM_TABLES)) {
331       numTables = parseInt(cmd.getOptionValue(NUM_TABLES), 1, Short.MAX_VALUE);
332     }
333   }
334 
335   protected void parseColumnFamilyOptions(CommandLine cmd) {
336     String dataBlockEncodingStr = cmd.getOptionValue(OPT_DATA_BLOCK_ENCODING);
337     dataBlockEncodingAlgo = dataBlockEncodingStr == null ? null :
338         DataBlockEncoding.valueOf(dataBlockEncodingStr);
339     if (dataBlockEncodingAlgo == DataBlockEncoding.NONE && encodeInCacheOnly) {
340       throw new IllegalArgumentException("-" + OPT_ENCODE_IN_CACHE_ONLY + " " +
341           "does not make sense when data block encoding is not used");
342     }
343 
344     String compressStr = cmd.getOptionValue(OPT_COMPRESSION);
345     compressAlgo = compressStr == null ? Compression.Algorithm.NONE :
346         Compression.Algorithm.valueOf(compressStr);
347 
348     String bloomStr = cmd.getOptionValue(OPT_BLOOM);
349     bloomType = bloomStr == null ? null :
350         StoreFile.BloomType.valueOf(bloomStr);
351 
352     inMemoryCF = cmd.hasOption(OPT_INMEMORY);
353   }
354 
355   public void initTestTable() throws IOException {
356     HBaseTestingUtility.createPreSplitLoadTestTable(conf, tableName,
357         COLUMN_FAMILY, compressAlgo, dataBlockEncodingAlgo);
358     applyColumnFamilyOptions(tableName, COLUMN_FAMILIES);
359   }
360 
361   @Override
362   protected int doWork() throws IOException {
363     if (numTables > 1) {
364       return parallelLoadTables();
365     } else {
366       return loadTable();
367     }
368   }
369 
370   protected int loadTable() throws IOException {
371     if (cmd.hasOption(OPT_ZK_QUORUM)) {
372       conf.set(HConstants.ZOOKEEPER_QUORUM, cmd.getOptionValue(OPT_ZK_QUORUM));
373     }
374 
375     if (isInitOnly) {
376       LOG.info("Initializing only; no reads or writes");
377       initTestTable();
378       return 0;
379     }
380 
381     if (!isSkipInit) {
382       initTestTable();
383     }
384 
385     LoadTestDataGenerator dataGen = new MultiThreadedAction.DefaultDataGenerator(
386       minColDataSize, maxColDataSize, minColsPerKey, maxColsPerKey, COLUMN_FAMILY);
387 
388     if (isWrite) {
389       writerThreads = new MultiThreadedWriter(dataGen, conf, tableName);
390       writerThreads.setMultiPut(isMultiPut);
391     }
392 
393     if (isRead) {
394       readerThreads = new MultiThreadedReader(dataGen, conf, tableName, verifyPercent);
395       readerThreads.setMaxErrors(maxReadErrors);
396       readerThreads.setKeyWindow(keyWindow);
397     }
398 
399     if (isRead && isWrite) {
400       LOG.info("Concurrent read/write workload: making readers aware of the " +
401           "write point");
402       readerThreads.linkToWriter(writerThreads);
403     }
404 
405     if (isWrite) {
406       System.out.println("Starting to write data...");
407       writerThreads.start(startKey, endKey, numWriterThreads);
408     }
409 
410     if (isRead) {
411       System.out.println("Starting to read data...");
412       readerThreads.start(startKey, endKey, numReaderThreads);
413     }
414 
415     if (isWrite) {
416       writerThreads.waitForFinish();
417     }
418 
419     if (isRead) {
420       readerThreads.waitForFinish();
421     }
422 
423     boolean success = true;
424     if (isWrite) {
425       success = success && writerThreads.getNumWriteFailures() == 0;
426     }
427     if (isRead) {
428       success = success && readerThreads.getNumReadErrors() == 0
429           && readerThreads.getNumReadFailures() == 0;
430     }
431     return success ? EXIT_SUCCESS : this.EXIT_FAILURE;
432   }
433   
434   public static void main(String[] args) {
435     new LoadTestTool().doStaticMain(args);
436   }
437 
438   /**
439    * When NUM_TABLES is specified, the function starts multiple worker threads 
440    * which individually start a LoadTestTool instance to load a table. Each 
441    * table name is in format <tn>_<index>. For example, "-tn test -num_tables 2"
442    * , table names will be "test_1", "test_2"
443    * 
444    * @throws IOException
445    */
446   private int parallelLoadTables() 
447       throws IOException {
448     // create new command args
449     String tableName = cmd.getOptionValue(OPT_TABLE_NAME, DEFAULT_TABLE_NAME);
450     String[] newArgs = null;
451     if (!cmd.hasOption(LoadTestTool.OPT_TABLE_NAME)) {
452       newArgs = new String[cmdLineArgs.length + 2];
453       newArgs[0] = "-" + LoadTestTool.OPT_TABLE_NAME;
454       for (int i = 0; i < cmdLineArgs.length; i++) {
455         newArgs[i + 2] = cmdLineArgs[i];
456       }
457     } else {
458       newArgs = cmdLineArgs;
459     }
460 
461     int tableNameValueIndex = -1;
462     for (int j = 0; j < newArgs.length; j++) {
463       if (newArgs[j].endsWith(OPT_TABLE_NAME)) {
464         tableNameValueIndex = j + 1;
465       } else if (newArgs[j].endsWith(NUM_TABLES)) {
466         // change NUM_TABLES to 1 so that each worker loads one table
467         newArgs[j + 1] = "1"; 
468       }
469     }
470 
471     // starting to load multiple tables
472     List<WorkerThread> workers = new ArrayList<WorkerThread>();
473     for (int i = 0; i < numTables; i++) {
474       String[] workerArgs = newArgs.clone();
475       workerArgs[tableNameValueIndex] = tableName + "_" + (i+1);
476       WorkerThread worker = new WorkerThread(i, workerArgs);
477       workers.add(worker);
478       LOG.info(worker + " starting");
479       worker.start();
480     }
481 
482     // wait for all workers finish
483     LOG.info("Waiting for worker threads to finish");
484     for (WorkerThread t : workers) {
485       try {
486         t.join();
487       } catch (InterruptedException ie) {
488         IOException iie = new InterruptedIOException();
489         iie.initCause(ie);
490         throw iie;
491       }
492       checkForErrors();
493     }
494     
495     return EXIT_SUCCESS;
496   }
497 
498   // If an exception is thrown by one of worker threads, it will be
499   // stored here.
500   protected AtomicReference<Throwable> thrown = new AtomicReference<Throwable>();
501 
502   private void workerThreadError(Throwable t) {
503     thrown.compareAndSet(null, t);
504   }
505 
506   /**
507    * Check for errors in the writer threads. If any is found, rethrow it.
508    */
509   private void checkForErrors() throws IOException {
510     Throwable thrown = this.thrown.get();
511     if (thrown == null) return;
512     if (thrown instanceof IOException) {
513       throw (IOException) thrown;
514     } else {
515       throw new RuntimeException(thrown);
516     }
517   }
518 
519   class WorkerThread extends Thread {
520     private String[] workerArgs;
521 
522     WorkerThread(int i, String[] args) {
523       super("WorkerThread-" + i);
524       workerArgs = args;
525     }
526 
527     public void run() {
528       try {
529         int ret = ToolRunner.run(HBaseConfiguration.create(), new LoadTestTool(), workerArgs);
530         if (ret != 0) {
531           throw new RuntimeException("LoadTestTool exit with non-zero return code.");
532         }
533       } catch (Exception ex) {
534         LOG.error("Error in worker thread", ex);
535         workerThreadError(ex);
536       }
537     }
538   }
539 }