View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.util;
18  
19  import java.io.IOException;
20  import java.io.InterruptedIOException;
21  import java.util.ArrayList;
22  import java.util.Arrays;
23  import java.util.List;
24  import java.util.concurrent.atomic.AtomicReference;
25  
26  import org.apache.commons.cli.CommandLine;
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.TableName;
30  import org.apache.hadoop.hbase.HBaseConfiguration;
31  import org.apache.hadoop.hbase.HBaseTestingUtility;
32  import org.apache.hadoop.hbase.HColumnDescriptor;
33  import org.apache.hadoop.hbase.HConstants;
34  import org.apache.hadoop.hbase.HTableDescriptor;
35  import org.apache.hadoop.hbase.PerformanceEvaluation;
36  import org.apache.hadoop.hbase.client.HBaseAdmin;
37  import org.apache.hadoop.hbase.io.compress.Compression;
38  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
39  import org.apache.hadoop.hbase.regionserver.BloomType;
40  import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
41  import org.apache.hadoop.util.ToolRunner;
42  
43  /**
44   * A command-line utility that reads, writes, and verifies data. Unlike
45   * {@link PerformanceEvaluation}, this tool validates the data written,
46   * and supports simultaneously writing and reading the same set of keys.
47   */
48  public class LoadTestTool extends AbstractHBaseTool {
49  
50    private static final Log LOG = LogFactory.getLog(LoadTestTool.class);
51  
52    /** Table name for the test */
53    private TableName tableName;
54  
55    /** Table name to use of not overridden on the command line */
56    protected static final String DEFAULT_TABLE_NAME = "cluster_test";
57  
58    /** Column family used by the test */
59    public static byte[] COLUMN_FAMILY = Bytes.toBytes("test_cf");
60  
61    /** Column families used by the test */
62    protected static final byte[][] COLUMN_FAMILIES = { COLUMN_FAMILY };
63  
64    /** The default data size if not specified */
65    protected static final int DEFAULT_DATA_SIZE = 64;
66  
67    /** The number of reader/writer threads if not specified */
68    protected static final int DEFAULT_NUM_THREADS = 20;
69  
70    /** Usage string for the load option */
71    protected static final String OPT_USAGE_LOAD =
72        "<avg_cols_per_key>:<avg_data_size>" +
73        "[:<#threads=" + DEFAULT_NUM_THREADS + ">]";
74  
75    /** Usage string for the read option */
76    protected static final String OPT_USAGE_READ =
77        "<verify_percent>[:<#threads=" + DEFAULT_NUM_THREADS + ">]";
78  
79    /** Usage string for the update option */
80    protected static final String OPT_USAGE_UPDATE =
81        "<update_percent>[:<#threads=" + DEFAULT_NUM_THREADS + ">]";
82  
83    protected static final String OPT_USAGE_BLOOM = "Bloom filter type, one of " +
84        Arrays.toString(BloomType.values());
85  
86    protected static final String OPT_USAGE_COMPRESSION = "Compression type, " +
87        "one of " + Arrays.toString(Compression.Algorithm.values());
88  
89    public static final String OPT_DATA_BLOCK_ENCODING_USAGE =
90      "Encoding algorithm (e.g. prefix "
91          + "compression) to use for data blocks in the test column family, "
92          + "one of " + Arrays.toString(DataBlockEncoding.values()) + ".";
93  
94    private static final String OPT_BLOOM = "bloom";
95    private static final String OPT_COMPRESSION = "compression";
96    public static final String OPT_DATA_BLOCK_ENCODING =
97        HColumnDescriptor.DATA_BLOCK_ENCODING.toLowerCase();
98  
99    public static final String OPT_INMEMORY = "in_memory";
100   public static final String OPT_USAGE_IN_MEMORY = "Tries to keep the HFiles of the CF " +
101   		"inmemory as far as possible.  Not guaranteed that reads are always served from inmemory";
102 
103   protected static final String OPT_KEY_WINDOW = "key_window";
104   protected static final String OPT_WRITE = "write";
105   protected static final String OPT_MAX_READ_ERRORS = "max_read_errors";
106   protected static final String OPT_MULTIPUT = "multiput";
107   protected static final String OPT_NUM_KEYS = "num_keys";
108   protected static final String OPT_READ = "read";
109   protected static final String OPT_START_KEY = "start_key";
110   protected static final String OPT_TABLE_NAME = "tn";
111   protected static final String OPT_ZK_QUORUM = "zk";
112   protected static final String OPT_SKIP_INIT = "skip_init";
113   protected static final String OPT_INIT_ONLY = "init_only";
114   private static final String NUM_TABLES = "num_tables";
115   protected static final String OPT_BATCHUPDATE = "batchupdate";
116   protected static final String OPT_UPDATE = "update";
117 
118   protected static final long DEFAULT_START_KEY = 0;
119 
120   /** This will be removed as we factor out the dependency on command line */
121   protected CommandLine cmd;
122 
123   protected MultiThreadedWriter writerThreads = null;
124   protected MultiThreadedReader readerThreads = null;
125   protected MultiThreadedUpdater updaterThreads = null;
126 
127   protected long startKey, endKey;
128 
129   protected boolean isWrite, isRead, isUpdate;
130 
131   // Column family options
132   protected DataBlockEncoding dataBlockEncodingAlgo;
133   protected Compression.Algorithm compressAlgo;
134   protected BloomType bloomType;
135   private boolean inMemoryCF;
136   // Writer options
137   protected int numWriterThreads = DEFAULT_NUM_THREADS;
138   protected int minColsPerKey, maxColsPerKey;
139   protected int minColDataSize = DEFAULT_DATA_SIZE, maxColDataSize = DEFAULT_DATA_SIZE;
140   protected boolean isMultiPut;
141 
142   // Updater options
143   protected int numUpdaterThreads = DEFAULT_NUM_THREADS;
144   protected int updatePercent;
145   protected boolean isBatchUpdate;
146 
147   // Reader options
148   private int numReaderThreads = DEFAULT_NUM_THREADS;
149   private int keyWindow = MultiThreadedReader.DEFAULT_KEY_WINDOW;
150   private int maxReadErrors = MultiThreadedReader.DEFAULT_MAX_ERRORS;
151   private int verifyPercent;
152 
153   private int numTables = 1;
154 
155   // TODO: refactor LoadTestToolImpl somewhere to make the usage from tests less bad,
156   //       console tool itself should only be used from console.
157   protected boolean isSkipInit = false;
158   protected boolean isInitOnly = false;
159 
160   protected String[] splitColonSeparated(String option,
161       int minNumCols, int maxNumCols) {
162     String optVal = cmd.getOptionValue(option);
163     String[] cols = optVal.split(":");
164     if (cols.length < minNumCols || cols.length > maxNumCols) {
165       throw new IllegalArgumentException("Expected at least "
166           + minNumCols + " columns but no more than " + maxNumCols +
167           " in the colon-separated value '" + optVal + "' of the " +
168           "-" + option + " option");
169     }
170     return cols;
171   }
172 
173   protected int getNumThreads(String numThreadsStr) {
174     return parseInt(numThreadsStr, 1, Short.MAX_VALUE);
175   }
176 
177   /**
178    * Apply column family options such as Bloom filters, compression, and data
179    * block encoding.
180    */
181   protected void applyColumnFamilyOptions(TableName tableName,
182       byte[][] columnFamilies) throws IOException {
183     HBaseAdmin admin = new HBaseAdmin(conf);
184     HTableDescriptor tableDesc = admin.getTableDescriptor(tableName);
185     LOG.info("Disabling table " + tableName);
186     admin.disableTable(tableName);
187     for (byte[] cf : columnFamilies) {
188       HColumnDescriptor columnDesc = tableDesc.getFamily(cf);
189       boolean isNewCf = columnDesc == null;
190       if (isNewCf) {
191         columnDesc = new HColumnDescriptor(cf);
192       }
193       if (bloomType != null) {
194         columnDesc.setBloomFilterType(bloomType);
195       }
196       if (compressAlgo != null) {
197         columnDesc.setCompressionType(compressAlgo);
198       }
199       if (dataBlockEncodingAlgo != null) {
200         columnDesc.setDataBlockEncoding(dataBlockEncodingAlgo);
201       }
202       if (inMemoryCF) {
203         columnDesc.setInMemory(inMemoryCF);
204       }
205       if (isNewCf) {
206         admin.addColumn(tableName, columnDesc);
207       } else {
208         admin.modifyColumn(tableName, columnDesc);
209       }
210     }
211     LOG.info("Enabling table " + tableName);
212     admin.enableTable(tableName);
213   }
214 
215   @Override
216   protected void addOptions() {
217     addOptWithArg(OPT_ZK_QUORUM, "ZK quorum as comma-separated host names " +
218         "without port numbers");
219     addOptWithArg(OPT_TABLE_NAME, "The name of the table to read or write");
220     addOptWithArg(OPT_WRITE, OPT_USAGE_LOAD);
221     addOptWithArg(OPT_READ, OPT_USAGE_READ);
222     addOptWithArg(OPT_UPDATE, OPT_USAGE_UPDATE);
223     addOptNoArg(OPT_INIT_ONLY, "Initialize the test table only, don't do any loading");
224     addOptWithArg(OPT_BLOOM, OPT_USAGE_BLOOM);
225     addOptWithArg(OPT_COMPRESSION, OPT_USAGE_COMPRESSION);
226     addOptWithArg(OPT_DATA_BLOCK_ENCODING, OPT_DATA_BLOCK_ENCODING_USAGE);
227     addOptWithArg(OPT_MAX_READ_ERRORS, "The maximum number of read errors " +
228         "to tolerate before terminating all reader threads. The default is " +
229         MultiThreadedReader.DEFAULT_MAX_ERRORS + ".");
230     addOptWithArg(OPT_KEY_WINDOW, "The 'key window' to maintain between " +
231         "reads and writes for concurrent write/read workload. The default " +
232         "is " + MultiThreadedReader.DEFAULT_KEY_WINDOW + ".");
233 
234     addOptNoArg(OPT_MULTIPUT, "Whether to use multi-puts as opposed to " +
235         "separate puts for every column in a row");
236     addOptNoArg(OPT_BATCHUPDATE, "Whether to use batch as opposed to " +
237         "separate updates for every column in a row");
238     addOptNoArg(OPT_INMEMORY, OPT_USAGE_IN_MEMORY);
239 
240     addOptWithArg(OPT_NUM_KEYS, "The number of keys to read/write");
241     addOptWithArg(OPT_START_KEY, "The first key to read/write " +
242         "(a 0-based index). The default value is " +
243         DEFAULT_START_KEY + ".");
244     addOptNoArg(OPT_SKIP_INIT, "Skip the initialization; assume test table "
245         + "already exists");
246 
247     addOptWithArg(NUM_TABLES,
248       "A positive integer number. When a number n is speicfied, load test "
249           + "tool  will load n table parallely. -tn parameter value becomes "
250           + "table name prefix. Each table name is in format <tn>_1...<tn>_n");
251   }
252 
253   @Override
254   protected void processOptions(CommandLine cmd) {
255     this.cmd = cmd;
256 
257     tableName = TableName.valueOf(cmd.getOptionValue(OPT_TABLE_NAME,
258         DEFAULT_TABLE_NAME));
259 
260     isWrite = cmd.hasOption(OPT_WRITE);
261     isRead = cmd.hasOption(OPT_READ);
262     isUpdate = cmd.hasOption(OPT_UPDATE);
263     isInitOnly = cmd.hasOption(OPT_INIT_ONLY);
264 
265     if (!isWrite && !isRead && !isUpdate && !isInitOnly) {
266       throw new IllegalArgumentException("Either -" + OPT_WRITE + " or " +
267         "-" + OPT_UPDATE + "-" + OPT_READ + " has to be specified");
268     }
269 
270     if (isInitOnly && (isRead || isWrite || isUpdate)) {
271       throw new IllegalArgumentException(OPT_INIT_ONLY + " cannot be specified with"
272           + " either -" + OPT_WRITE + " or -" + OPT_UPDATE + " or -" + OPT_READ);
273     }
274 
275     if (!isInitOnly) {
276       if (!cmd.hasOption(OPT_NUM_KEYS)) {
277         throw new IllegalArgumentException(OPT_NUM_KEYS + " must be specified in "
278             + "read or write mode");
279       }
280       startKey = parseLong(cmd.getOptionValue(OPT_START_KEY,
281           String.valueOf(DEFAULT_START_KEY)), 0, Long.MAX_VALUE);
282       long numKeys = parseLong(cmd.getOptionValue(OPT_NUM_KEYS), 1,
283           Long.MAX_VALUE - startKey);
284       endKey = startKey + numKeys;
285       isSkipInit = cmd.hasOption(OPT_SKIP_INIT);
286       System.out.println("Key range: [" + startKey + ".." + (endKey - 1) + "]");
287     }
288 
289     parseColumnFamilyOptions(cmd);
290 
291     if (isWrite) {
292       String[] writeOpts = splitColonSeparated(OPT_WRITE, 2, 3);
293 
294       int colIndex = 0;
295       minColsPerKey = 1;
296       maxColsPerKey = 2 * Integer.parseInt(writeOpts[colIndex++]);
297       int avgColDataSize =
298           parseInt(writeOpts[colIndex++], 1, Integer.MAX_VALUE);
299       minColDataSize = avgColDataSize / 2;
300       maxColDataSize = avgColDataSize * 3 / 2;
301 
302       if (colIndex < writeOpts.length) {
303         numWriterThreads = getNumThreads(writeOpts[colIndex++]);
304       }
305 
306       isMultiPut = cmd.hasOption(OPT_MULTIPUT);
307 
308       System.out.println("Multi-puts: " + isMultiPut);
309       System.out.println("Columns per key: " + minColsPerKey + ".."
310           + maxColsPerKey);
311       System.out.println("Data size per column: " + minColDataSize + ".."
312           + maxColDataSize);
313     }
314 
315     if (isUpdate) {
316       String[] mutateOpts = splitColonSeparated(OPT_UPDATE, 1, 2);
317       int colIndex = 0;
318       updatePercent = parseInt(mutateOpts[colIndex++], 0, 100);
319       if (colIndex < mutateOpts.length) {
320         numUpdaterThreads = getNumThreads(mutateOpts[colIndex++]);
321       }
322 
323       isBatchUpdate = cmd.hasOption(OPT_BATCHUPDATE);
324 
325       System.out.println("Batch updates: " + isBatchUpdate);
326       System.out.println("Percent of keys to update: " + updatePercent);
327       System.out.println("Updater threads: " + numUpdaterThreads);
328     }
329 
330     if (isRead) {
331       String[] readOpts = splitColonSeparated(OPT_READ, 1, 2);
332       int colIndex = 0;
333       verifyPercent = parseInt(readOpts[colIndex++], 0, 100);
334       if (colIndex < readOpts.length) {
335         numReaderThreads = getNumThreads(readOpts[colIndex++]);
336       }
337 
338       if (cmd.hasOption(OPT_MAX_READ_ERRORS)) {
339         maxReadErrors = parseInt(cmd.getOptionValue(OPT_MAX_READ_ERRORS),
340             0, Integer.MAX_VALUE);
341       }
342 
343       if (cmd.hasOption(OPT_KEY_WINDOW)) {
344         keyWindow = parseInt(cmd.getOptionValue(OPT_KEY_WINDOW),
345             0, Integer.MAX_VALUE);
346       }
347 
348       System.out.println("Percent of keys to verify: " + verifyPercent);
349       System.out.println("Reader threads: " + numReaderThreads);
350     }
351 
352     numTables = 1;
353     if(cmd.hasOption(NUM_TABLES)) {
354       numTables = parseInt(cmd.getOptionValue(NUM_TABLES), 1, Short.MAX_VALUE);
355     }
356   }
357 
358   private void parseColumnFamilyOptions(CommandLine cmd) {
359     String dataBlockEncodingStr = cmd.getOptionValue(OPT_DATA_BLOCK_ENCODING);
360     dataBlockEncodingAlgo = dataBlockEncodingStr == null ? null :
361         DataBlockEncoding.valueOf(dataBlockEncodingStr);
362 
363     String compressStr = cmd.getOptionValue(OPT_COMPRESSION);
364     compressAlgo = compressStr == null ? Compression.Algorithm.NONE :
365         Compression.Algorithm.valueOf(compressStr);
366 
367     String bloomStr = cmd.getOptionValue(OPT_BLOOM);
368     bloomType = bloomStr == null ? null :
369         BloomType.valueOf(bloomStr);
370 
371     inMemoryCF = cmd.hasOption(OPT_INMEMORY);
372     
373   }
374 
375   public void initTestTable() throws IOException {
376     HBaseTestingUtility.createPreSplitLoadTestTable(conf, tableName,
377         COLUMN_FAMILY, compressAlgo, dataBlockEncodingAlgo);
378     applyColumnFamilyOptions(tableName, COLUMN_FAMILIES);
379   }
380 
381   @Override
382   protected int doWork() throws IOException {
383     if (numTables > 1) {
384       return parallelLoadTables();
385     } else {
386       return loadTable();
387     }
388   }
389 
390   protected int loadTable() throws IOException {
391     if (cmd.hasOption(OPT_ZK_QUORUM)) {
392       conf.set(HConstants.ZOOKEEPER_QUORUM, cmd.getOptionValue(OPT_ZK_QUORUM));
393     }
394 
395     if (isInitOnly) {
396       LOG.info("Initializing only; no reads or writes");
397       initTestTable();
398       return 0;
399     }
400 
401     if (!isSkipInit) {
402       initTestTable();
403     }
404 
405     LoadTestDataGenerator dataGen = new MultiThreadedAction.DefaultDataGenerator(
406         minColDataSize, maxColDataSize, minColsPerKey, maxColsPerKey, COLUMN_FAMILY);
407 
408     if (isWrite) {
409       writerThreads = new MultiThreadedWriter(dataGen, conf, tableName);
410       writerThreads.setMultiPut(isMultiPut);
411     }
412 
413     if (isUpdate) {
414       updaterThreads = new MultiThreadedUpdater(dataGen, conf, tableName, updatePercent);
415       updaterThreads.setBatchUpdate(isBatchUpdate);
416     }
417 
418     if (isRead) {
419       readerThreads = new MultiThreadedReader(dataGen, conf, tableName, verifyPercent);
420       readerThreads.setMaxErrors(maxReadErrors);
421       readerThreads.setKeyWindow(keyWindow);
422     }
423 
424     if (isUpdate && isWrite) {
425       LOG.info("Concurrent write/update workload: making updaters aware of the " +
426         "write point");
427       updaterThreads.linkToWriter(writerThreads);
428     }
429 
430     if (isRead && (isUpdate || isWrite)) {
431       LOG.info("Concurrent write/read workload: making readers aware of the " +
432         "write point");
433       readerThreads.linkToWriter(isUpdate ? updaterThreads : writerThreads);
434     }
435 
436     if (isWrite) {
437       System.out.println("Starting to write data...");
438       writerThreads.start(startKey, endKey, numWriterThreads);
439     }
440 
441     if (isUpdate) {
442       System.out.println("Starting to mutate data...");
443       updaterThreads.start(startKey, endKey, numUpdaterThreads);
444     }
445 
446     if (isRead) {
447       System.out.println("Starting to read data...");
448       readerThreads.start(startKey, endKey, numReaderThreads);
449     }
450 
451     if (isWrite) {
452       writerThreads.waitForFinish();
453     }
454 
455     if (isUpdate) {
456       updaterThreads.waitForFinish();
457     }
458 
459     if (isRead) {
460       readerThreads.waitForFinish();
461     }
462 
463     boolean success = true;
464     if (isWrite) {
465       success = success && writerThreads.getNumWriteFailures() == 0;
466     }
467     if (isUpdate) {
468       success = success && updaterThreads.getNumWriteFailures() == 0;
469     }
470     if (isRead) {
471       success = success && readerThreads.getNumReadErrors() == 0
472           && readerThreads.getNumReadFailures() == 0;
473     }
474     return success ? EXIT_SUCCESS : EXIT_FAILURE;
475   }
476 
477   public static void main(String[] args) {
478     new LoadTestTool().doStaticMain(args);
479   }
480 
481   /**
482    * When NUM_TABLES is specified, the function starts multiple worker threads
483    * which individually start a LoadTestTool instance to load a table. Each
484    * table name is in format <tn>_<index>. For example, "-tn test -num_tables 2"
485    * , table names will be "test_1", "test_2"
486    *
487    * @throws IOException
488    */
489   private int parallelLoadTables()
490       throws IOException {
491     // create new command args
492     String tableName = cmd.getOptionValue(OPT_TABLE_NAME, DEFAULT_TABLE_NAME);
493     String[] newArgs = null;
494     if (!cmd.hasOption(LoadTestTool.OPT_TABLE_NAME)) {
495       newArgs = new String[cmdLineArgs.length + 2];
496       newArgs[0] = "-" + LoadTestTool.OPT_TABLE_NAME;
497       newArgs[1] = LoadTestTool.DEFAULT_TABLE_NAME;
498       for (int i = 0; i < cmdLineArgs.length; i++) {
499         newArgs[i + 2] = cmdLineArgs[i];
500       }
501     } else {
502       newArgs = cmdLineArgs;
503     }
504 
505     int tableNameValueIndex = -1;
506     for (int j = 0; j < newArgs.length; j++) {
507       if (newArgs[j].endsWith(OPT_TABLE_NAME)) {
508         tableNameValueIndex = j + 1;
509       } else if (newArgs[j].endsWith(NUM_TABLES)) {
510         // change NUM_TABLES to 1 so that each worker loads one table
511         newArgs[j + 1] = "1";
512       }
513     }
514 
515     // starting to load multiple tables
516     List<WorkerThread> workers = new ArrayList<WorkerThread>();
517     for (int i = 0; i < numTables; i++) {
518       String[] workerArgs = newArgs.clone();
519       workerArgs[tableNameValueIndex] = tableName + "_" + (i+1);
520       WorkerThread worker = new WorkerThread(i, workerArgs);
521       workers.add(worker);
522       LOG.info(worker + " starting");
523       worker.start();
524     }
525 
526     // wait for all workers finish
527     LOG.info("Waiting for worker threads to finish");
528     for (WorkerThread t : workers) {
529       try {
530         t.join();
531       } catch (InterruptedException ie) {
532         IOException iie = new InterruptedIOException();
533         iie.initCause(ie);
534         throw iie;
535       }
536       checkForErrors();
537     }
538 
539     return EXIT_SUCCESS;
540   }
541 
542   // If an exception is thrown by one of worker threads, it will be
543   // stored here.
544   protected AtomicReference<Throwable> thrown = new AtomicReference<Throwable>();
545 
546   private void workerThreadError(Throwable t) {
547     thrown.compareAndSet(null, t);
548   }
549 
550   /**
551    * Check for errors in the writer threads. If any is found, rethrow it.
552    */
553   private void checkForErrors() throws IOException {
554     Throwable thrown = this.thrown.get();
555     if (thrown == null) return;
556     if (thrown instanceof IOException) {
557       throw (IOException) thrown;
558     } else {
559       throw new RuntimeException(thrown);
560     }
561   }
562 
563   class WorkerThread extends Thread {
564     private String[] workerArgs;
565 
566     WorkerThread(int i, String[] args) {
567       super("WorkerThread-" + i);
568       workerArgs = args;
569     }
570 
571     @Override
572     public void run() {
573       try {
574         int ret = ToolRunner.run(HBaseConfiguration.create(), new LoadTestTool(), workerArgs);
575         if (ret != 0) {
576           throw new RuntimeException("LoadTestTool exit with non-zero return code.");
577         }
578       } catch (Exception ex) {
579         LOG.error("Error in worker thread", ex);
580         workerThreadError(ex);
581       }
582     }
583   }
584 }