1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.util;
18  
19  import java.io.IOException;
20  import java.util.Arrays;
21  
22  import org.apache.commons.cli.CommandLine;
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.hbase.HBaseTestingUtility;
26  import org.apache.hadoop.hbase.HColumnDescriptor;
27  import org.apache.hadoop.hbase.HConstants;
28  import org.apache.hadoop.hbase.HTableDescriptor;
29  import org.apache.hadoop.hbase.PerformanceEvaluation;
30  import org.apache.hadoop.hbase.client.HBaseAdmin;
31  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
32  import org.apache.hadoop.hbase.io.hfile.Compression;
33  import org.apache.hadoop.hbase.regionserver.StoreFile;
34  
35  /**
36   * A command-line utility that reads, writes, and verifies data. Unlike
37   * {@link PerformanceEvaluation}, this tool validates the data written,
38   * and supports simultaneously writing and reading the same set of keys.
39   */
40  public class LoadTestTool extends AbstractHBaseTool {
41  
42    private static final Log LOG = LogFactory.getLog(LoadTestTool.class);
43  
44    /** Table name for the test */
45    protected byte[] tableName;
46  
47    /** Table name to use of not overridden on the command line */
48    protected static final String DEFAULT_TABLE_NAME = "cluster_test";
49  
50    /** Column family used by the test */
51    protected static byte[] COLUMN_FAMILY = Bytes.toBytes("test_cf");
52  
53    /** Column families used by the test */
54    protected static final byte[][] COLUMN_FAMILIES = { COLUMN_FAMILY };
55  
56    /** The number of reader/writer threads if not specified */
57    protected static final int DEFAULT_NUM_THREADS = 20;
58  
59    /** Usage string for the load option */
60    protected static final String OPT_USAGE_LOAD =
61        "<avg_cols_per_key>:<avg_data_size>" +
62        "[:<#threads=" + DEFAULT_NUM_THREADS + ">]";
63  
64    /** Usa\ge string for the read option */
65    protected static final String OPT_USAGE_READ =
66        "<verify_percent>[:<#threads=" + DEFAULT_NUM_THREADS + ">]";
67  
68    protected static final String OPT_USAGE_BLOOM = "Bloom filter type, one of " +
69        Arrays.toString(StoreFile.BloomType.values());
70  
71    protected static final String OPT_USAGE_COMPRESSION = "Compression type, " +
72        "one of " + Arrays.toString(Compression.Algorithm.values());
73  
74    public static final String OPT_DATA_BLOCK_ENCODING_USAGE =
75      "Encoding algorithm (e.g. prefix "
76          + "compression) to use for data blocks in the test column family, "
77          + "one of " + Arrays.toString(DataBlockEncoding.values()) + ".";
78  
79    private static final String OPT_BLOOM = "bloom";
80    private static final String OPT_COMPRESSION = "compression";
81    public static final String OPT_DATA_BLOCK_ENCODING =
82        HColumnDescriptor.DATA_BLOCK_ENCODING.toLowerCase();
83    public static final String OPT_ENCODE_IN_CACHE_ONLY =
84        "encode_in_cache_only";
85    public static final String OPT_ENCODE_IN_CACHE_ONLY_USAGE =
86        "If this is specified, data blocks will only be encoded in block " +
87        "cache but not on disk";
88  
89    protected static final String OPT_KEY_WINDOW = "key_window";
90    protected static final String OPT_WRITE = "write";
91    protected static final String OPT_MAX_READ_ERRORS = "max_read_errors";
92    protected static final String OPT_MULTIPUT = "multiput";
93    protected static final String OPT_NUM_KEYS = "num_keys";
94    protected static final String OPT_READ = "read";
95    protected static final String OPT_START_KEY = "start_key";
96    protected static final String OPT_TABLE_NAME = "tn";
97    protected static final String OPT_ZK_QUORUM = "zk";
98    protected static final String OPT_SKIP_INIT = "skip_init";
99    protected static final String OPT_INIT_ONLY = "init_only";
100 
101   protected static final long DEFAULT_START_KEY = 0;
102 
103   /** This will be removed as we factor out the dependency on command line */
104   protected CommandLine cmd;
105 
106   protected MultiThreadedWriter writerThreads = null;
107   protected MultiThreadedReader readerThreads = null;
108 
109   protected long startKey, endKey;
110 
111   protected boolean isWrite, isRead;
112 
113   // Column family options
114   protected DataBlockEncoding dataBlockEncodingAlgo;
115   protected boolean encodeInCacheOnly;
116   protected Compression.Algorithm compressAlgo;
117   protected StoreFile.BloomType bloomType;
118 
119   // Writer options
120   protected int numWriterThreads = DEFAULT_NUM_THREADS;
121   protected int minColsPerKey, maxColsPerKey;
122   protected int minColDataSize, maxColDataSize;
123   protected boolean isMultiPut;
124 
125   // Reader options
126   protected int numReaderThreads = DEFAULT_NUM_THREADS;
127   protected int keyWindow = MultiThreadedReader.DEFAULT_KEY_WINDOW;
128   protected int maxReadErrors = MultiThreadedReader.DEFAULT_MAX_ERRORS;
129   protected int verifyPercent;
130 
131   // TODO: refactor LoadTestToolImpl somewhere to make the usage from tests less bad,
132   // console tool itself should only be used from console.
133   protected boolean isSkipInit = false;
134   protected boolean isInitOnly = false;
135 
136   protected String[] splitColonSeparated(String option,
137       int minNumCols, int maxNumCols) {
138     String optVal = cmd.getOptionValue(option);
139     String[] cols = optVal.split(":");
140     if (cols.length < minNumCols || cols.length > maxNumCols) {
141       throw new IllegalArgumentException("Expected at least "
142           + minNumCols + " columns but no more than " + maxNumCols +
143           " in the colon-separated value '" + optVal + "' of the " +
144           "-" + option + " option");
145     }
146     return cols;
147   }
148 
149   protected int getNumThreads(String numThreadsStr) {
150     return parseInt(numThreadsStr, 1, Short.MAX_VALUE);
151   }
152 
153   /**
154    * Apply column family options such as Bloom filters, compression, and data
155    * block encoding.
156    */
157   protected void applyColumnFamilyOptions(byte[] tableName,
158       byte[][] columnFamilies) throws IOException {
159     HBaseAdmin admin = new HBaseAdmin(conf);
160     HTableDescriptor tableDesc = admin.getTableDescriptor(tableName);
161     LOG.info("Disabling table " + Bytes.toString(tableName));
162     admin.disableTable(tableName);
163     for (byte[] cf : columnFamilies) {
164       HColumnDescriptor columnDesc = tableDesc.getFamily(cf);
165       boolean isNewCf = columnDesc == null;
166       if (isNewCf) {
167         columnDesc = new HColumnDescriptor(cf);
168       }
169       if (bloomType != null) {
170         columnDesc.setBloomFilterType(bloomType);
171       }
172       if (compressAlgo != null) {
173         columnDesc.setCompressionType(compressAlgo);
174       }
175       if (dataBlockEncodingAlgo != null) {
176         columnDesc.setDataBlockEncoding(dataBlockEncodingAlgo);
177         columnDesc.setEncodeOnDisk(!encodeInCacheOnly);
178       }
179       if (isNewCf) {
180         admin.addColumn(tableName, columnDesc);
181       } else {
182         admin.modifyColumn(tableName, columnDesc);
183       }
184     }
185     LOG.info("Enabling table " + Bytes.toString(tableName));
186     admin.enableTable(tableName);
187   }
188 
189   @Override
190   protected void addOptions() {
191     addOptWithArg(OPT_ZK_QUORUM, "ZK quorum as comma-separated host names " +
192         "without port numbers");
193     addOptWithArg(OPT_TABLE_NAME, "The name of the table to read or write");
194     addOptWithArg(OPT_WRITE, OPT_USAGE_LOAD);
195     addOptWithArg(OPT_READ, OPT_USAGE_READ);
196     addOptNoArg(OPT_INIT_ONLY, "Initialize the test table only, don't do any loading");
197     addOptWithArg(OPT_BLOOM, OPT_USAGE_BLOOM);
198     addOptWithArg(OPT_COMPRESSION, OPT_USAGE_COMPRESSION);
199     addOptWithArg(OPT_DATA_BLOCK_ENCODING, OPT_DATA_BLOCK_ENCODING_USAGE);
200     addOptWithArg(OPT_MAX_READ_ERRORS, "The maximum number of read errors " +
201         "to tolerate before terminating all reader threads. The default is " +
202         MultiThreadedReader.DEFAULT_MAX_ERRORS + ".");
203     addOptWithArg(OPT_KEY_WINDOW, "The 'key window' to maintain between " +
204         "reads and writes for concurrent write/read workload. The default " +
205         "is " + MultiThreadedReader.DEFAULT_KEY_WINDOW + ".");
206 
207     addOptNoArg(OPT_MULTIPUT, "Whether to use multi-puts as opposed to " +
208         "separate puts for every column in a row");
209     addOptNoArg(OPT_ENCODE_IN_CACHE_ONLY, OPT_ENCODE_IN_CACHE_ONLY_USAGE);
210 
211     addOptWithArg(OPT_NUM_KEYS, "The number of keys to read/write");
212     addOptWithArg(OPT_START_KEY, "The first key to read/write " +
213         "(a 0-based index). The default value is " +
214         DEFAULT_START_KEY + ".");
215     addOptNoArg(OPT_SKIP_INIT, "Skip the initialization; assume test table "
216         + "already exists");
217   }
218 
219   @Override
220   protected void processOptions(CommandLine cmd) {
221     this.cmd = cmd;
222 
223     tableName = Bytes.toBytes(cmd.getOptionValue(OPT_TABLE_NAME,
224         DEFAULT_TABLE_NAME));
225 
226     isWrite = cmd.hasOption(OPT_WRITE);
227     isRead = cmd.hasOption(OPT_READ);
228     isInitOnly = cmd.hasOption(OPT_INIT_ONLY);
229 
230     if (!isWrite && !isRead && !isInitOnly) {
231       throw new IllegalArgumentException("Either -" + OPT_WRITE + " or " +
232           "-" + OPT_READ + " has to be specified");
233     }
234 
235     if (isInitOnly && (isRead || isWrite)) {
236       throw new IllegalArgumentException(OPT_INIT_ONLY + " cannot be specified with"
237           + " either -" + OPT_WRITE + " or -" + OPT_READ);
238     }
239 
240     if (!isInitOnly) {
241       if (!cmd.hasOption(OPT_NUM_KEYS)) {
242         throw new IllegalArgumentException(OPT_NUM_KEYS + " must be specified in "
243             + "read or write mode");
244       }
245       startKey = parseLong(cmd.getOptionValue(OPT_START_KEY,
246           String.valueOf(DEFAULT_START_KEY)), 0, Long.MAX_VALUE);
247       long numKeys = parseLong(cmd.getOptionValue(OPT_NUM_KEYS), 1,
248           Long.MAX_VALUE - startKey);
249       endKey = startKey + numKeys;
250       isSkipInit = cmd.hasOption(OPT_SKIP_INIT);
251       System.out.println("Key range: [" + startKey + ".." + (endKey - 1) + "]");
252     }
253 
254     encodeInCacheOnly = cmd.hasOption(OPT_ENCODE_IN_CACHE_ONLY);
255     parseColumnFamilyOptions(cmd);
256 
257     if (isWrite) {
258       String[] writeOpts = splitColonSeparated(OPT_WRITE, 2, 3);
259 
260       int colIndex = 0;
261       minColsPerKey = 1;
262       maxColsPerKey = 2 * Integer.parseInt(writeOpts[colIndex++]);
263       int avgColDataSize =
264           parseInt(writeOpts[colIndex++], 1, Integer.MAX_VALUE);
265       minColDataSize = avgColDataSize / 2;
266       maxColDataSize = avgColDataSize * 3 / 2;
267 
268       if (colIndex < writeOpts.length) {
269         numWriterThreads = getNumThreads(writeOpts[colIndex++]);
270       }
271 
272       isMultiPut = cmd.hasOption(OPT_MULTIPUT);
273 
274       System.out.println("Multi-puts: " + isMultiPut);
275       System.out.println("Columns per key: " + minColsPerKey + ".."
276           + maxColsPerKey);
277       System.out.println("Data size per column: " + minColDataSize + ".."
278           + maxColDataSize);
279     }
280 
281     if (isRead) {
282       String[] readOpts = splitColonSeparated(OPT_READ, 1, 2);
283       int colIndex = 0;
284       verifyPercent = parseInt(readOpts[colIndex++], 0, 100);
285       if (colIndex < readOpts.length) {
286         numReaderThreads = getNumThreads(readOpts[colIndex++]);
287       }
288 
289       if (cmd.hasOption(OPT_MAX_READ_ERRORS)) {
290         maxReadErrors = parseInt(cmd.getOptionValue(OPT_MAX_READ_ERRORS),
291             0, Integer.MAX_VALUE);
292       }
293 
294       if (cmd.hasOption(OPT_KEY_WINDOW)) {
295         keyWindow = parseInt(cmd.getOptionValue(OPT_KEY_WINDOW),
296             0, Integer.MAX_VALUE);
297       }
298 
299       System.out.println("Percent of keys to verify: " + verifyPercent);
300       System.out.println("Reader threads: " + numReaderThreads);
301     }
302   }
303 
304   protected void parseColumnFamilyOptions(CommandLine cmd) {
305     String dataBlockEncodingStr = cmd.getOptionValue(OPT_DATA_BLOCK_ENCODING);
306     dataBlockEncodingAlgo = dataBlockEncodingStr == null ? null :
307         DataBlockEncoding.valueOf(dataBlockEncodingStr);
308     if (dataBlockEncodingAlgo == DataBlockEncoding.NONE && encodeInCacheOnly) {
309       throw new IllegalArgumentException("-" + OPT_ENCODE_IN_CACHE_ONLY + " " +
310           "does not make sense when data block encoding is not used");
311     }
312 
313     String compressStr = cmd.getOptionValue(OPT_COMPRESSION);
314     compressAlgo = compressStr == null ? Compression.Algorithm.NONE :
315         Compression.Algorithm.valueOf(compressStr);
316 
317     String bloomStr = cmd.getOptionValue(OPT_BLOOM);
318     bloomType = bloomStr == null ? null :
319         StoreFile.BloomType.valueOf(bloomStr);
320   }
321 
322   public void initTestTable() throws IOException {
323     HBaseTestingUtility.createPreSplitLoadTestTable(conf, tableName,
324         COLUMN_FAMILY, compressAlgo, dataBlockEncodingAlgo);
325     applyColumnFamilyOptions(tableName, COLUMN_FAMILIES);
326   }
327 
328   @Override
329   protected int doWork() throws IOException {
330     if (cmd.hasOption(OPT_ZK_QUORUM)) {
331       conf.set(HConstants.ZOOKEEPER_QUORUM, cmd.getOptionValue(OPT_ZK_QUORUM));
332     }
333 
334     if (isInitOnly) {
335       LOG.info("Initializing only; no reads or writes");
336       initTestTable();
337       return 0;
338     }
339 
340     if (!isSkipInit) {
341       initTestTable();
342     }
343 
344     LoadTestDataGenerator dataGen = new MultiThreadedAction.DefaultDataGenerator(
345       minColDataSize, maxColDataSize, minColsPerKey, maxColsPerKey, COLUMN_FAMILY);
346 
347     if (isWrite) {
348       writerThreads = new MultiThreadedWriter(dataGen, conf, tableName);
349       writerThreads.setMultiPut(isMultiPut);
350     }
351 
352     if (isRead) {
353       readerThreads = new MultiThreadedReader(dataGen, conf, tableName, verifyPercent);
354       readerThreads.setMaxErrors(maxReadErrors);
355       readerThreads.setKeyWindow(keyWindow);
356     }
357 
358     if (isRead && isWrite) {
359       LOG.info("Concurrent read/write workload: making readers aware of the " +
360           "write point");
361       readerThreads.linkToWriter(writerThreads);
362     }
363 
364     if (isWrite) {
365       System.out.println("Starting to write data...");
366       writerThreads.start(startKey, endKey, numWriterThreads);
367     }
368 
369     if (isRead) {
370       System.out.println("Starting to read data...");
371       readerThreads.start(startKey, endKey, numReaderThreads);
372     }
373 
374     if (isWrite) {
375       writerThreads.waitForFinish();
376     }
377 
378     if (isRead) {
379       readerThreads.waitForFinish();
380     }
381 
382     boolean success = true;
383     if (isWrite) {
384       success = success && writerThreads.getNumWriteFailures() == 0;
385     }
386     if (isRead) {
387       success = success && readerThreads.getNumReadErrors() == 0
388           && readerThreads.getNumReadFailures() == 0;
389     }
390     return success ? 0 : 1;
391   }
392 
393   public static void main(String[] args) {
394     new LoadTestTool().doStaticMain(args);
395   }
396 
397 }