1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.util;
18  
19  import java.util.Random;
20  
21  import org.apache.hadoop.hbase.util.Bytes;
22  import org.apache.hadoop.hbase.util.MD5Hash;
23  
24  /**
25   * A generator of random keys and values for load testing. Keys are generated
26   * by converting numeric indexes to strings and prefixing them with an MD5
27   * hash. Values are generated by selecting value size in the configured range
28   * and generating a pseudo-random sequence of bytes seeded by key, column
29   * qualifier, and value size.
30   */
31  public class LoadTestKVGenerator {
32  
33    /** A random number generator for determining value size */
34    private Random randomForValueSize = new Random();
35  
36    private final int minValueSize;
37    private final int maxValueSize;
38  
39    public LoadTestKVGenerator(int minValueSize, int maxValueSize) {
40      if (minValueSize <= 0 || maxValueSize <= 0) {
41        throw new IllegalArgumentException("Invalid min/max value sizes: " +
42            minValueSize + ", " + maxValueSize);
43      }
44      this.minValueSize = minValueSize;
45      this.maxValueSize = maxValueSize;
46    }
47  
48    /**
49     * Verifies that the given byte array is the same as what would be generated
50     * for the given seed strings (row/cf/column/...). We are assuming that the
51     * value size is correct, and only verify the actual bytes. However, if the
52     * min/max value sizes are set sufficiently high, an accidental match should be
53     * extremely improbable.
54     */
55    public static boolean verify(byte[] value, byte[]... seedStrings) {
56      byte[] expectedData = getValueForRowColumn(value.length, seedStrings);
57      return Bytes.equals(expectedData, value);
58    }
59  
60    /**
61     * Converts the given key to string, and prefixes it with the MD5 hash of
62     * the index's string representation.
63     */
64    public static String md5PrefixedKey(long key) {
65      String stringKey = Long.toString(key);
66      String md5hash = MD5Hash.getMD5AsHex(Bytes.toBytes(stringKey));
67  
68      // flip the key to randomize
69      return md5hash + "-" + stringKey;
70    }
71  
72    /**
73     * Generates a value for the given key index and column qualifier. Size is
74     * selected randomly in the configured range. The generated value depends
75     * only on the combination of the strings passed (key/cf/column/...) and the selected
76     * value size. This allows to verify the actual value bytes when reading, as done
77     * in {#verify(byte[], byte[]...)}
78     * This method is as thread-safe as Random class. It appears that the worst bug ever
79     * found with the latter is that multiple threads will get some duplicate values, which
80     * we don't care about.
81     */
82    public byte[] generateRandomSizeValue(byte[]... seedStrings) {
83      int dataSize = minValueSize;
84      if (minValueSize != maxValueSize) {
85        dataSize = minValueSize + randomForValueSize.nextInt(Math.abs(maxValueSize - minValueSize));
86      }
87      return getValueForRowColumn(dataSize, seedStrings);
88   }
89  
90    /**
91     * Generates random bytes of the given size for the given row and column
92     * qualifier. The random seed is fully determined by these parameters.
93     */
94    private static byte[] getValueForRowColumn(int dataSize, byte[]... seedStrings) {
95      long seed = dataSize;
96      for (byte[] str : seedStrings) {
97        seed += Bytes.toString(str).hashCode();
98      }
99      Random seededRandom = new Random(seed);
100     byte[] randomBytes = new byte[dataSize];
101     seededRandom.nextBytes(randomBytes);
102     return randomBytes;
103   }
104 
105 }