1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with this 4 * work for additional information regarding copyright ownership. The ASF 5 * licenses this file to you under the Apache License, Version 2.0 (the 6 * "License"); you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14 * License for the specific language governing permissions and limitations 15 * under the License. 16 */ 17 package org.apache.hadoop.hbase.util.test; 18 19 import java.util.Random; 20 21 import org.apache.hadoop.hbase.util.Bytes; 22 import org.apache.hadoop.hbase.util.MD5Hash; 23 24 /** 25 * A generator of random keys and values for load testing. Keys are generated 26 * by converting numeric indexes to strings and prefixing them with an MD5 27 * hash. Values are generated by selecting value size in the configured range 28 * and generating a pseudo-random sequence of bytes seeded by key, column 29 * qualifier, and value size. 30 */ 31 public class LoadTestKVGenerator { 32 33 /** A random number generator for determining value size */ 34 private Random randomForValueSize = new Random(); 35 36 private final int minValueSize; 37 private final int maxValueSize; 38 39 public LoadTestKVGenerator(int minValueSize, int maxValueSize) { 40 if (minValueSize <= 0 || maxValueSize <= 0) { 41 throw new IllegalArgumentException("Invalid min/max value sizes: " + 42 minValueSize + ", " + maxValueSize); 43 } 44 this.minValueSize = minValueSize; 45 this.maxValueSize = maxValueSize; 46 } 47 48 /** 49 * Verifies that the given byte array is the same as what would be generated 50 * for the given seed strings (row/cf/column/...). We are assuming that the 51 * value size is correct, and only verify the actual bytes. However, if the 52 * min/max value sizes are set sufficiently high, an accidental match should be 53 * extremely improbable. 54 */ 55 public static boolean verify(byte[] value, byte[]... seedStrings) { 56 byte[] expectedData = getValueForRowColumn(value.length, seedStrings); 57 return Bytes.equals(expectedData, value); 58 } 59 60 /** 61 * Converts the given key to string, and prefixes it with the MD5 hash of 62 * the index's string representation. 63 */ 64 public static String md5PrefixedKey(long key) { 65 String stringKey = Long.toString(key); 66 String md5hash = MD5Hash.getMD5AsHex(Bytes.toBytes(stringKey)); 67 68 // flip the key to randomize 69 return md5hash + "-" + stringKey; 70 } 71 72 /** 73 * Generates a value for the given key index and column qualifier. Size is 74 * selected randomly in the configured range. The generated value depends 75 * only on the combination of the strings passed (key/cf/column/...) and the selected 76 * value size. This allows to verify the actual value bytes when reading, as done 77 * in {#verify(byte[], byte[]...)} 78 * This method is as thread-safe as Random class. It appears that the worst bug ever 79 * found with the latter is that multiple threads will get some duplicate values, which 80 * we don't care about. 81 */ 82 public byte[] generateRandomSizeValue(byte[]... seedStrings) { 83 int dataSize = minValueSize; 84 if(minValueSize != maxValueSize) { 85 dataSize = minValueSize + randomForValueSize.nextInt(Math.abs(maxValueSize - minValueSize)); 86 } 87 return getValueForRowColumn(dataSize, seedStrings); 88 } 89 90 /** 91 * Generates random bytes of the given size for the given row and column 92 * qualifier. The random seed is fully determined by these parameters. 93 */ 94 private static byte[] getValueForRowColumn(int dataSize, byte[]... seedStrings) { 95 long seed = dataSize; 96 for (byte[] str : seedStrings) { 97 seed += Bytes.toString(str).hashCode(); 98 } 99 Random seededRandom = new Random(seed); 100 byte[] randomBytes = new byte[dataSize]; 101 seededRandom.nextBytes(randomBytes); 102 return randomBytes; 103 } 104 105 }