1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.hadoop.hbase.io.encoding;
18
19 import java.nio.ByteBuffer;
20 import java.util.ArrayList;
21 import java.util.Collections;
22 import java.util.HashMap;
23 import java.util.List;
24 import java.util.Map;
25 import java.util.Random;
26
27 import org.apache.hadoop.hbase.KeyValue;
28 import org.apache.hadoop.hbase.util.ByteBufferUtils;
29 import org.apache.hadoop.io.WritableUtils;
30
31
32
33
34
35 public class RedundantKVGenerator {
36
37 static int DEFAULT_NUMBER_OF_ROW_PREFIXES = 10;
38 static int DEFAULT_AVERAGE_PREFIX_LENGTH = 6;
39 static int DEFAULT_PREFIX_LENGTH_VARIANCE = 3;
40 static int DEFAULT_AVERAGE_SUFFIX_LENGTH = 3;
41 static int DEFAULT_SUFFIX_LENGTH_VARIANCE = 3;
42 static int DEFAULT_NUMBER_OF_ROW = 500;
43
44
45 static float DEFAULT_CHANCE_FOR_SAME_QUALIFIER = 0.5f;
46 static float DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER = 0.4f;
47 static int DEFAULT_AVERAGE_QUALIFIER_LENGTH = 9;
48 static int DEFAULT_QUALIFIER_LENGTH_VARIANCE = 3;
49
50 static int DEFAULT_COLUMN_FAMILY_LENGTH = 9;
51 static int DEFAULT_VALUE_LENGTH = 8;
52 static float DEFAULT_CHANCE_FOR_ZERO_VALUE = 0.5f;
53
54 static int DEFAULT_BASE_TIMESTAMP_DIVIDE = 1000000;
55 static int DEFAULT_TIMESTAMP_DIFF_SIZE = 100000000;
56
57
58
59
60 public RedundantKVGenerator() {
61 this(new Random(42L),
62 DEFAULT_NUMBER_OF_ROW_PREFIXES,
63 DEFAULT_AVERAGE_PREFIX_LENGTH,
64 DEFAULT_PREFIX_LENGTH_VARIANCE,
65 DEFAULT_AVERAGE_SUFFIX_LENGTH,
66 DEFAULT_SUFFIX_LENGTH_VARIANCE,
67 DEFAULT_NUMBER_OF_ROW,
68
69 DEFAULT_CHANCE_FOR_SAME_QUALIFIER,
70 DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER,
71 DEFAULT_AVERAGE_QUALIFIER_LENGTH,
72 DEFAULT_QUALIFIER_LENGTH_VARIANCE,
73
74 DEFAULT_COLUMN_FAMILY_LENGTH,
75 DEFAULT_VALUE_LENGTH,
76 DEFAULT_CHANCE_FOR_ZERO_VALUE,
77
78 DEFAULT_BASE_TIMESTAMP_DIVIDE,
79 DEFAULT_TIMESTAMP_DIFF_SIZE
80 );
81 }
82
83
84
85
86
87
88 public RedundantKVGenerator(Random randomizer,
89 int numberOfRowPrefixes,
90 int averagePrefixLength,
91 int prefixLengthVariance,
92 int averageSuffixLength,
93 int suffixLengthVariance,
94 int numberOfRows,
95
96 float chanceForSameQualifier,
97 float chanceForSimiliarQualifier,
98 int averageQualifierLength,
99 int qualifierLengthVariance,
100
101 int columnFamilyLength,
102 int valueLength,
103 float chanceForZeroValue,
104
105 int baseTimestampDivide,
106 int timestampDiffSize
107 ) {
108 this.randomizer = randomizer;
109
110 this.numberOfRowPrefixes = numberOfRowPrefixes;
111 this.averagePrefixLength = averagePrefixLength;
112 this.prefixLengthVariance = prefixLengthVariance;
113 this.averageSuffixLength = averageSuffixLength;
114 this.suffixLengthVariance = suffixLengthVariance;
115 this.numberOfRows = numberOfRows;
116
117 this.chanceForSameQualifier = chanceForSameQualifier;
118 this.chanceForSimiliarQualifier = chanceForSimiliarQualifier;
119 this.averageQualifierLength = averageQualifierLength;
120 this.qualifierLengthVariance = qualifierLengthVariance;
121
122 this.columnFamilyLength = columnFamilyLength;
123 this.valueLength = valueLength;
124 this.chanceForZeroValue = chanceForZeroValue;
125
126 this.baseTimestampDivide = baseTimestampDivide;
127 this.timestampDiffSize = timestampDiffSize;
128 }
129
130
131 private Random randomizer;
132
133
134 private int numberOfRowPrefixes;
135 private int averagePrefixLength = 6;
136 private int prefixLengthVariance = 3;
137 private int averageSuffixLength = 3;
138 private int suffixLengthVariance = 3;
139 private int numberOfRows = 500;
140
141
142 private float chanceForSameQualifier = 0.5f;
143 private float chanceForSimiliarQualifier = 0.4f;
144 private int averageQualifierLength = 9;
145 private int qualifierLengthVariance = 3;
146
147 private int columnFamilyLength = 9;
148 private int valueLength = 8;
149 private float chanceForZeroValue = 0.5f;
150
151 private int baseTimestampDivide = 1000000;
152 private int timestampDiffSize = 100000000;
153
154 private List<byte[]> generateRows() {
155
156 List<byte[]> prefixes = new ArrayList<byte[]>();
157 prefixes.add(new byte[0]);
158 for (int i = 1; i < numberOfRowPrefixes; ++i) {
159 int prefixLength = averagePrefixLength;
160 prefixLength += randomizer.nextInt(2 * prefixLengthVariance + 1) -
161 prefixLengthVariance;
162 byte[] newPrefix = new byte[prefixLength];
163 randomizer.nextBytes(newPrefix);
164 prefixes.add(newPrefix);
165 }
166
167
168 List<byte[]> rows = new ArrayList<byte[]>();
169 for (int i = 0; i < numberOfRows; ++i) {
170 int suffixLength = averageSuffixLength;
171 suffixLength += randomizer.nextInt(2 * suffixLengthVariance + 1) -
172 suffixLengthVariance;
173 int randomPrefix = randomizer.nextInt(prefixes.size());
174 byte[] row = new byte[prefixes.get(randomPrefix).length +
175 suffixLength];
176 rows.add(row);
177 }
178
179 return rows;
180 }
181
182
183
184
185
186
187 public List<KeyValue> generateTestKeyValues(int howMany) {
188 List<KeyValue> result = new ArrayList<KeyValue>();
189
190 List<byte[]> rows = generateRows();
191 Map<Integer, List<byte[]>> rowsToQualifier =
192 new HashMap<Integer, List<byte[]>>();
193
194 byte[] family = new byte[columnFamilyLength];
195 randomizer.nextBytes(family);
196
197 long baseTimestamp = Math.abs(randomizer.nextLong()) /
198 baseTimestampDivide;
199
200 byte[] value = new byte[valueLength];
201
202 for (int i = 0; i < howMany; ++i) {
203 long timestamp = baseTimestamp + randomizer.nextInt(
204 timestampDiffSize);
205 Integer rowId = randomizer.nextInt(rows.size());
206 byte[] row = rows.get(rowId);
207
208
209
210 byte[] qualifier;
211 float qualifierChance = randomizer.nextFloat();
212 if (!rowsToQualifier.containsKey(rowId) ||
213 qualifierChance > chanceForSameQualifier +
214 chanceForSimiliarQualifier) {
215 int qualifierLength = averageQualifierLength;
216 qualifierLength +=
217 randomizer.nextInt(2 * qualifierLengthVariance + 1) -
218 qualifierLengthVariance;
219 qualifier = new byte[qualifierLength];
220 randomizer.nextBytes(qualifier);
221
222
223 if (!rowsToQualifier.containsKey(rowId)) {
224 rowsToQualifier.put(rowId, new ArrayList<byte[]>());
225 }
226 rowsToQualifier.get(rowId).add(qualifier);
227 } else if (qualifierChance > chanceForSameQualifier) {
228
229 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
230 byte[] originalQualifier = previousQualifiers.get(
231 randomizer.nextInt(previousQualifiers.size()));
232
233 qualifier = new byte[originalQualifier.length];
234 int commonPrefix = randomizer.nextInt(qualifier.length);
235 System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix);
236 for (int j = commonPrefix; j < qualifier.length; ++j) {
237 qualifier[j] = (byte) (randomizer.nextInt() & 0xff);
238 }
239
240 rowsToQualifier.get(rowId).add(qualifier);
241 } else {
242
243 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
244 qualifier = previousQualifiers.get(
245 randomizer.nextInt(previousQualifiers.size()));
246 }
247
248 if (randomizer.nextFloat() < chanceForZeroValue) {
249 for (int j = 0; j < value.length; ++j) {
250 value[j] = (byte) 0;
251 }
252 } else {
253 randomizer.nextBytes(value);
254 }
255
256 result.add(new KeyValue(row, family, qualifier, timestamp, value));
257 }
258
259 Collections.sort(result, KeyValue.COMPARATOR);
260
261 return result;
262 }
263
264
265
266
267
268
269 public static ByteBuffer convertKvToByteBuffer(List<KeyValue> keyValues,
270 boolean includesMemstoreTS) {
271 int totalSize = 0;
272 for (KeyValue kv : keyValues) {
273 totalSize += kv.getLength();
274 if (includesMemstoreTS) {
275 totalSize += WritableUtils.getVIntSize(kv.getMemstoreTS());
276 }
277 }
278
279 ByteBuffer result = ByteBuffer.allocate(totalSize);
280 for (KeyValue kv : keyValues) {
281 result.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
282 if (includesMemstoreTS) {
283 ByteBufferUtils.writeVLong(result, kv.getMemstoreTS());
284 }
285 }
286
287 return result;
288 }
289
290 }