1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.hadoop.hbase.util.test;
18
19 import java.nio.ByteBuffer;
20 import java.util.ArrayList;
21 import java.util.Collections;
22 import java.util.HashMap;
23 import java.util.List;
24 import java.util.Map;
25 import java.util.Random;
26
27 import org.apache.hadoop.classification.InterfaceAudience;
28 import org.apache.hadoop.hbase.KeyValue;
29 import org.apache.hadoop.hbase.util.ByteBufferUtils;
30 import org.apache.hadoop.io.WritableUtils;
31
32 import com.google.common.primitives.Bytes;
33
34
35
36
37
38 @edu.umd.cs.findbugs.annotations.SuppressWarnings(
39 value="RV_ABSOLUTE_VALUE_OF_RANDOM_INT",
40 justification="Should probably fix")
41 @InterfaceAudience.Private
42 public class RedundantKVGenerator {
43
44 static byte[] DEFAULT_COMMON_PREFIX = new byte[0];
45 static int DEFAULT_NUMBER_OF_ROW_PREFIXES = 10;
46 static int DEFAULT_AVERAGE_PREFIX_LENGTH = 6;
47 static int DEFAULT_PREFIX_LENGTH_VARIANCE = 3;
48 static int DEFAULT_AVERAGE_SUFFIX_LENGTH = 3;
49 static int DEFAULT_SUFFIX_LENGTH_VARIANCE = 3;
50 static int DEFAULT_NUMBER_OF_ROW = 500;
51
52
53 static float DEFAULT_CHANCE_FOR_SAME_QUALIFIER = 0.5f;
54 static float DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER = 0.4f;
55 static int DEFAULT_AVERAGE_QUALIFIER_LENGTH = 9;
56 static int DEFAULT_QUALIFIER_LENGTH_VARIANCE = 3;
57
58 static int DEFAULT_COLUMN_FAMILY_LENGTH = 9;
59 static int DEFAULT_VALUE_LENGTH = 8;
60 static float DEFAULT_CHANCE_FOR_ZERO_VALUE = 0.5f;
61
62 static int DEFAULT_BASE_TIMESTAMP_DIVIDE = 1000000;
63 static int DEFAULT_TIMESTAMP_DIFF_SIZE = 100000000;
64
65
66
67
68 public RedundantKVGenerator() {
69 this(new Random(42L),
70 DEFAULT_NUMBER_OF_ROW_PREFIXES,
71 DEFAULT_AVERAGE_PREFIX_LENGTH,
72 DEFAULT_PREFIX_LENGTH_VARIANCE,
73 DEFAULT_AVERAGE_SUFFIX_LENGTH,
74 DEFAULT_SUFFIX_LENGTH_VARIANCE,
75 DEFAULT_NUMBER_OF_ROW,
76
77 DEFAULT_CHANCE_FOR_SAME_QUALIFIER,
78 DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER,
79 DEFAULT_AVERAGE_QUALIFIER_LENGTH,
80 DEFAULT_QUALIFIER_LENGTH_VARIANCE,
81
82 DEFAULT_COLUMN_FAMILY_LENGTH,
83 DEFAULT_VALUE_LENGTH,
84 DEFAULT_CHANCE_FOR_ZERO_VALUE,
85
86 DEFAULT_BASE_TIMESTAMP_DIVIDE,
87 DEFAULT_TIMESTAMP_DIFF_SIZE
88 );
89 }
90
91
92
93
94
95
96 public RedundantKVGenerator(Random randomizer,
97 int numberOfRowPrefixes,
98 int averagePrefixLength,
99 int prefixLengthVariance,
100 int averageSuffixLength,
101 int suffixLengthVariance,
102 int numberOfRows,
103
104 float chanceForSameQualifier,
105 float chanceForSimiliarQualifier,
106 int averageQualifierLength,
107 int qualifierLengthVariance,
108
109 int columnFamilyLength,
110 int valueLength,
111 float chanceForZeroValue,
112
113 int baseTimestampDivide,
114 int timestampDiffSize
115 ) {
116 this.randomizer = randomizer;
117
118 this.commonPrefix = DEFAULT_COMMON_PREFIX;
119 this.numberOfRowPrefixes = numberOfRowPrefixes;
120 this.averagePrefixLength = averagePrefixLength;
121 this.prefixLengthVariance = prefixLengthVariance;
122 this.averageSuffixLength = averageSuffixLength;
123 this.suffixLengthVariance = suffixLengthVariance;
124 this.numberOfRows = numberOfRows;
125
126 this.chanceForSameQualifier = chanceForSameQualifier;
127 this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
128 this.averageQualifierLength = averageQualifierLength;
129 this.qualifierLengthVariance = qualifierLengthVariance;
130
131 this.columnFamilyLength = columnFamilyLength;
132 this.valueLength = valueLength;
133 this.chanceForZeroValue = chanceForZeroValue;
134
135 this.baseTimestampDivide = baseTimestampDivide;
136 this.timestampDiffSize = timestampDiffSize;
137 }
138
139
140 private Random randomizer;
141
142
143 private byte[] commonPrefix;
144 private int numberOfRowPrefixes;
145 private int averagePrefixLength = 6;
146 private int prefixLengthVariance = 3;
147 private int averageSuffixLength = 3;
148 private int suffixLengthVariance = 3;
149 private int numberOfRows = 500;
150
151
152 private byte[] family;
153
154
155 private float chanceForSameQualifier = 0.5f;
156 private float chanceForSimilarQualifier = 0.4f;
157 private int averageQualifierLength = 9;
158 private int qualifierLengthVariance = 3;
159
160 private int columnFamilyLength = 9;
161 private int valueLength = 8;
162 private float chanceForZeroValue = 0.5f;
163
164 private int baseTimestampDivide = 1000000;
165 private int timestampDiffSize = 100000000;
166
167 private List<byte[]> generateRows() {
168
169 List<byte[]> prefixes = new ArrayList<byte[]>();
170 prefixes.add(new byte[0]);
171 for (int i = 1; i < numberOfRowPrefixes; ++i) {
172 int prefixLength = averagePrefixLength;
173 prefixLength += randomizer.nextInt(2 * prefixLengthVariance + 1) -
174 prefixLengthVariance;
175 byte[] newPrefix = new byte[prefixLength];
176 randomizer.nextBytes(newPrefix);
177 byte[] newPrefixWithCommon = newPrefix;
178 prefixes.add(newPrefixWithCommon);
179 }
180
181
182 List<byte[]> rows = new ArrayList<byte[]>();
183 for (int i = 0; i < numberOfRows; ++i) {
184 int suffixLength = averageSuffixLength;
185 suffixLength += randomizer.nextInt(2 * suffixLengthVariance + 1) -
186 suffixLengthVariance;
187 int randomPrefix = randomizer.nextInt(prefixes.size());
188 byte[] row = new byte[prefixes.get(randomPrefix).length +
189 suffixLength];
190 byte[] rowWithCommonPrefix = Bytes.concat(commonPrefix, row);
191 rows.add(rowWithCommonPrefix);
192 }
193
194 return rows;
195 }
196
197
198
199
200
201
202 public List<KeyValue> generateTestKeyValues(int howMany) {
203 List<KeyValue> result = new ArrayList<KeyValue>();
204
205 List<byte[]> rows = generateRows();
206 Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<Integer, List<byte[]>>();
207
208 if(family==null){
209 family = new byte[columnFamilyLength];
210 randomizer.nextBytes(family);
211 }
212
213 long baseTimestamp = Math.abs(randomizer.nextInt()) / baseTimestampDivide;
214
215 byte[] value = new byte[valueLength];
216
217 for (int i = 0; i < howMany; ++i) {
218 long timestamp = baseTimestamp;
219 if(timestampDiffSize > 0){
220 timestamp += randomizer.nextInt(timestampDiffSize);
221 }
222 Integer rowId = randomizer.nextInt(rows.size());
223 byte[] row = rows.get(rowId);
224
225
226
227 byte[] qualifier;
228 float qualifierChance = randomizer.nextFloat();
229 if (!rowsToQualifier.containsKey(rowId)
230 || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) {
231 int qualifierLength = averageQualifierLength;
232 qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1)
233 - qualifierLengthVariance;
234 qualifier = new byte[qualifierLength];
235 randomizer.nextBytes(qualifier);
236
237
238 if (!rowsToQualifier.containsKey(rowId)) {
239 rowsToQualifier.put(rowId, new ArrayList<byte[]>());
240 }
241 rowsToQualifier.get(rowId).add(qualifier);
242 } else if (qualifierChance > chanceForSameQualifier) {
243
244 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
245 byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers
246 .size()));
247
248 qualifier = new byte[originalQualifier.length];
249 int commonPrefix = randomizer.nextInt(qualifier.length);
250 System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix);
251 for (int j = commonPrefix; j < qualifier.length; ++j) {
252 qualifier[j] = (byte) (randomizer.nextInt() & 0xff);
253 }
254
255 rowsToQualifier.get(rowId).add(qualifier);
256 } else {
257
258 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
259 qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size()));
260 }
261
262 if (randomizer.nextFloat() < chanceForZeroValue) {
263 for (int j = 0; j < value.length; ++j) {
264 value[j] = (byte) 0;
265 }
266 } else {
267 randomizer.nextBytes(value);
268 }
269
270 result.add(new KeyValue(row, family, qualifier, timestamp, value));
271 }
272
273 Collections.sort(result, KeyValue.COMPARATOR);
274
275 return result;
276 }
277
278
279
280
281
282
283 public static ByteBuffer convertKvToByteBuffer(List<KeyValue> keyValues,
284 boolean includesMemstoreTS) {
285 int totalSize = 0;
286 for (KeyValue kv : keyValues) {
287 totalSize += kv.getLength();
288 if (includesMemstoreTS) {
289 totalSize += WritableUtils.getVIntSize(kv.getMvccVersion());
290 }
291 }
292
293 ByteBuffer result = ByteBuffer.allocate(totalSize);
294 for (KeyValue kv : keyValues) {
295 result.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
296 if (includesMemstoreTS) {
297 ByteBufferUtils.writeVLong(result, kv.getMvccVersion());
298 }
299 }
300
301 return result;
302 }
303
304
305
306
307 public RedundantKVGenerator setCommonPrefix(byte[] prefix){
308 this.commonPrefix = prefix;
309 return this;
310 }
311
312 public RedundantKVGenerator setRandomizer(Random randomizer) {
313 this.randomizer = randomizer;
314 return this;
315 }
316
317 public RedundantKVGenerator setNumberOfRowPrefixes(int numberOfRowPrefixes) {
318 this.numberOfRowPrefixes = numberOfRowPrefixes;
319 return this;
320 }
321
322 public RedundantKVGenerator setAveragePrefixLength(int averagePrefixLength) {
323 this.averagePrefixLength = averagePrefixLength;
324 return this;
325 }
326
327 public RedundantKVGenerator setPrefixLengthVariance(int prefixLengthVariance) {
328 this.prefixLengthVariance = prefixLengthVariance;
329 return this;
330 }
331
332 public RedundantKVGenerator setAverageSuffixLength(int averageSuffixLength) {
333 this.averageSuffixLength = averageSuffixLength;
334 return this;
335 }
336
337 public RedundantKVGenerator setSuffixLengthVariance(int suffixLengthVariance) {
338 this.suffixLengthVariance = suffixLengthVariance;
339 return this;
340 }
341
342 public RedundantKVGenerator setNumberOfRows(int numberOfRows) {
343 this.numberOfRows = numberOfRows;
344 return this;
345 }
346
347 public RedundantKVGenerator setChanceForSameQualifier(float chanceForSameQualifier) {
348 this.chanceForSameQualifier = chanceForSameQualifier;
349 return this;
350 }
351
352 public RedundantKVGenerator setChanceForSimilarQualifier(float chanceForSimiliarQualifier) {
353 this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
354 return this;
355 }
356
357 public RedundantKVGenerator setAverageQualifierLength(int averageQualifierLength) {
358 this.averageQualifierLength = averageQualifierLength;
359 return this;
360 }
361
362 public RedundantKVGenerator setQualifierLengthVariance(int qualifierLengthVariance) {
363 this.qualifierLengthVariance = qualifierLengthVariance;
364 return this;
365 }
366
367 public RedundantKVGenerator setColumnFamilyLength(int columnFamilyLength) {
368 this.columnFamilyLength = columnFamilyLength;
369 return this;
370 }
371
372 public RedundantKVGenerator setFamily(byte[] family) {
373 this.family = family;
374 this.columnFamilyLength = family.length;
375 return this;
376 }
377
378 public RedundantKVGenerator setValueLength(int valueLength) {
379 this.valueLength = valueLength;
380 return this;
381 }
382
383 public RedundantKVGenerator setChanceForZeroValue(float chanceForZeroValue) {
384 this.chanceForZeroValue = chanceForZeroValue;
385 return this;
386 }
387
388 public RedundantKVGenerator setBaseTimestampDivide(int baseTimestampDivide) {
389 this.baseTimestampDivide = baseTimestampDivide;
390 return this;
391 }
392
393 public RedundantKVGenerator setTimestampDiffSize(int timestampDiffSize) {
394 this.timestampDiffSize = timestampDiffSize;
395 return this;
396 }
397
398 }