1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.hadoop.hbase.util.test;
18
19 import java.nio.ByteBuffer;
20 import java.util.ArrayList;
21 import java.util.Collections;
22 import java.util.HashMap;
23 import java.util.List;
24 import java.util.Map;
25 import java.util.Random;
26
27 import org.apache.hadoop.hbase.KeyValue;
28 import org.apache.hadoop.hbase.util.ByteBufferUtils;
29 import org.apache.hadoop.io.WritableUtils;
30
31 import com.google.common.primitives.Bytes;
32
33
34
35
36
37 @edu.umd.cs.findbugs.annotations.SuppressWarnings(
38 value="RV_ABSOLUTE_VALUE_OF_RANDOM_INT",
39 justification="Should probably fix")
40 public class RedundantKVGenerator {
41
42 static byte[] DEFAULT_COMMON_PREFIX = new byte[0];
43 static int DEFAULT_NUMBER_OF_ROW_PREFIXES = 10;
44 static int DEFAULT_AVERAGE_PREFIX_LENGTH = 6;
45 static int DEFAULT_PREFIX_LENGTH_VARIANCE = 3;
46 static int DEFAULT_AVERAGE_SUFFIX_LENGTH = 3;
47 static int DEFAULT_SUFFIX_LENGTH_VARIANCE = 3;
48 static int DEFAULT_NUMBER_OF_ROW = 500;
49
50
51 static float DEFAULT_CHANCE_FOR_SAME_QUALIFIER = 0.5f;
52 static float DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER = 0.4f;
53 static int DEFAULT_AVERAGE_QUALIFIER_LENGTH = 9;
54 static int DEFAULT_QUALIFIER_LENGTH_VARIANCE = 3;
55
56 static int DEFAULT_COLUMN_FAMILY_LENGTH = 9;
57 static int DEFAULT_VALUE_LENGTH = 8;
58 static float DEFAULT_CHANCE_FOR_ZERO_VALUE = 0.5f;
59
60 static int DEFAULT_BASE_TIMESTAMP_DIVIDE = 1000000;
61 static int DEFAULT_TIMESTAMP_DIFF_SIZE = 100000000;
62
63
64
65
66 public RedundantKVGenerator() {
67 this(new Random(42L),
68 DEFAULT_NUMBER_OF_ROW_PREFIXES,
69 DEFAULT_AVERAGE_PREFIX_LENGTH,
70 DEFAULT_PREFIX_LENGTH_VARIANCE,
71 DEFAULT_AVERAGE_SUFFIX_LENGTH,
72 DEFAULT_SUFFIX_LENGTH_VARIANCE,
73 DEFAULT_NUMBER_OF_ROW,
74
75 DEFAULT_CHANCE_FOR_SAME_QUALIFIER,
76 DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER,
77 DEFAULT_AVERAGE_QUALIFIER_LENGTH,
78 DEFAULT_QUALIFIER_LENGTH_VARIANCE,
79
80 DEFAULT_COLUMN_FAMILY_LENGTH,
81 DEFAULT_VALUE_LENGTH,
82 DEFAULT_CHANCE_FOR_ZERO_VALUE,
83
84 DEFAULT_BASE_TIMESTAMP_DIVIDE,
85 DEFAULT_TIMESTAMP_DIFF_SIZE
86 );
87 }
88
89
90
91
92
93
94 public RedundantKVGenerator(Random randomizer,
95 int numberOfRowPrefixes,
96 int averagePrefixLength,
97 int prefixLengthVariance,
98 int averageSuffixLength,
99 int suffixLengthVariance,
100 int numberOfRows,
101
102 float chanceForSameQualifier,
103 float chanceForSimiliarQualifier,
104 int averageQualifierLength,
105 int qualifierLengthVariance,
106
107 int columnFamilyLength,
108 int valueLength,
109 float chanceForZeroValue,
110
111 int baseTimestampDivide,
112 int timestampDiffSize
113 ) {
114 this.randomizer = randomizer;
115
116 this.commonPrefix = DEFAULT_COMMON_PREFIX;
117 this.numberOfRowPrefixes = numberOfRowPrefixes;
118 this.averagePrefixLength = averagePrefixLength;
119 this.prefixLengthVariance = prefixLengthVariance;
120 this.averageSuffixLength = averageSuffixLength;
121 this.suffixLengthVariance = suffixLengthVariance;
122 this.numberOfRows = numberOfRows;
123
124 this.chanceForSameQualifier = chanceForSameQualifier;
125 this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
126 this.averageQualifierLength = averageQualifierLength;
127 this.qualifierLengthVariance = qualifierLengthVariance;
128
129 this.columnFamilyLength = columnFamilyLength;
130 this.valueLength = valueLength;
131 this.chanceForZeroValue = chanceForZeroValue;
132
133 this.baseTimestampDivide = baseTimestampDivide;
134 this.timestampDiffSize = timestampDiffSize;
135 }
136
137
138 private Random randomizer;
139
140
141 private byte[] commonPrefix;
142 private int numberOfRowPrefixes;
143 private int averagePrefixLength = 6;
144 private int prefixLengthVariance = 3;
145 private int averageSuffixLength = 3;
146 private int suffixLengthVariance = 3;
147 private int numberOfRows = 500;
148
149
150 private byte[] family;
151
152
153 private float chanceForSameQualifier = 0.5f;
154 private float chanceForSimilarQualifier = 0.4f;
155 private int averageQualifierLength = 9;
156 private int qualifierLengthVariance = 3;
157
158 private int columnFamilyLength = 9;
159 private int valueLength = 8;
160 private float chanceForZeroValue = 0.5f;
161
162 private int baseTimestampDivide = 1000000;
163 private int timestampDiffSize = 100000000;
164
165 private List<byte[]> generateRows() {
166
167 List<byte[]> prefixes = new ArrayList<byte[]>();
168 prefixes.add(new byte[0]);
169 for (int i = 1; i < numberOfRowPrefixes; ++i) {
170 int prefixLength = averagePrefixLength;
171 prefixLength += randomizer.nextInt(2 * prefixLengthVariance + 1) -
172 prefixLengthVariance;
173 byte[] newPrefix = new byte[prefixLength];
174 randomizer.nextBytes(newPrefix);
175 byte[] newPrefixWithCommon = newPrefix;
176 prefixes.add(newPrefixWithCommon);
177 }
178
179
180 List<byte[]> rows = new ArrayList<byte[]>();
181 for (int i = 0; i < numberOfRows; ++i) {
182 int suffixLength = averageSuffixLength;
183 suffixLength += randomizer.nextInt(2 * suffixLengthVariance + 1) -
184 suffixLengthVariance;
185 int randomPrefix = randomizer.nextInt(prefixes.size());
186 byte[] row = new byte[prefixes.get(randomPrefix).length +
187 suffixLength];
188 byte[] rowWithCommonPrefix = Bytes.concat(commonPrefix, row);
189 rows.add(rowWithCommonPrefix);
190 }
191
192 return rows;
193 }
194
195
196
197
198
199
200 public List<KeyValue> generateTestKeyValues(int howMany) {
201 List<KeyValue> result = new ArrayList<KeyValue>();
202
203 List<byte[]> rows = generateRows();
204 Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<Integer, List<byte[]>>();
205
206 if(family==null){
207 family = new byte[columnFamilyLength];
208 randomizer.nextBytes(family);
209 }
210
211 long baseTimestamp = Math.abs(randomizer.nextInt()) / baseTimestampDivide;
212
213 byte[] value = new byte[valueLength];
214
215 for (int i = 0; i < howMany; ++i) {
216 long timestamp = baseTimestamp;
217 if(timestampDiffSize > 0){
218 timestamp += randomizer.nextInt(timestampDiffSize);
219 }
220 Integer rowId = randomizer.nextInt(rows.size());
221 byte[] row = rows.get(rowId);
222
223
224
225 byte[] qualifier;
226 float qualifierChance = randomizer.nextFloat();
227 if (!rowsToQualifier.containsKey(rowId)
228 || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) {
229 int qualifierLength = averageQualifierLength;
230 qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1)
231 - qualifierLengthVariance;
232 qualifier = new byte[qualifierLength];
233 randomizer.nextBytes(qualifier);
234
235
236 if (!rowsToQualifier.containsKey(rowId)) {
237 rowsToQualifier.put(rowId, new ArrayList<byte[]>());
238 }
239 rowsToQualifier.get(rowId).add(qualifier);
240 } else if (qualifierChance > chanceForSameQualifier) {
241
242 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
243 byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers
244 .size()));
245
246 qualifier = new byte[originalQualifier.length];
247 int commonPrefix = randomizer.nextInt(qualifier.length);
248 System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix);
249 for (int j = commonPrefix; j < qualifier.length; ++j) {
250 qualifier[j] = (byte) (randomizer.nextInt() & 0xff);
251 }
252
253 rowsToQualifier.get(rowId).add(qualifier);
254 } else {
255
256 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
257 qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size()));
258 }
259
260 if (randomizer.nextFloat() < chanceForZeroValue) {
261 for (int j = 0; j < value.length; ++j) {
262 value[j] = (byte) 0;
263 }
264 } else {
265 randomizer.nextBytes(value);
266 }
267
268 result.add(new KeyValue(row, family, qualifier, timestamp, value));
269 }
270
271 Collections.sort(result, KeyValue.COMPARATOR);
272
273 return result;
274 }
275
276
277
278
279
280
281 public static ByteBuffer convertKvToByteBuffer(List<KeyValue> keyValues,
282 boolean includesMemstoreTS) {
283 int totalSize = 0;
284 for (KeyValue kv : keyValues) {
285 totalSize += kv.getLength();
286 if (includesMemstoreTS) {
287 totalSize += WritableUtils.getVIntSize(kv.getMemstoreTS());
288 }
289 }
290
291 ByteBuffer result = ByteBuffer.allocate(totalSize);
292 for (KeyValue kv : keyValues) {
293 result.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
294 if (includesMemstoreTS) {
295 ByteBufferUtils.writeVLong(result, kv.getMemstoreTS());
296 }
297 }
298
299 return result;
300 }
301
302
303
304
305 public RedundantKVGenerator setCommonPrefix(byte[] prefix){
306 this.commonPrefix = prefix;
307 return this;
308 }
309
310 public RedundantKVGenerator setRandomizer(Random randomizer) {
311 this.randomizer = randomizer;
312 return this;
313 }
314
315 public RedundantKVGenerator setNumberOfRowPrefixes(int numberOfRowPrefixes) {
316 this.numberOfRowPrefixes = numberOfRowPrefixes;
317 return this;
318 }
319
320 public RedundantKVGenerator setAveragePrefixLength(int averagePrefixLength) {
321 this.averagePrefixLength = averagePrefixLength;
322 return this;
323 }
324
325 public RedundantKVGenerator setPrefixLengthVariance(int prefixLengthVariance) {
326 this.prefixLengthVariance = prefixLengthVariance;
327 return this;
328 }
329
330 public RedundantKVGenerator setAverageSuffixLength(int averageSuffixLength) {
331 this.averageSuffixLength = averageSuffixLength;
332 return this;
333 }
334
335 public RedundantKVGenerator setSuffixLengthVariance(int suffixLengthVariance) {
336 this.suffixLengthVariance = suffixLengthVariance;
337 return this;
338 }
339
340 public RedundantKVGenerator setNumberOfRows(int numberOfRows) {
341 this.numberOfRows = numberOfRows;
342 return this;
343 }
344
345 public RedundantKVGenerator setChanceForSameQualifier(float chanceForSameQualifier) {
346 this.chanceForSameQualifier = chanceForSameQualifier;
347 return this;
348 }
349
350 public RedundantKVGenerator setChanceForSimilarQualifier(float chanceForSimiliarQualifier) {
351 this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
352 return this;
353 }
354
355 public RedundantKVGenerator setAverageQualifierLength(int averageQualifierLength) {
356 this.averageQualifierLength = averageQualifierLength;
357 return this;
358 }
359
360 public RedundantKVGenerator setQualifierLengthVariance(int qualifierLengthVariance) {
361 this.qualifierLengthVariance = qualifierLengthVariance;
362 return this;
363 }
364
365 public RedundantKVGenerator setColumnFamilyLength(int columnFamilyLength) {
366 this.columnFamilyLength = columnFamilyLength;
367 return this;
368 }
369
370 public RedundantKVGenerator setFamily(byte[] family) {
371 this.family = family;
372 this.columnFamilyLength = family.length;
373 return this;
374 }
375
376 public RedundantKVGenerator setValueLength(int valueLength) {
377 this.valueLength = valueLength;
378 return this;
379 }
380
381 public RedundantKVGenerator setChanceForZeroValue(float chanceForZeroValue) {
382 this.chanceForZeroValue = chanceForZeroValue;
383 return this;
384 }
385
386 public RedundantKVGenerator setBaseTimestampDivide(int baseTimestampDivide) {
387 this.baseTimestampDivide = baseTimestampDivide;
388 return this;
389 }
390
391 public RedundantKVGenerator setTimestampDiffSize(int timestampDiffSize) {
392 this.timestampDiffSize = timestampDiffSize;
393 return this;
394 }
395
396 }