View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.util.test;
18  
19  import java.nio.ByteBuffer;
20  import java.util.ArrayList;
21  import java.util.Collections;
22  import java.util.HashMap;
23  import java.util.List;
24  import java.util.Map;
25  import java.util.Random;
26  
27  import org.apache.hadoop.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.KeyValue;
29  import org.apache.hadoop.hbase.util.ByteBufferUtils;
30  import org.apache.hadoop.io.WritableUtils;
31  
32  import com.google.common.primitives.Bytes;
33  
34  /**
35   * Generate list of key values which are very useful to test data block encoding
36   * and compression.
37   */
38  @edu.umd.cs.findbugs.annotations.SuppressWarnings(
39      value="RV_ABSOLUTE_VALUE_OF_RANDOM_INT",
40      justification="Should probably fix")
41  @InterfaceAudience.Private
42  public class RedundantKVGenerator {
43    // row settings
44    static byte[] DEFAULT_COMMON_PREFIX = new byte[0];
45    static int DEFAULT_NUMBER_OF_ROW_PREFIXES = 10;
46    static int DEFAULT_AVERAGE_PREFIX_LENGTH = 6;
47    static int DEFAULT_PREFIX_LENGTH_VARIANCE = 3;
48    static int DEFAULT_AVERAGE_SUFFIX_LENGTH = 3;
49    static int DEFAULT_SUFFIX_LENGTH_VARIANCE = 3;
50    static int DEFAULT_NUMBER_OF_ROW = 500;
51  
52    // qualifier
53    static float DEFAULT_CHANCE_FOR_SAME_QUALIFIER = 0.5f;
54    static float DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER = 0.4f;
55    static int DEFAULT_AVERAGE_QUALIFIER_LENGTH = 9;
56    static int DEFAULT_QUALIFIER_LENGTH_VARIANCE = 3;
57  
58    static int DEFAULT_COLUMN_FAMILY_LENGTH = 9;
59    static int DEFAULT_VALUE_LENGTH = 8;
60    static float DEFAULT_CHANCE_FOR_ZERO_VALUE = 0.5f;
61  
62    static int DEFAULT_BASE_TIMESTAMP_DIVIDE = 1000000;
63    static int DEFAULT_TIMESTAMP_DIFF_SIZE = 100000000;
64  
65    /**
66     * Default constructor, assumes all parameters from class constants.
67     */
68    public RedundantKVGenerator() {
69      this(new Random(42L),
70          DEFAULT_NUMBER_OF_ROW_PREFIXES,
71          DEFAULT_AVERAGE_PREFIX_LENGTH,
72          DEFAULT_PREFIX_LENGTH_VARIANCE,
73          DEFAULT_AVERAGE_SUFFIX_LENGTH,
74          DEFAULT_SUFFIX_LENGTH_VARIANCE,
75          DEFAULT_NUMBER_OF_ROW,
76  
77          DEFAULT_CHANCE_FOR_SAME_QUALIFIER,
78          DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER,
79          DEFAULT_AVERAGE_QUALIFIER_LENGTH,
80          DEFAULT_QUALIFIER_LENGTH_VARIANCE,
81  
82          DEFAULT_COLUMN_FAMILY_LENGTH,
83          DEFAULT_VALUE_LENGTH,
84          DEFAULT_CHANCE_FOR_ZERO_VALUE,
85  
86          DEFAULT_BASE_TIMESTAMP_DIVIDE,
87          DEFAULT_TIMESTAMP_DIFF_SIZE
88      );
89    }
90  
91  
92    /**
93     * Various configuration options for generating key values
94     * @param randomizer pick things by random
95     */
96    public RedundantKVGenerator(Random randomizer,
97        int numberOfRowPrefixes,
98        int averagePrefixLength,
99        int prefixLengthVariance,
100       int averageSuffixLength,
101       int suffixLengthVariance,
102       int numberOfRows,
103 
104       float chanceForSameQualifier,
105       float chanceForSimiliarQualifier,
106       int averageQualifierLength,
107       int qualifierLengthVariance,
108 
109       int columnFamilyLength,
110       int valueLength,
111       float chanceForZeroValue,
112 
113       int baseTimestampDivide,
114       int timestampDiffSize
115       ) {
116     this.randomizer = randomizer;
117 
118     this.commonPrefix = DEFAULT_COMMON_PREFIX;
119     this.numberOfRowPrefixes = numberOfRowPrefixes;
120     this.averagePrefixLength = averagePrefixLength;
121     this.prefixLengthVariance = prefixLengthVariance;
122     this.averageSuffixLength = averageSuffixLength;
123     this.suffixLengthVariance = suffixLengthVariance;
124     this.numberOfRows = numberOfRows;
125 
126     this.chanceForSameQualifier = chanceForSameQualifier;
127     this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
128     this.averageQualifierLength = averageQualifierLength;
129     this.qualifierLengthVariance = qualifierLengthVariance;
130 
131     this.columnFamilyLength = columnFamilyLength;
132     this.valueLength = valueLength;
133     this.chanceForZeroValue = chanceForZeroValue;
134 
135     this.baseTimestampDivide = baseTimestampDivide;
136     this.timestampDiffSize = timestampDiffSize;
137   }
138 
139   /** Used to generate dataset */
140   private Random randomizer;
141 
142   // row settings
143   private byte[] commonPrefix;//global prefix before rowPrefixes
144   private int numberOfRowPrefixes;
145   private int averagePrefixLength = 6;
146   private int prefixLengthVariance = 3;
147   private int averageSuffixLength = 3;
148   private int suffixLengthVariance = 3;
149   private int numberOfRows = 500;
150 
151   //family
152   private byte[] family;
153 
154   // qualifier
155   private float chanceForSameQualifier = 0.5f;
156   private float chanceForSimilarQualifier = 0.4f;
157   private int averageQualifierLength = 9;
158   private int qualifierLengthVariance = 3;
159 
160   private int columnFamilyLength = 9;
161   private int valueLength = 8;
162   private float chanceForZeroValue = 0.5f;
163 
164   private int baseTimestampDivide = 1000000;
165   private int timestampDiffSize = 100000000;
166 
167   private List<byte[]> generateRows() {
168     // generate prefixes
169     List<byte[]> prefixes = new ArrayList<byte[]>();
170     prefixes.add(new byte[0]);
171     for (int i = 1; i < numberOfRowPrefixes; ++i) {
172       int prefixLength = averagePrefixLength;
173       prefixLength += randomizer.nextInt(2 * prefixLengthVariance + 1) -
174           prefixLengthVariance;
175       byte[] newPrefix = new byte[prefixLength];
176       randomizer.nextBytes(newPrefix);
177       byte[] newPrefixWithCommon = newPrefix;
178       prefixes.add(newPrefixWithCommon);
179     }
180 
181     // generate rest of the row
182     List<byte[]> rows = new ArrayList<byte[]>();
183     for (int i = 0; i < numberOfRows; ++i) {
184       int suffixLength = averageSuffixLength;
185       suffixLength += randomizer.nextInt(2 * suffixLengthVariance + 1) -
186           suffixLengthVariance;
187       int randomPrefix = randomizer.nextInt(prefixes.size());
188       byte[] row = new byte[prefixes.get(randomPrefix).length +
189                             suffixLength];
190       byte[] rowWithCommonPrefix = Bytes.concat(commonPrefix, row);
191       rows.add(rowWithCommonPrefix);
192     }
193 
194     return rows;
195   }
196 
197   /**
198    * Generate test data useful to test encoders.
199    * @param howMany How many Key values should be generated.
200    * @return sorted list of key values
201    */
202   public List<KeyValue> generateTestKeyValues(int howMany) {
203     List<KeyValue> result = new ArrayList<KeyValue>();
204 
205     List<byte[]> rows = generateRows();
206     Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<Integer, List<byte[]>>();
207 
208     if(family==null){
209       family = new byte[columnFamilyLength];
210       randomizer.nextBytes(family);
211     }
212 
213     long baseTimestamp = Math.abs(randomizer.nextInt()) / baseTimestampDivide;
214 
215     byte[] value = new byte[valueLength];
216 
217     for (int i = 0; i < howMany; ++i) {
218       long timestamp = baseTimestamp;
219       if(timestampDiffSize > 0){
220         timestamp += randomizer.nextInt(timestampDiffSize);
221       }
222       Integer rowId = randomizer.nextInt(rows.size());
223       byte[] row = rows.get(rowId);
224 
225       // generate qualifier, sometimes it is same, sometimes similar,
226       // occasionally completely different
227       byte[] qualifier;
228       float qualifierChance = randomizer.nextFloat();
229       if (!rowsToQualifier.containsKey(rowId)
230           || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) {
231         int qualifierLength = averageQualifierLength;
232         qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1)
233             - qualifierLengthVariance;
234         qualifier = new byte[qualifierLength];
235         randomizer.nextBytes(qualifier);
236 
237         // add it to map
238         if (!rowsToQualifier.containsKey(rowId)) {
239           rowsToQualifier.put(rowId, new ArrayList<byte[]>());
240         }
241         rowsToQualifier.get(rowId).add(qualifier);
242       } else if (qualifierChance > chanceForSameQualifier) {
243         // similar qualifier
244         List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
245         byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers
246             .size()));
247 
248         qualifier = new byte[originalQualifier.length];
249         int commonPrefix = randomizer.nextInt(qualifier.length);
250         System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix);
251         for (int j = commonPrefix; j < qualifier.length; ++j) {
252           qualifier[j] = (byte) (randomizer.nextInt() & 0xff);
253         }
254 
255         rowsToQualifier.get(rowId).add(qualifier);
256       } else {
257         // same qualifier
258         List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
259         qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size()));
260       }
261 
262       if (randomizer.nextFloat() < chanceForZeroValue) {
263         for (int j = 0; j < value.length; ++j) {
264           value[j] = (byte) 0;
265         }
266       } else {
267         randomizer.nextBytes(value);
268       }
269 
270       result.add(new KeyValue(row, family, qualifier, timestamp, value));
271     }
272 
273     Collections.sort(result, KeyValue.COMPARATOR);
274 
275     return result;
276   }
277 
278   /**
279    * Convert list of KeyValues to byte buffer.
280    * @param keyValues list of KeyValues to be converted.
281    * @return buffer with content from key values
282    */
283   public static ByteBuffer convertKvToByteBuffer(List<KeyValue> keyValues,
284       boolean includesMemstoreTS) {
285     int totalSize = 0;
286     for (KeyValue kv : keyValues) {
287       totalSize += kv.getLength();
288       if (includesMemstoreTS) {
289         totalSize += WritableUtils.getVIntSize(kv.getMvccVersion());
290       }
291     }
292 
293     ByteBuffer result = ByteBuffer.allocate(totalSize);
294     for (KeyValue kv : keyValues) {
295       result.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
296       if (includesMemstoreTS) {
297         ByteBufferUtils.writeVLong(result, kv.getMvccVersion());
298       }
299     }
300 
301     return result;
302   }
303   
304   
305   /************************ get/set ***********************************/
306   
307   public RedundantKVGenerator setCommonPrefix(byte[] prefix){
308     this.commonPrefix = prefix;
309     return this;
310   }
311 
312   public RedundantKVGenerator setRandomizer(Random randomizer) {
313     this.randomizer = randomizer;
314     return this;
315   }
316 
317   public RedundantKVGenerator setNumberOfRowPrefixes(int numberOfRowPrefixes) {
318     this.numberOfRowPrefixes = numberOfRowPrefixes;
319     return this;
320   }
321 
322   public RedundantKVGenerator setAveragePrefixLength(int averagePrefixLength) {
323     this.averagePrefixLength = averagePrefixLength;
324     return this;
325   }
326 
327   public RedundantKVGenerator setPrefixLengthVariance(int prefixLengthVariance) {
328     this.prefixLengthVariance = prefixLengthVariance;
329     return this;
330   }
331 
332   public RedundantKVGenerator setAverageSuffixLength(int averageSuffixLength) {
333     this.averageSuffixLength = averageSuffixLength;
334     return this;
335   }
336 
337   public RedundantKVGenerator setSuffixLengthVariance(int suffixLengthVariance) {
338     this.suffixLengthVariance = suffixLengthVariance;
339     return this;
340   }
341 
342   public RedundantKVGenerator setNumberOfRows(int numberOfRows) {
343     this.numberOfRows = numberOfRows;
344     return this;
345   }
346 
347   public RedundantKVGenerator setChanceForSameQualifier(float chanceForSameQualifier) {
348     this.chanceForSameQualifier = chanceForSameQualifier;
349     return this;
350   }
351 
352   public RedundantKVGenerator setChanceForSimilarQualifier(float chanceForSimiliarQualifier) {
353     this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
354     return this;
355   }
356 
357   public RedundantKVGenerator setAverageQualifierLength(int averageQualifierLength) {
358     this.averageQualifierLength = averageQualifierLength;
359     return this;
360   }
361 
362   public RedundantKVGenerator setQualifierLengthVariance(int qualifierLengthVariance) {
363     this.qualifierLengthVariance = qualifierLengthVariance;
364     return this;
365   }
366 
367   public RedundantKVGenerator setColumnFamilyLength(int columnFamilyLength) {
368     this.columnFamilyLength = columnFamilyLength;
369     return this;
370   }
371 
372   public RedundantKVGenerator setFamily(byte[] family) {
373     this.family = family;
374     this.columnFamilyLength = family.length;
375     return this;
376   }
377 
378   public RedundantKVGenerator setValueLength(int valueLength) {
379     this.valueLength = valueLength;
380     return this;
381   }
382 
383   public RedundantKVGenerator setChanceForZeroValue(float chanceForZeroValue) {
384     this.chanceForZeroValue = chanceForZeroValue;
385     return this;
386   }
387 
388   public RedundantKVGenerator setBaseTimestampDivide(int baseTimestampDivide) {
389     this.baseTimestampDivide = baseTimestampDivide;
390     return this;
391   }
392 
393   public RedundantKVGenerator setTimestampDiffSize(int timestampDiffSize) {
394     this.timestampDiffSize = timestampDiffSize;
395     return this;
396   }
397   
398 }