1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.regionserver;
21
22 import static org.junit.Assert.assertEquals;
23 import static org.junit.Assert.assertFalse;
24 import static org.junit.Assert.assertTrue;
25
26 import java.io.IOException;
27 import java.util.ArrayList;
28 import java.util.Collection;
29 import java.util.Collections;
30 import java.util.HashMap;
31 import java.util.HashSet;
32 import java.util.List;
33 import java.util.Map;
34 import java.util.Random;
35 import java.util.Set;
36 import java.util.TreeSet;
37
38 import org.apache.commons.lang.ArrayUtils;
39 import org.apache.commons.logging.Log;
40 import org.apache.commons.logging.LogFactory;
41 import org.apache.hadoop.hbase.HBaseTestingUtility;
42 import org.apache.hadoop.hbase.HColumnDescriptor;
43 import org.apache.hadoop.hbase.KeyValue;
44 import org.apache.hadoop.hbase.KeyValueTestUtil;
45 import org.apache.hadoop.hbase.MediumTests;
46 import org.apache.hadoop.hbase.client.Delete;
47 import org.apache.hadoop.hbase.client.Put;
48 import org.apache.hadoop.hbase.client.Scan;
49 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
50 import org.apache.hadoop.hbase.io.hfile.Compression;
51 import org.apache.hadoop.hbase.io.hfile.HFile;
52 import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics;
53 import org.apache.hadoop.hbase.util.Bytes;
54 import org.junit.Before;
55 import org.junit.Test;
56 import org.junit.experimental.categories.Category;
57 import org.junit.runner.RunWith;
58 import org.junit.runners.Parameterized;
59 import org.junit.runners.Parameterized.Parameters;
60
61
62
63
64 @RunWith(Parameterized.class)
65 @Category(MediumTests.class)
66 public class TestMultiColumnScanner {
67
68 private static final Log LOG = LogFactory.getLog(TestMultiColumnScanner.class);
69
70 private static final String TABLE_NAME =
71 TestMultiColumnScanner.class.getSimpleName();
72
73 static final int MAX_VERSIONS = 50;
74
75 private static final String FAMILY = "CF";
76 private static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY);
77
78
79
80
81
82 private static final int NUM_COLUMNS = 8;
83
84 private static final int MAX_COLUMN_BIT_MASK = 1 << NUM_COLUMNS - 1;
85 private static final int NUM_FLUSHES = 10;
86 private static final int NUM_ROWS = 20;
87
88
89 private static final long BIG_LONG = 9111222333444555666L;
90
91
92
93
94
95 private static final long[] TIMESTAMPS = new long[] { 1, 3, 5,
96 Integer.MAX_VALUE, BIG_LONG, Long.MAX_VALUE - 1 };
97
98
99 private static final double COLUMN_SKIP_IN_STORE_FILE_PROB = 0.7;
100
101
102 private static final double COLUMN_SKIP_IN_ROW_PROB = 0.1;
103
104
105 private static final double COLUMN_SKIP_EVERYWHERE_PROB = 0.1;
106
107
108 private static final double DELETE_PROBABILITY = 0.02;
109
110 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
111
112 private final Compression.Algorithm comprAlgo;
113 private final StoreFile.BloomType bloomType;
114 private final DataBlockEncoding dataBlockEncoding;
115
116
117 static {
118 assertTrue(BIG_LONG > 0.9 * Long.MAX_VALUE);
119
120
121 for (int i = 0; i < TIMESTAMPS.length - 1; ++i)
122 assertTrue(TIMESTAMPS[i] < TIMESTAMPS[i + 1]);
123 }
124
125 @Before
126 public void setUp() {
127 SchemaMetrics.configureGlobally(TEST_UTIL.getConfiguration());
128 }
129
130
131 @Parameters
132 public static final Collection<Object[]> parameters() {
133 List<Object[]> parameters = new ArrayList<Object[]>();
134 for (Object[] bloomAndCompressionParams :
135 HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS) {
136 for (boolean useDataBlockEncoding : new boolean[]{false, true}) {
137 parameters.add(ArrayUtils.add(bloomAndCompressionParams,
138 useDataBlockEncoding));
139 }
140 }
141 return parameters;
142 }
143
144 public TestMultiColumnScanner(Compression.Algorithm comprAlgo,
145 StoreFile.BloomType bloomType, boolean useDataBlockEncoding) {
146 this.comprAlgo = comprAlgo;
147 this.bloomType = bloomType;
148 this.dataBlockEncoding = useDataBlockEncoding ? DataBlockEncoding.PREFIX :
149 DataBlockEncoding.NONE;
150 }
151
152 @Test
153 public void testMultiColumnScanner() throws IOException {
154 HRegion region = TEST_UTIL.createTestRegion(TABLE_NAME,
155 new HColumnDescriptor(FAMILY)
156 .setCompressionType(comprAlgo)
157 .setBloomFilterType(bloomType)
158 .setMaxVersions(MAX_VERSIONS)
159 .setDataBlockEncoding(dataBlockEncoding)
160 );
161 List<String> rows = sequentialStrings("row", NUM_ROWS);
162 List<String> qualifiers = sequentialStrings("qual", NUM_COLUMNS);
163 List<KeyValue> kvs = new ArrayList<KeyValue>();
164 Set<String> keySet = new HashSet<String>();
165
166
167
168 Map<String, Long> lastDelTimeMap = new HashMap<String, Long>();
169
170 Random rand = new Random(29372937L);
171 Set<String> rowQualSkip = new HashSet<String>();
172
173
174
175 for (String row : rows)
176 for (String qual : qualifiers)
177 if (rand.nextDouble() < COLUMN_SKIP_IN_ROW_PROB) {
178 LOG.info("Skipping " + qual + " in row " + row);
179 rowQualSkip.add(rowQualKey(row, qual));
180 }
181
182
183 for (String qual : qualifiers)
184 if (rand.nextDouble() < COLUMN_SKIP_EVERYWHERE_PROB) {
185 LOG.info("Skipping " + qual + " in all rows");
186 for (String row : rows)
187 rowQualSkip.add(rowQualKey(row, qual));
188 }
189
190 for (int iFlush = 0; iFlush < NUM_FLUSHES; ++iFlush) {
191 for (String qual : qualifiers) {
192
193
194 if (rand.nextDouble() < COLUMN_SKIP_IN_STORE_FILE_PROB)
195 continue;
196
197 byte[] qualBytes = Bytes.toBytes(qual);
198 for (String row : rows) {
199 Put p = new Put(Bytes.toBytes(row));
200 for (long ts : TIMESTAMPS) {
201 String value = createValue(row, qual, ts);
202 KeyValue kv = KeyValueTestUtil.create(row, FAMILY, qual, ts,
203 value);
204 assertEquals(kv.getTimestamp(), ts);
205 p.add(kv);
206 String keyAsString = kv.toString();
207 if (!keySet.contains(keyAsString)) {
208 keySet.add(keyAsString);
209 kvs.add(kv);
210 }
211 }
212 region.put(p);
213
214 Delete d = new Delete(Bytes.toBytes(row));
215 boolean deletedSomething = false;
216 for (long ts : TIMESTAMPS)
217 if (rand.nextDouble() < DELETE_PROBABILITY) {
218 d.deleteColumns(FAMILY_BYTES, qualBytes, ts);
219 String rowAndQual = row + "_" + qual;
220 Long whenDeleted = lastDelTimeMap.get(rowAndQual);
221 lastDelTimeMap.put(rowAndQual, whenDeleted == null ? ts
222 : Math.max(ts, whenDeleted));
223 deletedSomething = true;
224 }
225 if (deletedSomething)
226 region.delete(d, null, true);
227 }
228 }
229 region.flushcache();
230 }
231
232 Collections.sort(kvs, KeyValue.COMPARATOR);
233 for (int maxVersions = 1; maxVersions <= TIMESTAMPS.length; ++maxVersions) {
234 for (int columnBitMask = 1; columnBitMask <= MAX_COLUMN_BIT_MASK; ++columnBitMask) {
235 Scan scan = new Scan();
236 scan.setMaxVersions(maxVersions);
237 Set<String> qualSet = new TreeSet<String>();
238 {
239 int columnMaskTmp = columnBitMask;
240 for (String qual : qualifiers) {
241 if ((columnMaskTmp & 1) != 0) {
242 scan.addColumn(FAMILY_BYTES, Bytes.toBytes(qual));
243 qualSet.add(qual);
244 }
245 columnMaskTmp >>= 1;
246 }
247 assertEquals(0, columnMaskTmp);
248 }
249
250 InternalScanner scanner = region.getScanner(scan);
251 List<KeyValue> results = new ArrayList<KeyValue>();
252
253 int kvPos = 0;
254 int numResults = 0;
255 String queryInfo = "columns queried: " + qualSet + " (columnBitMask="
256 + columnBitMask + "), maxVersions=" + maxVersions;
257
258 while (scanner.next(results) || results.size() > 0) {
259 for (KeyValue kv : results) {
260 while (kvPos < kvs.size()
261 && !matchesQuery(kvs.get(kvPos), qualSet, maxVersions,
262 lastDelTimeMap)) {
263 ++kvPos;
264 }
265 String rowQual = getRowQualStr(kv);
266 String deleteInfo = "";
267 Long lastDelTS = lastDelTimeMap.get(rowQual);
268 if (lastDelTS != null) {
269 deleteInfo = "; last timestamp when row/column " + rowQual
270 + " was deleted: " + lastDelTS;
271 }
272 assertTrue("Scanner returned additional key/value: " + kv + ", "
273 + queryInfo + deleteInfo + ";", kvPos < kvs.size());
274 assertEquals("Scanner returned wrong key/value; " + queryInfo
275 + deleteInfo + ";", kvs.get(kvPos), kv);
276 ++kvPos;
277 ++numResults;
278 }
279 results.clear();
280 }
281 for (; kvPos < kvs.size(); ++kvPos) {
282 KeyValue remainingKV = kvs.get(kvPos);
283 assertFalse("Matching column not returned by scanner: "
284 + remainingKV + ", " + queryInfo + ", results returned: "
285 + numResults, matchesQuery(remainingKV, qualSet, maxVersions,
286 lastDelTimeMap));
287 }
288 }
289 }
290 assertTrue("This test is supposed to delete at least some row/column " +
291 "pairs", lastDelTimeMap.size() > 0);
292 LOG.info("Number of row/col pairs deleted at least once: " +
293 lastDelTimeMap.size());
294 region.close();
295 region.getLog().closeAndDelete();
296 }
297
298 private static String getRowQualStr(KeyValue kv) {
299 String rowStr = Bytes.toString(kv.getBuffer(), kv.getRowOffset(),
300 kv.getRowLength());
301 String qualStr = Bytes.toString(kv.getBuffer(), kv.getQualifierOffset(),
302 kv.getQualifierLength());
303 return rowStr + "_" + qualStr;
304 }
305
306 private static boolean matchesQuery(KeyValue kv, Set<String> qualSet,
307 int maxVersions, Map<String, Long> lastDelTimeMap) {
308 Long lastDelTS = lastDelTimeMap.get(getRowQualStr(kv));
309 long ts = kv.getTimestamp();
310 return qualSet.contains(qualStr(kv))
311 && ts >= TIMESTAMPS[TIMESTAMPS.length - maxVersions]
312 && (lastDelTS == null || ts > lastDelTS);
313 }
314
315 private static String qualStr(KeyValue kv) {
316 return Bytes.toString(kv.getBuffer(), kv.getQualifierOffset(),
317 kv.getQualifierLength());
318 }
319
320 private static String rowQualKey(String row, String qual) {
321 return row + "_" + qual;
322 }
323
324 static String createValue(String row, String qual, long ts) {
325 return "value_for_" + row + "_" + qual + "_" + ts;
326 }
327
328 private static List<String> sequentialStrings(String prefix, int n) {
329 List<String> lst = new ArrayList<String>();
330 for (int i = 0; i < n; ++i) {
331 StringBuilder sb = new StringBuilder();
332 sb.append(prefix + i);
333
334
335 int iBitShifted = i;
336 while (iBitShifted != 0) {
337 sb.append((iBitShifted & 1) == 0 ? 'a' : 'b');
338 iBitShifted >>= 1;
339 }
340
341 lst.add(sb.toString());
342 }
343
344 return lst;
345 }
346
347
348 @org.junit.Rule
349 public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
350 new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
351 }
352