1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertTrue;
24
25 import java.io.IOException;
26 import java.util.ArrayList;
27 import java.util.Collection;
28 import java.util.Collections;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.Random;
34 import java.util.Set;
35 import java.util.TreeSet;
36
37 import org.apache.commons.lang.ArrayUtils;
38 import org.apache.commons.logging.Log;
39 import org.apache.commons.logging.LogFactory;
40 import org.apache.hadoop.hbase.CellComparator;
41 import org.apache.hadoop.hbase.HBaseTestingUtility;
42 import org.apache.hadoop.hbase.HColumnDescriptor;
43 import org.apache.hadoop.hbase.KeyValue;
44 import org.apache.hadoop.hbase.KeyValueTestUtil;
45 import org.apache.hadoop.hbase.MediumTests;
46 import org.apache.hadoop.hbase.client.Delete;
47 import org.apache.hadoop.hbase.client.Put;
48 import org.apache.hadoop.hbase.client.Scan;
49 import org.apache.hadoop.hbase.io.compress.Compression;
50 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
51 import org.apache.hadoop.hbase.util.Bytes;
52 import org.junit.Test;
53 import org.junit.experimental.categories.Category;
54 import org.junit.runner.RunWith;
55 import org.junit.runners.Parameterized;
56 import org.junit.runners.Parameterized.Parameters;
57
58
59
60
61 @RunWith(Parameterized.class)
62 @Category(MediumTests.class)
63 public class TestMultiColumnScanner {
64
65 private static final Log LOG = LogFactory.getLog(TestMultiColumnScanner.class);
66
67 private static final String TABLE_NAME =
68 TestMultiColumnScanner.class.getSimpleName();
69
70 static final int MAX_VERSIONS = 50;
71
72 private static final String FAMILY = "CF";
73 private static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY);
74
75
76
77
78
79 private static final int NUM_COLUMNS = 8;
80
81 private static final int MAX_COLUMN_BIT_MASK = 1 << NUM_COLUMNS - 1;
82 private static final int NUM_FLUSHES = 10;
83 private static final int NUM_ROWS = 20;
84
85
86 private static final long BIG_LONG = 9111222333444555666L;
87
88
89
90
91
92 private static final long[] TIMESTAMPS = new long[] { 1, 3, 5,
93 Integer.MAX_VALUE, BIG_LONG, Long.MAX_VALUE - 1 };
94
95
96 private static final double COLUMN_SKIP_IN_STORE_FILE_PROB = 0.7;
97
98
99 private static final double COLUMN_SKIP_IN_ROW_PROB = 0.1;
100
101
102 private static final double COLUMN_SKIP_EVERYWHERE_PROB = 0.1;
103
104
105 private static final double DELETE_PROBABILITY = 0.02;
106
107 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
108
109 private final Compression.Algorithm comprAlgo;
110 private final BloomType bloomType;
111 private final DataBlockEncoding dataBlockEncoding;
112
113
114 static {
115 assertTrue(BIG_LONG > 0.9 * Long.MAX_VALUE);
116
117
118 for (int i = 0; i < TIMESTAMPS.length - 1; ++i)
119 assertTrue(TIMESTAMPS[i] < TIMESTAMPS[i + 1]);
120 }
121
122 @Parameters
123 public static final Collection<Object[]> parameters() {
124 List<Object[]> parameters = new ArrayList<Object[]>();
125 for (Object[] bloomAndCompressionParams :
126 HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS) {
127 for (boolean useDataBlockEncoding : new boolean[]{false, true}) {
128 parameters.add(ArrayUtils.add(bloomAndCompressionParams,
129 useDataBlockEncoding));
130 }
131 }
132 return parameters;
133 }
134
135 public TestMultiColumnScanner(Compression.Algorithm comprAlgo,
136 BloomType bloomType, boolean useDataBlockEncoding) {
137 this.comprAlgo = comprAlgo;
138 this.bloomType = bloomType;
139 this.dataBlockEncoding = useDataBlockEncoding ? DataBlockEncoding.PREFIX :
140 DataBlockEncoding.NONE;
141 }
142
143 @Test
144 public void testMultiColumnScanner() throws IOException {
145 HRegion region = TEST_UTIL.createTestRegion(TABLE_NAME,
146 new HColumnDescriptor(FAMILY)
147 .setCompressionType(comprAlgo)
148 .setBloomFilterType(bloomType)
149 .setMaxVersions(MAX_VERSIONS)
150 .setDataBlockEncoding(dataBlockEncoding)
151 );
152 List<String> rows = sequentialStrings("row", NUM_ROWS);
153 List<String> qualifiers = sequentialStrings("qual", NUM_COLUMNS);
154 List<KeyValue> kvs = new ArrayList<KeyValue>();
155 Set<String> keySet = new HashSet<String>();
156
157
158
159 Map<String, Long> lastDelTimeMap = new HashMap<String, Long>();
160
161 Random rand = new Random(29372937L);
162 Set<String> rowQualSkip = new HashSet<String>();
163
164
165
166 for (String row : rows)
167 for (String qual : qualifiers)
168 if (rand.nextDouble() < COLUMN_SKIP_IN_ROW_PROB) {
169 LOG.info("Skipping " + qual + " in row " + row);
170 rowQualSkip.add(rowQualKey(row, qual));
171 }
172
173
174 for (String qual : qualifiers)
175 if (rand.nextDouble() < COLUMN_SKIP_EVERYWHERE_PROB) {
176 LOG.info("Skipping " + qual + " in all rows");
177 for (String row : rows)
178 rowQualSkip.add(rowQualKey(row, qual));
179 }
180
181 for (int iFlush = 0; iFlush < NUM_FLUSHES; ++iFlush) {
182 for (String qual : qualifiers) {
183
184
185 if (rand.nextDouble() < COLUMN_SKIP_IN_STORE_FILE_PROB)
186 continue;
187
188 byte[] qualBytes = Bytes.toBytes(qual);
189 for (String row : rows) {
190 Put p = new Put(Bytes.toBytes(row));
191 for (long ts : TIMESTAMPS) {
192 String value = createValue(row, qual, ts);
193 KeyValue kv = KeyValueTestUtil.create(row, FAMILY, qual, ts,
194 value);
195 assertEquals(kv.getTimestamp(), ts);
196 p.add(kv);
197 String keyAsString = kv.toString();
198 if (!keySet.contains(keyAsString)) {
199 keySet.add(keyAsString);
200 kvs.add(kv);
201 }
202 }
203 region.put(p);
204
205 Delete d = new Delete(Bytes.toBytes(row));
206 boolean deletedSomething = false;
207 for (long ts : TIMESTAMPS)
208 if (rand.nextDouble() < DELETE_PROBABILITY) {
209 d.deleteColumns(FAMILY_BYTES, qualBytes, ts);
210 String rowAndQual = row + "_" + qual;
211 Long whenDeleted = lastDelTimeMap.get(rowAndQual);
212 lastDelTimeMap.put(rowAndQual, whenDeleted == null ? ts
213 : Math.max(ts, whenDeleted));
214 deletedSomething = true;
215 }
216 if (deletedSomething)
217 region.delete(d);
218 }
219 }
220 region.flushcache();
221 }
222
223 Collections.sort(kvs, KeyValue.COMPARATOR);
224 for (int maxVersions = 1; maxVersions <= TIMESTAMPS.length; ++maxVersions) {
225 for (int columnBitMask = 1; columnBitMask <= MAX_COLUMN_BIT_MASK; ++columnBitMask) {
226 Scan scan = new Scan();
227 scan.setMaxVersions(maxVersions);
228 Set<String> qualSet = new TreeSet<String>();
229 {
230 int columnMaskTmp = columnBitMask;
231 for (String qual : qualifiers) {
232 if ((columnMaskTmp & 1) != 0) {
233 scan.addColumn(FAMILY_BYTES, Bytes.toBytes(qual));
234 qualSet.add(qual);
235 }
236 columnMaskTmp >>= 1;
237 }
238 assertEquals(0, columnMaskTmp);
239 }
240
241 InternalScanner scanner = region.getScanner(scan);
242 List<KeyValue> results = new ArrayList<KeyValue>();
243
244 int kvPos = 0;
245 int numResults = 0;
246 String queryInfo = "columns queried: " + qualSet + " (columnBitMask="
247 + columnBitMask + "), maxVersions=" + maxVersions;
248
249 while (scanner.next(results) || results.size() > 0) {
250 for (KeyValue kv : results) {
251 while (kvPos < kvs.size()
252 && !matchesQuery(kvs.get(kvPos), qualSet, maxVersions,
253 lastDelTimeMap)) {
254 ++kvPos;
255 }
256 String rowQual = getRowQualStr(kv);
257 String deleteInfo = "";
258 Long lastDelTS = lastDelTimeMap.get(rowQual);
259 if (lastDelTS != null) {
260 deleteInfo = "; last timestamp when row/column " + rowQual
261 + " was deleted: " + lastDelTS;
262 }
263 assertTrue("Scanner returned additional key/value: " + kv + ", "
264 + queryInfo + deleteInfo + ";", kvPos < kvs.size());
265 assertTrue("Scanner returned wrong key/value; " + queryInfo
266 + deleteInfo + ";", CellComparator.equalsIgnoreMvccVersion(kvs.get(kvPos), (kv)));
267 ++kvPos;
268 ++numResults;
269 }
270 results.clear();
271 }
272 for (; kvPos < kvs.size(); ++kvPos) {
273 KeyValue remainingKV = kvs.get(kvPos);
274 assertFalse("Matching column not returned by scanner: "
275 + remainingKV + ", " + queryInfo + ", results returned: "
276 + numResults, matchesQuery(remainingKV, qualSet, maxVersions,
277 lastDelTimeMap));
278 }
279 }
280 }
281 assertTrue("This test is supposed to delete at least some row/column " +
282 "pairs", lastDelTimeMap.size() > 0);
283 LOG.info("Number of row/col pairs deleted at least once: " +
284 lastDelTimeMap.size());
285 HRegion.closeHRegion(region);
286 }
287
288 private static String getRowQualStr(KeyValue kv) {
289 String rowStr = Bytes.toString(kv.getBuffer(), kv.getRowOffset(),
290 kv.getRowLength());
291 String qualStr = Bytes.toString(kv.getBuffer(), kv.getQualifierOffset(),
292 kv.getQualifierLength());
293 return rowStr + "_" + qualStr;
294 }
295
296 private static boolean matchesQuery(KeyValue kv, Set<String> qualSet,
297 int maxVersions, Map<String, Long> lastDelTimeMap) {
298 Long lastDelTS = lastDelTimeMap.get(getRowQualStr(kv));
299 long ts = kv.getTimestamp();
300 return qualSet.contains(qualStr(kv))
301 && ts >= TIMESTAMPS[TIMESTAMPS.length - maxVersions]
302 && (lastDelTS == null || ts > lastDelTS);
303 }
304
305 private static String qualStr(KeyValue kv) {
306 return Bytes.toString(kv.getBuffer(), kv.getQualifierOffset(),
307 kv.getQualifierLength());
308 }
309
310 private static String rowQualKey(String row, String qual) {
311 return row + "_" + qual;
312 }
313
314 static String createValue(String row, String qual, long ts) {
315 return "value_for_" + row + "_" + qual + "_" + ts;
316 }
317
318 private static List<String> sequentialStrings(String prefix, int n) {
319 List<String> lst = new ArrayList<String>();
320 for (int i = 0; i < n; ++i) {
321 StringBuilder sb = new StringBuilder();
322 sb.append(prefix + i);
323
324
325 int iBitShifted = i;
326 while (iBitShifted != 0) {
327 sb.append((iBitShifted & 1) == 0 ? 'a' : 'b');
328 iBitShifted >>= 1;
329 }
330
331 lst.add(sb.toString());
332 }
333
334 return lst;
335 }
336
337
338 }
339