1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.regionserver;
21
22 import static org.apache.hadoop.hbase.HBaseTestingUtility.assertKVListsEqual;
23 import static org.junit.Assert.assertTrue;
24
25 import java.io.IOException;
26 import java.util.ArrayList;
27 import java.util.Arrays;
28 import java.util.Collection;
29 import java.util.Collections;
30 import java.util.HashMap;
31 import java.util.HashSet;
32 import java.util.List;
33 import java.util.Map;
34 import java.util.Random;
35 import java.util.Set;
36
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.hadoop.hbase.HBaseTestingUtility;
40 import org.apache.hadoop.hbase.HColumnDescriptor;
41 import org.apache.hadoop.hbase.HConstants;
42 import org.apache.hadoop.hbase.KeyValue;
43 import org.apache.hadoop.hbase.MediumTests;
44 import org.apache.hadoop.hbase.client.Delete;
45 import org.apache.hadoop.hbase.client.Put;
46 import org.apache.hadoop.hbase.client.Scan;
47 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
48 import org.apache.hadoop.hbase.io.hfile.Compression;
49 import org.apache.hadoop.hbase.io.hfile.HFile;
50 import org.apache.hadoop.hbase.util.Bytes;
51 import org.junit.After;
52 import org.junit.Before;
53 import org.junit.Test;
54 import org.junit.experimental.categories.Category;
55 import org.junit.runner.RunWith;
56 import org.junit.runners.Parameterized;
57 import org.junit.runners.Parameterized.Parameters;
58
59
60
61
62
63 @RunWith(Parameterized.class)
64 @Category(MediumTests.class)
65 public class TestSeekOptimizations {
66
67 private static final Log LOG =
68 LogFactory.getLog(TestSeekOptimizations.class);
69
70
71 private static final String FAMILY = "myCF";
72 private static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY);
73
74 private static final int PUTS_PER_ROW_COL = 50;
75 private static final int DELETES_PER_ROW_COL = 10;
76
77 private static final int NUM_ROWS = 3;
78 private static final int NUM_COLS = 3;
79
80 private static final boolean VERBOSE = false;
81
82
83
84
85
86 private static final boolean USE_MANY_STORE_FILES = true;
87
88 private static final int[][] COLUMN_SETS = new int[][] {
89 {},
90 {0},
91 {1},
92 {0, 2},
93 {1, 2},
94 {0, 1, 2},
95 };
96
97
98
99 private static final int[][] ROW_RANGES = new int[][] {
100 {-1, -1},
101 {0, 1},
102 {1, 1},
103 {1, 2},
104 {0, 2}
105 };
106
107 private static final int[] MAX_VERSIONS_VALUES = new int[] { 1, 2 };
108
109
110 private HRegion region;
111 private Put put;
112 private Delete del;
113 private Random rand;
114 private Set<Long> putTimestamps = new HashSet<Long>();
115 private Set<Long> delTimestamps = new HashSet<Long>();
116 private List<KeyValue> expectedKVs = new ArrayList<KeyValue>();
117
118 private Compression.Algorithm comprAlgo;
119 private StoreFile.BloomType bloomType;
120
121 private long totalSeekDiligent, totalSeekLazy;
122
123 private final static HBaseTestingUtility TEST_UTIL =
124 new HBaseTestingUtility();
125
126 @Parameters
127 public static final Collection<Object[]> parameters() {
128 return HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS;
129 }
130
131 public TestSeekOptimizations(Compression.Algorithm comprAlgo,
132 StoreFile.BloomType bloomType) {
133 this.comprAlgo = comprAlgo;
134 this.bloomType = bloomType;
135 }
136
137 @Before
138 public void setUp() {
139 rand = new Random(91238123L);
140 expectedKVs.clear();
141 }
142
143 @Test
144 public void testMultipleTimestampRanges() throws IOException {
145
146 StoreFileScanner.instrument();
147 region = TEST_UTIL.createTestRegion(TestSeekOptimizations.class.getName(),
148 new HColumnDescriptor(FAMILY)
149 .setCompressionType(comprAlgo)
150 .setBloomFilterType(bloomType)
151 );
152
153
154 final long latestDelTS = USE_MANY_STORE_FILES ? 1397 : -1;
155
156 createTimestampRange(1, 50, -1);
157 createTimestampRange(51, 100, -1);
158 if (USE_MANY_STORE_FILES) {
159 createTimestampRange(100, 500, 127);
160 createTimestampRange(900, 1300, -1);
161 createTimestampRange(1301, 2500, latestDelTS);
162 createTimestampRange(2502, 2598, -1);
163 createTimestampRange(2599, 2999, -1);
164 }
165
166 prepareExpectedKVs(latestDelTS);
167
168 for (int[] columnArr : COLUMN_SETS) {
169 for (int[] rowRange : ROW_RANGES) {
170 for (int maxVersions : MAX_VERSIONS_VALUES) {
171 for (boolean lazySeekEnabled : new boolean[] { false, true }) {
172 testScan(columnArr, lazySeekEnabled, rowRange[0], rowRange[1],
173 maxVersions);
174 }
175 }
176 }
177 }
178
179 final double seekSavings = 1 - totalSeekLazy * 1.0 / totalSeekDiligent;
180 System.err.println("For bloom=" + bloomType + ", compr=" + comprAlgo +
181 " total seeks without optimization: " + totalSeekDiligent
182 + ", with optimization: " + totalSeekLazy + " (" +
183 String.format("%.2f%%", totalSeekLazy * 100.0 / totalSeekDiligent) +
184 "), savings: " + String.format("%.2f%%",
185 100.0 * seekSavings) + "\n");
186
187
188
189 final double expectedSeekSavings = 0.0;
190 assertTrue("Lazy seek is only saving " +
191 String.format("%.2f%%", seekSavings * 100) + " seeks but should " +
192 "save at least " + String.format("%.2f%%", expectedSeekSavings * 100),
193 seekSavings >= expectedSeekSavings);
194 }
195
196 private void testScan(final int[] columnArr, final boolean lazySeekEnabled,
197 final int startRow, final int endRow, int maxVersions)
198 throws IOException {
199 StoreScanner.enableLazySeekGlobally(lazySeekEnabled);
200 final Scan scan = new Scan();
201 final Set<String> qualSet = new HashSet<String>();
202 for (int iColumn : columnArr) {
203 String qualStr = getQualStr(iColumn);
204 scan.addColumn(FAMILY_BYTES, Bytes.toBytes(qualStr));
205 qualSet.add(qualStr);
206 }
207 scan.setMaxVersions(maxVersions);
208 scan.setStartRow(rowBytes(startRow));
209
210
211 {
212 final byte[] scannerStopRow =
213 rowBytes(endRow + (startRow != endRow ? 1 : 0));
214 scan.setStopRow(scannerStopRow);
215 }
216
217 final long initialSeekCount = StoreFileScanner.getSeekCount();
218 final InternalScanner scanner = region.getScanner(scan);
219 final List<KeyValue> results = new ArrayList<KeyValue>();
220 final List<KeyValue> actualKVs = new ArrayList<KeyValue>();
221
222
223
224
225 boolean hasNext;
226 do {
227 hasNext = scanner.next(results);
228 actualKVs.addAll(results);
229 results.clear();
230 } while (hasNext);
231
232 List<KeyValue> filteredKVs = filterExpectedResults(qualSet,
233 rowBytes(startRow), rowBytes(endRow), maxVersions);
234 final String rowRestrictionStr =
235 (startRow == -1 && endRow == -1) ? "all rows" : (
236 startRow == endRow ? ("row=" + startRow) : ("startRow="
237 + startRow + ", " + "endRow=" + endRow));
238 final String columnRestrictionStr =
239 columnArr.length == 0 ? "all columns"
240 : ("columns=" + Arrays.toString(columnArr));
241 final String testDesc =
242 "Bloom=" + bloomType + ", compr=" + comprAlgo + ", "
243 + (scan.isGetScan() ? "Get" : "Scan") + ": "
244 + columnRestrictionStr + ", " + rowRestrictionStr
245 + ", maxVersions=" + maxVersions + ", lazySeek=" + lazySeekEnabled;
246 long seekCount = StoreFileScanner.getSeekCount() - initialSeekCount;
247 if (VERBOSE) {
248 System.err.println("Seek count: " + seekCount + ", KVs returned: "
249 + actualKVs.size() + ". " + testDesc +
250 (lazySeekEnabled ? "\n" : ""));
251 }
252 if (lazySeekEnabled) {
253 totalSeekLazy += seekCount;
254 } else {
255 totalSeekDiligent += seekCount;
256 }
257 assertKVListsEqual(testDesc, filteredKVs, actualKVs);
258 }
259
260 private List<KeyValue> filterExpectedResults(Set<String> qualSet,
261 byte[] startRow, byte[] endRow, int maxVersions) {
262 final List<KeyValue> filteredKVs = new ArrayList<KeyValue>();
263 final Map<String, Integer> verCount = new HashMap<String, Integer>();
264 for (KeyValue kv : expectedKVs) {
265 if (startRow.length > 0 &&
266 Bytes.compareTo(kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
267 startRow, 0, startRow.length) < 0) {
268 continue;
269 }
270
271
272 if (endRow.length > 0 &&
273 Bytes.compareTo(kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
274 endRow, 0, endRow.length) > 0) {
275 continue;
276 }
277
278 if (!qualSet.isEmpty() && (Bytes.compareTo(
279 kv.getBuffer(), kv.getFamilyOffset(), kv.getFamilyLength(),
280 FAMILY_BYTES, 0, FAMILY_BYTES.length
281 ) != 0 ||
282 !qualSet.contains(Bytes.toString(kv.getQualifier())))) {
283 continue;
284 }
285
286 final String rowColStr =
287 Bytes.toStringBinary(kv.getRow()) + "/"
288 + Bytes.toStringBinary(kv.getFamily()) + ":"
289 + Bytes.toStringBinary(kv.getQualifier());
290 final Integer curNumVer = verCount.get(rowColStr);
291 final int newNumVer = curNumVer != null ? (curNumVer + 1) : 1;
292 if (newNumVer <= maxVersions) {
293 filteredKVs.add(kv);
294 verCount.put(rowColStr, newNumVer);
295 }
296 }
297
298 return filteredKVs;
299 }
300
301 private void prepareExpectedKVs(long latestDelTS) {
302 final List<KeyValue> filteredKVs = new ArrayList<KeyValue>();
303 for (KeyValue kv : expectedKVs) {
304 if (kv.getTimestamp() > latestDelTS || latestDelTS == -1) {
305 filteredKVs.add(kv);
306 }
307 }
308 expectedKVs = filteredKVs;
309 Collections.sort(expectedKVs, KeyValue.COMPARATOR);
310 }
311
312 public void put(String qual, long ts) {
313 if (!putTimestamps.contains(ts)) {
314 put.add(FAMILY_BYTES, Bytes.toBytes(qual), ts, createValue(ts));
315 putTimestamps.add(ts);
316 }
317 if (VERBOSE) {
318 LOG.info("put: row " + Bytes.toStringBinary(put.getRow())
319 + ", cf " + FAMILY + ", qualifier " + qual + ", ts " + ts);
320 }
321 }
322
323 private byte[] createValue(long ts) {
324 return Bytes.toBytes("value" + ts);
325 }
326
327 public void delAtTimestamp(String qual, long ts) {
328 del.deleteColumn(FAMILY_BYTES, Bytes.toBytes(qual), ts);
329 logDelete(qual, ts, "at");
330 }
331
332 private void logDelete(String qual, long ts, String delType) {
333 if (VERBOSE) {
334 LOG.info("del " + delType + ": row "
335 + Bytes.toStringBinary(put.getRow()) + ", cf " + FAMILY
336 + ", qualifier " + qual + ", ts " + ts);
337 }
338 }
339
340 private void delUpToTimestamp(String qual, long upToTS) {
341 del.deleteColumns(FAMILY_BYTES, Bytes.toBytes(qual), upToTS);
342 logDelete(qual, upToTS, "up to and including");
343 }
344
345 private long randLong(long n) {
346 long l = rand.nextLong();
347 if (l == Long.MIN_VALUE)
348 l = Long.MAX_VALUE;
349 return Math.abs(l) % n;
350 }
351
352 private long randBetween(long a, long b) {
353 long x = a + randLong(b - a + 1);
354 assertTrue(a <= x && x <= b);
355 return x;
356 }
357
358 private final String rowStr(int i) {
359 return ("row" + i).intern();
360 }
361
362 private final byte[] rowBytes(int i) {
363 if (i == -1) {
364 return HConstants.EMPTY_BYTE_ARRAY;
365 }
366 return Bytes.toBytes(rowStr(i));
367 }
368
369 private final String getQualStr(int i) {
370 return ("qual" + i).intern();
371 }
372
373 public void createTimestampRange(long minTS, long maxTS,
374 long deleteUpToTS) throws IOException {
375 assertTrue(minTS < maxTS);
376 assertTrue(deleteUpToTS == -1
377 || (minTS <= deleteUpToTS && deleteUpToTS <= maxTS));
378
379 for (int iRow = 0; iRow < NUM_ROWS; ++iRow) {
380 final String row = rowStr(iRow);
381 final byte[] rowBytes = Bytes.toBytes(row);
382 for (int iCol = 0; iCol < NUM_COLS; ++iCol) {
383 final String qual = getQualStr(iCol);
384 final byte[] qualBytes = Bytes.toBytes(qual);
385 put = new Put(rowBytes);
386
387 putTimestamps.clear();
388 put(qual, minTS);
389 put(qual, maxTS);
390 for (int i = 0; i < PUTS_PER_ROW_COL; ++i) {
391 put(qual, randBetween(minTS, maxTS));
392 }
393
394 long[] putTimestampList = new long[putTimestamps.size()];
395 {
396 int i = 0;
397 for (long ts : putTimestamps) {
398 putTimestampList[i++] = ts;
399 }
400 }
401
402
403 delTimestamps.clear();
404 assertTrue(putTimestampList.length >= DELETES_PER_ROW_COL);
405 int numToDel = DELETES_PER_ROW_COL;
406 int tsRemaining = putTimestampList.length;
407 del = new Delete(rowBytes);
408 for (long ts : putTimestampList) {
409 if (rand.nextInt(tsRemaining) < numToDel) {
410 delAtTimestamp(qual, ts);
411 putTimestamps.remove(ts);
412 --numToDel;
413 }
414
415 if (--tsRemaining == 0) {
416 break;
417 }
418 }
419
420
421 if (deleteUpToTS != -1) {
422 delUpToTimestamp(qual, deleteUpToTS);
423 }
424
425 region.put(put);
426 if (!del.isEmpty()) {
427 region.delete(del, null, true);
428 }
429
430
431
432 for (long ts : putTimestamps) {
433 expectedKVs.add(new KeyValue(rowBytes, FAMILY_BYTES, qualBytes, ts,
434 KeyValue.Type.Put));
435 }
436 }
437 }
438
439 region.flushcache();
440 }
441
442 @After
443 public void tearDown() throws IOException {
444 if (region != null) {
445 region.close();
446 region.getLog().closeAndDelete();
447 }
448
449
450
451 StoreScanner.enableLazySeekGlobally(
452 StoreScanner.LAZY_SEEK_ENABLED_BY_DEFAULT);
453 }
454
455
456 @org.junit.Rule
457 public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
458 new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
459 }
460