1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.regionserver;
21
22 import static org.apache.hadoop.hbase.HBaseTestingUtility.assertKVListsEqual;
23 import static org.junit.Assert.assertTrue;
24
25 import java.io.IOException;
26 import java.util.ArrayList;
27 import java.util.Arrays;
28 import java.util.Collection;
29 import java.util.Collections;
30 import java.util.HashMap;
31 import java.util.HashSet;
32 import java.util.List;
33 import java.util.Map;
34 import java.util.Random;
35 import java.util.Set;
36
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.hadoop.hbase.HBaseTestingUtility;
40 import org.apache.hadoop.hbase.HColumnDescriptor;
41 import org.apache.hadoop.hbase.HConstants;
42 import org.apache.hadoop.hbase.KeyValue;
43 import org.apache.hadoop.hbase.MediumTests;
44 import org.apache.hadoop.hbase.client.Delete;
45 import org.apache.hadoop.hbase.client.Put;
46 import org.apache.hadoop.hbase.client.Scan;
47 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
48 import org.apache.hadoop.hbase.io.hfile.Compression;
49 import org.apache.hadoop.hbase.io.hfile.HFile;
50 import org.apache.hadoop.hbase.util.Bytes;
51 import org.junit.After;
52 import org.junit.Before;
53 import org.junit.Test;
54 import org.junit.experimental.categories.Category;
55 import org.junit.runner.RunWith;
56 import org.junit.runners.Parameterized;
57 import org.junit.runners.Parameterized.Parameters;
58
59
60
61
62
63 @RunWith(Parameterized.class)
64 @Category(MediumTests.class)
65 public class TestSeekOptimizations {
66
67 private static final Log LOG =
68 LogFactory.getLog(TestSeekOptimizations.class);
69
70
71 private static final String FAMILY = "myCF";
72 private static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY);
73
74 private static final int PUTS_PER_ROW_COL = 50;
75 private static final int DELETES_PER_ROW_COL = 10;
76
77 private static final int NUM_ROWS = 3;
78 private static final int NUM_COLS = 3;
79
80 private static final boolean VERBOSE = false;
81
82
83
84
85
86 private static final boolean USE_MANY_STORE_FILES = true;
87
88 private static final int[][] COLUMN_SETS = new int[][] {
89 {},
90 {0},
91 {1},
92 {0, 2},
93 {1, 2},
94 {0, 1, 2},
95 };
96
97
98
99 private static final int[][] ROW_RANGES = new int[][] {
100 {-1, -1},
101 {0, 1},
102 {1, 1},
103 {1, 2},
104 {0, 2}
105 };
106
107 private static final int[] MAX_VERSIONS_VALUES = new int[] { 1, 2 };
108
109
110 private HRegion region;
111 private Put put;
112 private Delete del;
113 private Random rand;
114 private Set<Long> putTimestamps = new HashSet<Long>();
115 private Set<Long> delTimestamps = new HashSet<Long>();
116 private List<KeyValue> expectedKVs = new ArrayList<KeyValue>();
117
118 private Compression.Algorithm comprAlgo;
119 private StoreFile.BloomType bloomType;
120
121 private long totalSeekDiligent, totalSeekLazy;
122
123 private final static HBaseTestingUtility TEST_UTIL =
124 new HBaseTestingUtility();
125
126 @Parameters
127 public static final Collection<Object[]> parameters() {
128 return HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS;
129 }
130
131 public TestSeekOptimizations(Compression.Algorithm comprAlgo,
132 StoreFile.BloomType bloomType) {
133 this.comprAlgo = comprAlgo;
134 this.bloomType = bloomType;
135 }
136
137 @Before
138 public void setUp() {
139 rand = new Random(91238123L);
140 expectedKVs.clear();
141 }
142
143 @Test
144 public void testMultipleTimestampRanges() throws IOException {
145 region = TEST_UTIL.createTestRegion(TestSeekOptimizations.class.getName(),
146 new HColumnDescriptor(FAMILY)
147 .setCompressionType(comprAlgo)
148 .setBloomFilterType(bloomType)
149 );
150
151
152 final long latestDelTS = USE_MANY_STORE_FILES ? 1397 : -1;
153
154 createTimestampRange(1, 50, -1);
155 createTimestampRange(51, 100, -1);
156 if (USE_MANY_STORE_FILES) {
157 createTimestampRange(100, 500, 127);
158 createTimestampRange(900, 1300, -1);
159 createTimestampRange(1301, 2500, latestDelTS);
160 createTimestampRange(2502, 2598, -1);
161 createTimestampRange(2599, 2999, -1);
162 }
163
164 prepareExpectedKVs(latestDelTS);
165
166 for (int[] columnArr : COLUMN_SETS) {
167 for (int[] rowRange : ROW_RANGES) {
168 for (int maxVersions : MAX_VERSIONS_VALUES) {
169 for (boolean lazySeekEnabled : new boolean[] { false, true }) {
170 testScan(columnArr, lazySeekEnabled, rowRange[0], rowRange[1],
171 maxVersions);
172 }
173 }
174 }
175 }
176
177 final double seekSavings = 1 - totalSeekLazy * 1.0 / totalSeekDiligent;
178 System.err.println("For bloom=" + bloomType + ", compr=" + comprAlgo +
179 " total seeks without optimization: " + totalSeekDiligent
180 + ", with optimization: " + totalSeekLazy + " (" +
181 String.format("%.2f%%", totalSeekLazy * 100.0 / totalSeekDiligent) +
182 "), savings: " + String.format("%.2f%%",
183 100.0 * seekSavings) + "\n");
184
185
186
187 final double expectedSeekSavings = 0.0;
188 assertTrue("Lazy seek is only saving " +
189 String.format("%.2f%%", seekSavings * 100) + " seeks but should " +
190 "save at least " + String.format("%.2f%%", expectedSeekSavings * 100),
191 seekSavings >= expectedSeekSavings);
192 }
193
194 private void testScan(final int[] columnArr, final boolean lazySeekEnabled,
195 final int startRow, final int endRow, int maxVersions)
196 throws IOException {
197 StoreScanner.enableLazySeekGlobally(lazySeekEnabled);
198 final Scan scan = new Scan();
199 final Set<String> qualSet = new HashSet<String>();
200 for (int iColumn : columnArr) {
201 String qualStr = getQualStr(iColumn);
202 scan.addColumn(FAMILY_BYTES, Bytes.toBytes(qualStr));
203 qualSet.add(qualStr);
204 }
205 scan.setMaxVersions(maxVersions);
206 scan.setStartRow(rowBytes(startRow));
207
208
209 {
210 final byte[] scannerStopRow =
211 rowBytes(endRow + (startRow != endRow ? 1 : 0));
212 scan.setStopRow(scannerStopRow);
213 }
214
215 final long initialSeekCount = StoreFileScanner.getSeekCount();
216 final InternalScanner scanner = region.getScanner(scan);
217 final List<KeyValue> results = new ArrayList<KeyValue>();
218 final List<KeyValue> actualKVs = new ArrayList<KeyValue>();
219
220
221
222
223 boolean hasNext;
224 do {
225 hasNext = scanner.next(results);
226 actualKVs.addAll(results);
227 results.clear();
228 } while (hasNext);
229
230 List<KeyValue> filteredKVs = filterExpectedResults(qualSet,
231 rowBytes(startRow), rowBytes(endRow), maxVersions);
232 final String rowRestrictionStr =
233 (startRow == -1 && endRow == -1) ? "all rows" : (
234 startRow == endRow ? ("row=" + startRow) : ("startRow="
235 + startRow + ", " + "endRow=" + endRow));
236 final String columnRestrictionStr =
237 columnArr.length == 0 ? "all columns"
238 : ("columns=" + Arrays.toString(columnArr));
239 final String testDesc =
240 "Bloom=" + bloomType + ", compr=" + comprAlgo + ", "
241 + (scan.isGetScan() ? "Get" : "Scan") + ": "
242 + columnRestrictionStr + ", " + rowRestrictionStr
243 + ", maxVersions=" + maxVersions + ", lazySeek=" + lazySeekEnabled;
244 long seekCount = StoreFileScanner.getSeekCount() - initialSeekCount;
245 if (VERBOSE) {
246 System.err.println("Seek count: " + seekCount + ", KVs returned: "
247 + actualKVs.size() + ". " + testDesc +
248 (lazySeekEnabled ? "\n" : ""));
249 }
250 if (lazySeekEnabled) {
251 totalSeekLazy += seekCount;
252 } else {
253 totalSeekDiligent += seekCount;
254 }
255 assertKVListsEqual(testDesc, filteredKVs, actualKVs);
256 }
257
258 private List<KeyValue> filterExpectedResults(Set<String> qualSet,
259 byte[] startRow, byte[] endRow, int maxVersions) {
260 final List<KeyValue> filteredKVs = new ArrayList<KeyValue>();
261 final Map<String, Integer> verCount = new HashMap<String, Integer>();
262 for (KeyValue kv : expectedKVs) {
263 if (startRow.length > 0 &&
264 Bytes.compareTo(kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
265 startRow, 0, startRow.length) < 0) {
266 continue;
267 }
268
269
270 if (endRow.length > 0 &&
271 Bytes.compareTo(kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
272 endRow, 0, endRow.length) > 0) {
273 continue;
274 }
275
276 if (!qualSet.isEmpty() && (Bytes.compareTo(
277 kv.getBuffer(), kv.getFamilyOffset(), kv.getFamilyLength(),
278 FAMILY_BYTES, 0, FAMILY_BYTES.length
279 ) != 0 ||
280 !qualSet.contains(Bytes.toString(kv.getQualifier())))) {
281 continue;
282 }
283
284 final String rowColStr =
285 Bytes.toStringBinary(kv.getRow()) + "/"
286 + Bytes.toStringBinary(kv.getFamily()) + ":"
287 + Bytes.toStringBinary(kv.getQualifier());
288 final Integer curNumVer = verCount.get(rowColStr);
289 final int newNumVer = curNumVer != null ? (curNumVer + 1) : 1;
290 if (newNumVer <= maxVersions) {
291 filteredKVs.add(kv);
292 verCount.put(rowColStr, newNumVer);
293 }
294 }
295
296 return filteredKVs;
297 }
298
299 private void prepareExpectedKVs(long latestDelTS) {
300 final List<KeyValue> filteredKVs = new ArrayList<KeyValue>();
301 for (KeyValue kv : expectedKVs) {
302 if (kv.getTimestamp() > latestDelTS || latestDelTS == -1) {
303 filteredKVs.add(kv);
304 }
305 }
306 expectedKVs = filteredKVs;
307 Collections.sort(expectedKVs, KeyValue.COMPARATOR);
308 }
309
310 public void put(String qual, long ts) {
311 if (!putTimestamps.contains(ts)) {
312 put.add(FAMILY_BYTES, Bytes.toBytes(qual), ts, createValue(ts));
313 putTimestamps.add(ts);
314 }
315 if (VERBOSE) {
316 LOG.info("put: row " + Bytes.toStringBinary(put.getRow())
317 + ", cf " + FAMILY + ", qualifier " + qual + ", ts " + ts);
318 }
319 }
320
321 private byte[] createValue(long ts) {
322 return Bytes.toBytes("value" + ts);
323 }
324
325 public void delAtTimestamp(String qual, long ts) {
326 del.deleteColumn(FAMILY_BYTES, Bytes.toBytes(qual), ts);
327 logDelete(qual, ts, "at");
328 }
329
330 private void logDelete(String qual, long ts, String delType) {
331 if (VERBOSE) {
332 LOG.info("del " + delType + ": row "
333 + Bytes.toStringBinary(put.getRow()) + ", cf " + FAMILY
334 + ", qualifier " + qual + ", ts " + ts);
335 }
336 }
337
338 private void delUpToTimestamp(String qual, long upToTS) {
339 del.deleteColumns(FAMILY_BYTES, Bytes.toBytes(qual), upToTS);
340 logDelete(qual, upToTS, "up to and including");
341 }
342
343 private long randLong(long n) {
344 long l = rand.nextLong();
345 if (l == Long.MIN_VALUE)
346 l = Long.MAX_VALUE;
347 return Math.abs(l) % n;
348 }
349
350 private long randBetween(long a, long b) {
351 long x = a + randLong(b - a + 1);
352 assertTrue(a <= x && x <= b);
353 return x;
354 }
355
356 private final String rowStr(int i) {
357 return ("row" + i).intern();
358 }
359
360 private final byte[] rowBytes(int i) {
361 if (i == -1) {
362 return HConstants.EMPTY_BYTE_ARRAY;
363 }
364 return Bytes.toBytes(rowStr(i));
365 }
366
367 private final String getQualStr(int i) {
368 return ("qual" + i).intern();
369 }
370
371 public void createTimestampRange(long minTS, long maxTS,
372 long deleteUpToTS) throws IOException {
373 assertTrue(minTS < maxTS);
374 assertTrue(deleteUpToTS == -1
375 || (minTS <= deleteUpToTS && deleteUpToTS <= maxTS));
376
377 for (int iRow = 0; iRow < NUM_ROWS; ++iRow) {
378 final String row = rowStr(iRow);
379 final byte[] rowBytes = Bytes.toBytes(row);
380 for (int iCol = 0; iCol < NUM_COLS; ++iCol) {
381 final String qual = getQualStr(iCol);
382 final byte[] qualBytes = Bytes.toBytes(qual);
383 put = new Put(rowBytes);
384
385 putTimestamps.clear();
386 put(qual, minTS);
387 put(qual, maxTS);
388 for (int i = 0; i < PUTS_PER_ROW_COL; ++i) {
389 put(qual, randBetween(minTS, maxTS));
390 }
391
392 long[] putTimestampList = new long[putTimestamps.size()];
393 {
394 int i = 0;
395 for (long ts : putTimestamps) {
396 putTimestampList[i++] = ts;
397 }
398 }
399
400
401 delTimestamps.clear();
402 assertTrue(putTimestampList.length >= DELETES_PER_ROW_COL);
403 int numToDel = DELETES_PER_ROW_COL;
404 int tsRemaining = putTimestampList.length;
405 del = new Delete(rowBytes);
406 for (long ts : putTimestampList) {
407 if (rand.nextInt(tsRemaining) < numToDel) {
408 delAtTimestamp(qual, ts);
409 putTimestamps.remove(ts);
410 --numToDel;
411 }
412
413 if (--tsRemaining == 0) {
414 break;
415 }
416 }
417
418
419 if (deleteUpToTS != -1) {
420 delUpToTimestamp(qual, deleteUpToTS);
421 }
422
423 region.put(put);
424 if (!del.isEmpty()) {
425 region.delete(del, null, true);
426 }
427
428
429
430 for (long ts : putTimestamps) {
431 expectedKVs.add(new KeyValue(rowBytes, FAMILY_BYTES, qualBytes, ts,
432 KeyValue.Type.Put));
433 }
434 }
435 }
436
437 region.flushcache();
438 }
439
440 @After
441 public void tearDown() throws IOException {
442 if (region != null) {
443 region.close();
444 region.getLog().closeAndDelete();
445 }
446
447
448
449 StoreScanner.enableLazySeekGlobally(
450 StoreScanner.LAZY_SEEK_ENABLED_BY_DEFAULT);
451 }
452
453
454 @org.junit.Rule
455 public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
456 new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
457 }
458