1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import static org.apache.hadoop.hbase.HBaseTestingUtility.assertKVListsEqual;
22 import static org.junit.Assert.assertTrue;
23
24 import java.io.IOException;
25 import java.util.ArrayList;
26 import java.util.Arrays;
27 import java.util.Collection;
28 import java.util.Collections;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.Random;
34 import java.util.Set;
35
36 import org.apache.commons.logging.Log;
37 import org.apache.commons.logging.LogFactory;
38 import org.apache.hadoop.hbase.HBaseTestingUtility;
39 import org.apache.hadoop.hbase.HColumnDescriptor;
40 import org.apache.hadoop.hbase.HConstants;
41 import org.apache.hadoop.hbase.KeyValue;
42 import org.apache.hadoop.hbase.MediumTests;
43 import org.apache.hadoop.hbase.client.Delete;
44 import org.apache.hadoop.hbase.client.Put;
45 import org.apache.hadoop.hbase.client.Scan;
46 import org.apache.hadoop.hbase.io.compress.Compression;
47 import org.apache.hadoop.hbase.util.Bytes;
48 import org.junit.After;
49 import org.junit.Before;
50 import org.junit.Test;
51 import org.junit.experimental.categories.Category;
52 import org.junit.runner.RunWith;
53 import org.junit.runners.Parameterized;
54 import org.junit.runners.Parameterized.Parameters;
55
56
57
58
59
60 @RunWith(Parameterized.class)
61 @Category(MediumTests.class)
62 public class TestSeekOptimizations {
63
64 private static final Log LOG =
65 LogFactory.getLog(TestSeekOptimizations.class);
66
67
68 private static final String FAMILY = "myCF";
69 private static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY);
70
71 private static final int PUTS_PER_ROW_COL = 50;
72 private static final int DELETES_PER_ROW_COL = 10;
73
74 private static final int NUM_ROWS = 3;
75 private static final int NUM_COLS = 3;
76
77 private static final boolean VERBOSE = false;
78
79
80
81
82
83 private static final boolean USE_MANY_STORE_FILES = true;
84
85 private static final int[][] COLUMN_SETS = new int[][] {
86 {},
87 {0},
88 {1},
89 {0, 2},
90 {1, 2},
91 {0, 1, 2},
92 };
93
94
95
96 private static final int[][] ROW_RANGES = new int[][] {
97 {-1, -1},
98 {0, 1},
99 {1, 1},
100 {1, 2},
101 {0, 2}
102 };
103
104 private static final int[] MAX_VERSIONS_VALUES = new int[] { 1, 2 };
105
106
107 private HRegion region;
108 private Put put;
109 private Delete del;
110 private Random rand;
111 private Set<Long> putTimestamps = new HashSet<Long>();
112 private Set<Long> delTimestamps = new HashSet<Long>();
113 private List<KeyValue> expectedKVs = new ArrayList<KeyValue>();
114
115 private Compression.Algorithm comprAlgo;
116 private BloomType bloomType;
117
118 private long totalSeekDiligent, totalSeekLazy;
119
120 private final static HBaseTestingUtility TEST_UTIL =
121 new HBaseTestingUtility();
122
123 @Parameters
124 public static final Collection<Object[]> parameters() {
125 return HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS;
126 }
127
128 public TestSeekOptimizations(Compression.Algorithm comprAlgo,
129 BloomType bloomType) {
130 this.comprAlgo = comprAlgo;
131 this.bloomType = bloomType;
132 }
133
134 @Before
135 public void setUp() {
136 rand = new Random(91238123L);
137 expectedKVs.clear();
138 }
139
140 @Test
141 public void testMultipleTimestampRanges() throws IOException {
142 region = TEST_UTIL.createTestRegion("testMultipleTimestampRanges",
143 new HColumnDescriptor(FAMILY)
144 .setCompressionType(comprAlgo)
145 .setBloomFilterType(bloomType)
146 .setMaxVersions(3)
147 );
148
149
150 final long latestDelTS = USE_MANY_STORE_FILES ? 1397 : -1;
151
152 createTimestampRange(1, 50, -1);
153 createTimestampRange(51, 100, -1);
154 if (USE_MANY_STORE_FILES) {
155 createTimestampRange(100, 500, 127);
156 createTimestampRange(900, 1300, -1);
157 createTimestampRange(1301, 2500, latestDelTS);
158 createTimestampRange(2502, 2598, -1);
159 createTimestampRange(2599, 2999, -1);
160 }
161
162 prepareExpectedKVs(latestDelTS);
163
164 for (int[] columnArr : COLUMN_SETS) {
165 for (int[] rowRange : ROW_RANGES) {
166 for (int maxVersions : MAX_VERSIONS_VALUES) {
167 for (boolean lazySeekEnabled : new boolean[] { false, true }) {
168 testScan(columnArr, lazySeekEnabled, rowRange[0], rowRange[1],
169 maxVersions);
170 }
171 }
172 }
173 }
174
175 final double seekSavings = 1 - totalSeekLazy * 1.0 / totalSeekDiligent;
176 System.err.println("For bloom=" + bloomType + ", compr=" + comprAlgo +
177 " total seeks without optimization: " + totalSeekDiligent
178 + ", with optimization: " + totalSeekLazy + " (" +
179 String.format("%.2f%%", totalSeekLazy * 100.0 / totalSeekDiligent) +
180 "), savings: " + String.format("%.2f%%",
181 100.0 * seekSavings) + "\n");
182
183
184
185 final double expectedSeekSavings = 0.0;
186 assertTrue("Lazy seek is only saving " +
187 String.format("%.2f%%", seekSavings * 100) + " seeks but should " +
188 "save at least " + String.format("%.2f%%", expectedSeekSavings * 100),
189 seekSavings >= expectedSeekSavings);
190 }
191
192 private void testScan(final int[] columnArr, final boolean lazySeekEnabled,
193 final int startRow, final int endRow, int maxVersions)
194 throws IOException {
195 StoreScanner.enableLazySeekGlobally(lazySeekEnabled);
196 final Scan scan = new Scan();
197 final Set<String> qualSet = new HashSet<String>();
198 for (int iColumn : columnArr) {
199 String qualStr = getQualStr(iColumn);
200 scan.addColumn(FAMILY_BYTES, Bytes.toBytes(qualStr));
201 qualSet.add(qualStr);
202 }
203 scan.setMaxVersions(maxVersions);
204 scan.setStartRow(rowBytes(startRow));
205
206
207 {
208 final byte[] scannerStopRow =
209 rowBytes(endRow + (startRow != endRow ? 1 : 0));
210 scan.setStopRow(scannerStopRow);
211 }
212
213 final long initialSeekCount = StoreFileScanner.getSeekCount();
214 final InternalScanner scanner = region.getScanner(scan);
215 final List<KeyValue> results = new ArrayList<KeyValue>();
216 final List<KeyValue> actualKVs = new ArrayList<KeyValue>();
217
218
219
220
221 boolean hasNext;
222 do {
223 hasNext = scanner.next(results);
224 actualKVs.addAll(results);
225 results.clear();
226 } while (hasNext);
227
228 List<KeyValue> filteredKVs = filterExpectedResults(qualSet,
229 rowBytes(startRow), rowBytes(endRow), maxVersions);
230 final String rowRestrictionStr =
231 (startRow == -1 && endRow == -1) ? "all rows" : (
232 startRow == endRow ? ("row=" + startRow) : ("startRow="
233 + startRow + ", " + "endRow=" + endRow));
234 final String columnRestrictionStr =
235 columnArr.length == 0 ? "all columns"
236 : ("columns=" + Arrays.toString(columnArr));
237 final String testDesc =
238 "Bloom=" + bloomType + ", compr=" + comprAlgo + ", "
239 + (scan.isGetScan() ? "Get" : "Scan") + ": "
240 + columnRestrictionStr + ", " + rowRestrictionStr
241 + ", maxVersions=" + maxVersions + ", lazySeek=" + lazySeekEnabled;
242 long seekCount = StoreFileScanner.getSeekCount() - initialSeekCount;
243 if (VERBOSE) {
244 System.err.println("Seek count: " + seekCount + ", KVs returned: "
245 + actualKVs.size() + ". " + testDesc +
246 (lazySeekEnabled ? "\n" : ""));
247 }
248 if (lazySeekEnabled) {
249 totalSeekLazy += seekCount;
250 } else {
251 totalSeekDiligent += seekCount;
252 }
253 assertKVListsEqual(testDesc, filteredKVs, actualKVs);
254 }
255
256 private List<KeyValue> filterExpectedResults(Set<String> qualSet,
257 byte[] startRow, byte[] endRow, int maxVersions) {
258 final List<KeyValue> filteredKVs = new ArrayList<KeyValue>();
259 final Map<String, Integer> verCount = new HashMap<String, Integer>();
260 for (KeyValue kv : expectedKVs) {
261 if (startRow.length > 0 &&
262 Bytes.compareTo(kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
263 startRow, 0, startRow.length) < 0) {
264 continue;
265 }
266
267
268 if (endRow.length > 0 &&
269 Bytes.compareTo(kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
270 endRow, 0, endRow.length) > 0) {
271 continue;
272 }
273
274 if (!qualSet.isEmpty() && (Bytes.compareTo(
275 kv.getBuffer(), kv.getFamilyOffset(), kv.getFamilyLength(),
276 FAMILY_BYTES, 0, FAMILY_BYTES.length
277 ) != 0 ||
278 !qualSet.contains(Bytes.toString(kv.getQualifier())))) {
279 continue;
280 }
281
282 final String rowColStr =
283 Bytes.toStringBinary(kv.getRow()) + "/"
284 + Bytes.toStringBinary(kv.getFamily()) + ":"
285 + Bytes.toStringBinary(kv.getQualifier());
286 final Integer curNumVer = verCount.get(rowColStr);
287 final int newNumVer = curNumVer != null ? (curNumVer + 1) : 1;
288 if (newNumVer <= maxVersions) {
289 filteredKVs.add(kv);
290 verCount.put(rowColStr, newNumVer);
291 }
292 }
293
294 return filteredKVs;
295 }
296
297 private void prepareExpectedKVs(long latestDelTS) {
298 final List<KeyValue> filteredKVs = new ArrayList<KeyValue>();
299 for (KeyValue kv : expectedKVs) {
300 if (kv.getTimestamp() > latestDelTS || latestDelTS == -1) {
301 filteredKVs.add(kv);
302 }
303 }
304 expectedKVs = filteredKVs;
305 Collections.sort(expectedKVs, KeyValue.COMPARATOR);
306 }
307
308 public void put(String qual, long ts) {
309 if (!putTimestamps.contains(ts)) {
310 put.add(FAMILY_BYTES, Bytes.toBytes(qual), ts, createValue(ts));
311 putTimestamps.add(ts);
312 }
313 if (VERBOSE) {
314 LOG.info("put: row " + Bytes.toStringBinary(put.getRow())
315 + ", cf " + FAMILY + ", qualifier " + qual + ", ts " + ts);
316 }
317 }
318
319 private byte[] createValue(long ts) {
320 return Bytes.toBytes("value" + ts);
321 }
322
323 public void delAtTimestamp(String qual, long ts) {
324 del.deleteColumn(FAMILY_BYTES, Bytes.toBytes(qual), ts);
325 logDelete(qual, ts, "at");
326 }
327
328 private void logDelete(String qual, long ts, String delType) {
329 if (VERBOSE) {
330 LOG.info("del " + delType + ": row "
331 + Bytes.toStringBinary(put.getRow()) + ", cf " + FAMILY
332 + ", qualifier " + qual + ", ts " + ts);
333 }
334 }
335
336 private void delUpToTimestamp(String qual, long upToTS) {
337 del.deleteColumns(FAMILY_BYTES, Bytes.toBytes(qual), upToTS);
338 logDelete(qual, upToTS, "up to and including");
339 }
340
341 private long randLong(long n) {
342 long l = rand.nextLong();
343 if (l == Long.MIN_VALUE)
344 l = Long.MAX_VALUE;
345 return Math.abs(l) % n;
346 }
347
348 private long randBetween(long a, long b) {
349 long x = a + randLong(b - a + 1);
350 assertTrue(a <= x && x <= b);
351 return x;
352 }
353
354 private final String rowStr(int i) {
355 return ("row" + i).intern();
356 }
357
358 private final byte[] rowBytes(int i) {
359 if (i == -1) {
360 return HConstants.EMPTY_BYTE_ARRAY;
361 }
362 return Bytes.toBytes(rowStr(i));
363 }
364
365 private final String getQualStr(int i) {
366 return ("qual" + i).intern();
367 }
368
369 public void createTimestampRange(long minTS, long maxTS,
370 long deleteUpToTS) throws IOException {
371 assertTrue(minTS < maxTS);
372 assertTrue(deleteUpToTS == -1
373 || (minTS <= deleteUpToTS && deleteUpToTS <= maxTS));
374
375 for (int iRow = 0; iRow < NUM_ROWS; ++iRow) {
376 final String row = rowStr(iRow);
377 final byte[] rowBytes = Bytes.toBytes(row);
378 for (int iCol = 0; iCol < NUM_COLS; ++iCol) {
379 final String qual = getQualStr(iCol);
380 final byte[] qualBytes = Bytes.toBytes(qual);
381 put = new Put(rowBytes);
382
383 putTimestamps.clear();
384 put(qual, minTS);
385 put(qual, maxTS);
386 for (int i = 0; i < PUTS_PER_ROW_COL; ++i) {
387 put(qual, randBetween(minTS, maxTS));
388 }
389
390 long[] putTimestampList = new long[putTimestamps.size()];
391 {
392 int i = 0;
393 for (long ts : putTimestamps) {
394 putTimestampList[i++] = ts;
395 }
396 }
397
398
399 delTimestamps.clear();
400 assertTrue(putTimestampList.length >= DELETES_PER_ROW_COL);
401 int numToDel = DELETES_PER_ROW_COL;
402 int tsRemaining = putTimestampList.length;
403 del = new Delete(rowBytes);
404 for (long ts : putTimestampList) {
405 if (rand.nextInt(tsRemaining) < numToDel) {
406 delAtTimestamp(qual, ts);
407 putTimestamps.remove(ts);
408 --numToDel;
409 }
410
411 if (--tsRemaining == 0) {
412 break;
413 }
414 }
415
416
417 if (deleteUpToTS != -1) {
418 delUpToTimestamp(qual, deleteUpToTS);
419 }
420
421 region.put(put);
422 if (!del.isEmpty()) {
423 region.delete(del);
424 }
425
426
427
428 for (long ts : putTimestamps) {
429 expectedKVs.add(new KeyValue(rowBytes, FAMILY_BYTES, qualBytes, ts,
430 KeyValue.Type.Put));
431 }
432 }
433 }
434
435 region.flushcache();
436 }
437
438 @After
439 public void tearDown() throws IOException {
440 if (region != null) {
441 HRegion.closeHRegion(region);
442 }
443
444
445
446 StoreScanner.enableLazySeekGlobally(
447 StoreScanner.LAZY_SEEK_ENABLED_BY_DEFAULT);
448 }
449
450
451 }
452