View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import static org.apache.hadoop.hbase.HBaseTestingUtility.assertKVListsEqual;
22  import static org.junit.Assert.assertTrue;
23  
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.Arrays;
27  import java.util.Collection;
28  import java.util.Collections;
29  import java.util.HashMap;
30  import java.util.HashSet;
31  import java.util.List;
32  import java.util.Map;
33  import java.util.Random;
34  import java.util.Set;
35  
36  import org.apache.commons.logging.Log;
37  import org.apache.commons.logging.LogFactory;
38  import org.apache.hadoop.hbase.HBaseTestingUtility;
39  import org.apache.hadoop.hbase.HColumnDescriptor;
40  import org.apache.hadoop.hbase.HConstants;
41  import org.apache.hadoop.hbase.KeyValue;
42  import org.apache.hadoop.hbase.MediumTests;
43  import org.apache.hadoop.hbase.client.Delete;
44  import org.apache.hadoop.hbase.client.Put;
45  import org.apache.hadoop.hbase.client.Scan;
46  import org.apache.hadoop.hbase.io.compress.Compression;
47  import org.apache.hadoop.hbase.util.Bytes;
48  import org.junit.After;
49  import org.junit.Before;
50  import org.junit.Test;
51  import org.junit.experimental.categories.Category;
52  import org.junit.runner.RunWith;
53  import org.junit.runners.Parameterized;
54  import org.junit.runners.Parameterized.Parameters;
55  
56  /**
57   * Test various seek optimizations for correctness and check if they are
58   * actually saving I/O operations.
59   */
60  @RunWith(Parameterized.class)
61  @Category(MediumTests.class)
62  public class TestSeekOptimizations {
63  
64    private static final Log LOG =
65        LogFactory.getLog(TestSeekOptimizations.class);
66  
67    // Constants
68    private static final String FAMILY = "myCF";
69    private static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY);
70  
71    private static final int PUTS_PER_ROW_COL = 50;
72    private static final int DELETES_PER_ROW_COL = 10;
73  
74    private static final int NUM_ROWS = 3;
75    private static final int NUM_COLS = 3;
76  
77    private static final boolean VERBOSE = false;
78  
79    /**
80     * Disable this when this test fails hopelessly and you need to debug a
81     * simpler case.
82     */
83    private static final boolean USE_MANY_STORE_FILES = true;
84  
85    private static final int[][] COLUMN_SETS = new int[][] {
86      {},  // All columns
87      {0},
88      {1},
89      {0, 2},
90      {1, 2},
91      {0, 1, 2},
92    };
93  
94    // Both start row and end row are inclusive here for the purposes of this
95    // test.
96    private static final int[][] ROW_RANGES = new int[][] {
97      {-1, -1},
98      {0, 1},
99      {1, 1},
100     {1, 2},
101     {0, 2}
102   };
103 
104   private static final int[] MAX_VERSIONS_VALUES = new int[] { 1, 2 };
105 
106   // Instance variables
107   private HRegion region;
108   private Put put;
109   private Delete del;
110   private Random rand;
111   private Set<Long> putTimestamps = new HashSet<Long>();
112   private Set<Long> delTimestamps = new HashSet<Long>();
113   private List<KeyValue> expectedKVs = new ArrayList<KeyValue>();
114 
115   private Compression.Algorithm comprAlgo;
116   private BloomType bloomType;
117 
118   private long totalSeekDiligent, totalSeekLazy;
119   
120   private final static HBaseTestingUtility TEST_UTIL =
121       new HBaseTestingUtility();
122 
123   @Parameters
124   public static final Collection<Object[]> parameters() {
125     return HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS;
126   }
127 
128   public TestSeekOptimizations(Compression.Algorithm comprAlgo,
129       BloomType bloomType) {
130     this.comprAlgo = comprAlgo;
131     this.bloomType = bloomType;
132   }
133 
134   @Before
135   public void setUp() {
136     rand = new Random(91238123L);
137     expectedKVs.clear();
138   }
139 
140   @Test
141   public void testMultipleTimestampRanges() throws IOException {
142     region = TEST_UTIL.createTestRegion("testMultipleTimestampRanges",
143         new HColumnDescriptor(FAMILY)
144             .setCompressionType(comprAlgo)
145             .setBloomFilterType(bloomType)
146             .setMaxVersions(3)
147     );
148 
149     // Delete the given timestamp and everything before.
150     final long latestDelTS = USE_MANY_STORE_FILES ? 1397 : -1;
151 
152     createTimestampRange(1, 50, -1);
153     createTimestampRange(51, 100, -1);
154     if (USE_MANY_STORE_FILES) {
155       createTimestampRange(100, 500, 127);
156       createTimestampRange(900, 1300, -1);
157       createTimestampRange(1301, 2500, latestDelTS);
158       createTimestampRange(2502, 2598, -1);
159       createTimestampRange(2599, 2999, -1);
160     }
161 
162     prepareExpectedKVs(latestDelTS);
163 
164     for (int[] columnArr : COLUMN_SETS) {
165       for (int[] rowRange : ROW_RANGES) {
166         for (int maxVersions : MAX_VERSIONS_VALUES) {
167           for (boolean lazySeekEnabled : new boolean[] { false, true }) {
168             testScan(columnArr, lazySeekEnabled, rowRange[0], rowRange[1],
169                 maxVersions);
170           }
171         }
172       }
173     }
174 
175     final double seekSavings = 1 - totalSeekLazy * 1.0 / totalSeekDiligent;
176     System.err.println("For bloom=" + bloomType + ", compr=" + comprAlgo +
177         " total seeks without optimization: " + totalSeekDiligent
178         + ", with optimization: " + totalSeekLazy + " (" +
179         String.format("%.2f%%", totalSeekLazy * 100.0 / totalSeekDiligent) +
180         "), savings: " + String.format("%.2f%%",
181             100.0 * seekSavings) + "\n");
182 
183     // Test that lazy seeks are buying us something. Without the actual
184     // implementation of the lazy seek optimization this will be 0.
185     final double expectedSeekSavings = 0.0;
186     assertTrue("Lazy seek is only saving " +
187         String.format("%.2f%%", seekSavings * 100) + " seeks but should " +
188         "save at least " + String.format("%.2f%%", expectedSeekSavings * 100),
189         seekSavings >= expectedSeekSavings);
190   }
191 
192   private void testScan(final int[] columnArr, final boolean lazySeekEnabled,
193       final int startRow, final int endRow, int maxVersions)
194       throws IOException {
195     StoreScanner.enableLazySeekGlobally(lazySeekEnabled);
196     final Scan scan = new Scan();
197     final Set<String> qualSet = new HashSet<String>();
198     for (int iColumn : columnArr) {
199       String qualStr = getQualStr(iColumn);
200       scan.addColumn(FAMILY_BYTES, Bytes.toBytes(qualStr));
201       qualSet.add(qualStr);
202     }
203     scan.setMaxVersions(maxVersions);
204     scan.setStartRow(rowBytes(startRow));
205 
206     // Adjust for the fact that for multi-row queries the end row is exclusive.
207     {
208       final byte[] scannerStopRow =
209           rowBytes(endRow + (startRow != endRow ? 1 : 0));
210       scan.setStopRow(scannerStopRow);
211     }
212 
213     final long initialSeekCount = StoreFileScanner.getSeekCount();
214     final InternalScanner scanner = region.getScanner(scan);
215     final List<KeyValue> results = new ArrayList<KeyValue>();
216     final List<KeyValue> actualKVs = new ArrayList<KeyValue>();
217 
218     // Such a clumsy do-while loop appears to be the official way to use an
219     // internalScanner. scanner.next() return value refers to the _next_
220     // result, not to the one already returned in results.
221     boolean hasNext;
222     do {
223       hasNext = scanner.next(results);
224       actualKVs.addAll(results);
225       results.clear();
226     } while (hasNext);
227 
228     List<KeyValue> filteredKVs = filterExpectedResults(qualSet,
229         rowBytes(startRow), rowBytes(endRow), maxVersions);
230     final String rowRestrictionStr =
231         (startRow == -1 && endRow == -1) ? "all rows" : (
232             startRow == endRow ? ("row=" + startRow) : ("startRow="
233             + startRow + ", " + "endRow=" + endRow));
234     final String columnRestrictionStr =
235         columnArr.length == 0 ? "all columns"
236             : ("columns=" + Arrays.toString(columnArr));
237     final String testDesc =
238         "Bloom=" + bloomType + ", compr=" + comprAlgo + ", "
239             + (scan.isGetScan() ? "Get" : "Scan") + ": "
240             + columnRestrictionStr + ", " + rowRestrictionStr
241             + ", maxVersions=" + maxVersions + ", lazySeek=" + lazySeekEnabled;
242     long seekCount = StoreFileScanner.getSeekCount() - initialSeekCount;
243     if (VERBOSE) {
244       System.err.println("Seek count: " + seekCount + ", KVs returned: "
245         + actualKVs.size() + ". " + testDesc +
246         (lazySeekEnabled ? "\n" : ""));
247     }
248     if (lazySeekEnabled) {
249       totalSeekLazy += seekCount;
250     } else {
251       totalSeekDiligent += seekCount;
252     }
253     assertKVListsEqual(testDesc, filteredKVs, actualKVs);
254   }
255 
256   private List<KeyValue> filterExpectedResults(Set<String> qualSet,
257       byte[] startRow, byte[] endRow, int maxVersions) {
258     final List<KeyValue> filteredKVs = new ArrayList<KeyValue>();
259     final Map<String, Integer> verCount = new HashMap<String, Integer>();
260     for (KeyValue kv : expectedKVs) {
261       if (startRow.length > 0 &&
262           Bytes.compareTo(kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
263               startRow, 0, startRow.length) < 0) {
264         continue;
265       }
266 
267       // In this unit test the end row is always inclusive.
268       if (endRow.length > 0 &&
269           Bytes.compareTo(kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
270               endRow, 0, endRow.length) > 0) {
271         continue;
272       }
273 
274       if (!qualSet.isEmpty() && (Bytes.compareTo(
275             kv.getBuffer(), kv.getFamilyOffset(), kv.getFamilyLength(),
276             FAMILY_BYTES, 0, FAMILY_BYTES.length
277           ) != 0 ||
278           !qualSet.contains(Bytes.toString(kv.getQualifier())))) {
279         continue;
280       }
281 
282       final String rowColStr =
283         Bytes.toStringBinary(kv.getRow()) + "/"
284             + Bytes.toStringBinary(kv.getFamily()) + ":"
285             + Bytes.toStringBinary(kv.getQualifier());
286       final Integer curNumVer = verCount.get(rowColStr);
287       final int newNumVer = curNumVer != null ? (curNumVer + 1) : 1;
288       if (newNumVer <= maxVersions) {
289         filteredKVs.add(kv);
290         verCount.put(rowColStr, newNumVer);
291       }
292     }
293 
294     return filteredKVs;
295   }
296 
297   private void prepareExpectedKVs(long latestDelTS) {
298     final List<KeyValue> filteredKVs = new ArrayList<KeyValue>();
299     for (KeyValue kv : expectedKVs) {
300       if (kv.getTimestamp() > latestDelTS || latestDelTS == -1) {
301         filteredKVs.add(kv);
302       }
303     }
304     expectedKVs = filteredKVs;
305     Collections.sort(expectedKVs, KeyValue.COMPARATOR);
306   }
307 
308   public void put(String qual, long ts) {
309     if (!putTimestamps.contains(ts)) {
310       put.add(FAMILY_BYTES, Bytes.toBytes(qual), ts, createValue(ts));
311       putTimestamps.add(ts);
312     }
313     if (VERBOSE) {
314       LOG.info("put: row " + Bytes.toStringBinary(put.getRow())
315           + ", cf " + FAMILY + ", qualifier " + qual + ", ts " + ts);
316     }
317   }
318 
319   private byte[] createValue(long ts) {
320     return Bytes.toBytes("value" + ts);
321   }
322 
323   public void delAtTimestamp(String qual, long ts) {
324     del.deleteColumn(FAMILY_BYTES, Bytes.toBytes(qual), ts);
325     logDelete(qual, ts, "at");
326   }
327 
328   private void logDelete(String qual, long ts, String delType) {
329     if (VERBOSE) {
330       LOG.info("del " + delType + ": row "
331           + Bytes.toStringBinary(put.getRow()) + ", cf " + FAMILY
332           + ", qualifier " + qual + ", ts " + ts);
333     }
334   }
335 
336   private void delUpToTimestamp(String qual, long upToTS) {
337     del.deleteColumns(FAMILY_BYTES, Bytes.toBytes(qual), upToTS);
338     logDelete(qual, upToTS, "up to and including");
339   }
340 
341   private long randLong(long n) {
342     long l = rand.nextLong();
343     if (l == Long.MIN_VALUE)
344       l = Long.MAX_VALUE;
345     return Math.abs(l) % n;
346   }
347 
348   private long randBetween(long a, long b) {
349     long x = a + randLong(b - a + 1);
350     assertTrue(a <= x && x <= b);
351     return x;
352   }
353 
354   private final String rowStr(int i) {
355     return ("row" + i).intern();
356   }
357 
358   private final byte[] rowBytes(int i) {
359     if (i == -1) {
360       return HConstants.EMPTY_BYTE_ARRAY;
361     }
362     return Bytes.toBytes(rowStr(i));
363   }
364 
365   private final String getQualStr(int i) {
366     return ("qual" + i).intern();
367   }
368 
369   public void createTimestampRange(long minTS, long maxTS,
370       long deleteUpToTS) throws IOException {
371     assertTrue(minTS < maxTS);
372     assertTrue(deleteUpToTS == -1
373         || (minTS <= deleteUpToTS && deleteUpToTS <= maxTS));
374 
375     for (int iRow = 0; iRow < NUM_ROWS; ++iRow) {
376       final String row = rowStr(iRow);
377       final byte[] rowBytes = Bytes.toBytes(row);
378       for (int iCol = 0; iCol < NUM_COLS; ++iCol) {
379         final String qual = getQualStr(iCol);
380         final byte[] qualBytes = Bytes.toBytes(qual);
381         put = new Put(rowBytes);
382 
383         putTimestamps.clear();
384         put(qual, minTS);
385         put(qual, maxTS);
386         for (int i = 0; i < PUTS_PER_ROW_COL; ++i) {
387           put(qual, randBetween(minTS, maxTS));
388         }
389 
390         long[] putTimestampList = new long[putTimestamps.size()];
391         {
392           int i = 0;
393           for (long ts : putTimestamps) {
394             putTimestampList[i++] = ts;
395           }
396         }
397 
398         // Delete a predetermined number of particular timestamps
399         delTimestamps.clear();
400         assertTrue(putTimestampList.length >= DELETES_PER_ROW_COL);
401         int numToDel = DELETES_PER_ROW_COL;
402         int tsRemaining = putTimestampList.length;
403         del = new Delete(rowBytes);
404         for (long ts : putTimestampList) {
405           if (rand.nextInt(tsRemaining) < numToDel) {
406             delAtTimestamp(qual, ts);
407             putTimestamps.remove(ts);
408             --numToDel;
409           }
410 
411           if (--tsRemaining == 0) {
412             break;
413           }
414         }
415 
416         // Another type of delete: everything up to the given timestamp.
417         if (deleteUpToTS != -1) {
418           delUpToTimestamp(qual, deleteUpToTS);
419         }
420 
421         region.put(put);
422         if (!del.isEmpty()) {
423           region.delete(del);
424         }
425 
426         // Add remaining timestamps (those we have not deleted) to expected
427         // results
428         for (long ts : putTimestamps) {
429           expectedKVs.add(new KeyValue(rowBytes, FAMILY_BYTES, qualBytes, ts,
430               KeyValue.Type.Put));
431         }
432       }
433     }
434 
435     region.flushcache();
436   }
437 
438   @After
439   public void tearDown() throws IOException {
440     if (region != null) {
441       HRegion.closeHRegion(region);
442     }
443 
444     // We have to re-set the lazy seek flag back to the default so that other
445     // unit tests are not affected.
446     StoreScanner.enableLazySeekGlobally(
447         StoreScanner.LAZY_SEEK_ENABLED_BY_DEFAULT);
448   }
449 
450 
451 }
452