1   /*
2    * Copyright 2011 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import static org.apache.hadoop.hbase.HBaseTestingUtility.assertKVListsEqual;
23  import static org.junit.Assert.assertTrue;
24  
25  import java.io.IOException;
26  import java.util.ArrayList;
27  import java.util.Arrays;
28  import java.util.Collection;
29  import java.util.Collections;
30  import java.util.HashMap;
31  import java.util.HashSet;
32  import java.util.List;
33  import java.util.Map;
34  import java.util.Random;
35  import java.util.Set;
36  
37  import org.apache.commons.logging.Log;
38  import org.apache.commons.logging.LogFactory;
39  import org.apache.hadoop.hbase.HBaseTestingUtility;
40  import org.apache.hadoop.hbase.HColumnDescriptor;
41  import org.apache.hadoop.hbase.HConstants;
42  import org.apache.hadoop.hbase.KeyValue;
43  import org.apache.hadoop.hbase.MediumTests;
44  import org.apache.hadoop.hbase.client.Delete;
45  import org.apache.hadoop.hbase.client.Put;
46  import org.apache.hadoop.hbase.client.Scan;
47  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
48  import org.apache.hadoop.hbase.io.hfile.Compression;
49  import org.apache.hadoop.hbase.io.hfile.HFile;
50  import org.apache.hadoop.hbase.util.Bytes;
51  import org.junit.After;
52  import org.junit.Before;
53  import org.junit.Test;
54  import org.junit.experimental.categories.Category;
55  import org.junit.runner.RunWith;
56  import org.junit.runners.Parameterized;
57  import org.junit.runners.Parameterized.Parameters;
58  
59  /**
60   * Test various seek optimizations for correctness and check if they are
61   * actually saving I/O operations.
62   */
63  @RunWith(Parameterized.class)
64  @Category(MediumTests.class)
65  public class TestSeekOptimizations {
66  
67    private static final Log LOG =
68        LogFactory.getLog(TestSeekOptimizations.class);
69  
70    // Constants
71    private static final String FAMILY = "myCF";
72    private static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY);
73  
74    private static final int PUTS_PER_ROW_COL = 50;
75    private static final int DELETES_PER_ROW_COL = 10;
76  
77    private static final int NUM_ROWS = 3;
78    private static final int NUM_COLS = 3;
79  
80    private static final boolean VERBOSE = false;
81  
82    /**
83     * Disable this when this test fails hopelessly and you need to debug a
84     * simpler case.
85     */
86    private static final boolean USE_MANY_STORE_FILES = true;
87  
88    private static final int[][] COLUMN_SETS = new int[][] {
89      {},  // All columns
90      {0},
91      {1},
92      {0, 2},
93      {1, 2},
94      {0, 1, 2},
95    };
96  
97    // Both start row and end row are inclusive here for the purposes of this
98    // test.
99    private static final int[][] ROW_RANGES = new int[][] {
100     {-1, -1},
101     {0, 1},
102     {1, 1},
103     {1, 2},
104     {0, 2}
105   };
106 
107   private static final int[] MAX_VERSIONS_VALUES = new int[] { 1, 2 };
108 
109   // Instance variables
110   private HRegion region;
111   private Put put;
112   private Delete del;
113   private Random rand;
114   private Set<Long> putTimestamps = new HashSet<Long>();
115   private Set<Long> delTimestamps = new HashSet<Long>();
116   private List<KeyValue> expectedKVs = new ArrayList<KeyValue>();
117 
118   private Compression.Algorithm comprAlgo;
119   private StoreFile.BloomType bloomType;
120 
121   private long totalSeekDiligent, totalSeekLazy;
122   
123   private final static HBaseTestingUtility TEST_UTIL =
124       new HBaseTestingUtility();
125 
126   @Parameters
127   public static final Collection<Object[]> parameters() {
128     return HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS;
129   }
130 
131   public TestSeekOptimizations(Compression.Algorithm comprAlgo,
132       StoreFile.BloomType bloomType) {
133     this.comprAlgo = comprAlgo;
134     this.bloomType = bloomType;
135   }
136 
137   @Before
138   public void setUp() {
139     rand = new Random(91238123L);
140     expectedKVs.clear();
141   }
142 
143   @Test
144   public void testMultipleTimestampRanges() throws IOException {
145     region = TEST_UTIL.createTestRegion(TestSeekOptimizations.class.getName(),
146         new HColumnDescriptor(FAMILY)
147             .setCompressionType(comprAlgo)
148             .setBloomFilterType(bloomType)
149     );
150 
151     // Delete the given timestamp and everything before.
152     final long latestDelTS = USE_MANY_STORE_FILES ? 1397 : -1;
153 
154     createTimestampRange(1, 50, -1);
155     createTimestampRange(51, 100, -1);
156     if (USE_MANY_STORE_FILES) {
157       createTimestampRange(100, 500, 127);
158       createTimestampRange(900, 1300, -1);
159       createTimestampRange(1301, 2500, latestDelTS);
160       createTimestampRange(2502, 2598, -1);
161       createTimestampRange(2599, 2999, -1);
162     }
163 
164     prepareExpectedKVs(latestDelTS);
165 
166     for (int[] columnArr : COLUMN_SETS) {
167       for (int[] rowRange : ROW_RANGES) {
168         for (int maxVersions : MAX_VERSIONS_VALUES) {
169           for (boolean lazySeekEnabled : new boolean[] { false, true }) {
170             testScan(columnArr, lazySeekEnabled, rowRange[0], rowRange[1],
171                 maxVersions);
172           }
173         }
174       }
175     }
176 
177     final double seekSavings = 1 - totalSeekLazy * 1.0 / totalSeekDiligent;
178     System.err.println("For bloom=" + bloomType + ", compr=" + comprAlgo +
179         " total seeks without optimization: " + totalSeekDiligent
180         + ", with optimization: " + totalSeekLazy + " (" +
181         String.format("%.2f%%", totalSeekLazy * 100.0 / totalSeekDiligent) +
182         "), savings: " + String.format("%.2f%%",
183             100.0 * seekSavings) + "\n");
184 
185     // Test that lazy seeks are buying us something. Without the actual
186     // implementation of the lazy seek optimization this will be 0.
187     final double expectedSeekSavings = 0.0;
188     assertTrue("Lazy seek is only saving " +
189         String.format("%.2f%%", seekSavings * 100) + " seeks but should " +
190         "save at least " + String.format("%.2f%%", expectedSeekSavings * 100),
191         seekSavings >= expectedSeekSavings);
192   }
193 
194   private void testScan(final int[] columnArr, final boolean lazySeekEnabled,
195       final int startRow, final int endRow, int maxVersions)
196       throws IOException {
197     StoreScanner.enableLazySeekGlobally(lazySeekEnabled);
198     final Scan scan = new Scan();
199     final Set<String> qualSet = new HashSet<String>();
200     for (int iColumn : columnArr) {
201       String qualStr = getQualStr(iColumn);
202       scan.addColumn(FAMILY_BYTES, Bytes.toBytes(qualStr));
203       qualSet.add(qualStr);
204     }
205     scan.setMaxVersions(maxVersions);
206     scan.setStartRow(rowBytes(startRow));
207 
208     // Adjust for the fact that for multi-row queries the end row is exclusive.
209     {
210       final byte[] scannerStopRow =
211           rowBytes(endRow + (startRow != endRow ? 1 : 0));
212       scan.setStopRow(scannerStopRow);
213     }
214 
215     final long initialSeekCount = StoreFileScanner.getSeekCount();
216     final InternalScanner scanner = region.getScanner(scan);
217     final List<KeyValue> results = new ArrayList<KeyValue>();
218     final List<KeyValue> actualKVs = new ArrayList<KeyValue>();
219 
220     // Such a clumsy do-while loop appears to be the official way to use an
221     // internalScanner. scanner.next() return value refers to the _next_
222     // result, not to the one already returned in results.
223     boolean hasNext;
224     do {
225       hasNext = scanner.next(results);
226       actualKVs.addAll(results);
227       results.clear();
228     } while (hasNext);
229 
230     List<KeyValue> filteredKVs = filterExpectedResults(qualSet,
231         rowBytes(startRow), rowBytes(endRow), maxVersions);
232     final String rowRestrictionStr =
233         (startRow == -1 && endRow == -1) ? "all rows" : (
234             startRow == endRow ? ("row=" + startRow) : ("startRow="
235             + startRow + ", " + "endRow=" + endRow));
236     final String columnRestrictionStr =
237         columnArr.length == 0 ? "all columns"
238             : ("columns=" + Arrays.toString(columnArr));
239     final String testDesc =
240         "Bloom=" + bloomType + ", compr=" + comprAlgo + ", "
241             + (scan.isGetScan() ? "Get" : "Scan") + ": "
242             + columnRestrictionStr + ", " + rowRestrictionStr
243             + ", maxVersions=" + maxVersions + ", lazySeek=" + lazySeekEnabled;
244     long seekCount = StoreFileScanner.getSeekCount() - initialSeekCount;
245     if (VERBOSE) {
246       System.err.println("Seek count: " + seekCount + ", KVs returned: "
247         + actualKVs.size() + ". " + testDesc +
248         (lazySeekEnabled ? "\n" : ""));
249     }
250     if (lazySeekEnabled) {
251       totalSeekLazy += seekCount;
252     } else {
253       totalSeekDiligent += seekCount;
254     }
255     assertKVListsEqual(testDesc, filteredKVs, actualKVs);
256   }
257 
258   private List<KeyValue> filterExpectedResults(Set<String> qualSet,
259       byte[] startRow, byte[] endRow, int maxVersions) {
260     final List<KeyValue> filteredKVs = new ArrayList<KeyValue>();
261     final Map<String, Integer> verCount = new HashMap<String, Integer>();
262     for (KeyValue kv : expectedKVs) {
263       if (startRow.length > 0 &&
264           Bytes.compareTo(kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
265               startRow, 0, startRow.length) < 0) {
266         continue;
267       }
268 
269       // In this unit test the end row is always inclusive.
270       if (endRow.length > 0 &&
271           Bytes.compareTo(kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
272               endRow, 0, endRow.length) > 0) {
273         continue;
274       }
275 
276       if (!qualSet.isEmpty() && (Bytes.compareTo(
277             kv.getBuffer(), kv.getFamilyOffset(), kv.getFamilyLength(),
278             FAMILY_BYTES, 0, FAMILY_BYTES.length
279           ) != 0 ||
280           !qualSet.contains(Bytes.toString(kv.getQualifier())))) {
281         continue;
282       }
283 
284       final String rowColStr =
285         Bytes.toStringBinary(kv.getRow()) + "/"
286             + Bytes.toStringBinary(kv.getFamily()) + ":"
287             + Bytes.toStringBinary(kv.getQualifier());
288       final Integer curNumVer = verCount.get(rowColStr);
289       final int newNumVer = curNumVer != null ? (curNumVer + 1) : 1;
290       if (newNumVer <= maxVersions) {
291         filteredKVs.add(kv);
292         verCount.put(rowColStr, newNumVer);
293       }
294     }
295 
296     return filteredKVs;
297   }
298 
299   private void prepareExpectedKVs(long latestDelTS) {
300     final List<KeyValue> filteredKVs = new ArrayList<KeyValue>();
301     for (KeyValue kv : expectedKVs) {
302       if (kv.getTimestamp() > latestDelTS || latestDelTS == -1) {
303         filteredKVs.add(kv);
304       }
305     }
306     expectedKVs = filteredKVs;
307     Collections.sort(expectedKVs, KeyValue.COMPARATOR);
308   }
309 
310   public void put(String qual, long ts) {
311     if (!putTimestamps.contains(ts)) {
312       put.add(FAMILY_BYTES, Bytes.toBytes(qual), ts, createValue(ts));
313       putTimestamps.add(ts);
314     }
315     if (VERBOSE) {
316       LOG.info("put: row " + Bytes.toStringBinary(put.getRow())
317           + ", cf " + FAMILY + ", qualifier " + qual + ", ts " + ts);
318     }
319   }
320 
321   private byte[] createValue(long ts) {
322     return Bytes.toBytes("value" + ts);
323   }
324 
325   public void delAtTimestamp(String qual, long ts) {
326     del.deleteColumn(FAMILY_BYTES, Bytes.toBytes(qual), ts);
327     logDelete(qual, ts, "at");
328   }
329 
330   private void logDelete(String qual, long ts, String delType) {
331     if (VERBOSE) {
332       LOG.info("del " + delType + ": row "
333           + Bytes.toStringBinary(put.getRow()) + ", cf " + FAMILY
334           + ", qualifier " + qual + ", ts " + ts);
335     }
336   }
337 
338   private void delUpToTimestamp(String qual, long upToTS) {
339     del.deleteColumns(FAMILY_BYTES, Bytes.toBytes(qual), upToTS);
340     logDelete(qual, upToTS, "up to and including");
341   }
342 
343   private long randLong(long n) {
344     long l = rand.nextLong();
345     if (l == Long.MIN_VALUE)
346       l = Long.MAX_VALUE;
347     return Math.abs(l) % n;
348   }
349 
350   private long randBetween(long a, long b) {
351     long x = a + randLong(b - a + 1);
352     assertTrue(a <= x && x <= b);
353     return x;
354   }
355 
356   private final String rowStr(int i) {
357     return ("row" + i).intern();
358   }
359 
360   private final byte[] rowBytes(int i) {
361     if (i == -1) {
362       return HConstants.EMPTY_BYTE_ARRAY;
363     }
364     return Bytes.toBytes(rowStr(i));
365   }
366 
367   private final String getQualStr(int i) {
368     return ("qual" + i).intern();
369   }
370 
371   public void createTimestampRange(long minTS, long maxTS,
372       long deleteUpToTS) throws IOException {
373     assertTrue(minTS < maxTS);
374     assertTrue(deleteUpToTS == -1
375         || (minTS <= deleteUpToTS && deleteUpToTS <= maxTS));
376 
377     for (int iRow = 0; iRow < NUM_ROWS; ++iRow) {
378       final String row = rowStr(iRow);
379       final byte[] rowBytes = Bytes.toBytes(row);
380       for (int iCol = 0; iCol < NUM_COLS; ++iCol) {
381         final String qual = getQualStr(iCol);
382         final byte[] qualBytes = Bytes.toBytes(qual);
383         put = new Put(rowBytes);
384 
385         putTimestamps.clear();
386         put(qual, minTS);
387         put(qual, maxTS);
388         for (int i = 0; i < PUTS_PER_ROW_COL; ++i) {
389           put(qual, randBetween(minTS, maxTS));
390         }
391 
392         long[] putTimestampList = new long[putTimestamps.size()];
393         {
394           int i = 0;
395           for (long ts : putTimestamps) {
396             putTimestampList[i++] = ts;
397           }
398         }
399 
400         // Delete a predetermined number of particular timestamps
401         delTimestamps.clear();
402         assertTrue(putTimestampList.length >= DELETES_PER_ROW_COL);
403         int numToDel = DELETES_PER_ROW_COL;
404         int tsRemaining = putTimestampList.length;
405         del = new Delete(rowBytes);
406         for (long ts : putTimestampList) {
407           if (rand.nextInt(tsRemaining) < numToDel) {
408             delAtTimestamp(qual, ts);
409             putTimestamps.remove(ts);
410             --numToDel;
411           }
412 
413           if (--tsRemaining == 0) {
414             break;
415           }
416         }
417 
418         // Another type of delete: everything up to the given timestamp.
419         if (deleteUpToTS != -1) {
420           delUpToTimestamp(qual, deleteUpToTS);
421         }
422 
423         region.put(put);
424         if (!del.isEmpty()) {
425           region.delete(del, null, true);
426         }
427 
428         // Add remaining timestamps (those we have not deleted) to expected
429         // results
430         for (long ts : putTimestamps) {
431           expectedKVs.add(new KeyValue(rowBytes, FAMILY_BYTES, qualBytes, ts,
432               KeyValue.Type.Put));
433         }
434       }
435     }
436 
437     region.flushcache();
438   }
439 
440   @After
441   public void tearDown() throws IOException {
442     if (region != null) {
443       region.close();
444       region.getLog().closeAndDelete();
445     }
446 
447     // We have to re-set the lazy seek flag back to the default so that other
448     // unit tests are not affected.
449     StoreScanner.enableLazySeekGlobally(
450         StoreScanner.LAZY_SEEK_ENABLED_BY_DEFAULT);
451   }
452 
453 
454   @org.junit.Rule
455   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
456     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
457 }
458