View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertTrue;
24  
25  import java.io.IOException;
26  import java.util.ArrayList;
27  import java.util.Collection;
28  import java.util.Collections;
29  import java.util.HashMap;
30  import java.util.HashSet;
31  import java.util.List;
32  import java.util.Map;
33  import java.util.Random;
34  import java.util.Set;
35  import java.util.TreeSet;
36  
37  import org.apache.commons.lang.ArrayUtils;
38  import org.apache.commons.logging.Log;
39  import org.apache.commons.logging.LogFactory;
40  import org.apache.hadoop.hbase.CellComparator;
41  import org.apache.hadoop.hbase.HBaseTestingUtility;
42  import org.apache.hadoop.hbase.HColumnDescriptor;
43  import org.apache.hadoop.hbase.KeyValue;
44  import org.apache.hadoop.hbase.KeyValueTestUtil;
45  import org.apache.hadoop.hbase.MediumTests;
46  import org.apache.hadoop.hbase.client.Delete;
47  import org.apache.hadoop.hbase.client.Put;
48  import org.apache.hadoop.hbase.client.Scan;
49  import org.apache.hadoop.hbase.io.compress.Compression;
50  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
51  import org.apache.hadoop.hbase.util.Bytes;
52  import org.junit.Test;
53  import org.junit.experimental.categories.Category;
54  import org.junit.runner.RunWith;
55  import org.junit.runners.Parameterized;
56  import org.junit.runners.Parameterized.Parameters;
57  
58  /**
59   * Tests optimized scanning of multiple columns.
60   */
61  @RunWith(Parameterized.class)
62  @Category(MediumTests.class)
63  public class TestMultiColumnScanner {
64  
65    private static final Log LOG = LogFactory.getLog(TestMultiColumnScanner.class);
66  
67    private static final String TABLE_NAME =
68        TestMultiColumnScanner.class.getSimpleName();
69  
70    static final int MAX_VERSIONS = 50;
71  
72    private static final String FAMILY = "CF";
73    private static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY);
74  
75    /**
76     * The size of the column qualifier set used. Increasing this parameter
77     * exponentially increases test time.
78     */
79    private static final int NUM_COLUMNS = 8;
80  
81    private static final int MAX_COLUMN_BIT_MASK = 1 << NUM_COLUMNS - 1;
82    private static final int NUM_FLUSHES = 10;
83    private static final int NUM_ROWS = 20;
84  
85    /** A large value of type long for use as a timestamp */
86    private static final long BIG_LONG = 9111222333444555666L;
87  
88    /**
89     * Timestamps to test with. Cannot use {@link Long#MAX_VALUE} here, because
90     * it will be replaced by an timestamp auto-generated based on the time.
91     */
92    private static final long[] TIMESTAMPS = new long[] { 1, 3, 5,
93        Integer.MAX_VALUE, BIG_LONG, Long.MAX_VALUE - 1 };
94  
95    /** The probability that a column is skipped in a store file. */
96    private static final double COLUMN_SKIP_IN_STORE_FILE_PROB = 0.7;
97  
98    /** The probability of skipping a column in a single row */
99    private static final double COLUMN_SKIP_IN_ROW_PROB = 0.1;
100 
101   /** The probability of skipping a column everywhere */
102   private static final double COLUMN_SKIP_EVERYWHERE_PROB = 0.1;
103 
104   /** The probability to delete a row/column pair */
105   private static final double DELETE_PROBABILITY = 0.02;
106 
107   private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
108 
109   private final Compression.Algorithm comprAlgo;
110   private final BloomType bloomType;
111   private final DataBlockEncoding dataBlockEncoding;
112 
113   // Some static sanity-checking.
114   static {
115     assertTrue(BIG_LONG > 0.9 * Long.MAX_VALUE); // Guard against typos.
116 
117     // Ensure TIMESTAMPS are sorted.
118     for (int i = 0; i < TIMESTAMPS.length - 1; ++i)
119       assertTrue(TIMESTAMPS[i] < TIMESTAMPS[i + 1]);
120   }
121 
122   @Parameters
123   public static final Collection<Object[]> parameters() {
124     List<Object[]> parameters = new ArrayList<Object[]>();
125     for (Object[] bloomAndCompressionParams :
126         HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS) {
127       for (boolean useDataBlockEncoding : new boolean[]{false, true}) {
128         parameters.add(ArrayUtils.add(bloomAndCompressionParams,
129             useDataBlockEncoding));
130       }
131     }
132     return parameters;
133   }
134 
135   public TestMultiColumnScanner(Compression.Algorithm comprAlgo,
136       BloomType bloomType, boolean useDataBlockEncoding) {
137     this.comprAlgo = comprAlgo;
138     this.bloomType = bloomType;
139     this.dataBlockEncoding = useDataBlockEncoding ? DataBlockEncoding.PREFIX :
140         DataBlockEncoding.NONE;
141   }
142 
143   @Test
144   public void testMultiColumnScanner() throws IOException {
145     HRegion region = TEST_UTIL.createTestRegion(TABLE_NAME,
146         new HColumnDescriptor(FAMILY)
147             .setCompressionType(comprAlgo)
148             .setBloomFilterType(bloomType)
149             .setMaxVersions(MAX_VERSIONS)
150             .setDataBlockEncoding(dataBlockEncoding)
151     );
152     List<String> rows = sequentialStrings("row", NUM_ROWS);
153     List<String> qualifiers = sequentialStrings("qual", NUM_COLUMNS);
154     List<KeyValue> kvs = new ArrayList<KeyValue>();
155     Set<String> keySet = new HashSet<String>();
156 
157     // A map from <row>_<qualifier> to the most recent delete timestamp for
158     // that column.
159     Map<String, Long> lastDelTimeMap = new HashMap<String, Long>();
160 
161     Random rand = new Random(29372937L);
162     Set<String> rowQualSkip = new HashSet<String>();
163 
164     // Skip some columns in some rows. We need to test scanning over a set
165     // of columns when some of the columns are not there.
166     for (String row : rows)
167       for (String qual : qualifiers)
168         if (rand.nextDouble() < COLUMN_SKIP_IN_ROW_PROB) {
169           LOG.info("Skipping " + qual + " in row " + row);
170           rowQualSkip.add(rowQualKey(row, qual));
171         }
172 
173     // Also skip some columns in all rows.
174     for (String qual : qualifiers)
175       if (rand.nextDouble() < COLUMN_SKIP_EVERYWHERE_PROB) {
176         LOG.info("Skipping " + qual + " in all rows");
177         for (String row : rows)
178           rowQualSkip.add(rowQualKey(row, qual));
179       }
180 
181     for (int iFlush = 0; iFlush < NUM_FLUSHES; ++iFlush) {
182       for (String qual : qualifiers) {
183         // This is where we decide to include or not include this column into
184         // this store file, regardless of row and timestamp.
185         if (rand.nextDouble() < COLUMN_SKIP_IN_STORE_FILE_PROB)
186           continue;
187 
188         byte[] qualBytes = Bytes.toBytes(qual);
189         for (String row : rows) {
190           Put p = new Put(Bytes.toBytes(row));
191           for (long ts : TIMESTAMPS) {
192             String value = createValue(row, qual, ts);
193             KeyValue kv = KeyValueTestUtil.create(row, FAMILY, qual, ts,
194                 value);
195             assertEquals(kv.getTimestamp(), ts);
196             p.add(kv);
197             String keyAsString = kv.toString();
198             if (!keySet.contains(keyAsString)) {
199               keySet.add(keyAsString);
200               kvs.add(kv);
201             }
202           }
203           region.put(p);
204 
205           Delete d = new Delete(Bytes.toBytes(row));
206           boolean deletedSomething = false;
207           for (long ts : TIMESTAMPS)
208             if (rand.nextDouble() < DELETE_PROBABILITY) {
209               d.deleteColumns(FAMILY_BYTES, qualBytes, ts);
210               String rowAndQual = row + "_" + qual;
211               Long whenDeleted = lastDelTimeMap.get(rowAndQual);
212               lastDelTimeMap.put(rowAndQual, whenDeleted == null ? ts
213                   : Math.max(ts, whenDeleted));
214               deletedSomething = true;
215             }
216           if (deletedSomething)
217             region.delete(d);
218         }
219       }
220       region.flushcache();
221     }
222 
223     Collections.sort(kvs, KeyValue.COMPARATOR);
224     for (int maxVersions = 1; maxVersions <= TIMESTAMPS.length; ++maxVersions) {
225       for (int columnBitMask = 1; columnBitMask <= MAX_COLUMN_BIT_MASK; ++columnBitMask) {
226         Scan scan = new Scan();
227         scan.setMaxVersions(maxVersions);
228         Set<String> qualSet = new TreeSet<String>();
229         {
230           int columnMaskTmp = columnBitMask;
231           for (String qual : qualifiers) {
232             if ((columnMaskTmp & 1) != 0) {
233               scan.addColumn(FAMILY_BYTES, Bytes.toBytes(qual));
234               qualSet.add(qual);
235             }
236             columnMaskTmp >>= 1;
237           }
238           assertEquals(0, columnMaskTmp);
239         }
240 
241         InternalScanner scanner = region.getScanner(scan);
242         List<KeyValue> results = new ArrayList<KeyValue>();
243 
244         int kvPos = 0;
245         int numResults = 0;
246         String queryInfo = "columns queried: " + qualSet + " (columnBitMask="
247             + columnBitMask + "), maxVersions=" + maxVersions;
248 
249         while (scanner.next(results) || results.size() > 0) {
250           for (KeyValue kv : results) {
251             while (kvPos < kvs.size()
252                 && !matchesQuery(kvs.get(kvPos), qualSet, maxVersions,
253                     lastDelTimeMap)) {
254               ++kvPos;
255             }
256             String rowQual = getRowQualStr(kv);
257             String deleteInfo = "";
258             Long lastDelTS = lastDelTimeMap.get(rowQual);
259             if (lastDelTS != null) {
260               deleteInfo = "; last timestamp when row/column " + rowQual
261                   + " was deleted: " + lastDelTS;
262             }
263             assertTrue("Scanner returned additional key/value: " + kv + ", "
264                 + queryInfo + deleteInfo + ";", kvPos < kvs.size());
265             assertTrue("Scanner returned wrong key/value; " + queryInfo
266                 + deleteInfo + ";", CellComparator.equalsIgnoreMvccVersion(kvs.get(kvPos), (kv)));
267             ++kvPos;
268             ++numResults;
269           }
270           results.clear();
271         }
272         for (; kvPos < kvs.size(); ++kvPos) {
273           KeyValue remainingKV = kvs.get(kvPos);
274           assertFalse("Matching column not returned by scanner: "
275               + remainingKV + ", " + queryInfo + ", results returned: "
276               + numResults, matchesQuery(remainingKV, qualSet, maxVersions,
277               lastDelTimeMap));
278         }
279       }
280     }
281     assertTrue("This test is supposed to delete at least some row/column " +
282         "pairs", lastDelTimeMap.size() > 0);
283     LOG.info("Number of row/col pairs deleted at least once: " +
284        lastDelTimeMap.size());
285     HRegion.closeHRegion(region);
286   }
287 
288   private static String getRowQualStr(KeyValue kv) {
289     String rowStr = Bytes.toString(kv.getBuffer(), kv.getRowOffset(),
290         kv.getRowLength());
291     String qualStr = Bytes.toString(kv.getBuffer(), kv.getQualifierOffset(),
292         kv.getQualifierLength());
293     return rowStr + "_" + qualStr;
294   }
295 
296   private static boolean matchesQuery(KeyValue kv, Set<String> qualSet,
297       int maxVersions, Map<String, Long> lastDelTimeMap) {
298     Long lastDelTS = lastDelTimeMap.get(getRowQualStr(kv));
299     long ts = kv.getTimestamp();
300     return qualSet.contains(qualStr(kv))
301         && ts >= TIMESTAMPS[TIMESTAMPS.length - maxVersions]
302         && (lastDelTS == null || ts > lastDelTS);
303   }
304 
305   private static String qualStr(KeyValue kv) {
306     return Bytes.toString(kv.getBuffer(), kv.getQualifierOffset(),
307         kv.getQualifierLength());
308   }
309 
310   private static String rowQualKey(String row, String qual) {
311     return row + "_" + qual;
312   }
313 
314   static String createValue(String row, String qual, long ts) {
315     return "value_for_" + row + "_" + qual + "_" + ts;
316   }
317 
318   private static List<String> sequentialStrings(String prefix, int n) {
319     List<String> lst = new ArrayList<String>();
320     for (int i = 0; i < n; ++i) {
321       StringBuilder sb = new StringBuilder();
322       sb.append(prefix + i);
323 
324       // Make column length depend on i.
325       int iBitShifted = i;
326       while (iBitShifted != 0) {
327         sb.append((iBitShifted & 1) == 0 ? 'a' : 'b');
328         iBitShifted >>= 1;
329       }
330 
331       lst.add(sb.toString());
332     }
333 
334     return lst;
335   }
336 
337 
338 }
339