View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.filter;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.ByteArrayOutputStream;
23  import java.io.DataInput;
24  import java.io.DataInputStream;
25  import java.io.DataOutput;
26  import java.io.DataOutputStream;
27  import java.io.IOException;
28  import java.util.ArrayList;
29  import java.util.Arrays;
30  import java.util.List;
31  
32  import static org.junit.Assert.assertEquals;
33  import static org.junit.Assert.assertFalse;
34  import static org.junit.Assert.assertNotNull;
35  import static org.junit.Assert.assertTrue;
36  import static org.junit.Assert.assertNull;
37  
38  import org.apache.hadoop.hbase.Cell;
39  import org.apache.hadoop.hbase.KeyValue;
40  import org.apache.hadoop.hbase.KeyValueUtil;
41  import org.apache.hadoop.hbase.SmallTests;
42  import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
43  import org.apache.hadoop.hbase.filter.FilterList.Operator;
44  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
45  import org.apache.hadoop.hbase.util.Bytes;
46  import org.junit.Test;
47  import org.junit.experimental.categories.Category;
48  
49  import com.google.common.collect.Lists;
50  
51  /**
52   * Tests filter sets
53   *
54   */
55  @Category(SmallTests.class)
56  public class TestFilterList {
57    static final int MAX_PAGES = 2;
58    static final char FIRST_CHAR = 'a';
59    static final char LAST_CHAR = 'e';
60    static byte[] GOOD_BYTES = Bytes.toBytes("abc");
61    static byte[] BAD_BYTES = Bytes.toBytes("def");
62  
63  
64    @Test
65    public void testAddFilter() throws Exception {
66      Filter filter1 = new FirstKeyOnlyFilter();
67      Filter filter2 = new FirstKeyOnlyFilter();
68  
69      FilterList filterList = new FilterList(filter1, filter2);
70      filterList.addFilter(new FirstKeyOnlyFilter());
71  
72      filterList = new FilterList(Arrays.asList(filter1, filter2));
73      filterList.addFilter(new FirstKeyOnlyFilter());
74  
75      filterList = new FilterList(Operator.MUST_PASS_ALL, filter1, filter2);
76      filterList.addFilter(new FirstKeyOnlyFilter());
77  
78      filterList = new FilterList(Operator.MUST_PASS_ALL, Arrays.asList(filter1, filter2));
79      filterList.addFilter(new FirstKeyOnlyFilter());
80  
81    }
82  
83  
84    /**
85     * Test "must pass one"
86     * @throws Exception
87     */
88    @Test
89    public void testMPONE() throws Exception {
90      mpOneTest(getFilterMPONE());
91    }
92  
93    private Filter getFilterMPONE() {
94      List<Filter> filters = new ArrayList<Filter>();
95      filters.add(new PageFilter(MAX_PAGES));
96      filters.add(new WhileMatchFilter(new PrefixFilter(Bytes.toBytes("yyy"))));
97      Filter filterMPONE =
98        new FilterList(FilterList.Operator.MUST_PASS_ONE, filters);
99      return filterMPONE;
100   }
101 
102   private void mpOneTest(Filter filterMPONE) throws Exception {
103     /* Filter must do all below steps:
104      * <ul>
105      * <li>{@link #reset()}</li>
106      * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
107      * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
108      * if false, we will also call</li>
109      * <li>{@link #filterKeyValue(org.apache.hadoop.hbase.KeyValue)} -> true to drop this key/value</li>
110      * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
111      * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
112      * </li>
113      * </ul>
114     */
115     filterMPONE.reset();
116     assertFalse(filterMPONE.filterAllRemaining());
117 
118     /* Will pass both */
119     byte [] rowkey = Bytes.toBytes("yyyyyyyyy");
120     for (int i = 0; i < MAX_PAGES - 1; i++) {
121       assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
122       KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
123         Bytes.toBytes(i));
124       assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
125       assertFalse(filterMPONE.filterRow());
126     }
127 
128     /* Only pass PageFilter */
129     rowkey = Bytes.toBytes("z");
130     assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
131     KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(0),
132         Bytes.toBytes(0));
133     assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
134     assertFalse(filterMPONE.filterRow());
135 
136     /* reach MAX_PAGES already, should filter any rows */
137     rowkey = Bytes.toBytes("yyy");
138     assertTrue(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
139     kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(0),
140         Bytes.toBytes(0));
141     assertFalse(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
142     assertFalse(filterMPONE.filterRow());
143 
144     /* We should filter any row */
145     rowkey = Bytes.toBytes("z");
146     assertTrue(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
147     assertTrue(filterMPONE.filterAllRemaining());
148   }
149 
150   /**
151    * Test "must pass all"
152    * @throws Exception
153    */
154   @Test
155   public void testMPALL() throws Exception {
156     mpAllTest(getMPALLFilter());
157   }
158 
159   private Filter getMPALLFilter() {
160     List<Filter> filters = new ArrayList<Filter>();
161     filters.add(new PageFilter(MAX_PAGES));
162     filters.add(new WhileMatchFilter(new PrefixFilter(Bytes.toBytes("yyy"))));
163     Filter filterMPALL =
164       new FilterList(FilterList.Operator.MUST_PASS_ALL, filters);
165     return filterMPALL;
166   }
167 
168   private void mpAllTest(Filter filterMPALL) throws Exception {
169     /* Filter must do all below steps:
170      * <ul>
171      * <li>{@link #reset()}</li>
172      * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
173      * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
174      * if false, we will also call</li>
175      * <li>{@link #filterKeyValue(org.apache.hadoop.hbase.KeyValue)} -> true to drop this key/value</li>
176      * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
177      * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
178      * </li>
179      * </ul>
180     */
181     filterMPALL.reset();
182     assertFalse(filterMPALL.filterAllRemaining());
183     byte [] rowkey = Bytes.toBytes("yyyyyyyyy");
184     for (int i = 0; i < MAX_PAGES - 1; i++) {
185       assertFalse(filterMPALL.filterRowKey(rowkey, 0, rowkey.length));
186       KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
187         Bytes.toBytes(i));
188       assertTrue(Filter.ReturnCode.INCLUDE == filterMPALL.filterKeyValue(kv));
189     }
190     filterMPALL.reset();
191     rowkey = Bytes.toBytes("z");
192     assertTrue(filterMPALL.filterRowKey(rowkey, 0, rowkey.length));
193     // Should fail here; row should be filtered out.
194     KeyValue kv = new KeyValue(rowkey, rowkey, rowkey, rowkey);
195     assertTrue(Filter.ReturnCode.NEXT_ROW == filterMPALL.filterKeyValue(kv));
196   }
197 
198   /**
199    * Test list ordering
200    * @throws Exception
201    */
202   @Test
203   public void testOrdering() throws Exception {
204     orderingTest(getOrderingFilter());
205   }
206 
207   public Filter getOrderingFilter() {
208     List<Filter> filters = new ArrayList<Filter>();
209     filters.add(new PrefixFilter(Bytes.toBytes("yyy")));
210     filters.add(new PageFilter(MAX_PAGES));
211     Filter filterMPONE =
212       new FilterList(FilterList.Operator.MUST_PASS_ONE, filters);
213     return filterMPONE;
214   }
215 
216   public void orderingTest(Filter filterMPONE) throws Exception {
217     /* Filter must do all below steps:
218      * <ul>
219      * <li>{@link #reset()}</li>
220      * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
221      * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
222      * if false, we will also call</li>
223      * <li>{@link #filterKeyValue(org.apache.hadoop.hbase.KeyValue)} -> true to drop this key/value</li>
224      * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
225      * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
226      * </li>
227      * </ul>
228     */
229     filterMPONE.reset();
230     assertFalse(filterMPONE.filterAllRemaining());
231 
232     /* We should be able to fill MAX_PAGES without incrementing page counter */
233     byte [] rowkey = Bytes.toBytes("yyyyyyyy");
234     for (int i = 0; i < MAX_PAGES; i++) {
235       assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
236       KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
237           Bytes.toBytes(i));
238         assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
239       assertFalse(filterMPONE.filterRow());
240     }
241 
242     /* Now let's fill the page filter */
243     rowkey = Bytes.toBytes("xxxxxxx");
244     for (int i = 0; i < MAX_PAGES; i++) {
245       assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
246       KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
247           Bytes.toBytes(i));
248         assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
249       assertFalse(filterMPONE.filterRow());
250     }
251 
252     /* We should still be able to include even though page filter is at max */
253     rowkey = Bytes.toBytes("yyy");
254     for (int i = 0; i < MAX_PAGES; i++) {
255       assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
256       KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
257           Bytes.toBytes(i));
258         assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
259       assertFalse(filterMPONE.filterRow());
260     }
261   }
262 
263   /**
264    * Test serialization
265    * @throws Exception
266    */
267   @Test
268   public void testSerialization() throws Exception {
269     List<Filter> filters = new ArrayList<Filter>();
270     filters.add(new PageFilter(MAX_PAGES));
271     filters.add(new WhileMatchFilter(new PrefixFilter(Bytes.toBytes("yyy"))));
272     Filter filterMPALL =
273       new FilterList(FilterList.Operator.MUST_PASS_ALL, filters);
274 
275     // Decompose filterMPALL to bytes.
276     byte[] buffer = filterMPALL.toByteArray();
277 
278     // Recompose filterMPALL.
279     FilterList newFilter = FilterList.parseFrom(buffer);
280 
281     // Run tests
282     mpOneTest(ProtobufUtil.toFilter(ProtobufUtil.toFilter(getFilterMPONE())));
283     mpAllTest(ProtobufUtil.toFilter(ProtobufUtil.toFilter(getMPALLFilter())));
284     orderingTest(ProtobufUtil.toFilter(ProtobufUtil.toFilter(getOrderingFilter())));
285   }
286 
287   /**
288    * Test filterKeyValue logic.
289    * @throws Exception
290    */
291   public void testFilterKeyValue() throws Exception {
292     Filter includeFilter = new FilterBase() {
293       @Override
294       public Filter.ReturnCode filterKeyValue(Cell v) {
295         return Filter.ReturnCode.INCLUDE;
296       }
297     };
298 
299     Filter alternateFilter = new FilterBase() {
300       boolean returnInclude = true;
301 
302       @Override
303       public Filter.ReturnCode filterKeyValue(Cell v) {
304         Filter.ReturnCode returnCode = returnInclude ? Filter.ReturnCode.INCLUDE :
305                                                        Filter.ReturnCode.SKIP;
306         returnInclude = !returnInclude;
307         return returnCode;
308       }
309     };
310 
311     Filter alternateIncludeFilter = new FilterBase() {
312       boolean returnIncludeOnly = false;
313 
314       @Override
315       public Filter.ReturnCode filterKeyValue(Cell v) {
316         Filter.ReturnCode returnCode = returnIncludeOnly ? Filter.ReturnCode.INCLUDE :
317                                                            Filter.ReturnCode.INCLUDE_AND_NEXT_COL;
318         returnIncludeOnly = !returnIncludeOnly;
319         return returnCode;
320       }
321     };
322 
323     // Check must pass one filter.
324     FilterList mpOnefilterList = new FilterList(Operator.MUST_PASS_ONE,
325         Arrays.asList(new Filter[] { includeFilter, alternateIncludeFilter, alternateFilter }));
326     // INCLUDE, INCLUDE, INCLUDE_AND_NEXT_COL.
327     assertEquals(Filter.ReturnCode.INCLUDE_AND_NEXT_COL, mpOnefilterList.filterKeyValue(null));
328     // INCLUDE, SKIP, INCLUDE. 
329     assertEquals(Filter.ReturnCode.INCLUDE, mpOnefilterList.filterKeyValue(null));
330 
331     // Check must pass all filter.
332     FilterList mpAllfilterList = new FilterList(Operator.MUST_PASS_ALL,
333         Arrays.asList(new Filter[] { includeFilter, alternateIncludeFilter, alternateFilter }));
334     // INCLUDE, INCLUDE, INCLUDE_AND_NEXT_COL.
335     assertEquals(Filter.ReturnCode.INCLUDE_AND_NEXT_COL, mpAllfilterList.filterKeyValue(null));
336     // INCLUDE, SKIP, INCLUDE. 
337     assertEquals(Filter.ReturnCode.SKIP, mpAllfilterList.filterKeyValue(null));
338   }
339 
340   /**
341    * Test pass-thru of hints.
342    */
343   @Test
344   public void testHintPassThru() throws Exception {
345 
346     final KeyValue minKeyValue = new KeyValue(Bytes.toBytes(0L), null, null);
347     final KeyValue maxKeyValue = new KeyValue(Bytes.toBytes(Long.MAX_VALUE),
348         null, null);
349 
350     Filter filterNoHint = new FilterBase() {
351       @Override
352       public byte [] toByteArray() {return null;}
353     };
354 
355     Filter filterMinHint = new FilterBase() {
356       @Override
357       public ReturnCode filterKeyValue(Cell ignored) {
358         return ReturnCode.SEEK_NEXT_USING_HINT;
359       }
360 
361       @Override
362       public Cell getNextCellHint(Cell currentKV) {
363         return minKeyValue;
364       }
365 
366       @Override
367       public byte [] toByteArray() {return null;}
368     };
369 
370     Filter filterMaxHint = new FilterBase() {
371       @Override
372       public ReturnCode filterKeyValue(Cell ignored) {
373         return ReturnCode.SEEK_NEXT_USING_HINT;
374       }
375 
376       @Override
377       public Cell getNextCellHint(Cell currentKV) {
378         return new KeyValue(Bytes.toBytes(Long.MAX_VALUE), null, null);
379       }
380 
381       @Override
382       public byte [] toByteArray() {return null;}
383     };
384 
385     // MUST PASS ONE
386 
387     // Should take the min if given two hints
388     FilterList filterList = new FilterList(Operator.MUST_PASS_ONE,
389         Arrays.asList(new Filter [] { filterMinHint, filterMaxHint } ));
390     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
391         minKeyValue));
392 
393     // Should have no hint if any filter has no hint
394     filterList = new FilterList(Operator.MUST_PASS_ONE,
395         Arrays.asList(
396             new Filter [] { filterMinHint, filterMaxHint, filterNoHint } ));
397     assertNull(filterList.getNextKeyHint(null));
398     filterList = new FilterList(Operator.MUST_PASS_ONE,
399         Arrays.asList(new Filter [] { filterNoHint, filterMaxHint } ));
400     assertNull(filterList.getNextKeyHint(null));
401 
402     // Should give max hint if its the only one
403     filterList = new FilterList(Operator.MUST_PASS_ONE,
404         Arrays.asList(new Filter [] { filterMaxHint, filterMaxHint } ));
405     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
406         maxKeyValue));
407 
408     // MUST PASS ALL
409 
410     // Should take the first hint
411     filterList = new FilterList(Operator.MUST_PASS_ALL,
412         Arrays.asList(new Filter [] { filterMinHint, filterMaxHint } ));
413     filterList.filterKeyValue(null);
414     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
415         minKeyValue));
416 
417     filterList = new FilterList(Operator.MUST_PASS_ALL,
418         Arrays.asList(new Filter [] { filterMaxHint, filterMinHint } ));
419     filterList.filterKeyValue(null);
420     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
421         maxKeyValue));
422 
423     // Should have first hint even if a filter has no hint
424     filterList = new FilterList(Operator.MUST_PASS_ALL,
425         Arrays.asList(
426             new Filter [] { filterNoHint, filterMinHint, filterMaxHint } ));
427     filterList.filterKeyValue(null);
428     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
429         minKeyValue));
430     filterList = new FilterList(Operator.MUST_PASS_ALL,
431         Arrays.asList(new Filter [] { filterNoHint, filterMaxHint } ));
432     filterList.filterKeyValue(null);
433     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
434         maxKeyValue));
435     filterList = new FilterList(Operator.MUST_PASS_ALL,
436         Arrays.asList(new Filter [] { filterNoHint, filterMinHint } ));
437     filterList.filterKeyValue(null);
438     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
439         minKeyValue));
440   }
441 
442   /**
443    * Tests the behavior of transform() in a hierarchical filter.
444    *
445    * transform() only applies after a filterKeyValue() whose return-code includes the KeyValue.
446    * Lazy evaluation of AND
447    */
448   @Test
449   public void testTransformMPO() throws Exception {
450     // Apply the following filter:
451     //     (family=fam AND qualifier=qual1 AND KeyOnlyFilter)
452     //  OR (family=fam AND qualifier=qual2)
453     final FilterList flist = new FilterList(Operator.MUST_PASS_ONE, Lists.<Filter>newArrayList(
454         new FilterList(Operator.MUST_PASS_ALL, Lists.<Filter>newArrayList(
455             new FamilyFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("fam"))),
456             new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("qual1"))),
457             new KeyOnlyFilter())),
458         new FilterList(Operator.MUST_PASS_ALL, Lists.<Filter>newArrayList(
459             new FamilyFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("fam"))),
460             new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("qual2")))))));
461 
462     final KeyValue kvQual1 = new KeyValue(
463         Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual1"), Bytes.toBytes("value"));
464     final KeyValue kvQual2 = new KeyValue(
465         Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual2"), Bytes.toBytes("value"));
466     final KeyValue kvQual3 = new KeyValue(
467         Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual3"), Bytes.toBytes("value"));
468 
469     // Value for fam:qual1 should be stripped:
470     assertEquals(Filter.ReturnCode.INCLUDE, flist.filterKeyValue(kvQual1));
471     final KeyValue transformedQual1 = KeyValueUtil.ensureKeyValue(flist.transform(kvQual1));
472     assertEquals(0, transformedQual1.getValue().length);
473 
474     // Value for fam:qual2 should not be stripped:
475     assertEquals(Filter.ReturnCode.INCLUDE, flist.filterKeyValue(kvQual2));
476     final KeyValue transformedQual2 = KeyValueUtil.ensureKeyValue(flist.transform(kvQual2));
477     assertEquals("value", Bytes.toString(transformedQual2.getValue()));
478 
479     // Other keys should be skipped:
480     assertEquals(Filter.ReturnCode.SKIP, flist.filterKeyValue(kvQual3));
481   }
482 
483 }
484