View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.filter;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.ByteArrayOutputStream;
23  import java.io.DataInput;
24  import java.io.DataInputStream;
25  import java.io.DataOutput;
26  import java.io.DataOutputStream;
27  import java.io.IOException;
28  import java.util.ArrayList;
29  import java.util.Arrays;
30  import java.util.List;
31  
32  import static org.junit.Assert.assertEquals;
33  import static org.junit.Assert.assertFalse;
34  import static org.junit.Assert.assertNotNull;
35  import static org.junit.Assert.assertTrue;
36  import static org.junit.Assert.assertNull;
37  
38  import org.apache.hadoop.hbase.KeyValue;
39  import org.apache.hadoop.hbase.SmallTests;
40  import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
41  import org.apache.hadoop.hbase.filter.FilterList.Operator;
42  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
43  import org.apache.hadoop.hbase.util.Bytes;
44  import org.junit.Test;
45  import org.junit.experimental.categories.Category;
46  
47  import com.google.common.collect.Lists;
48  
49  /**
50   * Tests filter sets
51   *
52   */
53  @Category(SmallTests.class)
54  public class TestFilterList {
55    static final int MAX_PAGES = 2;
56    static final char FIRST_CHAR = 'a';
57    static final char LAST_CHAR = 'e';
58    static byte[] GOOD_BYTES = Bytes.toBytes("abc");
59    static byte[] BAD_BYTES = Bytes.toBytes("def");
60  
61  
62    @Test
63    public void testAddFilter() throws Exception {
64      Filter filter1 = new FirstKeyOnlyFilter();
65      Filter filter2 = new FirstKeyOnlyFilter();
66  
67      FilterList filterList = new FilterList(filter1, filter2);
68      filterList.addFilter(new FirstKeyOnlyFilter());
69  
70      filterList = new FilterList(Arrays.asList(filter1, filter2));
71      filterList.addFilter(new FirstKeyOnlyFilter());
72  
73      filterList = new FilterList(Operator.MUST_PASS_ALL, filter1, filter2);
74      filterList.addFilter(new FirstKeyOnlyFilter());
75  
76      filterList = new FilterList(Operator.MUST_PASS_ALL, Arrays.asList(filter1, filter2));
77      filterList.addFilter(new FirstKeyOnlyFilter());
78  
79    }
80  
81  
82    /**
83     * Test "must pass one"
84     * @throws Exception
85     */
86    @Test
87    public void testMPONE() throws Exception {
88      mpOneTest(getFilterMPONE());
89    }
90  
91    private Filter getFilterMPONE() {
92      List<Filter> filters = new ArrayList<Filter>();
93      filters.add(new PageFilter(MAX_PAGES));
94      filters.add(new WhileMatchFilter(new PrefixFilter(Bytes.toBytes("yyy"))));
95      Filter filterMPONE =
96        new FilterList(FilterList.Operator.MUST_PASS_ONE, filters);
97      return filterMPONE;
98    }
99  
100   private void mpOneTest(Filter filterMPONE) throws Exception {
101     /* Filter must do all below steps:
102      * <ul>
103      * <li>{@link #reset()}</li>
104      * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
105      * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
106      * if false, we will also call</li>
107      * <li>{@link #filterKeyValue(org.apache.hadoop.hbase.KeyValue)} -> true to drop this key/value</li>
108      * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
109      * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
110      * </li>
111      * </ul>
112     */
113     filterMPONE.reset();
114     assertFalse(filterMPONE.filterAllRemaining());
115 
116     /* Will pass both */
117     byte [] rowkey = Bytes.toBytes("yyyyyyyyy");
118     for (int i = 0; i < MAX_PAGES - 1; i++) {
119       assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
120       KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
121         Bytes.toBytes(i));
122       assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
123       assertFalse(filterMPONE.filterRow());
124     }
125 
126     /* Only pass PageFilter */
127     rowkey = Bytes.toBytes("z");
128     assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
129     KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(0),
130         Bytes.toBytes(0));
131     assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
132     assertFalse(filterMPONE.filterRow());
133 
134     /* reach MAX_PAGES already, should filter any rows */
135     rowkey = Bytes.toBytes("yyy");
136     assertTrue(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
137     kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(0),
138         Bytes.toBytes(0));
139     assertFalse(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
140     assertFalse(filterMPONE.filterRow());
141 
142     /* We should filter any row */
143     rowkey = Bytes.toBytes("z");
144     assertTrue(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
145     assertTrue(filterMPONE.filterAllRemaining());
146   }
147 
148   /**
149    * Test "must pass all"
150    * @throws Exception
151    */
152   @Test
153   public void testMPALL() throws Exception {
154     mpAllTest(getMPALLFilter());
155   }
156 
157   private Filter getMPALLFilter() {
158     List<Filter> filters = new ArrayList<Filter>();
159     filters.add(new PageFilter(MAX_PAGES));
160     filters.add(new WhileMatchFilter(new PrefixFilter(Bytes.toBytes("yyy"))));
161     Filter filterMPALL =
162       new FilterList(FilterList.Operator.MUST_PASS_ALL, filters);
163     return filterMPALL;
164   }
165 
166   private void mpAllTest(Filter filterMPALL) throws Exception {
167     /* Filter must do all below steps:
168      * <ul>
169      * <li>{@link #reset()}</li>
170      * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
171      * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
172      * if false, we will also call</li>
173      * <li>{@link #filterKeyValue(org.apache.hadoop.hbase.KeyValue)} -> true to drop this key/value</li>
174      * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
175      * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
176      * </li>
177      * </ul>
178     */
179     filterMPALL.reset();
180     assertFalse(filterMPALL.filterAllRemaining());
181     byte [] rowkey = Bytes.toBytes("yyyyyyyyy");
182     for (int i = 0; i < MAX_PAGES - 1; i++) {
183       assertFalse(filterMPALL.filterRowKey(rowkey, 0, rowkey.length));
184       KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
185         Bytes.toBytes(i));
186       assertTrue(Filter.ReturnCode.INCLUDE == filterMPALL.filterKeyValue(kv));
187     }
188     filterMPALL.reset();
189     rowkey = Bytes.toBytes("z");
190     assertTrue(filterMPALL.filterRowKey(rowkey, 0, rowkey.length));
191     // Should fail here; row should be filtered out.
192     KeyValue kv = new KeyValue(rowkey, rowkey, rowkey, rowkey);
193     assertTrue(Filter.ReturnCode.NEXT_ROW == filterMPALL.filterKeyValue(kv));
194   }
195 
196   /**
197    * Test list ordering
198    * @throws Exception
199    */
200   @Test
201   public void testOrdering() throws Exception {
202     orderingTest(getOrderingFilter());
203   }
204 
205   public Filter getOrderingFilter() {
206     List<Filter> filters = new ArrayList<Filter>();
207     filters.add(new PrefixFilter(Bytes.toBytes("yyy")));
208     filters.add(new PageFilter(MAX_PAGES));
209     Filter filterMPONE =
210       new FilterList(FilterList.Operator.MUST_PASS_ONE, filters);
211     return filterMPONE;
212   }
213 
214   public void orderingTest(Filter filterMPONE) throws Exception {
215     /* Filter must do all below steps:
216      * <ul>
217      * <li>{@link #reset()}</li>
218      * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
219      * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
220      * if false, we will also call</li>
221      * <li>{@link #filterKeyValue(org.apache.hadoop.hbase.KeyValue)} -> true to drop this key/value</li>
222      * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
223      * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
224      * </li>
225      * </ul>
226     */
227     filterMPONE.reset();
228     assertFalse(filterMPONE.filterAllRemaining());
229 
230     /* We should be able to fill MAX_PAGES without incrementing page counter */
231     byte [] rowkey = Bytes.toBytes("yyyyyyyy");
232     for (int i = 0; i < MAX_PAGES; i++) {
233       assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
234       KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
235           Bytes.toBytes(i));
236         assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
237       assertFalse(filterMPONE.filterRow());
238     }
239 
240     /* Now let's fill the page filter */
241     rowkey = Bytes.toBytes("xxxxxxx");
242     for (int i = 0; i < MAX_PAGES; i++) {
243       assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
244       KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
245           Bytes.toBytes(i));
246         assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
247       assertFalse(filterMPONE.filterRow());
248     }
249 
250     /* We should still be able to include even though page filter is at max */
251     rowkey = Bytes.toBytes("yyy");
252     for (int i = 0; i < MAX_PAGES; i++) {
253       assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
254       KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
255           Bytes.toBytes(i));
256         assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
257       assertFalse(filterMPONE.filterRow());
258     }
259   }
260 
261   /**
262    * Test serialization
263    * @throws Exception
264    */
265   @Test
266   public void testSerialization() throws Exception {
267     List<Filter> filters = new ArrayList<Filter>();
268     filters.add(new PageFilter(MAX_PAGES));
269     filters.add(new WhileMatchFilter(new PrefixFilter(Bytes.toBytes("yyy"))));
270     Filter filterMPALL =
271       new FilterList(FilterList.Operator.MUST_PASS_ALL, filters);
272 
273     // Decompose filterMPALL to bytes.
274     byte[] buffer = filterMPALL.toByteArray();
275 
276     // Recompose filterMPALL.
277     FilterList newFilter = FilterList.parseFrom(buffer);
278 
279     // Run tests
280     mpOneTest(ProtobufUtil.toFilter(ProtobufUtil.toFilter(getFilterMPONE())));
281     mpAllTest(ProtobufUtil.toFilter(ProtobufUtil.toFilter(getMPALLFilter())));
282     orderingTest(ProtobufUtil.toFilter(ProtobufUtil.toFilter(getOrderingFilter())));
283   }
284 
285   /**
286    * Test filterKeyValue logic.
287    * @throws Exception
288    */
289   public void testFilterKeyValue() throws Exception {
290     Filter includeFilter = new FilterBase() {
291       @Override
292       public Filter.ReturnCode filterKeyValue(KeyValue v) {
293         return Filter.ReturnCode.INCLUDE;
294       }
295     };
296 
297     Filter alternateFilter = new FilterBase() {
298       boolean returnInclude = true;
299 
300       @Override
301       public Filter.ReturnCode filterKeyValue(KeyValue v) {
302         Filter.ReturnCode returnCode = returnInclude ? Filter.ReturnCode.INCLUDE :
303                                                        Filter.ReturnCode.SKIP;
304         returnInclude = !returnInclude;
305         return returnCode;
306       }
307     };
308 
309     Filter alternateIncludeFilter = new FilterBase() {
310       boolean returnIncludeOnly = false;
311 
312       @Override
313       public Filter.ReturnCode filterKeyValue(KeyValue v) {
314         Filter.ReturnCode returnCode = returnIncludeOnly ? Filter.ReturnCode.INCLUDE :
315                                                            Filter.ReturnCode.INCLUDE_AND_NEXT_COL;
316         returnIncludeOnly = !returnIncludeOnly;
317         return returnCode;
318       }
319     };
320 
321     // Check must pass one filter.
322     FilterList mpOnefilterList = new FilterList(Operator.MUST_PASS_ONE,
323         Arrays.asList(new Filter[] { includeFilter, alternateIncludeFilter, alternateFilter }));
324     // INCLUDE, INCLUDE, INCLUDE_AND_NEXT_COL.
325     assertEquals(Filter.ReturnCode.INCLUDE_AND_NEXT_COL, mpOnefilterList.filterKeyValue(null));
326     // INCLUDE, SKIP, INCLUDE. 
327     assertEquals(Filter.ReturnCode.INCLUDE, mpOnefilterList.filterKeyValue(null));
328 
329     // Check must pass all filter.
330     FilterList mpAllfilterList = new FilterList(Operator.MUST_PASS_ALL,
331         Arrays.asList(new Filter[] { includeFilter, alternateIncludeFilter, alternateFilter }));
332     // INCLUDE, INCLUDE, INCLUDE_AND_NEXT_COL.
333     assertEquals(Filter.ReturnCode.INCLUDE_AND_NEXT_COL, mpAllfilterList.filterKeyValue(null));
334     // INCLUDE, SKIP, INCLUDE. 
335     assertEquals(Filter.ReturnCode.SKIP, mpAllfilterList.filterKeyValue(null));
336   }
337 
338   /**
339    * Test pass-thru of hints.
340    */
341   @Test
342   public void testHintPassThru() throws Exception {
343 
344     final KeyValue minKeyValue = new KeyValue(Bytes.toBytes(0L), null, null);
345     final KeyValue maxKeyValue = new KeyValue(Bytes.toBytes(Long.MAX_VALUE),
346         null, null);
347 
348     Filter filterNoHint = new FilterBase() {
349       @Override
350       public byte [] toByteArray() {return null;}
351     };
352 
353     Filter filterMinHint = new FilterBase() {
354       @Override
355       public ReturnCode filterKeyValue(KeyValue ignored) {
356         return ReturnCode.SEEK_NEXT_USING_HINT;
357       }
358 
359       @Override
360       public KeyValue getNextKeyHint(KeyValue currentKV) {
361         return minKeyValue;
362       }
363 
364       @Override
365       public byte [] toByteArray() {return null;}
366     };
367 
368     Filter filterMaxHint = new FilterBase() {
369       @Override
370       public ReturnCode filterKeyValue(KeyValue ignored) {
371         return ReturnCode.SEEK_NEXT_USING_HINT;
372       }
373 
374       @Override
375       public KeyValue getNextKeyHint(KeyValue currentKV) {
376         return new KeyValue(Bytes.toBytes(Long.MAX_VALUE), null, null);
377       }
378 
379       @Override
380       public byte [] toByteArray() {return null;}
381     };
382 
383     // MUST PASS ONE
384 
385     // Should take the min if given two hints
386     FilterList filterList = new FilterList(Operator.MUST_PASS_ONE,
387         Arrays.asList(new Filter [] { filterMinHint, filterMaxHint } ));
388     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
389         minKeyValue));
390 
391     // Should have no hint if any filter has no hint
392     filterList = new FilterList(Operator.MUST_PASS_ONE,
393         Arrays.asList(
394             new Filter [] { filterMinHint, filterMaxHint, filterNoHint } ));
395     assertNull(filterList.getNextKeyHint(null));
396     filterList = new FilterList(Operator.MUST_PASS_ONE,
397         Arrays.asList(new Filter [] { filterNoHint, filterMaxHint } ));
398     assertNull(filterList.getNextKeyHint(null));
399 
400     // Should give max hint if its the only one
401     filterList = new FilterList(Operator.MUST_PASS_ONE,
402         Arrays.asList(new Filter [] { filterMaxHint, filterMaxHint } ));
403     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
404         maxKeyValue));
405 
406     // MUST PASS ALL
407 
408     // Should take the first hint
409     filterList = new FilterList(Operator.MUST_PASS_ALL,
410         Arrays.asList(new Filter [] { filterMinHint, filterMaxHint } ));
411     filterList.filterKeyValue(null);
412     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
413         minKeyValue));
414 
415     filterList = new FilterList(Operator.MUST_PASS_ALL,
416         Arrays.asList(new Filter [] { filterMaxHint, filterMinHint } ));
417     filterList.filterKeyValue(null);
418     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
419         maxKeyValue));
420 
421     // Should have first hint even if a filter has no hint
422     filterList = new FilterList(Operator.MUST_PASS_ALL,
423         Arrays.asList(
424             new Filter [] { filterNoHint, filterMinHint, filterMaxHint } ));
425     filterList.filterKeyValue(null);
426     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
427         minKeyValue));
428     filterList = new FilterList(Operator.MUST_PASS_ALL,
429         Arrays.asList(new Filter [] { filterNoHint, filterMaxHint } ));
430     filterList.filterKeyValue(null);
431     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
432         maxKeyValue));
433     filterList = new FilterList(Operator.MUST_PASS_ALL,
434         Arrays.asList(new Filter [] { filterNoHint, filterMinHint } ));
435     filterList.filterKeyValue(null);
436     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
437         minKeyValue));
438   }
439 
440   /**
441    * Tests the behavior of transform() in a hierarchical filter.
442    *
443    * transform() only applies after a filterKeyValue() whose return-code includes the KeyValue.
444    * Lazy evaluation of AND
445    */
446   @Test
447   public void testTransformMPO() throws Exception {
448     // Apply the following filter:
449     //     (family=fam AND qualifier=qual1 AND KeyOnlyFilter)
450     //  OR (family=fam AND qualifier=qual2)
451     final FilterList flist = new FilterList(Operator.MUST_PASS_ONE, Lists.<Filter>newArrayList(
452         new FilterList(Operator.MUST_PASS_ALL, Lists.<Filter>newArrayList(
453             new FamilyFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("fam"))),
454             new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("qual1"))),
455             new KeyOnlyFilter())),
456         new FilterList(Operator.MUST_PASS_ALL, Lists.<Filter>newArrayList(
457             new FamilyFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("fam"))),
458             new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("qual2")))))));
459 
460     final KeyValue kvQual1 = new KeyValue(
461         Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual1"), Bytes.toBytes("value"));
462     final KeyValue kvQual2 = new KeyValue(
463         Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual2"), Bytes.toBytes("value"));
464     final KeyValue kvQual3 = new KeyValue(
465         Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual3"), Bytes.toBytes("value"));
466 
467     // Value for fam:qual1 should be stripped:
468     assertEquals(Filter.ReturnCode.INCLUDE, flist.filterKeyValue(kvQual1));
469     final KeyValue transformedQual1 = flist.transform(kvQual1);
470     assertEquals(0, transformedQual1.getValue().length);
471 
472     // Value for fam:qual2 should not be stripped:
473     assertEquals(Filter.ReturnCode.INCLUDE, flist.filterKeyValue(kvQual2));
474     final KeyValue transformedQual2 = flist.transform(kvQual2);
475     assertEquals("value", Bytes.toString(transformedQual2.getValue()));
476 
477     // Other keys should be skipped:
478     assertEquals(Filter.ReturnCode.SKIP, flist.filterKeyValue(kvQual3));
479   }
480 
481 }
482