1   /**
2    * Copyright 2009 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.filter;
21  
22  import java.io.ByteArrayInputStream;
23  import java.io.ByteArrayOutputStream;
24  import java.io.DataInput;
25  import java.io.DataInputStream;
26  import java.io.DataOutput;
27  import java.io.DataOutputStream;
28  import java.io.IOException;
29  import java.util.ArrayList;
30  import java.util.Arrays;
31  import java.util.List;
32  
33  import junit.framework.TestCase;
34  
35  import org.apache.hadoop.hbase.KeyValue;
36  import org.apache.hadoop.hbase.SmallTests;
37  import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
38  import org.apache.hadoop.hbase.filter.FilterList.Operator;
39  import org.apache.hadoop.hbase.util.Bytes;
40  import org.junit.experimental.categories.Category;
41  
42  import com.google.common.collect.Lists;
43  
44  /**
45   * Tests filter sets
46   *
47   */
48  @Category(SmallTests.class)
49  public class TestFilterList extends TestCase {
50    static final int MAX_PAGES = 2;
51    static final char FIRST_CHAR = 'a';
52    static final char LAST_CHAR = 'e';
53    static byte[] GOOD_BYTES = Bytes.toBytes("abc");
54    static byte[] BAD_BYTES = Bytes.toBytes("def");
55  
56    /**
57     * Test "must pass one"
58     * @throws Exception
59     */
60    public void testMPONE() throws Exception {
61      List<Filter> filters = new ArrayList<Filter>();
62      filters.add(new PageFilter(MAX_PAGES));
63      filters.add(new WhileMatchFilter(new PrefixFilter(Bytes.toBytes("yyy"))));
64      Filter filterMPONE =
65          new FilterList(FilterList.Operator.MUST_PASS_ONE, filters);
66      /* Filter must do all below steps:
67       * <ul>
68       * <li>{@link #reset()}</li>
69       * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
70       * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
71       * if false, we will also call</li>
72       * <li>{@link #filterKeyValue(org.apache.hadoop.hbase.KeyValue)} -> true to drop this key/value</li>
73       * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
74       * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
75       * </li>
76       * </ul>
77      */
78      filterMPONE.reset();
79      assertFalse(filterMPONE.filterAllRemaining());
80  
81      /* Will pass both */
82      byte [] rowkey = Bytes.toBytes("yyyyyyyyy");
83      for (int i = 0; i < MAX_PAGES - 1; i++) {
84        assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
85        KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
86          Bytes.toBytes(i));
87        assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
88        assertFalse(filterMPONE.filterRow());
89      }
90  
91      /* Only pass PageFilter */
92      rowkey = Bytes.toBytes("z");
93      assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
94      KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(0),
95          Bytes.toBytes(0));
96      assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
97      assertFalse(filterMPONE.filterRow());
98  
99      /* reach MAX_PAGES already, should filter any rows */
100     rowkey = Bytes.toBytes("yyy");
101     assertTrue(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
102     kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(0),
103         Bytes.toBytes(0));
104     assertFalse(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
105 
106     /* We should filter any row */
107     rowkey = Bytes.toBytes("z");
108     assertTrue(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
109     assertTrue(filterMPONE.filterAllRemaining());
110   }
111 
112   /**
113    * Test "must pass all"
114    * @throws Exception
115    */
116   public void testMPALL() throws Exception {
117     List<Filter> filters = new ArrayList<Filter>();
118     filters.add(new PageFilter(MAX_PAGES));
119     filters.add(new WhileMatchFilter(new PrefixFilter(Bytes.toBytes("yyy"))));
120     Filter filterMPALL =
121       new FilterList(FilterList.Operator.MUST_PASS_ALL, filters);
122     /* Filter must do all below steps:
123      * <ul>
124      * <li>{@link #reset()}</li>
125      * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
126      * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
127      * if false, we will also call</li>
128      * <li>{@link #filterKeyValue(org.apache.hadoop.hbase.KeyValue)} -> true to drop this key/value</li>
129      * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
130      * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
131      * </li>
132      * </ul>
133     */
134     filterMPALL.reset();
135     assertFalse(filterMPALL.filterAllRemaining());
136     byte [] rowkey = Bytes.toBytes("yyyyyyyyy");
137     for (int i = 0; i < MAX_PAGES - 1; i++) {
138       assertFalse(filterMPALL.filterRowKey(rowkey, 0, rowkey.length));
139       KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
140         Bytes.toBytes(i));
141       assertTrue(Filter.ReturnCode.INCLUDE == filterMPALL.filterKeyValue(kv));
142     }
143     filterMPALL.reset();
144     rowkey = Bytes.toBytes("z");
145     assertTrue(filterMPALL.filterRowKey(rowkey, 0, rowkey.length));
146     // Should fail here; row should be filtered out.
147     KeyValue kv = new KeyValue(rowkey, rowkey, rowkey, rowkey);
148     assertTrue(Filter.ReturnCode.NEXT_ROW == filterMPALL.filterKeyValue(kv));
149   }
150 
151   /**
152    * Test list ordering
153    * @throws Exception
154    */
155   public void testOrdering() throws Exception {
156     List<Filter> filters = new ArrayList<Filter>();
157     filters.add(new PrefixFilter(Bytes.toBytes("yyy")));
158     filters.add(new PageFilter(MAX_PAGES));
159     Filter filterMPONE =
160         new FilterList(FilterList.Operator.MUST_PASS_ONE, filters);
161     /* Filter must do all below steps:
162      * <ul>
163      * <li>{@link #reset()}</li>
164      * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
165      * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
166      * if false, we will also call</li>
167      * <li>{@link #filterKeyValue(org.apache.hadoop.hbase.KeyValue)} -> true to drop this key/value</li>
168      * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
169      * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
170      * </li>
171      * </ul>
172     */
173     filterMPONE.reset();
174     assertFalse(filterMPONE.filterAllRemaining());
175 
176     /* We should be able to fill MAX_PAGES without incrementing page counter */
177     byte [] rowkey = Bytes.toBytes("yyyyyyyy");
178     for (int i = 0; i < MAX_PAGES; i++) {
179       assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
180       KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
181           Bytes.toBytes(i));
182         assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
183       assertFalse(filterMPONE.filterRow());
184     }
185 
186     /* Now let's fill the page filter */
187     rowkey = Bytes.toBytes("xxxxxxx");
188     for (int i = 0; i < MAX_PAGES; i++) {
189       assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
190       KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
191           Bytes.toBytes(i));
192         assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
193       assertFalse(filterMPONE.filterRow());
194     }
195 
196     /* We should still be able to include even though page filter is at max */
197     rowkey = Bytes.toBytes("yyy");
198     for (int i = 0; i < MAX_PAGES; i++) {
199       assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
200       KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
201           Bytes.toBytes(i));
202         assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
203       assertFalse(filterMPONE.filterRow());
204     }
205   }
206 
207   /**
208    * Test serialization
209    * @throws Exception
210    */
211   public void testSerialization() throws Exception {
212     List<Filter> filters = new ArrayList<Filter>();
213     filters.add(new PageFilter(MAX_PAGES));
214     filters.add(new WhileMatchFilter(new PrefixFilter(Bytes.toBytes("yyy"))));
215     Filter filterMPALL =
216       new FilterList(FilterList.Operator.MUST_PASS_ALL, filters);
217 
218     // Decompose filterMPALL to bytes.
219     ByteArrayOutputStream stream = new ByteArrayOutputStream();
220     DataOutputStream out = new DataOutputStream(stream);
221     filterMPALL.write(out);
222     out.close();
223     byte[] buffer = stream.toByteArray();
224 
225     // Recompose filterMPALL.
226     DataInputStream in = new DataInputStream(new ByteArrayInputStream(buffer));
227     FilterList newFilter = new FilterList();
228     newFilter.readFields(in);
229 
230     // TODO: Run TESTS!!!
231   }
232 
233   /**
234    * Test filterKeyValue logic.
235    * @throws Exception
236    */
237   public void testFilterKeyValue() throws Exception {
238     Filter includeFilter = new FilterBase() {
239       @Override
240       public Filter.ReturnCode filterKeyValue(KeyValue v) {
241         return Filter.ReturnCode.INCLUDE;
242       }
243 
244       @Override
245       public void readFields(DataInput arg0) throws IOException {}
246 
247       @Override
248       public void write(DataOutput arg0) throws IOException {}
249     };
250 
251     Filter alternateFilter = new FilterBase() {
252       boolean returnInclude = true;
253 
254       @Override
255       public Filter.ReturnCode filterKeyValue(KeyValue v) {
256         Filter.ReturnCode returnCode = returnInclude ? Filter.ReturnCode.INCLUDE :
257                                                        Filter.ReturnCode.SKIP;
258         returnInclude = !returnInclude;
259         return returnCode;
260       }
261 
262       @Override
263       public void readFields(DataInput arg0) throws IOException {}
264 
265       @Override
266       public void write(DataOutput arg0) throws IOException {}
267     };
268 
269     Filter alternateIncludeFilter = new FilterBase() {
270       boolean returnIncludeOnly = false;
271 
272       @Override
273       public Filter.ReturnCode filterKeyValue(KeyValue v) {
274         Filter.ReturnCode returnCode = returnIncludeOnly ? Filter.ReturnCode.INCLUDE :
275                                                            Filter.ReturnCode.INCLUDE_AND_NEXT_COL;
276         returnIncludeOnly = !returnIncludeOnly;
277         return returnCode;
278       }
279 
280       @Override
281       public void readFields(DataInput arg0) throws IOException {}
282 
283       @Override
284       public void write(DataOutput arg0) throws IOException {}
285     };
286 
287     // Check must pass one filter.
288     FilterList mpOnefilterList = new FilterList(Operator.MUST_PASS_ONE,
289         Arrays.asList(new Filter[] { includeFilter, alternateIncludeFilter, alternateFilter }));
290     // INCLUDE, INCLUDE, INCLUDE_AND_NEXT_COL.
291     assertEquals(Filter.ReturnCode.INCLUDE_AND_NEXT_COL, mpOnefilterList.filterKeyValue(null));
292     // INCLUDE, SKIP, INCLUDE. 
293     assertEquals(Filter.ReturnCode.INCLUDE, mpOnefilterList.filterKeyValue(null));
294 
295     // Check must pass all filter.
296     FilterList mpAllfilterList = new FilterList(Operator.MUST_PASS_ALL,
297         Arrays.asList(new Filter[] { includeFilter, alternateIncludeFilter, alternateFilter }));
298     // INCLUDE, INCLUDE, INCLUDE_AND_NEXT_COL.
299     assertEquals(Filter.ReturnCode.INCLUDE_AND_NEXT_COL, mpAllfilterList.filterKeyValue(null));
300     // INCLUDE, SKIP, INCLUDE. 
301     assertEquals(Filter.ReturnCode.SKIP, mpAllfilterList.filterKeyValue(null));
302   }
303 
304   /**
305    * Test pass-thru of hints.
306    */
307   public void testHintPassThru() throws Exception {
308 
309     final KeyValue minKeyValue = new KeyValue(Bytes.toBytes(0L), null, null);
310     final KeyValue maxKeyValue = new KeyValue(Bytes.toBytes(Long.MAX_VALUE),
311         null, null);
312 
313     Filter filterNoHint = new FilterBase() {
314       @Override
315       public void readFields(DataInput arg0) throws IOException {}
316 
317       @Override
318       public void write(DataOutput arg0) throws IOException {}
319     };
320 
321     Filter filterMinHint = new FilterBase() {
322       @Override
323       public KeyValue getNextKeyHint(KeyValue currentKV) {
324         return minKeyValue;
325       }
326 
327       @Override
328       public void readFields(DataInput arg0) throws IOException {}
329 
330       @Override
331       public void write(DataOutput arg0) throws IOException {}
332     };
333 
334     Filter filterMaxHint = new FilterBase() {
335       @Override
336       public KeyValue getNextKeyHint(KeyValue currentKV) {
337         return new KeyValue(Bytes.toBytes(Long.MAX_VALUE), null, null);
338       }
339 
340       @Override
341       public void readFields(DataInput arg0) throws IOException {}
342 
343       @Override
344       public void write(DataOutput arg0) throws IOException {}
345     };
346 
347     // MUST PASS ONE
348 
349     // Should take the min if given two hints
350     FilterList filterList = new FilterList(Operator.MUST_PASS_ONE,
351         Arrays.asList(new Filter [] { filterMinHint, filterMaxHint } ));
352     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
353         minKeyValue));
354 
355     // Should have no hint if any filter has no hint
356     filterList = new FilterList(Operator.MUST_PASS_ONE,
357         Arrays.asList(
358             new Filter [] { filterMinHint, filterMaxHint, filterNoHint } ));
359     assertNull(filterList.getNextKeyHint(null));
360     filterList = new FilterList(Operator.MUST_PASS_ONE,
361         Arrays.asList(new Filter [] { filterNoHint, filterMaxHint } ));
362     assertNull(filterList.getNextKeyHint(null));
363 
364     // Should give max hint if its the only one
365     filterList = new FilterList(Operator.MUST_PASS_ONE,
366         Arrays.asList(new Filter [] { filterMaxHint, filterMaxHint } ));
367     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
368         maxKeyValue));
369 
370     // MUST PASS ALL
371 
372     // Should take the max if given two hints
373     filterList = new FilterList(Operator.MUST_PASS_ALL,
374         Arrays.asList(new Filter [] { filterMinHint, filterMaxHint } ));
375     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
376         maxKeyValue));
377 
378     // Should have max hint even if a filter has no hint
379     filterList = new FilterList(Operator.MUST_PASS_ALL,
380         Arrays.asList(
381             new Filter [] { filterMinHint, filterMaxHint, filterNoHint } ));
382     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
383         maxKeyValue));
384     filterList = new FilterList(Operator.MUST_PASS_ALL,
385         Arrays.asList(new Filter [] { filterNoHint, filterMaxHint } ));
386     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
387         maxKeyValue));
388     filterList = new FilterList(Operator.MUST_PASS_ALL,
389         Arrays.asList(new Filter [] { filterNoHint, filterMinHint } ));
390     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
391         minKeyValue));
392 
393     // Should give min hint if its the only one
394     filterList = new FilterList(Operator.MUST_PASS_ALL,
395         Arrays.asList(new Filter [] { filterNoHint, filterMinHint } ));
396     assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
397         minKeyValue));
398   }
399 
400   /**
401    * Tests the behavior of transform() in a hierarchical filter.
402    *
403    * transform() only applies after a filterKeyValue() whose return-code includes the KeyValue.
404    * Lazy evaluation of AND
405    */
406   public void testTransformMPO() throws Exception {
407     // Apply the following filter:
408     //     (family=fam AND qualifier=qual1 AND KeyOnlyFilter)
409     //  OR (family=fam AND qualifier=qual2)
410     final FilterList flist = new FilterList(Operator.MUST_PASS_ONE, Lists.<Filter>newArrayList(
411         new FilterList(Operator.MUST_PASS_ALL, Lists.<Filter>newArrayList(
412             new FamilyFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("fam"))),
413             new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("qual1"))),
414             new KeyOnlyFilter())),
415         new FilterList(Operator.MUST_PASS_ALL, Lists.<Filter>newArrayList(
416             new FamilyFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("fam"))),
417             new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("qual2")))))));
418 
419     final KeyValue kvQual1 = new KeyValue(
420         Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual1"), Bytes.toBytes("value"));
421     final KeyValue kvQual2 = new KeyValue(
422         Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual2"), Bytes.toBytes("value"));
423     final KeyValue kvQual3 = new KeyValue(
424         Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual3"), Bytes.toBytes("value"));
425 
426     // Value for fam:qual1 should be stripped:
427     assertEquals(Filter.ReturnCode.INCLUDE, flist.filterKeyValue(kvQual1));
428     final KeyValue transformedQual1 = flist.transform(kvQual1);
429     assertEquals(0, transformedQual1.getValue().length);
430 
431     // Value for fam:qual2 should not be stripped:
432     assertEquals(Filter.ReturnCode.INCLUDE, flist.filterKeyValue(kvQual2));
433     final KeyValue transformedQual2 = flist.transform(kvQual2);
434     assertEquals("value", Bytes.toString(transformedQual2.getValue()));
435 
436     // Other keys should be skipped:
437     assertEquals(Filter.ReturnCode.SKIP, flist.filterKeyValue(kvQual3));
438   }
439 
440   @org.junit.Rule
441   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
442     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
443 }
444