View Javadoc

1   /*
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.filter;
22  
23  import org.apache.hadoop.hbase.KeyValue;
24  import org.apache.hadoop.io.Writable;
25  
26  import java.util.List;
27  
28  /**
29   * Interface for row and column filters directly applied within the regionserver.
30   * A filter can expect the following call sequence:
31   *<ul>
32   * <li>{@link #reset()}</li>
33   * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
34   * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
35   * if false, we will also call</li>
36   * <li>{@link #filterKeyValue(KeyValue)} -> true to drop this key/value</li>
37   * <li>{@link #filterRow(List)} -> allows directmodification of the final list to be submitted
38   * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
39   * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
40   * </li>
41   * </ul>
42   *
43   * Filter instances are created one per region/scan.  This interface replaces
44   * the old RowFilterInterface.
45   *
46   * When implementing your own filters, consider inheriting {@link FilterBase} to help
47   * you reduce boilerplate.
48   * 
49   * @see FilterBase
50   */
51  public interface Filter extends Writable {
52    /**
53     * Reset the state of the filter between rows.
54     */
55    public void reset();
56  
57    /**
58     * Filters a row based on the row key. If this returns true, the entire
59     * row will be excluded.  If false, each KeyValue in the row will be
60     * passed to {@link #filterKeyValue(KeyValue)} below.
61     *
62     * @param buffer buffer containing row key
63     * @param offset offset into buffer where row key starts
64     * @param length length of the row key
65     * @return true, remove entire row, false, include the row (maybe).
66     */
67    public boolean filterRowKey(byte [] buffer, int offset, int length);
68  
69    /**
70     * If this returns true, the scan will terminate.
71     *
72     * @return true to end scan, false to continue.
73     */
74    public boolean filterAllRemaining();
75  
76    /**
77     * A way to filter based on the column family, column qualifier and/or the
78     * column value. Return code is described below.  This allows filters to
79     * filter only certain number of columns, then terminate without matching ever
80     * column.
81     *
82     * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
83     * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called
84     * just in case the caller calls for the next row.
85     *
86     * @param v the KeyValue in question
87     * @return code as described below
88     * @see Filter.ReturnCode
89     */
90    public ReturnCode filterKeyValue(KeyValue v);
91  
92    /**
93     * Return codes for filterValue().
94     */
95    public enum ReturnCode {
96      /**
97       * Include the KeyValue
98       */
99      INCLUDE,
100     /**
101      * Skip this KeyValue
102      */
103     SKIP,
104     /**
105      * Skip this column. Go to the next column in this row.
106      */
107     NEXT_COL,
108     /**
109      * Done with columns, skip to next row. Note that filterRow() will
110      * still be called.
111      */
112     NEXT_ROW,
113     /**
114      * Seek to next key which is given as hint by the filter.
115      */
116     SEEK_NEXT_USING_HINT,
117 }
118 
119   /**
120    * Chance to alter the list of keyvalues to be submitted.
121    * Modifications to the list will carry on
122    * @param kvs the list of keyvalues to be filtered
123    */
124   public void filterRow(List<KeyValue> kvs);
125 
126   /**
127    * @return True if this filter actively uses filterRow(List).
128    * Primarily used to check for conflicts with scans(such as scans
129    * that do not read a full row at a time)
130    */
131   public boolean hasFilterRow();
132 
133   /**
134    * Last chance to veto row based on previous {@link #filterKeyValue(KeyValue)}
135    * calls. The filter needs to retain state then return a particular value for
136    * this call if they wish to exclude a row if a certain column is missing
137    * (for example).
138    * @return true to exclude row, false to include row.
139    */
140   public boolean filterRow();
141 
142   /**
143    * If the filter returns the match code SEEK_NEXT_USING_HINT, then
144    * it should also tell which is the next key it must seek to.
145    * After receiving the match code SEEK_NEXT_USING_HINT, the QueryMatcher would
146    * call this function to find out which key it must next seek to.
147    * @return KeyValue which must be next seeked. return null if the filter is
148    * not sure which key to seek to next.
149    */
150   public KeyValue getNextKeyHint(KeyValue currentKV);
151 }