View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.filter;
21  
22  import java.io.IOException;
23  import java.util.List;
24  
25  import org.apache.hadoop.classification.InterfaceAudience;
26  import org.apache.hadoop.classification.InterfaceStability;
27  import org.apache.hadoop.hbase.KeyValue;
28  import org.apache.hadoop.hbase.exceptions.DeserializationException;
29  
30  /**
31   * Interface for row and column filters directly applied within the regionserver.
32   *
33   * A filter can expect the following call sequence:
34   * <ul>
35   *   <li> {@link #reset()} : reset the filter state before filtering a new row. </li>
36   *   <li> {@link #filterAllRemaining()}: true means row scan is over; false means keep going. </li>
37   *   <li> {@link #filterRowKey(byte[],int,int)}: true means drop this row; false means include.</li>
38   *   <li> {@link #filterKeyValue(KeyValue)}: decides whether to include or exclude this KeyValue.
39   *        See {@link ReturnCode}. </li>
40   *   <li> {@link #transform(KeyValue)}: if the KeyValue is included, let the filter transform the
41   *        KeyValue. </li>
42   *   <li> {@link #filterRow(List)}: allows direct modification of the final list to be submitted
43   *   <li> {@link #filterRow()}: last chance to drop entire row based on the sequence of
44   *        filter calls. Eg: filter a row if it doesn't contain a specified column. </li>
45   * </ul>
46   *
47   * Filter instances are created one per region/scan.  This abstract class replaces
48   * the old RowFilterInterface.
49   *
50   * When implementing your own filters, consider inheriting {@link FilterBase} to help
51   * you reduce boilerplate.
52   *
53   * @see FilterBase
54   */
55  @InterfaceAudience.Public
56  @InterfaceStability.Stable
57  public abstract class Filter {
58    /**
59     * Reset the state of the filter between rows.
60     * 
61     * Concrete implementers can signal a failure condition in their code by throwing an
62     * {@link IOException}.
63     * 
64     * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
65     */
66    abstract public void reset() throws IOException;
67  
68    /**
69     * Filters a row based on the row key. If this returns true, the entire row will be excluded. If
70     * false, each KeyValue in the row will be passed to {@link #filterKeyValue(KeyValue)} below.
71     * 
72     * Concrete implementers can signal a failure condition in their code by throwing an
73     * {@link IOException}.
74     * 
75     * @param buffer buffer containing row key
76     * @param offset offset into buffer where row key starts
77     * @param length length of the row key
78     * @return true, remove entire row, false, include the row (maybe).
79     * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
80     */
81    abstract public boolean filterRowKey(byte[] buffer, int offset, int length) throws IOException;
82  
83    /**
84     * If this returns true, the scan will terminate.
85     * 
86     * Concrete implementers can signal a failure condition in their code by throwing an
87     * {@link IOException}.
88     * 
89     * @return true to end scan, false to continue.
90     * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
91     */
92    abstract public boolean filterAllRemaining() throws IOException;
93  
94    /**
95     * A way to filter based on the column family, column qualifier and/or the column value. Return
96     * code is described below. This allows filters to filter only certain number of columns, then
97     * terminate without matching ever column.
98     * 
99     * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
100    * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls
101    * for the next row.
102    * 
103    * Concrete implementers can signal a failure condition in their code by throwing an
104    * {@link IOException}.
105    * 
106    * @param v the KeyValue in question
107    * @return code as described below
108    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
109    * @see Filter.ReturnCode
110    */
111   abstract public ReturnCode filterKeyValue(final KeyValue v) throws IOException;
112 
113   /**
114    * Give the filter a chance to transform the passed KeyValue. If the KeyValue is changed a new
115    * KeyValue object must be returned.
116    * 
117    * @see org.apache.hadoop.hbase.KeyValue#shallowCopy()
118    *      The transformed KeyValue is what is eventually returned to the client. Most filters will
119    *      return the passed KeyValue unchanged.
120    * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transform(KeyValue) for an example of a
121    *      transformation.
122    * 
123    *      Concrete implementers can signal a failure condition in their code by throwing an
124    *      {@link IOException}.
125    * 
126    * @param v the KeyValue in question
127    * @return the changed KeyValue
128    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
129    */
130   abstract public KeyValue transform(final KeyValue v) throws IOException;
131 
132   /**
133    * Return codes for filterValue().
134    */
135   public enum ReturnCode {
136     /**
137      * Include the KeyValue
138      */
139     INCLUDE,
140     /**
141      * Include the KeyValue and seek to the next column skipping older versions.
142      */
143     INCLUDE_AND_NEXT_COL,
144     /**
145      * Skip this KeyValue
146      */
147     SKIP,
148     /**
149      * Skip this column. Go to the next column in this row.
150      */
151     NEXT_COL,
152     /**
153      * Done with columns, skip to next row. Note that filterRow() will
154      * still be called.
155      */
156     NEXT_ROW,
157     /**
158      * Seek to next key which is given as hint by the filter.
159      */
160     SEEK_NEXT_USING_HINT,
161 }
162 
163   /**
164    * Chance to alter the list of keyvalues to be submitted. Modifications to the list will carry on
165    * 
166    * Concrete implementers can signal a failure condition in their code by throwing an
167    * {@link IOException}.
168    * 
169    * @param kvs the list of keyvalues to be filtered
170    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
171    */
172   abstract public void filterRow(List<KeyValue> kvs) throws IOException;
173 
174   /**
175    * Primarily used to check for conflicts with scans(such as scans that do not read a full row at a
176    * time).
177    * 
178    * @return True if this filter actively uses filterRow(List) or filterRow().
179    */
180   abstract public boolean hasFilterRow();
181 
182   /**
183    * Last chance to veto row based on previous {@link #filterKeyValue(KeyValue)} calls. The filter
184    * needs to retain state then return a particular value for this call if they wish to exclude a
185    * row if a certain column is missing (for example).
186    * 
187    * Concrete implementers can signal a failure condition in their code by throwing an
188    * {@link IOException}.
189    * 
190    * @return true to exclude row, false to include row.
191    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
192    */
193   abstract public boolean filterRow() throws IOException;
194 
195   /**
196    * If the filter returns the match code SEEK_NEXT_USING_HINT, then it should also tell which is
197    * the next key it must seek to. After receiving the match code SEEK_NEXT_USING_HINT, the
198    * QueryMatcher would call this function to find out which key it must next seek to.
199    * 
200    * Concrete implementers can signal a failure condition in their code by throwing an
201    * {@link IOException}.
202    * 
203    * @return KeyValue which must be next seeked. return null if the filter is not sure which key to
204    *         seek to next.
205    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
206    */
207   abstract public KeyValue getNextKeyHint(final KeyValue currentKV) throws IOException;
208 
209   /**
210    * Check that given column family is essential for filter to check row. Most filters always return
211    * true here. But some could have more sophisticated logic which could significantly reduce
212    * scanning process by not even touching columns until we are 100% sure that it's data is needed
213    * in result.
214    * 
215    * Concrete implementers can signal a failure condition in their code by throwing an
216    * {@link IOException}.
217    * 
218    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
219    */
220   abstract public boolean isFamilyEssential(byte[] name) throws IOException;
221 
222   /**
223    * TODO: JAVADOC
224    * 
225    * Concrete implementers can signal a failure condition in their code by throwing an
226    * {@link IOException}.
227    * 
228    * @return The filter serialized using pb
229    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
230    */
231   abstract public byte[] toByteArray() throws IOException;
232 
233   /**
234    * 
235    * Concrete implementers can signal a failure condition in their code by throwing an
236    * {@link IOException}.
237    * 
238    * @param pbBytes A pb serialized {@link Filter} instance
239    * @return An instance of {@link Filter} made from <code>bytes</code>
240    * @throws DeserializationException
241    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
242    * @see #toByteArray
243    */
244   public static Filter parseFrom(final byte [] pbBytes) throws DeserializationException {
245     throw new DeserializationException(
246       "parseFrom called on base Filter, but should be called on derived type");
247   }
248 
249   /**
250    * Concrete implementers can signal a failure condition in their code by throwing an
251    * {@link IOException}.
252    * 
253    * @param other
254    * @return true if and only if the fields of the filter that are serialized are equal to the
255    *         corresponding fields in other. Used for testing.
256    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
257    */
258   abstract boolean areSerializedFieldsEqual(Filter other);
259 }