View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.filter;
21  
22  import java.io.IOException;
23  import java.util.List;
24  
25  import org.apache.hadoop.classification.InterfaceAudience;
26  import org.apache.hadoop.classification.InterfaceStability;
27  import org.apache.hadoop.hbase.Cell;
28  import org.apache.hadoop.hbase.KeyValue;
29  import org.apache.hadoop.hbase.exceptions.DeserializationException;
30  
31  /**
32   * Interface for row and column filters directly applied within the regionserver.
33   *
34   * A filter can expect the following call sequence:
35   * <ul>
36   *   <li> {@link #reset()} : reset the filter state before filtering a new row. </li>
37   *   <li> {@link #filterAllRemaining()}: true means row scan is over; false means keep going. </li>
38   *   <li> {@link #filterRowKey(byte[],int,int)}: true means drop this row; false means include.</li>
39   *   <li> {@link #filterKeyValue(Cell)}: decides whether to include or exclude this KeyValue.
40   *        See {@link ReturnCode}. </li>
41   *   <li> {@link #transform(KeyValue)}: if the KeyValue is included, let the filter transform the
42   *        KeyValue. </li>
43   *   <li> {@link #filterRowCells(List)}: allows direct modification of the final list to be submitted
44   *   <li> {@link #filterRow()}: last chance to drop entire row based on the sequence of
45   *        filter calls. Eg: filter a row if it doesn't contain a specified column. </li>
46   * </ul>
47   *
48   * Filter instances are created one per region/scan.  This abstract class replaces
49   * the old RowFilterInterface.
50   *
51   * When implementing your own filters, consider inheriting {@link FilterBase} to help
52   * you reduce boilerplate.
53   *
54   * @see FilterBase
55   */
56  @InterfaceAudience.Public
57  @InterfaceStability.Stable
58  public abstract class Filter {
59    /**
60     * Reset the state of the filter between rows.
61     * 
62     * Concrete implementers can signal a failure condition in their code by throwing an
63     * {@link IOException}.
64     * 
65     * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
66     */
67    abstract public void reset() throws IOException;
68  
69    /**
70     * Filters a row based on the row key. If this returns true, the entire row will be excluded. If
71     * false, each KeyValue in the row will be passed to {@link #filterKeyValue(Cell)} below.
72     * 
73     * Concrete implementers can signal a failure condition in their code by throwing an
74     * {@link IOException}.
75     * 
76     * @param buffer buffer containing row key
77     * @param offset offset into buffer where row key starts
78     * @param length length of the row key
79     * @return true, remove entire row, false, include the row (maybe).
80     * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
81     */
82    abstract public boolean filterRowKey(byte[] buffer, int offset, int length) throws IOException;
83  
84    /**
85     * If this returns true, the scan will terminate.
86     * 
87     * Concrete implementers can signal a failure condition in their code by throwing an
88     * {@link IOException}.
89     * 
90     * @return true to end scan, false to continue.
91     * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
92     */
93    abstract public boolean filterAllRemaining() throws IOException;
94  
95    /**
96     * A way to filter based on the column family, column qualifier and/or the column value. Return
97     * code is described below. This allows filters to filter only certain number of columns, then
98     * terminate without matching ever column.
99     * 
100    * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
101    * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls
102    * for the next row.
103    * 
104    * Concrete implementers can signal a failure condition in their code by throwing an
105    * {@link IOException}.
106    * 
107    * @param v the Cell in question
108    * @return code as described below
109    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
110    * @see Filter.ReturnCode
111    */
112   abstract public ReturnCode filterKeyValue(final Cell v) throws IOException;
113 
114   /**
115    * Give the filter a chance to transform the passed KeyValue. If the Cell is changed a new
116    * Cell object must be returned.
117    * 
118    * @see org.apache.hadoop.hbase.KeyValue#shallowCopy()
119    *      The transformed KeyValue is what is eventually returned to the client. Most filters will
120    *      return the passed KeyValue unchanged.
121    * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transform(KeyValue) for an example of a
122    *      transformation.
123    * 
124    *      Concrete implementers can signal a failure condition in their code by throwing an
125    *      {@link IOException}.
126    * 
127    * @param v the KeyValue in question
128    * @return the changed KeyValue
129    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
130    */
131   abstract public Cell transformCell(final Cell v) throws IOException;
132 
133   /**
134    * WARNING: please to not override this method.  Instead override {@link #transformCell(Cell)}.
135    * This is for transition from 0.94 -> 0.96
136    **/
137   @Deprecated // use Cell transformCell(final Cell)
138   abstract public KeyValue transform(final KeyValue currentKV) throws IOException;
139  
140   
141   /**
142    * Return codes for filterValue().
143    */
144   @InterfaceAudience.Public
145   @InterfaceStability.Stable
146   public enum ReturnCode {
147     /**
148      * Include the Cell
149      */
150     INCLUDE,
151     /**
152      * Include the Cell and seek to the next column skipping older versions.
153      */
154     INCLUDE_AND_NEXT_COL,
155     /**
156      * Skip this Cell
157      */
158     SKIP,
159     /**
160      * Skip this column. Go to the next column in this row.
161      */
162     NEXT_COL,
163     /**
164      * Done with columns, skip to next row. Note that filterRow() will
165      * still be called.
166      */
167     NEXT_ROW,
168     /**
169      * Seek to next key which is given as hint by the filter.
170      */
171     SEEK_NEXT_USING_HINT,
172 }
173 
174   /**
175    * Chance to alter the list of Cells to be submitted. Modifications to the list will carry on
176    * 
177    * Concrete implementers can signal a failure condition in their code by throwing an
178    * {@link IOException}.
179    * 
180    * @param kvs the list of Cells to be filtered
181    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
182    */
183   abstract public void filterRowCells(List<Cell> kvs) throws IOException;
184 
185   /**
186    * WARNING: please to not override this method.  Instead override {@link #filterRowCells(List)}.
187    * This is for transition from 0.94 -> 0.96
188    **/
189   @Deprecated
190   abstract public void filterRow(List<KeyValue> kvs) throws IOException;
191 
192   /**
193    * Primarily used to check for conflicts with scans(such as scans that do not read a full row at a
194    * time).
195    * 
196    * @return True if this filter actively uses filterRow(List) or filterRow().
197    */
198   abstract public boolean hasFilterRow();
199 
200   /**
201    * Last chance to veto row based on previous {@link #filterKeyValue(Cell)} calls. The filter
202    * needs to retain state then return a particular value for this call if they wish to exclude a
203    * row if a certain column is missing (for example).
204    * 
205    * Concrete implementers can signal a failure condition in their code by throwing an
206    * {@link IOException}.
207    * 
208    * @return true to exclude row, false to include row.
209    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
210    */
211   abstract public boolean filterRow() throws IOException;
212 
213   @Deprecated // use Cell GetNextKeyHint(final Cell)
214   abstract public KeyValue getNextKeyHint(final KeyValue currentKV) throws IOException;
215 
216   /**
217    * If the filter returns the match code SEEK_NEXT_USING_HINT, then it should also tell which is
218    * the next key it must seek to. After receiving the match code SEEK_NEXT_USING_HINT, the
219    * QueryMatcher would call this function to find out which key it must next seek to.
220    * 
221    * Concrete implementers can signal a failure condition in their code by throwing an
222    * {@link IOException}.
223    * 
224    * @return KeyValue which must be next seeked. return null if the filter is not sure which key to
225    *         seek to next.
226    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
227    */
228   abstract public Cell getNextCellHint(final Cell currentKV) throws IOException;
229 
230   /**
231    * Check that given column family is essential for filter to check row. Most filters always return
232    * true here. But some could have more sophisticated logic which could significantly reduce
233    * scanning process by not even touching columns until we are 100% sure that it's data is needed
234    * in result.
235    * 
236    * Concrete implementers can signal a failure condition in their code by throwing an
237    * {@link IOException}.
238    * 
239    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
240    */
241   abstract public boolean isFamilyEssential(byte[] name) throws IOException;
242 
243   /**
244    * TODO: JAVADOC
245    * 
246    * Concrete implementers can signal a failure condition in their code by throwing an
247    * {@link IOException}.
248    * 
249    * @return The filter serialized using pb
250    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
251    */
252   abstract public byte[] toByteArray() throws IOException;
253 
254   /**
255    * 
256    * Concrete implementers can signal a failure condition in their code by throwing an
257    * {@link IOException}.
258    * 
259    * @param pbBytes A pb serialized {@link Filter} instance
260    * @return An instance of {@link Filter} made from <code>bytes</code>
261    * @throws DeserializationException
262    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
263    * @see #toByteArray
264    */
265   public static Filter parseFrom(final byte [] pbBytes) throws DeserializationException {
266     throw new DeserializationException(
267       "parseFrom called on base Filter, but should be called on derived type");
268   }
269 
270   /**
271    * Concrete implementers can signal a failure condition in their code by throwing an
272    * {@link IOException}.
273    * 
274    * @param other
275    * @return true if and only if the fields of the filter that are serialized are equal to the
276    *         corresponding fields in other. Used for testing.
277    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
278    */
279   abstract boolean areSerializedFieldsEqual(Filter other);
280 }