View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.filter;
21  
22  import java.io.IOException;
23  import java.util.List;
24  
25  import org.apache.hadoop.classification.InterfaceAudience;
26  import org.apache.hadoop.classification.InterfaceStability;
27  import org.apache.hadoop.hbase.KeyValue;
28  import org.apache.hadoop.hbase.exceptions.DeserializationException;
29  
30  /**
31   * Interface for row and column filters directly applied within the regionserver.
32   * A filter can expect the following call sequence:
33   *<ul>
34   * <li>{@link #reset()}</li>
35   * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
36   * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
37   * if false, we will also call</li>
38   * <li>{@link #filterKeyValue(KeyValue)} -> true to drop this key/value</li>
39   * <li>{@link #filterRow(List)} -> allows directmodification of the final list to be submitted
40   * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
41   * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
42   * </li>
43   * </ul>
44   *
45   * Filter instances are created one per region/scan.  This abstract class replaces
46   * the old RowFilterInterface.
47   *
48   * When implementing your own filters, consider inheriting {@link FilterBase} to help
49   * you reduce boilerplate.
50   * 
51   * @see FilterBase
52   */
53  @InterfaceAudience.Public
54  @InterfaceStability.Stable
55  public abstract class Filter {
56    /**
57     * Reset the state of the filter between rows.
58     * 
59     * Concrete implementers can signal a failure condition in their code by throwing an
60     * {@link IOException}.
61     * 
62     * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
63     */
64    abstract public void reset() throws IOException;
65  
66    /**
67     * Filters a row based on the row key. If this returns true, the entire row will be excluded. If
68     * false, each KeyValue in the row will be passed to {@link #filterKeyValue(KeyValue)} below.
69     * 
70     * Concrete implementers can signal a failure condition in their code by throwing an
71     * {@link IOException}.
72     * 
73     * @param buffer buffer containing row key
74     * @param offset offset into buffer where row key starts
75     * @param length length of the row key
76     * @return true, remove entire row, false, include the row (maybe).
77     * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
78     */
79    abstract public boolean filterRowKey(byte[] buffer, int offset, int length) throws IOException;
80  
81    /**
82     * If this returns true, the scan will terminate.
83     * 
84     * Concrete implementers can signal a failure condition in their code by throwing an
85     * {@link IOException}.
86     * 
87     * @return true to end scan, false to continue.
88     * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
89     */
90    abstract public boolean filterAllRemaining() throws IOException;
91  
92    /**
93     * A way to filter based on the column family, column qualifier and/or the column value. Return
94     * code is described below. This allows filters to filter only certain number of columns, then
95     * terminate without matching ever column.
96     * 
97     * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
98     * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls
99     * for the next row.
100    * 
101    * Concrete implementers can signal a failure condition in their code by throwing an
102    * {@link IOException}.
103    * 
104    * @param v the KeyValue in question
105    * @return code as described below
106    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
107    * @see Filter.ReturnCode
108    */
109   abstract public ReturnCode filterKeyValue(final KeyValue v) throws IOException;
110 
111   /**
112    * Give the filter a chance to transform the passed KeyValue. If the KeyValue is changed a new
113    * KeyValue object must be returned.
114    * 
115    * @see org.apache.hadoop.hbase.KeyValue#shallowCopy()
116    *      The transformed KeyValue is what is eventually returned to the client. Most filters will
117    *      return the passed KeyValue unchanged.
118    * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transform(KeyValue) for an example of a
119    *      transformation.
120    * 
121    *      Concrete implementers can signal a failure condition in their code by throwing an
122    *      {@link IOException}.
123    * 
124    * @param v the KeyValue in question
125    * @return the changed KeyValue
126    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
127    */
128   abstract public KeyValue transform(final KeyValue v) throws IOException;
129 
130   /**
131    * Return codes for filterValue().
132    */
133   public enum ReturnCode {
134     /**
135      * Include the KeyValue
136      */
137     INCLUDE,
138     /**
139      * Include the KeyValue and seek to the next column skipping older versions.
140      */
141     INCLUDE_AND_NEXT_COL,
142     /**
143      * Skip this KeyValue
144      */
145     SKIP,
146     /**
147      * Skip this column. Go to the next column in this row.
148      */
149     NEXT_COL,
150     /**
151      * Done with columns, skip to next row. Note that filterRow() will
152      * still be called.
153      */
154     NEXT_ROW,
155     /**
156      * Seek to next key which is given as hint by the filter.
157      */
158     SEEK_NEXT_USING_HINT,
159 }
160 
161   /**
162    * Chance to alter the list of keyvalues to be submitted. Modifications to the list will carry on
163    * 
164    * Concrete implementers can signal a failure condition in their code by throwing an
165    * {@link IOException}.
166    * 
167    * @param kvs the list of keyvalues to be filtered
168    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
169    */
170   abstract public void filterRow(List<KeyValue> kvs) throws IOException;
171 
172   /**
173    * Primarily used to check for conflicts with scans(such as scans that do not read a full row at a
174    * time).
175    * 
176    * @return True if this filter actively uses filterRow(List) or filterRow().
177    */
178   abstract public boolean hasFilterRow();
179 
180   /**
181    * Last chance to veto row based on previous {@link #filterKeyValue(KeyValue)} calls. The filter
182    * needs to retain state then return a particular value for this call if they wish to exclude a
183    * row if a certain column is missing (for example).
184    * 
185    * Concrete implementers can signal a failure condition in their code by throwing an
186    * {@link IOException}.
187    * 
188    * @return true to exclude row, false to include row.
189    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
190    */
191   abstract public boolean filterRow() throws IOException;
192 
193   /**
194    * If the filter returns the match code SEEK_NEXT_USING_HINT, then it should also tell which is
195    * the next key it must seek to. After receiving the match code SEEK_NEXT_USING_HINT, the
196    * QueryMatcher would call this function to find out which key it must next seek to.
197    * 
198    * Concrete implementers can signal a failure condition in their code by throwing an
199    * {@link IOException}.
200    * 
201    * @return KeyValue which must be next seeked. return null if the filter is not sure which key to
202    *         seek to next.
203    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
204    */
205   abstract public KeyValue getNextKeyHint(final KeyValue currentKV) throws IOException;
206 
207   /**
208    * Check that given column family is essential for filter to check row. Most filters always return
209    * true here. But some could have more sophisticated logic which could significantly reduce
210    * scanning process by not even touching columns until we are 100% sure that it's data is needed
211    * in result.
212    * 
213    * Concrete implementers can signal a failure condition in their code by throwing an
214    * {@link IOException}.
215    * 
216    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
217    */
218   abstract public boolean isFamilyEssential(byte[] name) throws IOException;
219 
220   /**
221    * TODO: JAVADOC
222    * 
223    * Concrete implementers can signal a failure condition in their code by throwing an
224    * {@link IOException}.
225    * 
226    * @return The filter serialized using pb
227    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
228    */
229   abstract public byte[] toByteArray() throws IOException;
230 
231   /**
232    * 
233    * Concrete implementers can signal a failure condition in their code by throwing an
234    * {@link IOException}.
235    * 
236    * @param pbBytes A pb serialized {@link Filter} instance
237    * @return An instance of {@link Filter} made from <code>bytes</code>
238    * @throws DeserializationException
239    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
240    * @see #toByteArray
241    */
242   public static Filter parseFrom(final byte [] pbBytes) throws DeserializationException {
243     throw new DeserializationException(
244       "parseFrom called on base Filter, but should be called on derived type");
245   }
246 
247   /**
248    * Concrete implementers can signal a failure condition in their code by throwing an
249    * {@link IOException}.
250    * 
251    * @param other
252    * @return true if and only if the fields of the filter that are serialized are equal to the
253    *         corresponding fields in other. Used for testing.
254    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
255    */
256   abstract boolean areSerializedFieldsEqual(Filter other);
257 }