1 /*
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 package org.apache.hadoop.hbase.filter;
21
22 import java.io.IOException;
23 import java.util.List;
24
25 import org.apache.hadoop.classification.InterfaceAudience;
26 import org.apache.hadoop.classification.InterfaceStability;
27 import org.apache.hadoop.hbase.Cell;
28 import org.apache.hadoop.hbase.KeyValue;
29 import org.apache.hadoop.hbase.exceptions.DeserializationException;
30
31 /**
32 * Interface for row and column filters directly applied within the regionserver.
33 *
34 * A filter can expect the following call sequence:
35 * <ul>
36 * <li> {@link #reset()} : reset the filter state before filtering a new row. </li>
37 * <li> {@link #filterAllRemaining()}: true means row scan is over; false means keep going. </li>
38 * <li> {@link #filterRowKey(byte[],int,int)}: true means drop this row; false means include.</li>
39 * <li> {@link #filterKeyValue(Cell)}: decides whether to include or exclude this KeyValue.
40 * See {@link ReturnCode}. </li>
41 * <li> {@link #transform(KeyValue)}: if the KeyValue is included, let the filter transform the
42 * KeyValue. </li>
43 * <li> {@link #filterRowCells(List)}: allows direct modification of the final list to be submitted
44 * <li> {@link #filterRow()}: last chance to drop entire row based on the sequence of
45 * filter calls. Eg: filter a row if it doesn't contain a specified column. </li>
46 * </ul>
47 *
48 * Filter instances are created one per region/scan. This abstract class replaces
49 * the old RowFilterInterface.
50 *
51 * When implementing your own filters, consider inheriting {@link FilterBase} to help
52 * you reduce boilerplate.
53 *
54 * @see FilterBase
55 */
56 @InterfaceAudience.Public
57 @InterfaceStability.Stable
58 public abstract class Filter {
59 /**
60 * Reset the state of the filter between rows.
61 *
62 * Concrete implementers can signal a failure condition in their code by throwing an
63 * {@link IOException}.
64 *
65 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
66 */
67 abstract public void reset() throws IOException;
68
69 /**
70 * Filters a row based on the row key. If this returns true, the entire row will be excluded. If
71 * false, each KeyValue in the row will be passed to {@link #filterKeyValue(Cell)} below.
72 *
73 * Concrete implementers can signal a failure condition in their code by throwing an
74 * {@link IOException}.
75 *
76 * @param buffer buffer containing row key
77 * @param offset offset into buffer where row key starts
78 * @param length length of the row key
79 * @return true, remove entire row, false, include the row (maybe).
80 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
81 */
82 abstract public boolean filterRowKey(byte[] buffer, int offset, int length) throws IOException;
83
84 /**
85 * If this returns true, the scan will terminate.
86 *
87 * Concrete implementers can signal a failure condition in their code by throwing an
88 * {@link IOException}.
89 *
90 * @return true to end scan, false to continue.
91 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
92 */
93 abstract public boolean filterAllRemaining() throws IOException;
94
95 /**
96 * A way to filter based on the column family, column qualifier and/or the column value. Return
97 * code is described below. This allows filters to filter only certain number of columns, then
98 * terminate without matching ever column.
99 *
100 * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
101 * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls
102 * for the next row.
103 *
104 * Concrete implementers can signal a failure condition in their code by throwing an
105 * {@link IOException}.
106 *
107 * @param v the Cell in question
108 * @return code as described below
109 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
110 * @see Filter.ReturnCode
111 */
112 abstract public ReturnCode filterKeyValue(final Cell v) throws IOException;
113
114 /**
115 * Give the filter a chance to transform the passed KeyValue. If the Cell is changed a new
116 * Cell object must be returned.
117 *
118 * @see org.apache.hadoop.hbase.KeyValue#shallowCopy()
119 * The transformed KeyValue is what is eventually returned to the client. Most filters will
120 * return the passed KeyValue unchanged.
121 * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transform(KeyValue) for an example of a
122 * transformation.
123 *
124 * Concrete implementers can signal a failure condition in their code by throwing an
125 * {@link IOException}.
126 *
127 * @param v the KeyValue in question
128 * @return the changed KeyValue
129 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
130 */
131 abstract public Cell transformCell(final Cell v) throws IOException;
132
133 /**
134 * WARNING: please to not override this method. Instead override {@link #transformCell(Cell)}.
135 * This is for transition from 0.94 -> 0.96
136 **/
137 @Deprecated // use Cell transformCell(final Cell)
138 abstract public KeyValue transform(final KeyValue currentKV) throws IOException;
139
140
141 /**
142 * Return codes for filterValue().
143 */
144 @InterfaceAudience.Public
145 @InterfaceStability.Stable
146 public enum ReturnCode {
147 /**
148 * Include the Cell
149 */
150 INCLUDE,
151 /**
152 * Include the Cell and seek to the next column skipping older versions.
153 */
154 INCLUDE_AND_NEXT_COL,
155 /**
156 * Skip this Cell
157 */
158 SKIP,
159 /**
160 * Skip this column. Go to the next column in this row.
161 */
162 NEXT_COL,
163 /**
164 * Done with columns, skip to next row. Note that filterRow() will
165 * still be called.
166 */
167 NEXT_ROW,
168 /**
169 * Seek to next key which is given as hint by the filter.
170 */
171 SEEK_NEXT_USING_HINT,
172 }
173
174 /**
175 * Chance to alter the list of Cells to be submitted. Modifications to the list will carry on
176 *
177 * Concrete implementers can signal a failure condition in their code by throwing an
178 * {@link IOException}.
179 *
180 * @param kvs the list of Cells to be filtered
181 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
182 */
183 abstract public void filterRowCells(List<Cell> kvs) throws IOException;
184
185 /**
186 * WARNING: please to not override this method. Instead override {@link #filterRowCells(List)}.
187 * This is for transition from 0.94 -> 0.96
188 **/
189 @Deprecated
190 abstract public void filterRow(List<KeyValue> kvs) throws IOException;
191
192 /**
193 * Primarily used to check for conflicts with scans(such as scans that do not read a full row at a
194 * time).
195 *
196 * @return True if this filter actively uses filterRow(List) or filterRow().
197 */
198 abstract public boolean hasFilterRow();
199
200 /**
201 * Last chance to veto row based on previous {@link #filterKeyValue(Cell)} calls. The filter
202 * needs to retain state then return a particular value for this call if they wish to exclude a
203 * row if a certain column is missing (for example).
204 *
205 * Concrete implementers can signal a failure condition in their code by throwing an
206 * {@link IOException}.
207 *
208 * @return true to exclude row, false to include row.
209 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
210 */
211 abstract public boolean filterRow() throws IOException;
212
213 @Deprecated // use Cell GetNextKeyHint(final Cell)
214 abstract public KeyValue getNextKeyHint(final KeyValue currentKV) throws IOException;
215
216 /**
217 * If the filter returns the match code SEEK_NEXT_USING_HINT, then it should also tell which is
218 * the next key it must seek to. After receiving the match code SEEK_NEXT_USING_HINT, the
219 * QueryMatcher would call this function to find out which key it must next seek to.
220 *
221 * Concrete implementers can signal a failure condition in their code by throwing an
222 * {@link IOException}.
223 *
224 * @return KeyValue which must be next seeked. return null if the filter is not sure which key to
225 * seek to next.
226 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
227 */
228 abstract public Cell getNextCellHint(final Cell currentKV) throws IOException;
229
230 /**
231 * Check that given column family is essential for filter to check row. Most filters always return
232 * true here. But some could have more sophisticated logic which could significantly reduce
233 * scanning process by not even touching columns until we are 100% sure that it's data is needed
234 * in result.
235 *
236 * Concrete implementers can signal a failure condition in their code by throwing an
237 * {@link IOException}.
238 *
239 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
240 */
241 abstract public boolean isFamilyEssential(byte[] name) throws IOException;
242
243 /**
244 * TODO: JAVADOC
245 *
246 * Concrete implementers can signal a failure condition in their code by throwing an
247 * {@link IOException}.
248 *
249 * @return The filter serialized using pb
250 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
251 */
252 abstract public byte[] toByteArray() throws IOException;
253
254 /**
255 *
256 * Concrete implementers can signal a failure condition in their code by throwing an
257 * {@link IOException}.
258 *
259 * @param pbBytes A pb serialized {@link Filter} instance
260 * @return An instance of {@link Filter} made from <code>bytes</code>
261 * @throws DeserializationException
262 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
263 * @see #toByteArray
264 */
265 public static Filter parseFrom(final byte [] pbBytes) throws DeserializationException {
266 throw new DeserializationException(
267 "parseFrom called on base Filter, but should be called on derived type");
268 }
269
270 /**
271 * Concrete implementers can signal a failure condition in their code by throwing an
272 * {@link IOException}.
273 *
274 * @param other
275 * @return true if and only if the fields of the filter that are serialized are equal to the
276 * corresponding fields in other. Used for testing.
277 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
278 */
279 abstract boolean areSerializedFieldsEqual(Filter other);
280 }