1 /*
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 package org.apache.hadoop.hbase.filter;
21
22 import java.io.IOException;
23 import java.util.List;
24
25 import org.apache.hadoop.classification.InterfaceAudience;
26 import org.apache.hadoop.classification.InterfaceStability;
27 import org.apache.hadoop.hbase.KeyValue;
28 import org.apache.hadoop.hbase.exceptions.DeserializationException;
29
30 /**
31 * Interface for row and column filters directly applied within the regionserver.
32 *
33 * A filter can expect the following call sequence:
34 * <ul>
35 * <li> {@link #reset()} : reset the filter state before filtering a new row. </li>
36 * <li> {@link #filterAllRemaining()}: true means row scan is over; false means keep going. </li>
37 * <li> {@link #filterRowKey(byte[],int,int)}: true means drop this row; false means include.</li>
38 * <li> {@link #filterKeyValue(KeyValue)}: decides whether to include or exclude this KeyValue.
39 * See {@link ReturnCode}. </li>
40 * <li> {@link #transform(KeyValue)}: if the KeyValue is included, let the filter transform the
41 * KeyValue. </li>
42 * <li> {@link #filterRow(List)}: allows direct modification of the final list to be submitted
43 * <li> {@link #filterRow()}: last chance to drop entire row based on the sequence of
44 * filter calls. Eg: filter a row if it doesn't contain a specified column. </li>
45 * </ul>
46 *
47 * Filter instances are created one per region/scan. This abstract class replaces
48 * the old RowFilterInterface.
49 *
50 * When implementing your own filters, consider inheriting {@link FilterBase} to help
51 * you reduce boilerplate.
52 *
53 * @see FilterBase
54 */
55 @InterfaceAudience.Public
56 @InterfaceStability.Stable
57 public abstract class Filter {
58 /**
59 * Reset the state of the filter between rows.
60 *
61 * Concrete implementers can signal a failure condition in their code by throwing an
62 * {@link IOException}.
63 *
64 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
65 */
66 abstract public void reset() throws IOException;
67
68 /**
69 * Filters a row based on the row key. If this returns true, the entire row will be excluded. If
70 * false, each KeyValue in the row will be passed to {@link #filterKeyValue(KeyValue)} below.
71 *
72 * Concrete implementers can signal a failure condition in their code by throwing an
73 * {@link IOException}.
74 *
75 * @param buffer buffer containing row key
76 * @param offset offset into buffer where row key starts
77 * @param length length of the row key
78 * @return true, remove entire row, false, include the row (maybe).
79 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
80 */
81 abstract public boolean filterRowKey(byte[] buffer, int offset, int length) throws IOException;
82
83 /**
84 * If this returns true, the scan will terminate.
85 *
86 * Concrete implementers can signal a failure condition in their code by throwing an
87 * {@link IOException}.
88 *
89 * @return true to end scan, false to continue.
90 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
91 */
92 abstract public boolean filterAllRemaining() throws IOException;
93
94 /**
95 * A way to filter based on the column family, column qualifier and/or the column value. Return
96 * code is described below. This allows filters to filter only certain number of columns, then
97 * terminate without matching ever column.
98 *
99 * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
100 * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls
101 * for the next row.
102 *
103 * Concrete implementers can signal a failure condition in their code by throwing an
104 * {@link IOException}.
105 *
106 * @param v the KeyValue in question
107 * @return code as described below
108 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
109 * @see Filter.ReturnCode
110 */
111 abstract public ReturnCode filterKeyValue(final KeyValue v) throws IOException;
112
113 /**
114 * Give the filter a chance to transform the passed KeyValue. If the KeyValue is changed a new
115 * KeyValue object must be returned.
116 *
117 * @see org.apache.hadoop.hbase.KeyValue#shallowCopy()
118 * The transformed KeyValue is what is eventually returned to the client. Most filters will
119 * return the passed KeyValue unchanged.
120 * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transform(KeyValue) for an example of a
121 * transformation.
122 *
123 * Concrete implementers can signal a failure condition in their code by throwing an
124 * {@link IOException}.
125 *
126 * @param v the KeyValue in question
127 * @return the changed KeyValue
128 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
129 */
130 abstract public KeyValue transform(final KeyValue v) throws IOException;
131
132 /**
133 * Return codes for filterValue().
134 */
135 public enum ReturnCode {
136 /**
137 * Include the KeyValue
138 */
139 INCLUDE,
140 /**
141 * Include the KeyValue and seek to the next column skipping older versions.
142 */
143 INCLUDE_AND_NEXT_COL,
144 /**
145 * Skip this KeyValue
146 */
147 SKIP,
148 /**
149 * Skip this column. Go to the next column in this row.
150 */
151 NEXT_COL,
152 /**
153 * Done with columns, skip to next row. Note that filterRow() will
154 * still be called.
155 */
156 NEXT_ROW,
157 /**
158 * Seek to next key which is given as hint by the filter.
159 */
160 SEEK_NEXT_USING_HINT,
161 }
162
163 /**
164 * Chance to alter the list of keyvalues to be submitted. Modifications to the list will carry on
165 *
166 * Concrete implementers can signal a failure condition in their code by throwing an
167 * {@link IOException}.
168 *
169 * @param kvs the list of keyvalues to be filtered
170 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
171 */
172 abstract public void filterRow(List<KeyValue> kvs) throws IOException;
173
174 /**
175 * Primarily used to check for conflicts with scans(such as scans that do not read a full row at a
176 * time).
177 *
178 * @return True if this filter actively uses filterRow(List) or filterRow().
179 */
180 abstract public boolean hasFilterRow();
181
182 /**
183 * Last chance to veto row based on previous {@link #filterKeyValue(KeyValue)} calls. The filter
184 * needs to retain state then return a particular value for this call if they wish to exclude a
185 * row if a certain column is missing (for example).
186 *
187 * Concrete implementers can signal a failure condition in their code by throwing an
188 * {@link IOException}.
189 *
190 * @return true to exclude row, false to include row.
191 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
192 */
193 abstract public boolean filterRow() throws IOException;
194
195 /**
196 * If the filter returns the match code SEEK_NEXT_USING_HINT, then it should also tell which is
197 * the next key it must seek to. After receiving the match code SEEK_NEXT_USING_HINT, the
198 * QueryMatcher would call this function to find out which key it must next seek to.
199 *
200 * Concrete implementers can signal a failure condition in their code by throwing an
201 * {@link IOException}.
202 *
203 * @return KeyValue which must be next seeked. return null if the filter is not sure which key to
204 * seek to next.
205 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
206 */
207 abstract public KeyValue getNextKeyHint(final KeyValue currentKV) throws IOException;
208
209 /**
210 * Check that given column family is essential for filter to check row. Most filters always return
211 * true here. But some could have more sophisticated logic which could significantly reduce
212 * scanning process by not even touching columns until we are 100% sure that it's data is needed
213 * in result.
214 *
215 * Concrete implementers can signal a failure condition in their code by throwing an
216 * {@link IOException}.
217 *
218 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
219 */
220 abstract public boolean isFamilyEssential(byte[] name) throws IOException;
221
222 /**
223 * TODO: JAVADOC
224 *
225 * Concrete implementers can signal a failure condition in their code by throwing an
226 * {@link IOException}.
227 *
228 * @return The filter serialized using pb
229 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
230 */
231 abstract public byte[] toByteArray() throws IOException;
232
233 /**
234 *
235 * Concrete implementers can signal a failure condition in their code by throwing an
236 * {@link IOException}.
237 *
238 * @param pbBytes A pb serialized {@link Filter} instance
239 * @return An instance of {@link Filter} made from <code>bytes</code>
240 * @throws DeserializationException
241 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
242 * @see #toByteArray
243 */
244 public static Filter parseFrom(final byte [] pbBytes) throws DeserializationException {
245 throw new DeserializationException(
246 "parseFrom called on base Filter, but should be called on derived type");
247 }
248
249 /**
250 * Concrete implementers can signal a failure condition in their code by throwing an
251 * {@link IOException}.
252 *
253 * @param other
254 * @return true if and only if the fields of the filter that are serialized are equal to the
255 * corresponding fields in other. Used for testing.
256 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
257 */
258 abstract boolean areSerializedFieldsEqual(Filter other);
259 }