1 /*
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 package org.apache.hadoop.hbase.filter;
21
22 import java.io.IOException;
23 import java.util.List;
24
25 import org.apache.hadoop.classification.InterfaceAudience;
26 import org.apache.hadoop.classification.InterfaceStability;
27 import org.apache.hadoop.hbase.KeyValue;
28 import org.apache.hadoop.hbase.exceptions.DeserializationException;
29
30 /**
31 * Interface for row and column filters directly applied within the regionserver.
32 * A filter can expect the following call sequence:
33 *<ul>
34 * <li>{@link #reset()}</li>
35 * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
36 * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
37 * if false, we will also call</li>
38 * <li>{@link #filterKeyValue(KeyValue)} -> true to drop this key/value</li>
39 * <li>{@link #filterRow(List)} -> allows directmodification of the final list to be submitted
40 * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
41 * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
42 * </li>
43 * </ul>
44 *
45 * Filter instances are created one per region/scan. This abstract class replaces
46 * the old RowFilterInterface.
47 *
48 * When implementing your own filters, consider inheriting {@link FilterBase} to help
49 * you reduce boilerplate.
50 *
51 * @see FilterBase
52 */
53 @InterfaceAudience.Public
54 @InterfaceStability.Stable
55 public abstract class Filter {
56 /**
57 * Reset the state of the filter between rows.
58 *
59 * Concrete implementers can signal a failure condition in their code by throwing an
60 * {@link IOException}.
61 *
62 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
63 */
64 abstract public void reset() throws IOException;
65
66 /**
67 * Filters a row based on the row key. If this returns true, the entire row will be excluded. If
68 * false, each KeyValue in the row will be passed to {@link #filterKeyValue(KeyValue)} below.
69 *
70 * Concrete implementers can signal a failure condition in their code by throwing an
71 * {@link IOException}.
72 *
73 * @param buffer buffer containing row key
74 * @param offset offset into buffer where row key starts
75 * @param length length of the row key
76 * @return true, remove entire row, false, include the row (maybe).
77 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
78 */
79 abstract public boolean filterRowKey(byte[] buffer, int offset, int length) throws IOException;
80
81 /**
82 * If this returns true, the scan will terminate.
83 *
84 * Concrete implementers can signal a failure condition in their code by throwing an
85 * {@link IOException}.
86 *
87 * @return true to end scan, false to continue.
88 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
89 */
90 abstract public boolean filterAllRemaining() throws IOException;
91
92 /**
93 * A way to filter based on the column family, column qualifier and/or the column value. Return
94 * code is described below. This allows filters to filter only certain number of columns, then
95 * terminate without matching ever column.
96 *
97 * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
98 * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls
99 * for the next row.
100 *
101 * Concrete implementers can signal a failure condition in their code by throwing an
102 * {@link IOException}.
103 *
104 * @param v the KeyValue in question
105 * @return code as described below
106 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
107 * @see Filter.ReturnCode
108 */
109 abstract public ReturnCode filterKeyValue(final KeyValue v) throws IOException;
110
111 /**
112 * Give the filter a chance to transform the passed KeyValue. If the KeyValue is changed a new
113 * KeyValue object must be returned.
114 *
115 * @see org.apache.hadoop.hbase.KeyValue#shallowCopy()
116 * The transformed KeyValue is what is eventually returned to the client. Most filters will
117 * return the passed KeyValue unchanged.
118 * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transform(KeyValue) for an example of a
119 * transformation.
120 *
121 * Concrete implementers can signal a failure condition in their code by throwing an
122 * {@link IOException}.
123 *
124 * @param v the KeyValue in question
125 * @return the changed KeyValue
126 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
127 */
128 abstract public KeyValue transform(final KeyValue v) throws IOException;
129
130 /**
131 * Return codes for filterValue().
132 */
133 public enum ReturnCode {
134 /**
135 * Include the KeyValue
136 */
137 INCLUDE,
138 /**
139 * Include the KeyValue and seek to the next column skipping older versions.
140 */
141 INCLUDE_AND_NEXT_COL,
142 /**
143 * Skip this KeyValue
144 */
145 SKIP,
146 /**
147 * Skip this column. Go to the next column in this row.
148 */
149 NEXT_COL,
150 /**
151 * Done with columns, skip to next row. Note that filterRow() will
152 * still be called.
153 */
154 NEXT_ROW,
155 /**
156 * Seek to next key which is given as hint by the filter.
157 */
158 SEEK_NEXT_USING_HINT,
159 }
160
161 /**
162 * Chance to alter the list of keyvalues to be submitted. Modifications to the list will carry on
163 *
164 * Concrete implementers can signal a failure condition in their code by throwing an
165 * {@link IOException}.
166 *
167 * @param kvs the list of keyvalues to be filtered
168 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
169 */
170 abstract public void filterRow(List<KeyValue> kvs) throws IOException;
171
172 /**
173 * Primarily used to check for conflicts with scans(such as scans that do not read a full row at a
174 * time).
175 *
176 * @return True if this filter actively uses filterRow(List) or filterRow().
177 */
178 abstract public boolean hasFilterRow();
179
180 /**
181 * Last chance to veto row based on previous {@link #filterKeyValue(KeyValue)} calls. The filter
182 * needs to retain state then return a particular value for this call if they wish to exclude a
183 * row if a certain column is missing (for example).
184 *
185 * Concrete implementers can signal a failure condition in their code by throwing an
186 * {@link IOException}.
187 *
188 * @return true to exclude row, false to include row.
189 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
190 */
191 abstract public boolean filterRow() throws IOException;
192
193 /**
194 * If the filter returns the match code SEEK_NEXT_USING_HINT, then it should also tell which is
195 * the next key it must seek to. After receiving the match code SEEK_NEXT_USING_HINT, the
196 * QueryMatcher would call this function to find out which key it must next seek to.
197 *
198 * Concrete implementers can signal a failure condition in their code by throwing an
199 * {@link IOException}.
200 *
201 * @return KeyValue which must be next seeked. return null if the filter is not sure which key to
202 * seek to next.
203 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
204 */
205 abstract public KeyValue getNextKeyHint(final KeyValue currentKV) throws IOException;
206
207 /**
208 * Check that given column family is essential for filter to check row. Most filters always return
209 * true here. But some could have more sophisticated logic which could significantly reduce
210 * scanning process by not even touching columns until we are 100% sure that it's data is needed
211 * in result.
212 *
213 * Concrete implementers can signal a failure condition in their code by throwing an
214 * {@link IOException}.
215 *
216 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
217 */
218 abstract public boolean isFamilyEssential(byte[] name) throws IOException;
219
220 /**
221 * TODO: JAVADOC
222 *
223 * Concrete implementers can signal a failure condition in their code by throwing an
224 * {@link IOException}.
225 *
226 * @return The filter serialized using pb
227 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
228 */
229 abstract public byte[] toByteArray() throws IOException;
230
231 /**
232 *
233 * Concrete implementers can signal a failure condition in their code by throwing an
234 * {@link IOException}.
235 *
236 * @param pbBytes A pb serialized {@link Filter} instance
237 * @return An instance of {@link Filter} made from <code>bytes</code>
238 * @throws DeserializationException
239 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
240 * @see #toByteArray
241 */
242 public static Filter parseFrom(final byte [] pbBytes) throws DeserializationException {
243 throw new DeserializationException(
244 "parseFrom called on base Filter, but should be called on derived type");
245 }
246
247 /**
248 * Concrete implementers can signal a failure condition in their code by throwing an
249 * {@link IOException}.
250 *
251 * @param other
252 * @return true if and only if the fields of the filter that are serialized are equal to the
253 * corresponding fields in other. Used for testing.
254 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
255 */
256 abstract boolean areSerializedFieldsEqual(Filter other);
257 }