1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.filter;
19
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.List;
23
24 import com.google.common.annotations.VisibleForTesting;
25 import com.google.protobuf.InvalidProtocolBufferException;
26 import org.apache.hadoop.hbase.classification.InterfaceAudience;
27 import org.apache.hadoop.hbase.classification.InterfaceStability;
28 import org.apache.hadoop.hbase.Cell;
29 import org.apache.hadoop.hbase.KeyValueUtil;
30 import org.apache.hadoop.hbase.exceptions.DeserializationException;
31 import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
32 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
33 import org.apache.hadoop.hbase.util.ByteStringer;
34 import org.apache.hadoop.hbase.util.Bytes;
35 import org.apache.hadoop.hbase.util.Pair;
36
37 import java.util.ArrayList;
38 import java.util.Arrays;
39 import java.util.List;
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69 @InterfaceAudience.Public
70 @InterfaceStability.Evolving
71 public class FuzzyRowFilter extends FilterBase {
72 private List<Pair<byte[], byte[]>> fuzzyKeysData;
73 private boolean done = false;
74
75 public FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData) {
76 Pair<byte[], byte[]> p;
77 for (int i = 0; i < fuzzyKeysData.size(); i++) {
78 p = fuzzyKeysData.get(i);
79 if (p.getFirst().length != p.getSecond().length) {
80 Pair<String, String> readable = new Pair<String, String>(
81 Bytes.toStringBinary(p.getFirst()),
82 Bytes.toStringBinary(p.getSecond()));
83 throw new IllegalArgumentException("Fuzzy pair lengths do not match: " + readable);
84 }
85 }
86 this.fuzzyKeysData = fuzzyKeysData;
87 }
88
89
90 @Override
91 public ReturnCode filterKeyValue(Cell cell) {
92
93 SatisfiesCode bestOption = SatisfiesCode.NO_NEXT;
94 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
95 SatisfiesCode satisfiesCode = satisfies(isReversed(), cell.getRowArray(),
96 cell.getRowOffset(), cell.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond());
97 if (satisfiesCode == SatisfiesCode.YES) {
98 return ReturnCode.INCLUDE;
99 }
100
101 if (satisfiesCode == SatisfiesCode.NEXT_EXISTS) {
102 bestOption = SatisfiesCode.NEXT_EXISTS;
103 }
104 }
105
106 if (bestOption == SatisfiesCode.NEXT_EXISTS) {
107 return ReturnCode.SEEK_NEXT_USING_HINT;
108 }
109
110
111 done = true;
112 return ReturnCode.NEXT_ROW;
113 }
114
115
116
117 @Override
118 public Cell transformCell(Cell v) {
119 return v;
120 }
121
122 @Override
123 public Cell getNextCellHint(Cell curCell) {
124 byte[] nextRowKey = null;
125
126 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
127 byte[] nextRowKeyCandidate = getNextForFuzzyRule(isReversed(), curCell.getRowArray(),
128 curCell.getRowOffset(), curCell.getRowLength(), fuzzyData.getFirst(),
129 fuzzyData.getSecond());
130 if (nextRowKeyCandidate == null) {
131 continue;
132 }
133 if (nextRowKey == null ||
134 (reversed && Bytes.compareTo(nextRowKeyCandidate, nextRowKey) > 0) ||
135 (!reversed && Bytes.compareTo(nextRowKeyCandidate, nextRowKey) < 0)) {
136 nextRowKey = nextRowKeyCandidate;
137 }
138 }
139
140 if (!reversed && nextRowKey == null) {
141
142
143
144
145 throw new IllegalStateException("No next row key that satisfies fuzzy exists when" +
146 " getNextKeyHint() is invoked." +
147 " Filter: " + this.toString() +
148 " currentKV: " + curCell);
149 }
150
151 return nextRowKey == null ? null : KeyValueUtil.createFirstOnRow(nextRowKey);
152 }
153
154 @Override
155 public boolean filterAllRemaining() {
156 return done;
157 }
158
159
160
161
162 public byte [] toByteArray() {
163 FilterProtos.FuzzyRowFilter.Builder builder =
164 FilterProtos.FuzzyRowFilter.newBuilder();
165 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
166 BytesBytesPair.Builder bbpBuilder = BytesBytesPair.newBuilder();
167 bbpBuilder.setFirst(ByteStringer.wrap(fuzzyData.getFirst()));
168 bbpBuilder.setSecond(ByteStringer.wrap(fuzzyData.getSecond()));
169 builder.addFuzzyKeysData(bbpBuilder);
170 }
171 return builder.build().toByteArray();
172 }
173
174
175
176
177
178
179
180 public static FuzzyRowFilter parseFrom(final byte [] pbBytes)
181 throws DeserializationException {
182 FilterProtos.FuzzyRowFilter proto;
183 try {
184 proto = FilterProtos.FuzzyRowFilter.parseFrom(pbBytes);
185 } catch (InvalidProtocolBufferException e) {
186 throw new DeserializationException(e);
187 }
188 int count = proto.getFuzzyKeysDataCount();
189 ArrayList<Pair<byte[], byte[]>> fuzzyKeysData= new ArrayList<Pair<byte[], byte[]>>(count);
190 for (int i = 0; i < count; ++i) {
191 BytesBytesPair current = proto.getFuzzyKeysData(i);
192 byte[] keyBytes = current.getFirst().toByteArray();
193 byte[] keyMeta = current.getSecond().toByteArray();
194 fuzzyKeysData.add(new Pair<byte[], byte[]>(keyBytes, keyMeta));
195 }
196 return new FuzzyRowFilter(fuzzyKeysData);
197 }
198
199 @Override
200 public String toString() {
201 final StringBuilder sb = new StringBuilder();
202 sb.append("FuzzyRowFilter");
203 sb.append("{fuzzyKeysData=");
204 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
205 sb.append('{').append(Bytes.toStringBinary(fuzzyData.getFirst())).append(":");
206 sb.append(Bytes.toStringBinary(fuzzyData.getSecond())).append('}');
207 }
208 sb.append("}, ");
209 return sb.toString();
210 }
211
212
213
214 static enum SatisfiesCode {
215
216 YES,
217
218 NEXT_EXISTS,
219
220 NO_NEXT
221 }
222
223 @VisibleForTesting
224 static SatisfiesCode satisfies(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
225 return satisfies(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
226 }
227
228 @VisibleForTesting
229 static SatisfiesCode satisfies(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
230 byte[] fuzzyKeyMeta) {
231 return satisfies(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
232 }
233
234 private static SatisfiesCode satisfies(boolean reverse, byte[] row, int offset, int length,
235 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
236 if (row == null) {
237
238 return SatisfiesCode.YES;
239 }
240
241 Order order = Order.orderFor(reverse);
242 boolean nextRowKeyCandidateExists = false;
243
244 for (int i = 0; i < fuzzyKeyMeta.length && i < length; i++) {
245
246 boolean byteAtPositionFixed = fuzzyKeyMeta[i] == 0;
247 boolean fixedByteIncorrect = byteAtPositionFixed && fuzzyKeyBytes[i] != row[i + offset];
248 if (fixedByteIncorrect) {
249
250 if (nextRowKeyCandidateExists) {
251 return SatisfiesCode.NEXT_EXISTS;
252 }
253
254
255
256
257 boolean rowByteLessThanFixed = (row[i + offset] & 0xFF) < (fuzzyKeyBytes[i] & 0xFF);
258 if (rowByteLessThanFixed && !reverse) {
259 return SatisfiesCode.NEXT_EXISTS;
260 } else if (!rowByteLessThanFixed && reverse) {
261 return SatisfiesCode.NEXT_EXISTS;
262 } else {
263 return SatisfiesCode.NO_NEXT;
264 }
265 }
266
267
268
269
270
271
272
273 if (fuzzyKeyMeta[i] == 1 && !order.isMax(fuzzyKeyBytes[i])) {
274 nextRowKeyCandidateExists = true;
275 }
276 }
277
278 return SatisfiesCode.YES;
279 }
280
281 @VisibleForTesting
282 static byte[] getNextForFuzzyRule(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
283 return getNextForFuzzyRule(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
284 }
285
286 @VisibleForTesting
287 static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
288 byte[] fuzzyKeyMeta) {
289 return getNextForFuzzyRule(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
290 }
291
292
293 private enum Order {
294 ASC {
295 public boolean lt(int lhs, int rhs) {
296 return lhs < rhs;
297 }
298 public boolean gt(int lhs, int rhs) {
299 return lhs > rhs;
300 }
301 public byte inc(byte val) {
302
303 return (byte) (val + 1);
304 }
305 public boolean isMax(byte val) {
306 return val == (byte) 0xff;
307 }
308 public byte min() {
309 return 0;
310 }
311 },
312 DESC {
313 public boolean lt(int lhs, int rhs) {
314 return lhs > rhs;
315 }
316 public boolean gt(int lhs, int rhs) {
317 return lhs < rhs;
318 }
319 public byte inc(byte val) {
320
321 return (byte) (val - 1);
322 }
323 public boolean isMax(byte val) {
324 return val == 0;
325 }
326 public byte min() {
327 return (byte) 0xFF;
328 }
329 };
330
331 public static Order orderFor(boolean reverse) {
332 return reverse ? DESC : ASC;
333 }
334
335
336 public abstract boolean lt(int lhs, int rhs);
337
338 public abstract boolean gt(int lhs, int rhs);
339
340 public abstract byte inc(byte val);
341
342 public abstract boolean isMax(byte val);
343
344 public abstract byte min();
345 }
346
347
348
349
350
351 @VisibleForTesting
352 static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int length,
353 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
354
355
356
357
358
359
360
361
362 byte[] result = Arrays.copyOf(fuzzyKeyBytes,
363 length > fuzzyKeyBytes.length ? length : fuzzyKeyBytes.length);
364 if (reverse && length > fuzzyKeyBytes.length) {
365
366 for (int i = fuzzyKeyBytes.length; i < result.length; i++) {
367 result[i] = (byte) 0xFF;
368 }
369 }
370 int toInc = -1;
371 final Order order = Order.orderFor(reverse);
372
373 boolean increased = false;
374 for (int i = 0; i < result.length; i++) {
375 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 1) {
376 result[i] = row[offset + i];
377 if (!order.isMax(row[offset + i])) {
378
379 toInc = i;
380 }
381 } else if (i < fuzzyKeyMeta.length && fuzzyKeyMeta[i] == 0) {
382 if (order.lt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
383
384
385 increased = true;
386 break;
387 }
388
389 if (order.gt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
390
391
392
393 break;
394 }
395 }
396 }
397
398 if (!increased) {
399 if (toInc < 0) {
400 return null;
401 }
402 result[toInc] = order.inc(result[toInc]);
403
404
405
406 for (int i = toInc + 1; i < result.length; i++) {
407 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 1) {
408 result[i] = order.min();
409 }
410 }
411 }
412
413 return result;
414 }
415
416
417
418
419
420 boolean areSerializedFieldsEqual(Filter o) {
421 if (o == this) return true;
422 if (!(o instanceof FuzzyRowFilter)) return false;
423
424 FuzzyRowFilter other = (FuzzyRowFilter)o;
425 if (this.fuzzyKeysData.size() != other.fuzzyKeysData.size()) return false;
426 for (int i = 0; i < fuzzyKeysData.size(); ++i) {
427 Pair<byte[], byte[]> thisData = this.fuzzyKeysData.get(i);
428 Pair<byte[], byte[]> otherData = other.fuzzyKeysData.get(i);
429 if (!(Bytes.equals(thisData.getFirst(), otherData.getFirst())
430 && Bytes.equals(thisData.getSecond(), otherData.getSecond()))) {
431 return false;
432 }
433 }
434 return true;
435 }
436 }