1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.filter;
19
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.Comparator;
23 import java.util.List;
24 import java.util.PriorityQueue;
25
26 import org.apache.hadoop.hbase.Cell;
27 import org.apache.hadoop.hbase.KeyValue;
28 import org.apache.hadoop.hbase.classification.InterfaceAudience;
29 import org.apache.hadoop.hbase.classification.InterfaceStability;
30 import org.apache.hadoop.hbase.exceptions.DeserializationException;
31 import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
32 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
33 import org.apache.hadoop.hbase.util.ByteStringer;
34 import org.apache.hadoop.hbase.util.Bytes;
35 import org.apache.hadoop.hbase.util.Pair;
36 import org.apache.hadoop.hbase.util.UnsafeAccess;
37
38 import com.google.common.annotations.VisibleForTesting;
39 import com.google.protobuf.InvalidProtocolBufferException;
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59 @InterfaceAudience.Public
60 @InterfaceStability.Evolving
61 public class FuzzyRowFilter extends FilterBase {
62 private List<Pair<byte[], byte[]>> fuzzyKeysData;
63 private boolean done = false;
64
65
66
67
68
69
70 private int lastFoundIndex = -1;
71
72
73
74
75 private RowTracker tracker;
76
77 public FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData) {
78 Pair<byte[], byte[]> p;
79 for (int i = 0; i < fuzzyKeysData.size(); i++) {
80 p = fuzzyKeysData.get(i);
81 if (p.getFirst().length != p.getSecond().length) {
82 Pair<String, String> readable =
83 new Pair<String, String>(Bytes.toStringBinary(p.getFirst()), Bytes.toStringBinary(p
84 .getSecond()));
85 throw new IllegalArgumentException("Fuzzy pair lengths do not match: " + readable);
86 }
87
88 p.setSecond(preprocessMask(p.getSecond()));
89 preprocessSearchKey(p);
90 }
91 this.fuzzyKeysData = fuzzyKeysData;
92 this.tracker = new RowTracker();
93 }
94
95 private void preprocessSearchKey(Pair<byte[], byte[]> p) {
96 if (UnsafeAccess.isAvailable() == false) {
97 return;
98 }
99 byte[] key = p.getFirst();
100 byte[] mask = p.getSecond();
101 for (int i = 0; i < mask.length; i++) {
102
103 if (mask[i] == 0) key[i] = 0;
104 }
105 }
106
107
108
109
110
111
112
113 private byte[] preprocessMask(byte[] mask) {
114 if (UnsafeAccess.isAvailable() == false) {
115 return mask;
116 }
117 if (isPreprocessedMask(mask)) return mask;
118 for (int i = 0; i < mask.length; i++) {
119 if (mask[i] == 0) {
120 mask[i] = -1;
121 } else if (mask[i] == 1) {
122 mask[i] = 0;
123 }
124 }
125 return mask;
126 }
127
128 private boolean isPreprocessedMask(byte[] mask) {
129 for (int i = 0; i < mask.length; i++) {
130 if (mask[i] != -1 && mask[i] != 0) {
131 return false;
132 }
133 }
134 return true;
135 }
136
137 @Override
138 public ReturnCode filterKeyValue(Cell c) {
139 final int startIndex = lastFoundIndex >= 0 ? lastFoundIndex : 0;
140 final int size = fuzzyKeysData.size();
141 for (int i = startIndex; i < size + startIndex; i++) {
142 final int index = i % size;
143 Pair<byte[], byte[]> fuzzyData = fuzzyKeysData.get(index);
144 SatisfiesCode satisfiesCode =
145 satisfies(isReversed(), c.getRowArray(), c.getRowOffset(), c.getRowLength(),
146 fuzzyData.getFirst(), fuzzyData.getSecond());
147 if (satisfiesCode == SatisfiesCode.YES) {
148 lastFoundIndex = index;
149 return ReturnCode.INCLUDE;
150 }
151 }
152
153 lastFoundIndex = -1;
154 return ReturnCode.SEEK_NEXT_USING_HINT;
155
156 }
157
158 @Override
159 public Cell getNextCellHint(Cell currentCell) {
160 boolean result = tracker.updateTracker(currentCell);
161 if (result == false) {
162 done = true;
163 return null;
164 }
165 byte[] nextRowKey = tracker.nextRow();
166 return KeyValue.createFirstOnRow(nextRowKey);
167 }
168
169
170
171
172
173
174
175
176
177 private class RowTracker {
178 private final PriorityQueue<Pair<byte[], Pair<byte[], byte[]>>> nextRows;
179 private boolean initialized = false;
180
181 RowTracker() {
182 nextRows =
183 new PriorityQueue<Pair<byte[], Pair<byte[], byte[]>>>(fuzzyKeysData.size(),
184 new Comparator<Pair<byte[], Pair<byte[], byte[]>>>() {
185 @Override
186 public int compare(Pair<byte[], Pair<byte[], byte[]>> o1,
187 Pair<byte[], Pair<byte[], byte[]>> o2) {
188 int compare = Bytes.compareTo(o1.getFirst(), o2.getFirst());
189 if (!isReversed()) {
190 return compare;
191 } else {
192 return -compare;
193 }
194 }
195 });
196 }
197
198 byte[] nextRow() {
199 if (nextRows.isEmpty()) {
200 throw new IllegalStateException(
201 "NextRows should not be empty, make sure to call nextRow() after updateTracker() return true");
202 } else {
203 return nextRows.peek().getFirst();
204 }
205 }
206
207 boolean updateTracker(Cell currentCell) {
208 if (!initialized) {
209 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
210 updateWith(currentCell, fuzzyData);
211 }
212 initialized = true;
213 } else {
214 while (!nextRows.isEmpty() && !lessThan(currentCell, nextRows.peek().getFirst())) {
215 Pair<byte[], Pair<byte[], byte[]>> head = nextRows.poll();
216 Pair<byte[], byte[]> fuzzyData = head.getSecond();
217 updateWith(currentCell, fuzzyData);
218 }
219 }
220 return !nextRows.isEmpty();
221 }
222
223 boolean lessThan(Cell currentCell, byte[] nextRowKey) {
224 int compareResult =
225 Bytes.compareTo(currentCell.getRowArray(), currentCell.getRowOffset(),
226 currentCell.getRowLength(), nextRowKey, 0, nextRowKey.length);
227 return (!isReversed() && compareResult < 0) || (isReversed() && compareResult > 0);
228 }
229
230 void updateWith(Cell currentCell, Pair<byte[], byte[]> fuzzyData) {
231 byte[] nextRowKeyCandidate =
232 getNextForFuzzyRule(isReversed(), currentCell.getRowArray(), currentCell.getRowOffset(),
233 currentCell.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond());
234 if (nextRowKeyCandidate != null) {
235 nextRows.add(new Pair<byte[], Pair<byte[], byte[]>>(nextRowKeyCandidate, fuzzyData));
236 }
237 }
238
239 }
240
241 @Override
242 public boolean filterAllRemaining() {
243 return done;
244 }
245
246
247
248
249 public byte[] toByteArray() {
250 FilterProtos.FuzzyRowFilter.Builder builder = FilterProtos.FuzzyRowFilter.newBuilder();
251 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
252 BytesBytesPair.Builder bbpBuilder = BytesBytesPair.newBuilder();
253 bbpBuilder.setFirst(ByteStringer.wrap(fuzzyData.getFirst()));
254 bbpBuilder.setSecond(ByteStringer.wrap(fuzzyData.getSecond()));
255 builder.addFuzzyKeysData(bbpBuilder);
256 }
257 return builder.build().toByteArray();
258 }
259
260
261
262
263
264
265
266 public static FuzzyRowFilter parseFrom(final byte[] pbBytes) throws DeserializationException {
267 FilterProtos.FuzzyRowFilter proto;
268 try {
269 proto = FilterProtos.FuzzyRowFilter.parseFrom(pbBytes);
270 } catch (InvalidProtocolBufferException e) {
271 throw new DeserializationException(e);
272 }
273 int count = proto.getFuzzyKeysDataCount();
274 ArrayList<Pair<byte[], byte[]>> fuzzyKeysData = new ArrayList<Pair<byte[], byte[]>>(count);
275 for (int i = 0; i < count; ++i) {
276 BytesBytesPair current = proto.getFuzzyKeysData(i);
277 byte[] keyBytes = current.getFirst().toByteArray();
278 byte[] keyMeta = current.getSecond().toByteArray();
279 fuzzyKeysData.add(new Pair<byte[], byte[]>(keyBytes, keyMeta));
280 }
281 return new FuzzyRowFilter(fuzzyKeysData);
282 }
283
284 @Override
285 public String toString() {
286 final StringBuilder sb = new StringBuilder();
287 sb.append("FuzzyRowFilter");
288 sb.append("{fuzzyKeysData=");
289 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
290 sb.append('{').append(Bytes.toStringBinary(fuzzyData.getFirst())).append(":");
291 sb.append(Bytes.toStringBinary(fuzzyData.getSecond())).append('}');
292 }
293 sb.append("}, ");
294 return sb.toString();
295 }
296
297
298
299 static enum SatisfiesCode {
300
301 YES,
302
303 NEXT_EXISTS,
304
305 NO_NEXT
306 }
307
308 @VisibleForTesting
309 static SatisfiesCode satisfies(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
310 return satisfies(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
311 }
312
313 @VisibleForTesting
314 static SatisfiesCode satisfies(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
315 byte[] fuzzyKeyMeta) {
316 return satisfies(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
317 }
318
319 static SatisfiesCode satisfies(boolean reverse, byte[] row, int offset, int length,
320 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
321
322 if (UnsafeAccess.isAvailable() == false) {
323 return satisfiesNoUnsafe(reverse, row, offset, length, fuzzyKeyBytes, fuzzyKeyMeta);
324 }
325
326 if (row == null) {
327
328 return SatisfiesCode.YES;
329 }
330 length = Math.min(length, fuzzyKeyBytes.length);
331 int numWords = length / Bytes.SIZEOF_LONG;
332 int offsetAdj = offset + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
333
334 int j = numWords << 3;
335
336 for (int i = 0; i < j; i += Bytes.SIZEOF_LONG) {
337
338 long fuzzyBytes =
339 UnsafeAccess.theUnsafe.getLong(fuzzyKeyBytes, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
340 + (long) i);
341 long fuzzyMeta =
342 UnsafeAccess.theUnsafe.getLong(fuzzyKeyMeta, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
343 + (long) i);
344 long rowValue = UnsafeAccess.theUnsafe.getLong(row, offsetAdj + (long) i);
345 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
346
347 return SatisfiesCode.NEXT_EXISTS;
348 }
349 }
350
351 int off = j;
352
353 if (length - off >= Bytes.SIZEOF_INT) {
354 int fuzzyBytes =
355 UnsafeAccess.theUnsafe.getInt(fuzzyKeyBytes, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
356 + (long) off);
357 int fuzzyMeta =
358 UnsafeAccess.theUnsafe.getInt(fuzzyKeyMeta, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
359 + (long) off);
360 int rowValue = UnsafeAccess.theUnsafe.getInt(row, offsetAdj + (long) off);
361 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
362
363 return SatisfiesCode.NEXT_EXISTS;
364 }
365 off += Bytes.SIZEOF_INT;
366 }
367
368 if (length - off >= Bytes.SIZEOF_SHORT) {
369 short fuzzyBytes =
370 UnsafeAccess.theUnsafe.getShort(fuzzyKeyBytes, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
371 + (long) off);
372 short fuzzyMeta =
373 UnsafeAccess.theUnsafe.getShort(fuzzyKeyMeta, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
374 + (long) off);
375 short rowValue = UnsafeAccess.theUnsafe.getShort(row, offsetAdj + (long) off);
376 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
377
378
379
380 return SatisfiesCode.NEXT_EXISTS;
381 }
382 off += Bytes.SIZEOF_SHORT;
383 }
384
385 if (length - off >= Bytes.SIZEOF_BYTE) {
386 int fuzzyBytes = fuzzyKeyBytes[off] & 0xff;
387 int fuzzyMeta = fuzzyKeyMeta[off] & 0xff;
388 int rowValue = row[offset + off] & 0xff;
389 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
390
391 return SatisfiesCode.NEXT_EXISTS;
392 }
393 }
394 return SatisfiesCode.YES;
395 }
396
397 static SatisfiesCode satisfiesNoUnsafe(boolean reverse, byte[] row, int offset, int length,
398 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
399 if (row == null) {
400
401 return SatisfiesCode.YES;
402 }
403
404 Order order = Order.orderFor(reverse);
405 boolean nextRowKeyCandidateExists = false;
406
407 for (int i = 0; i < fuzzyKeyMeta.length && i < length; i++) {
408
409 boolean byteAtPositionFixed = fuzzyKeyMeta[i] == 0;
410 boolean fixedByteIncorrect = byteAtPositionFixed && fuzzyKeyBytes[i] != row[i + offset];
411 if (fixedByteIncorrect) {
412
413 if (nextRowKeyCandidateExists) {
414 return SatisfiesCode.NEXT_EXISTS;
415 }
416
417
418
419
420 boolean rowByteLessThanFixed = (row[i + offset] & 0xFF) < (fuzzyKeyBytes[i] & 0xFF);
421 if (rowByteLessThanFixed && !reverse) {
422 return SatisfiesCode.NEXT_EXISTS;
423 } else if (!rowByteLessThanFixed && reverse) {
424 return SatisfiesCode.NEXT_EXISTS;
425 } else {
426 return SatisfiesCode.NO_NEXT;
427 }
428 }
429
430
431
432
433
434
435
436 if (fuzzyKeyMeta[i] == 1 && !order.isMax(fuzzyKeyBytes[i])) {
437 nextRowKeyCandidateExists = true;
438 }
439 }
440 return SatisfiesCode.YES;
441 }
442
443 @VisibleForTesting
444 static byte[] getNextForFuzzyRule(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
445 return getNextForFuzzyRule(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
446 }
447
448 @VisibleForTesting
449 static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
450 byte[] fuzzyKeyMeta) {
451 return getNextForFuzzyRule(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
452 }
453
454
455 private enum Order {
456 ASC {
457 public boolean lt(int lhs, int rhs) {
458 return lhs < rhs;
459 }
460
461 public boolean gt(int lhs, int rhs) {
462 return lhs > rhs;
463 }
464
465 public byte inc(byte val) {
466
467 return (byte) (val + 1);
468 }
469
470 public boolean isMax(byte val) {
471 return val == (byte) 0xff;
472 }
473
474 public byte min() {
475 return 0;
476 }
477 },
478 DESC {
479 public boolean lt(int lhs, int rhs) {
480 return lhs > rhs;
481 }
482
483 public boolean gt(int lhs, int rhs) {
484 return lhs < rhs;
485 }
486
487 public byte inc(byte val) {
488
489 return (byte) (val - 1);
490 }
491
492 public boolean isMax(byte val) {
493 return val == 0;
494 }
495
496 public byte min() {
497 return (byte) 0xFF;
498 }
499 };
500
501 public static Order orderFor(boolean reverse) {
502 return reverse ? DESC : ASC;
503 }
504
505
506 public abstract boolean lt(int lhs, int rhs);
507
508
509 public abstract boolean gt(int lhs, int rhs);
510
511
512 public abstract byte inc(byte val);
513
514
515 public abstract boolean isMax(byte val);
516
517
518 public abstract byte min();
519 }
520
521
522
523
524
525 @VisibleForTesting
526 static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int length,
527 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
528
529
530
531
532
533
534
535
536 byte[] result =
537 Arrays.copyOf(fuzzyKeyBytes, length > fuzzyKeyBytes.length ? length : fuzzyKeyBytes.length);
538 if (reverse && length > fuzzyKeyBytes.length) {
539
540 for (int i = fuzzyKeyBytes.length; i < result.length; i++) {
541 result[i] = (byte) 0xFF;
542 }
543 }
544 int toInc = -1;
545 final Order order = Order.orderFor(reverse);
546
547 boolean increased = false;
548 for (int i = 0; i < result.length; i++) {
549 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0
550 result[i] = row[offset + i];
551 if (!order.isMax(row[offset + i])) {
552
553 toInc = i;
554 }
555 } else if (i < fuzzyKeyMeta.length && fuzzyKeyMeta[i] == -1
556 if (order.lt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
557
558
559 increased = true;
560 break;
561 }
562
563 if (order.gt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
564
565
566
567 break;
568 }
569 }
570 }
571
572 if (!increased) {
573 if (toInc < 0) {
574 return null;
575 }
576 result[toInc] = order.inc(result[toInc]);
577
578
579
580 for (int i = toInc + 1; i < result.length; i++) {
581 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0
582 result[i] = order.min();
583 }
584 }
585 }
586
587 return result;
588 }
589
590
591
592
593
594 boolean areSerializedFieldsEqual(Filter o) {
595 if (o == this) return true;
596 if (!(o instanceof FuzzyRowFilter)) return false;
597
598 FuzzyRowFilter other = (FuzzyRowFilter) o;
599 if (this.fuzzyKeysData.size() != other.fuzzyKeysData.size()) return false;
600 for (int i = 0; i < fuzzyKeysData.size(); ++i) {
601 Pair<byte[], byte[]> thisData = this.fuzzyKeysData.get(i);
602 Pair<byte[], byte[]> otherData = other.fuzzyKeysData.get(i);
603 if (!(Bytes.equals(thisData.getFirst(), otherData.getFirst()) && Bytes.equals(
604 thisData.getSecond(), otherData.getSecond()))) {
605 return false;
606 }
607 }
608 return true;
609 }
610 }