1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.filter;
19
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.Comparator;
23 import java.util.List;
24 import java.util.PriorityQueue;
25
26 import org.apache.hadoop.hbase.Cell;
27 import org.apache.hadoop.hbase.KeyValue;
28 import org.apache.hadoop.hbase.classification.InterfaceAudience;
29 import org.apache.hadoop.hbase.classification.InterfaceStability;
30 import org.apache.hadoop.hbase.exceptions.DeserializationException;
31 import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
32 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
33 import org.apache.hadoop.hbase.util.ByteStringer;
34 import org.apache.hadoop.hbase.util.Bytes;
35 import org.apache.hadoop.hbase.util.Pair;
36 import org.apache.hadoop.hbase.util.UnsafeAccess;
37 import org.apache.hadoop.hbase.util.UnsafeAvailChecker;
38
39 import com.google.common.annotations.VisibleForTesting;
40 import com.google.protobuf.InvalidProtocolBufferException;
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60 @InterfaceAudience.Public
61 @InterfaceStability.Evolving
62 public class FuzzyRowFilter extends FilterBase {
63 private static final boolean UNSAFE_UNALIGNED = UnsafeAvailChecker.unaligned();
64 private List<Pair<byte[], byte[]>> fuzzyKeysData;
65 private boolean done = false;
66
67
68
69
70
71
72 private int lastFoundIndex = -1;
73
74
75
76
77 private RowTracker tracker;
78
79 public FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData) {
80 Pair<byte[], byte[]> p;
81 for (int i = 0; i < fuzzyKeysData.size(); i++) {
82 p = fuzzyKeysData.get(i);
83 if (p.getFirst().length != p.getSecond().length) {
84 Pair<String, String> readable =
85 new Pair<String, String>(Bytes.toStringBinary(p.getFirst()), Bytes.toStringBinary(p
86 .getSecond()));
87 throw new IllegalArgumentException("Fuzzy pair lengths do not match: " + readable);
88 }
89
90 p.setSecond(preprocessMask(p.getSecond()));
91 preprocessSearchKey(p);
92 }
93 this.fuzzyKeysData = fuzzyKeysData;
94 this.tracker = new RowTracker();
95 }
96
97 private void preprocessSearchKey(Pair<byte[], byte[]> p) {
98 if (!UNSAFE_UNALIGNED) {
99 return;
100 }
101 byte[] key = p.getFirst();
102 byte[] mask = p.getSecond();
103 for (int i = 0; i < mask.length; i++) {
104
105 if (mask[i] == 0) key[i] = 0;
106 }
107 }
108
109
110
111
112
113
114
115 private byte[] preprocessMask(byte[] mask) {
116 if (!UNSAFE_UNALIGNED) {
117 return mask;
118 }
119 if (isPreprocessedMask(mask)) return mask;
120 for (int i = 0; i < mask.length; i++) {
121 if (mask[i] == 0) {
122 mask[i] = -1;
123 } else if (mask[i] == 1) {
124 mask[i] = 0;
125 }
126 }
127 return mask;
128 }
129
130 private boolean isPreprocessedMask(byte[] mask) {
131 for (int i = 0; i < mask.length; i++) {
132 if (mask[i] != -1 && mask[i] != 0) {
133 return false;
134 }
135 }
136 return true;
137 }
138
139 @Override
140 public ReturnCode filterKeyValue(Cell c) {
141 final int startIndex = lastFoundIndex >= 0 ? lastFoundIndex : 0;
142 final int size = fuzzyKeysData.size();
143 for (int i = startIndex; i < size + startIndex; i++) {
144 final int index = i % size;
145 Pair<byte[], byte[]> fuzzyData = fuzzyKeysData.get(index);
146 SatisfiesCode satisfiesCode =
147 satisfies(isReversed(), c.getRowArray(), c.getRowOffset(), c.getRowLength(),
148 fuzzyData.getFirst(), fuzzyData.getSecond());
149 if (satisfiesCode == SatisfiesCode.YES) {
150 lastFoundIndex = index;
151 return ReturnCode.INCLUDE;
152 }
153 }
154
155 lastFoundIndex = -1;
156
157 return ReturnCode.SEEK_NEXT_USING_HINT;
158
159 }
160
161 @Override
162 public Cell getNextCellHint(Cell currentCell) {
163 boolean result = tracker.updateTracker(currentCell);
164 if (result == false) {
165 done = true;
166 return null;
167 }
168 byte[] nextRowKey = tracker.nextRow();
169 return KeyValue.createFirstOnRow(nextRowKey);
170 }
171
172
173
174
175
176
177
178
179
180 private class RowTracker {
181 private final PriorityQueue<Pair<byte[], Pair<byte[], byte[]>>> nextRows;
182 private boolean initialized = false;
183
184 RowTracker() {
185 nextRows =
186 new PriorityQueue<Pair<byte[], Pair<byte[], byte[]>>>(fuzzyKeysData.size(),
187 new Comparator<Pair<byte[], Pair<byte[], byte[]>>>() {
188 @Override
189 public int compare(Pair<byte[], Pair<byte[], byte[]>> o1,
190 Pair<byte[], Pair<byte[], byte[]>> o2) {
191 int compare = Bytes.compareTo(o1.getFirst(), o2.getFirst());
192 if (!isReversed()) {
193 return compare;
194 } else {
195 return -compare;
196 }
197 }
198 });
199 }
200
201 byte[] nextRow() {
202 if (nextRows.isEmpty()) {
203 throw new IllegalStateException(
204 "NextRows should not be empty, make sure to call nextRow() after updateTracker() return true");
205 } else {
206 return nextRows.peek().getFirst();
207 }
208 }
209
210 boolean updateTracker(Cell currentCell) {
211 if (!initialized) {
212 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
213 updateWith(currentCell, fuzzyData);
214 }
215 initialized = true;
216 } else {
217 while (!nextRows.isEmpty() && !lessThan(currentCell, nextRows.peek().getFirst())) {
218 Pair<byte[], Pair<byte[], byte[]>> head = nextRows.poll();
219 Pair<byte[], byte[]> fuzzyData = head.getSecond();
220 updateWith(currentCell, fuzzyData);
221 }
222 }
223 return !nextRows.isEmpty();
224 }
225
226 boolean lessThan(Cell currentCell, byte[] nextRowKey) {
227 int compareResult =
228 Bytes.compareTo(currentCell.getRowArray(), currentCell.getRowOffset(),
229 currentCell.getRowLength(), nextRowKey, 0, nextRowKey.length);
230 return (!isReversed() && compareResult < 0) || (isReversed() && compareResult > 0);
231 }
232
233 void updateWith(Cell currentCell, Pair<byte[], byte[]> fuzzyData) {
234 byte[] nextRowKeyCandidate =
235 getNextForFuzzyRule(isReversed(), currentCell.getRowArray(), currentCell.getRowOffset(),
236 currentCell.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond());
237 if (nextRowKeyCandidate != null) {
238 nextRows.add(new Pair<byte[], Pair<byte[], byte[]>>(nextRowKeyCandidate, fuzzyData));
239 }
240 }
241
242 }
243
244 @Override
245 public boolean filterAllRemaining() {
246 return done;
247 }
248
249
250
251
252 public byte[] toByteArray() {
253 FilterProtos.FuzzyRowFilter.Builder builder = FilterProtos.FuzzyRowFilter.newBuilder();
254 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
255 BytesBytesPair.Builder bbpBuilder = BytesBytesPair.newBuilder();
256 bbpBuilder.setFirst(ByteStringer.wrap(fuzzyData.getFirst()));
257 bbpBuilder.setSecond(ByteStringer.wrap(fuzzyData.getSecond()));
258 builder.addFuzzyKeysData(bbpBuilder);
259 }
260 return builder.build().toByteArray();
261 }
262
263
264
265
266
267
268
269 public static FuzzyRowFilter parseFrom(final byte[] pbBytes) throws DeserializationException {
270 FilterProtos.FuzzyRowFilter proto;
271 try {
272 proto = FilterProtos.FuzzyRowFilter.parseFrom(pbBytes);
273 } catch (InvalidProtocolBufferException e) {
274 throw new DeserializationException(e);
275 }
276 int count = proto.getFuzzyKeysDataCount();
277 ArrayList<Pair<byte[], byte[]>> fuzzyKeysData = new ArrayList<Pair<byte[], byte[]>>(count);
278 for (int i = 0; i < count; ++i) {
279 BytesBytesPair current = proto.getFuzzyKeysData(i);
280 byte[] keyBytes = current.getFirst().toByteArray();
281 byte[] keyMeta = current.getSecond().toByteArray();
282 fuzzyKeysData.add(new Pair<byte[], byte[]>(keyBytes, keyMeta));
283 }
284 return new FuzzyRowFilter(fuzzyKeysData);
285 }
286
287 @Override
288 public String toString() {
289 final StringBuilder sb = new StringBuilder();
290 sb.append("FuzzyRowFilter");
291 sb.append("{fuzzyKeysData=");
292 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
293 sb.append('{').append(Bytes.toStringBinary(fuzzyData.getFirst())).append(":");
294 sb.append(Bytes.toStringBinary(fuzzyData.getSecond())).append('}');
295 }
296 sb.append("}, ");
297 return sb.toString();
298 }
299
300
301
302 static enum SatisfiesCode {
303
304 YES,
305
306 NEXT_EXISTS,
307
308 NO_NEXT
309 }
310
311 @VisibleForTesting
312 static SatisfiesCode satisfies(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
313 return satisfies(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
314 }
315
316 @VisibleForTesting
317 static SatisfiesCode satisfies(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
318 byte[] fuzzyKeyMeta) {
319 return satisfies(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
320 }
321
322 static SatisfiesCode satisfies(boolean reverse, byte[] row, int offset, int length,
323 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
324
325 if (!UNSAFE_UNALIGNED) {
326 return satisfiesNoUnsafe(reverse, row, offset, length, fuzzyKeyBytes, fuzzyKeyMeta);
327 }
328
329 if (row == null) {
330
331 return SatisfiesCode.YES;
332 }
333 length = Math.min(length, fuzzyKeyBytes.length);
334 int numWords = length / Bytes.SIZEOF_LONG;
335 int offsetAdj = offset + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
336
337 int j = numWords << 3;
338
339 for (int i = 0; i < j; i += Bytes.SIZEOF_LONG) {
340
341 long fuzzyBytes =
342 UnsafeAccess.theUnsafe.getLong(fuzzyKeyBytes, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
343 + (long) i);
344 long fuzzyMeta =
345 UnsafeAccess.theUnsafe.getLong(fuzzyKeyMeta, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
346 + (long) i);
347 long rowValue = UnsafeAccess.theUnsafe.getLong(row, offsetAdj + (long) i);
348 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
349
350 return SatisfiesCode.NEXT_EXISTS;
351 }
352 }
353
354 int off = j;
355
356 if (length - off >= Bytes.SIZEOF_INT) {
357 int fuzzyBytes =
358 UnsafeAccess.theUnsafe.getInt(fuzzyKeyBytes, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
359 + (long) off);
360 int fuzzyMeta =
361 UnsafeAccess.theUnsafe.getInt(fuzzyKeyMeta, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
362 + (long) off);
363 int rowValue = UnsafeAccess.theUnsafe.getInt(row, offsetAdj + (long) off);
364 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
365
366 return SatisfiesCode.NEXT_EXISTS;
367 }
368 off += Bytes.SIZEOF_INT;
369 }
370
371 if (length - off >= Bytes.SIZEOF_SHORT) {
372 short fuzzyBytes =
373 UnsafeAccess.theUnsafe.getShort(fuzzyKeyBytes, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
374 + (long) off);
375 short fuzzyMeta =
376 UnsafeAccess.theUnsafe.getShort(fuzzyKeyMeta, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
377 + (long) off);
378 short rowValue = UnsafeAccess.theUnsafe.getShort(row, offsetAdj + (long) off);
379 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
380
381
382
383 return SatisfiesCode.NEXT_EXISTS;
384 }
385 off += Bytes.SIZEOF_SHORT;
386 }
387
388 if (length - off >= Bytes.SIZEOF_BYTE) {
389 int fuzzyBytes = fuzzyKeyBytes[off] & 0xff;
390 int fuzzyMeta = fuzzyKeyMeta[off] & 0xff;
391 int rowValue = row[offset + off] & 0xff;
392 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
393
394 return SatisfiesCode.NEXT_EXISTS;
395 }
396 }
397 return SatisfiesCode.YES;
398 }
399
400 static SatisfiesCode satisfiesNoUnsafe(boolean reverse, byte[] row, int offset, int length,
401 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
402 if (row == null) {
403
404 return SatisfiesCode.YES;
405 }
406
407 Order order = Order.orderFor(reverse);
408 boolean nextRowKeyCandidateExists = false;
409
410 for (int i = 0; i < fuzzyKeyMeta.length && i < length; i++) {
411
412 boolean byteAtPositionFixed = fuzzyKeyMeta[i] == 0;
413 boolean fixedByteIncorrect = byteAtPositionFixed && fuzzyKeyBytes[i] != row[i + offset];
414 if (fixedByteIncorrect) {
415
416 if (nextRowKeyCandidateExists) {
417 return SatisfiesCode.NEXT_EXISTS;
418 }
419
420
421
422
423 boolean rowByteLessThanFixed = (row[i + offset] & 0xFF) < (fuzzyKeyBytes[i] & 0xFF);
424 if (rowByteLessThanFixed && !reverse) {
425 return SatisfiesCode.NEXT_EXISTS;
426 } else if (!rowByteLessThanFixed && reverse) {
427 return SatisfiesCode.NEXT_EXISTS;
428 } else {
429 return SatisfiesCode.NO_NEXT;
430 }
431 }
432
433
434
435
436
437
438
439 if (fuzzyKeyMeta[i] == 1 && !order.isMax(fuzzyKeyBytes[i])) {
440 nextRowKeyCandidateExists = true;
441 }
442 }
443 return SatisfiesCode.YES;
444 }
445
446 @VisibleForTesting
447 static byte[] getNextForFuzzyRule(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
448 return getNextForFuzzyRule(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
449 }
450
451 @VisibleForTesting
452 static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
453 byte[] fuzzyKeyMeta) {
454 return getNextForFuzzyRule(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
455 }
456
457
458 private enum Order {
459 ASC {
460 public boolean lt(int lhs, int rhs) {
461 return lhs < rhs;
462 }
463
464 public boolean gt(int lhs, int rhs) {
465 return lhs > rhs;
466 }
467
468 public byte inc(byte val) {
469
470 return (byte) (val + 1);
471 }
472
473 public boolean isMax(byte val) {
474 return val == (byte) 0xff;
475 }
476
477 public byte min() {
478 return 0;
479 }
480 },
481 DESC {
482 public boolean lt(int lhs, int rhs) {
483 return lhs > rhs;
484 }
485
486 public boolean gt(int lhs, int rhs) {
487 return lhs < rhs;
488 }
489
490 public byte inc(byte val) {
491
492 return (byte) (val - 1);
493 }
494
495 public boolean isMax(byte val) {
496 return val == 0;
497 }
498
499 public byte min() {
500 return (byte) 0xFF;
501 }
502 };
503
504 public static Order orderFor(boolean reverse) {
505 return reverse ? DESC : ASC;
506 }
507
508
509 public abstract boolean lt(int lhs, int rhs);
510
511
512 public abstract boolean gt(int lhs, int rhs);
513
514
515 public abstract byte inc(byte val);
516
517
518 public abstract boolean isMax(byte val);
519
520
521 public abstract byte min();
522 }
523
524
525
526
527
528 @VisibleForTesting
529 static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int length,
530 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
531
532
533
534
535
536
537
538
539 byte[] result =
540 Arrays.copyOf(fuzzyKeyBytes, length > fuzzyKeyBytes.length ? length : fuzzyKeyBytes.length);
541 if (reverse && length > fuzzyKeyBytes.length) {
542
543 for (int i = fuzzyKeyBytes.length; i < result.length; i++) {
544 result[i] = (byte) 0xFF;
545 }
546 }
547 int toInc = -1;
548 final Order order = Order.orderFor(reverse);
549
550 boolean increased = false;
551 for (int i = 0; i < result.length; i++) {
552 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0
553 result[i] = row[offset + i];
554 if (!order.isMax(row[offset + i])) {
555
556 toInc = i;
557 }
558 } else if (i < fuzzyKeyMeta.length && fuzzyKeyMeta[i] == -1
559 if (order.lt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
560
561
562 increased = true;
563 break;
564 }
565
566 if (order.gt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
567
568
569
570 break;
571 }
572 }
573 }
574
575 if (!increased) {
576 if (toInc < 0) {
577 return null;
578 }
579 result[toInc] = order.inc(result[toInc]);
580
581
582
583 for (int i = toInc + 1; i < result.length; i++) {
584 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0
585 result[i] = order.min();
586 }
587 }
588 }
589
590 return reverse? result: trimTrailingZeroes(result, fuzzyKeyMeta, toInc);
591 }
592
593
594
595
596
597
598
599
600
601
602
603
604
605 private static byte[] trimTrailingZeroes(byte[] result, byte[] fuzzyKeyMeta, int toInc) {
606 int off = fuzzyKeyMeta.length >= result.length? result.length -1:
607 fuzzyKeyMeta.length -1;
608 for( ; off >= 0; off--){
609 if(fuzzyKeyMeta[off] != 0) break;
610 }
611 if (off < toInc) off = toInc;
612 byte[] retValue = new byte[off+1];
613 System.arraycopy(result, 0, retValue, 0, retValue.length);
614 return retValue;
615 }
616
617
618
619
620
621 boolean areSerializedFieldsEqual(Filter o) {
622 if (o == this) return true;
623 if (!(o instanceof FuzzyRowFilter)) return false;
624
625 FuzzyRowFilter other = (FuzzyRowFilter) o;
626 if (this.fuzzyKeysData.size() != other.fuzzyKeysData.size()) return false;
627 for (int i = 0; i < fuzzyKeysData.size(); ++i) {
628 Pair<byte[], byte[]> thisData = this.fuzzyKeysData.get(i);
629 Pair<byte[], byte[]> otherData = other.fuzzyKeysData.get(i);
630 if (!(Bytes.equals(thisData.getFirst(), otherData.getFirst()) && Bytes.equals(
631 thisData.getSecond(), otherData.getSecond()))) {
632 return false;
633 }
634 }
635 return true;
636 }
637 }