1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.filter;
19
20 import com.google.protobuf.ByteString;
21 import com.google.protobuf.InvalidProtocolBufferException;
22 import org.apache.hadoop.classification.InterfaceAudience;
23 import org.apache.hadoop.classification.InterfaceStability;
24 import org.apache.hadoop.hbase.KeyValue;
25 import org.apache.hadoop.hbase.exceptions.DeserializationException;
26 import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
27 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
28 import org.apache.hadoop.hbase.util.Bytes;
29 import org.apache.hadoop.hbase.util.Pair;
30
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.List;
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63 @InterfaceAudience.Public
64 @InterfaceStability.Evolving
65 public class FuzzyRowFilter extends FilterBase {
66 private List<Pair<byte[], byte[]>> fuzzyKeysData;
67 private boolean done = false;
68
69 public FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData) {
70 this.fuzzyKeysData = fuzzyKeysData;
71 }
72
73
74 @Override
75 public ReturnCode filterKeyValue(KeyValue kv) {
76 byte[] rowKey = kv.getRow();
77
78 SatisfiesCode bestOption = SatisfiesCode.NO_NEXT;
79 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
80 SatisfiesCode satisfiesCode =
81 satisfies(rowKey, fuzzyData.getFirst(), fuzzyData.getSecond());
82 if (satisfiesCode == SatisfiesCode.YES) {
83 return ReturnCode.INCLUDE;
84 }
85
86 if (satisfiesCode == SatisfiesCode.NEXT_EXISTS) {
87 bestOption = SatisfiesCode.NEXT_EXISTS;
88 }
89 }
90
91 if (bestOption == SatisfiesCode.NEXT_EXISTS) {
92 return ReturnCode.SEEK_NEXT_USING_HINT;
93 }
94
95
96 done = true;
97 return ReturnCode.NEXT_ROW;
98 }
99
100 @Override
101 public KeyValue getNextKeyHint(KeyValue currentKV) {
102 byte[] rowKey = currentKV.getRow();
103 byte[] nextRowKey = null;
104
105 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
106 byte[] nextRowKeyCandidate = getNextForFuzzyRule(rowKey,
107 fuzzyData.getFirst(), fuzzyData.getSecond());
108 if (nextRowKeyCandidate == null) {
109 continue;
110 }
111 if (nextRowKey == null || Bytes.compareTo(nextRowKeyCandidate, nextRowKey) < 0) {
112 nextRowKey = nextRowKeyCandidate;
113 }
114 }
115
116 if (nextRowKey == null) {
117
118
119 throw new IllegalStateException("No next row key that satisfies fuzzy exists when" +
120 " getNextKeyHint() is invoked." +
121 " Filter: " + this.toString() +
122 " currentKV: " + currentKV.toString());
123 }
124
125 return KeyValue.createFirstOnRow(nextRowKey);
126 }
127
128 @Override
129 public boolean filterAllRemaining() {
130 return done;
131 }
132
133
134
135
136 public byte [] toByteArray() {
137 FilterProtos.FuzzyRowFilter.Builder builder =
138 FilterProtos.FuzzyRowFilter.newBuilder();
139 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
140 BytesBytesPair.Builder bbpBuilder = BytesBytesPair.newBuilder();
141 bbpBuilder.setFirst(ByteString.copyFrom(fuzzyData.getFirst()));
142 bbpBuilder.setSecond(ByteString.copyFrom(fuzzyData.getSecond()));
143 builder.addFuzzyKeysData(bbpBuilder);
144 }
145 return builder.build().toByteArray();
146 }
147
148
149
150
151
152
153
154 public static FuzzyRowFilter parseFrom(final byte [] pbBytes)
155 throws DeserializationException {
156 FilterProtos.FuzzyRowFilter proto;
157 try {
158 proto = FilterProtos.FuzzyRowFilter.parseFrom(pbBytes);
159 } catch (InvalidProtocolBufferException e) {
160 throw new DeserializationException(e);
161 }
162 int count = proto.getFuzzyKeysDataCount();
163 ArrayList<Pair<byte[], byte[]>> fuzzyKeysData= new ArrayList<Pair<byte[], byte[]>>(count);
164 for (int i = 0; i < count; ++i) {
165 BytesBytesPair current = proto.getFuzzyKeysData(i);
166 byte[] keyBytes = current.getFirst().toByteArray();
167 byte[] keyMeta = current.getSecond().toByteArray();
168 fuzzyKeysData.add(new Pair<byte[], byte[]>(keyBytes, keyMeta));
169 }
170 return new FuzzyRowFilter(fuzzyKeysData);
171 }
172
173 @Override
174 public String toString() {
175 final StringBuilder sb = new StringBuilder();
176 sb.append("FuzzyRowFilter");
177 sb.append("{fuzzyKeysData=");
178 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
179 sb.append('{').append(Bytes.toStringBinary(fuzzyData.getFirst())).append(":");
180 sb.append(Bytes.toStringBinary(fuzzyData.getSecond())).append('}');
181 }
182 sb.append("}, ");
183 return sb.toString();
184 }
185
186
187
188 static enum SatisfiesCode {
189
190 YES,
191
192 NEXT_EXISTS,
193
194 NO_NEXT
195 }
196
197 static SatisfiesCode satisfies(byte[] row,
198 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
199 return satisfies(row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
200 }
201
202 private static SatisfiesCode satisfies(byte[] row, int offset, int length,
203 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
204 if (row == null) {
205
206 return SatisfiesCode.YES;
207 }
208
209 boolean nextRowKeyCandidateExists = false;
210
211 for (int i = 0; i < fuzzyKeyMeta.length && i < length; i++) {
212
213 boolean byteAtPositionFixed = fuzzyKeyMeta[i] == 0;
214 boolean fixedByteIncorrect = byteAtPositionFixed && fuzzyKeyBytes[i] != row[i + offset];
215 if (fixedByteIncorrect) {
216
217 if (nextRowKeyCandidateExists) {
218 return SatisfiesCode.NEXT_EXISTS;
219 }
220
221
222
223
224 boolean rowByteLessThanFixed = (row[i + offset] & 0xFF) < (fuzzyKeyBytes[i] & 0xFF);
225 return rowByteLessThanFixed ? SatisfiesCode.NEXT_EXISTS : SatisfiesCode.NO_NEXT;
226 }
227
228
229
230
231
232
233
234 if (fuzzyKeyMeta[i] == 1 && !isMax(fuzzyKeyBytes[i])) {
235 nextRowKeyCandidateExists = true;
236 }
237 }
238
239 return SatisfiesCode.YES;
240 }
241
242 private static boolean isMax(byte fuzzyKeyByte) {
243 return (fuzzyKeyByte & 0xFF) == 255;
244 }
245
246 static byte[] getNextForFuzzyRule(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
247 return getNextForFuzzyRule(row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
248 }
249
250
251
252
253
254 private static byte[] getNextForFuzzyRule(byte[] row, int offset, int length,
255 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
256
257
258
259
260
261
262
263
264 byte[] result = Arrays.copyOf(fuzzyKeyBytes,
265 length > fuzzyKeyBytes.length ? length : fuzzyKeyBytes.length);
266 int toInc = -1;
267
268 boolean increased = false;
269 for (int i = 0; i < result.length; i++) {
270 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 1) {
271 result[i] = row[offset + i];
272 if (!isMax(row[i])) {
273
274 toInc = i;
275 }
276 } else if (i < fuzzyKeyMeta.length && fuzzyKeyMeta[i] == 0) {
277 if ((row[i + offset] & 0xFF) < (fuzzyKeyBytes[i] & 0xFF)) {
278
279
280 increased = true;
281 break;
282 }
283 if ((row[i + offset] & 0xFF) > (fuzzyKeyBytes[i] & 0xFF)) {
284
285
286
287 break;
288 }
289 }
290 }
291
292 if (!increased) {
293 if (toInc < 0) {
294 return null;
295 }
296 result[toInc]++;
297
298
299
300 for (int i = toInc + 1; i < result.length; i++) {
301 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 1) {
302 result[i] = 0;
303 }
304 }
305 }
306
307 return result;
308 }
309
310
311
312
313
314
315 boolean areSerializedFieldsEqual(Filter o) {
316 if (o == this) return true;
317 if (!(o instanceof FuzzyRowFilter)) return false;
318
319 FuzzyRowFilter other = (FuzzyRowFilter)o;
320 if (this.fuzzyKeysData.size() != other.fuzzyKeysData.size()) return false;
321 for (int i = 0; i < fuzzyKeysData.size(); ++i) {
322 Pair<byte[], byte[]> thisData = this.fuzzyKeysData.get(i);
323 Pair<byte[], byte[]> otherData = other.fuzzyKeysData.get(i);
324 if (!(Bytes.equals(thisData.getFirst(), otherData.getFirst())
325 && Bytes.equals(thisData.getSecond(), otherData.getSecond()))) {
326 return false;
327 }
328 }
329 return true;
330 }
331
332 }