1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.filter;
19
20 import java.io.DataInput;
21 import java.io.DataOutput;
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.List;
26
27 import org.apache.hadoop.hbase.KeyValue;
28 import org.apache.hadoop.hbase.util.Bytes;
29 import org.apache.hadoop.hbase.util.Pair;
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59 public class FuzzyRowFilter extends FilterBase {
60 private List<Pair<byte[], byte[]>> fuzzyKeysData;
61 private boolean done = false;
62
63
64
65
66 public FuzzyRowFilter() {
67 }
68
69 public FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData) {
70 this.fuzzyKeysData = fuzzyKeysData;
71 }
72
73
74 @Override
75 public ReturnCode filterKeyValue(KeyValue kv) {
76 byte[] rowKey = kv.getRow();
77
78 SatisfiesCode bestOption = SatisfiesCode.NO_NEXT;
79 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
80 SatisfiesCode satisfiesCode =
81 satisfies(rowKey, fuzzyData.getFirst(), fuzzyData.getSecond());
82 if (satisfiesCode == SatisfiesCode.YES) {
83 return ReturnCode.INCLUDE;
84 }
85
86 if (satisfiesCode == SatisfiesCode.NEXT_EXISTS) {
87 bestOption = SatisfiesCode.NEXT_EXISTS;
88 }
89 }
90
91 if (bestOption == SatisfiesCode.NEXT_EXISTS) {
92 return ReturnCode.SEEK_NEXT_USING_HINT;
93 }
94
95
96 done = true;
97 return ReturnCode.NEXT_ROW;
98 }
99
100 @Override
101 public KeyValue getNextKeyHint(KeyValue currentKV) {
102 byte[] rowKey = currentKV.getRow();
103 byte[] nextRowKey = null;
104
105 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
106 byte[] nextRowKeyCandidate = getNextForFuzzyRule(rowKey,
107 fuzzyData.getFirst(), fuzzyData.getSecond());
108 if (nextRowKeyCandidate == null) {
109 continue;
110 }
111 if (nextRowKey == null || Bytes.compareTo(nextRowKeyCandidate, nextRowKey) < 0) {
112 nextRowKey = nextRowKeyCandidate;
113 }
114 }
115
116 if (nextRowKey == null) {
117
118
119 throw new IllegalStateException("No next row key that satisfies fuzzy exists when" +
120 " getNextKeyHint() is invoked." +
121 " Filter: " + this.toString() +
122 " currentKV: " + currentKV.toString());
123 }
124
125 return KeyValue.createFirstOnRow(nextRowKey);
126 }
127
128 @Override
129 public boolean filterAllRemaining() {
130 return done;
131 }
132
133 @Override
134 public void write(DataOutput dataOutput) throws IOException {
135 dataOutput.writeInt(this.fuzzyKeysData.size());
136 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
137 Bytes.writeByteArray(dataOutput, fuzzyData.getFirst());
138 Bytes.writeByteArray(dataOutput, fuzzyData.getSecond());
139 }
140 }
141
142 @Override
143 public void readFields(DataInput dataInput) throws IOException {
144 int count = dataInput.readInt();
145 this.fuzzyKeysData = new ArrayList<Pair<byte[], byte[]>>(count);
146 for (int i = 0; i < count; i++) {
147 byte[] keyBytes = Bytes.readByteArray(dataInput);
148 byte[] keyMeta = Bytes.readByteArray(dataInput);
149 this.fuzzyKeysData.add(new Pair<byte[], byte[]>(keyBytes, keyMeta));
150 }
151 }
152
153 @Override
154 public String toString() {
155 final StringBuilder sb = new StringBuilder();
156 sb.append("FuzzyRowFilter");
157 sb.append("{fuzzyKeysData=");
158 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
159 sb.append('{').append(Bytes.toStringBinary(fuzzyData.getFirst())).append(":");
160 sb.append(Bytes.toStringBinary(fuzzyData.getSecond())).append('}');
161 }
162 sb.append("}, ");
163 return sb.toString();
164 }
165
166
167
168 static enum SatisfiesCode {
169
170 YES,
171
172 NEXT_EXISTS,
173
174 NO_NEXT
175 }
176
177 static SatisfiesCode satisfies(byte[] row,
178 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
179 return satisfies(row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
180 }
181
182 private static SatisfiesCode satisfies(byte[] row, int offset, int length,
183 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
184 if (row == null) {
185
186 return SatisfiesCode.YES;
187 }
188
189 boolean nextRowKeyCandidateExists = false;
190
191 for (int i = 0; i < fuzzyKeyMeta.length && i < length; i++) {
192
193 boolean byteAtPositionFixed = fuzzyKeyMeta[i] == 0;
194 boolean fixedByteIncorrect = byteAtPositionFixed && fuzzyKeyBytes[i] != row[i + offset];
195 if (fixedByteIncorrect) {
196
197 if (nextRowKeyCandidateExists) {
198 return SatisfiesCode.NEXT_EXISTS;
199 }
200
201
202
203
204 boolean rowByteLessThanFixed = (row[i + offset] & 0xFF) < (fuzzyKeyBytes[i] & 0xFF);
205 return rowByteLessThanFixed ? SatisfiesCode.NEXT_EXISTS : SatisfiesCode.NO_NEXT;
206 }
207
208
209
210
211
212
213
214 if (fuzzyKeyMeta[i] == 1 && !isMax(fuzzyKeyBytes[i])) {
215 nextRowKeyCandidateExists = true;
216 }
217 }
218
219 return SatisfiesCode.YES;
220 }
221
222 private static boolean isMax(byte fuzzyKeyByte) {
223 return (fuzzyKeyByte & 0xFF) == 255;
224 }
225
226 static byte[] getNextForFuzzyRule(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
227 return getNextForFuzzyRule(row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
228 }
229
230
231
232
233
234 private static byte[] getNextForFuzzyRule(byte[] row, int offset, int length,
235 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
236
237
238
239
240
241
242
243
244 byte[] result = Arrays.copyOf(fuzzyKeyBytes,
245 length > fuzzyKeyBytes.length ? length : fuzzyKeyBytes.length);
246 int toInc = -1;
247
248 boolean increased = false;
249 for (int i = 0; i < result.length; i++) {
250 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 1) {
251 result[i] = row[offset + i];
252 if (!isMax(row[i])) {
253
254 toInc = i;
255 }
256 } else if (i < fuzzyKeyMeta.length && fuzzyKeyMeta[i] == 0) {
257 if ((row[i + offset] & 0xFF) < (fuzzyKeyBytes[i] & 0xFF)) {
258
259
260 increased = true;
261 break;
262 }
263 if ((row[i + offset] & 0xFF) > (fuzzyKeyBytes[i] & 0xFF)) {
264
265
266
267 break;
268 }
269 }
270 }
271
272 if (!increased) {
273 if (toInc < 0) {
274 return null;
275 }
276 result[toInc]++;
277
278
279
280 for (int i = toInc + 1; i < result.length; i++) {
281 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 1) {
282 result[i] = 0;
283 }
284 }
285 }
286
287 return result;
288 }
289 }