1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize;
20
21 import java.util.ArrayList;
22 import java.util.List;
23
24 import org.apache.hadoop.classification.InterfaceAudience;
25 import org.apache.hadoop.hbase.util.ByteRange;
26 import org.apache.hadoop.hbase.util.Bytes;
27 import org.apache.hadoop.hbase.util.CollectionUtils;
28 import org.apache.hadoop.hbase.util.Strings;
29
30 import com.google.common.collect.Lists;
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59 @InterfaceAudience.Private
60 public class TokenizerNode{
61
62
63
64
65 protected Tokenizer builder;
66
67
68
69
70
71
72
73
74 protected TokenizerNode parent;
75
76
77
78
79 protected int nodeDepth;
80
81
82
83
84 protected int tokenStartOffset;
85
86
87
88
89 protected ByteRange token;
90
91
92
93
94
95
96 protected int numOccurrences;
97
98
99
100
101
102 protected ArrayList<TokenizerNode> children;
103
104
105
106
107
108
109
110
111
112
113
114 protected long id;
115
116
117
118
119 protected int firstInsertionIndex = -1;
120
121
122
123
124
125 protected int negativeIndex = 0;
126
127
128
129
130
131 protected int outputArrayOffset = -1;
132
133
134
135
136 public TokenizerNode(Tokenizer builder, TokenizerNode parent, int nodeDepth,
137 int tokenStartOffset, int tokenOffset, int tokenLength) {
138 this.token = new ByteRange();
139 reconstruct(builder, parent, nodeDepth, tokenStartOffset, tokenOffset, tokenLength);
140 this.children = Lists.newArrayList();
141 }
142
143
144
145
146
147 public void reconstruct(Tokenizer builder, TokenizerNode parent, int nodeDepth,
148 int tokenStartOffset, int tokenOffset, int tokenLength) {
149 this.builder = builder;
150 this.id = builder.nextNodeId();
151 this.parent = parent;
152 this.nodeDepth = nodeDepth;
153 builder.submitMaxNodeDepthCandidate(nodeDepth);
154 this.tokenStartOffset = tokenStartOffset;
155 this.token.set(builder.tokens, tokenOffset, tokenLength);
156 this.numOccurrences = 1;
157 }
158
159
160
161
162 public void reset() {
163 builder = null;
164 parent = null;
165 nodeDepth = 0;
166 tokenStartOffset = 0;
167 token.clear();
168 numOccurrences = 0;
169 children.clear();
170
171
172 id = 0;
173 firstInsertionIndex = -1;
174 negativeIndex = 0;
175 outputArrayOffset = -1;
176 }
177
178
179
180
181
182
183
184
185
186
187 public void addSorted(final ByteRange bytes) {
188
189
190
191
192 if (matchesToken(bytes) && CollectionUtils.notEmpty(children)) {
193 TokenizerNode lastChild = CollectionUtils.getLast(children);
194 if (lastChild.partiallyMatchesToken(bytes)) {
195 lastChild.addSorted(bytes);
196 return;
197 }
198 }
199
200
201
202
203
204
205
206
207
208 int numIdenticalTokenBytes = numIdenticalBytes(bytes);
209 int tailOffset = tokenStartOffset + numIdenticalTokenBytes;
210 int tailLength = bytes.getLength() - tailOffset;
211
212 if (numIdenticalTokenBytes == token.getLength()) {
213 if (tailLength == 0) {
214 incrementNumOccurrences(1);
215 } else {
216 int childNodeDepth = nodeDepth + 1;
217 int childTokenStartOffset = tokenStartOffset + numIdenticalTokenBytes;
218 TokenizerNode newChildNode = builder.addNode(this, childNodeDepth, childTokenStartOffset,
219 bytes, tailOffset);
220 addChild(newChildNode);
221 }
222 } else {
223 split(numIdenticalTokenBytes, bytes);
224 }
225 }
226
227
228 protected void addChild(TokenizerNode node) {
229 node.setParent(this);
230 children.add(node);
231 }
232
233
234
235
236
237
238
239
240
241
242
243
244
245 protected void split(int numTokenBytesToRetain, final ByteRange bytes) {
246 int childNodeDepth = nodeDepth;
247 int childTokenStartOffset = tokenStartOffset + numTokenBytesToRetain;
248
249
250 TokenizerNode firstChild = builder.addNode(this, childNodeDepth, childTokenStartOffset,
251 token, numTokenBytesToRetain);
252 firstChild.setNumOccurrences(numOccurrences);
253 token.setLength(numTokenBytesToRetain);
254 numOccurrences = 0;
255
256 moveChildrenToDifferentParent(firstChild);
257 addChild(firstChild);
258
259
260 TokenizerNode secondChild = builder.addNode(this, childNodeDepth, childTokenStartOffset,
261 bytes, tokenStartOffset + numTokenBytesToRetain);
262 addChild(secondChild);
263
264
265
266 firstChild.incrementNodeDepthRecursively();
267 secondChild.incrementNodeDepthRecursively();
268 }
269
270
271 protected void incrementNodeDepthRecursively() {
272 ++nodeDepth;
273 builder.submitMaxNodeDepthCandidate(nodeDepth);
274 for (int i = 0; i < children.size(); ++i) {
275 children.get(i).incrementNodeDepthRecursively();
276 }
277 }
278
279
280 protected void moveChildrenToDifferentParent(TokenizerNode newParent) {
281 for (int i = 0; i < children.size(); ++i) {
282 TokenizerNode child = children.get(i);
283 child.setParent(newParent);
284 newParent.children.add(child);
285 }
286 children.clear();
287 }
288
289
290
291
292 protected boolean partiallyMatchesToken(ByteRange bytes) {
293 return numIdenticalBytes(bytes) > 0;
294 }
295
296 protected boolean matchesToken(ByteRange bytes) {
297 return numIdenticalBytes(bytes) == getTokenLength();
298 }
299
300 protected int numIdenticalBytes(ByteRange bytes) {
301 return token.numEqualPrefixBytes(bytes, tokenStartOffset);
302 }
303
304
305
306
307 public void appendNodesToExternalList(List<TokenizerNode> appendTo, boolean includeNonLeaves,
308 boolean includeLeaves) {
309 if (includeNonLeaves && !isLeaf() || includeLeaves && isLeaf()) {
310 appendTo.add(this);
311 }
312 for (int i = 0; i < children.size(); ++i) {
313 TokenizerNode child = children.get(i);
314 child.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves);
315 }
316 }
317
318 public int setInsertionIndexes(int nextIndex) {
319 int newNextIndex = nextIndex;
320 if (hasOccurrences()) {
321 setFirstInsertionIndex(nextIndex);
322 newNextIndex += numOccurrences;
323 }
324 for (int i = 0; i < children.size(); ++i) {
325 TokenizerNode child = children.get(i);
326 newNextIndex = child.setInsertionIndexes(newNextIndex);
327 }
328 return newNextIndex;
329 }
330
331 public void appendOutputArrayOffsets(List<Integer> offsets) {
332 if (hasOccurrences()) {
333 offsets.add(outputArrayOffset);
334 }
335 for (int i = 0; i < children.size(); ++i) {
336 TokenizerNode child = children.get(i);
337 child.appendOutputArrayOffsets(offsets);
338 }
339 }
340
341
342
343
344
345
346
347
348
349
350 public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset,
351 int keyLength) {
352 int thisNodeDepthPlusLength = tokenStartOffset + token.getLength();
353
354
355 if (CollectionUtils.isEmpty(children)) {
356 if (thisNodeDepthPlusLength < keyLength) {
357 resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
358 return;
359 }
360 }
361
362
363 for (int i = 0; i < token.getLength(); ++i) {
364 if (key[tokenStartOffset + keyOffset + i] != token.get(i)) {
365
366 resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
367 return;
368 }
369 }
370
371 if (thisNodeDepthPlusLength == keyLength && numOccurrences > 0) {
372 resultHolder.set(TokenizerRowSearchPosition.MATCH, this);
373 return;
374 }
375
376 if (CollectionUtils.notEmpty(children)) {
377
378 for (int i = 0; i < children.size(); ++i) {
379 TokenizerNode child = children.get(i);
380 child.getNode(resultHolder, key, keyOffset, keyLength);
381 if (resultHolder.isMatch()) {
382 return;
383 } else if (resultHolder.getDifference() == TokenizerRowSearchPosition.BEFORE) {
384
385 resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
386 return;
387 }
388
389 }
390 }
391
392
393 resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
394 return;
395 }
396
397
398
399
400 public byte[] getNewByteArray() {
401 byte[] arrayToFill = new byte[tokenStartOffset + token.getLength()];
402 fillInBytes(arrayToFill);
403 return arrayToFill;
404 }
405
406 public void fillInBytes(byte[] arrayToFill) {
407 for (int i = 0; i < token.getLength(); ++i) {
408 arrayToFill[tokenStartOffset + i] = token.get(i);
409 }
410 if (parent != null) {
411 parent.fillInBytes(arrayToFill);
412 }
413 }
414
415
416
417
418 @Override
419 public String toString() {
420 String s = "";
421 if (parent == null) {
422 s += "R ";
423 } else {
424 s += getBnlIndicator(false) + " " + Bytes.toString(parent.getNewByteArray());
425 }
426 s += "[" + Bytes.toString(token.deepCopyToNewArray()) + "]";
427 if (numOccurrences > 0) {
428 s += "x" + numOccurrences;
429 }
430 return s;
431 }
432
433 public String getPaddedTokenAndOccurrenceString() {
434 StringBuilder sb = new StringBuilder();
435 sb.append(getBnlIndicator(true));
436 sb.append(Strings.padFront(numOccurrences + "", ' ', 3));
437 sb.append(Strings.padFront(nodeDepth + "", ' ', 3));
438 if (outputArrayOffset >= 0) {
439 sb.append(Strings.padFront(outputArrayOffset + "", ' ', 3));
440 }
441 sb.append(" ");
442 for (int i = 0; i < tokenStartOffset; ++i) {
443 sb.append(" ");
444 }
445 sb.append(Bytes.toString(token.deepCopyToNewArray()).replaceAll(" ", "_"));
446 return sb.toString();
447 }
448
449 public String getBnlIndicator(boolean indent) {
450 if (indent) {
451 if (isNub()) {
452 return " N ";
453 }
454 return isBranch() ? "B " : " L";
455 }
456 if (isNub()) {
457 return "N";
458 }
459 return isBranch() ? "B" : "L";
460 }
461
462
463
464
465 public int getNumBranchNodesIncludingThisNode() {
466 if (isLeaf()) {
467 return 0;
468 }
469 int totalFromThisPlusChildren = isBranch() ? 1 : 0;
470 for (int i = 0; i < children.size(); ++i) {
471 TokenizerNode child = children.get(i);
472 totalFromThisPlusChildren += child.getNumBranchNodesIncludingThisNode();
473 }
474 return totalFromThisPlusChildren;
475 }
476
477 public int getNumNubNodesIncludingThisNode() {
478 if (isLeaf()) {
479 return 0;
480 }
481 int totalFromThisPlusChildren = isNub() ? 1 : 0;
482 for (int i = 0; i < children.size(); ++i) {
483 TokenizerNode child = children.get(i);
484 totalFromThisPlusChildren += child.getNumNubNodesIncludingThisNode();
485 }
486 return totalFromThisPlusChildren;
487 }
488
489 public int getNumLeafNodesIncludingThisNode() {
490 if (isLeaf()) {
491 return 1;
492 }
493 int totalFromChildren = 0;
494 for (int i = 0; i < children.size(); ++i) {
495 TokenizerNode child = children.get(i);
496 totalFromChildren += child.getNumLeafNodesIncludingThisNode();
497 }
498 return totalFromChildren;
499 }
500
501
502
503
504 public int getNodeDepth() {
505 return nodeDepth;
506 }
507
508 public int getTokenLength() {
509 return token.getLength();
510 }
511
512 public boolean hasOccurrences() {
513 return numOccurrences > 0;
514 }
515
516 public boolean isRoot() {
517 return this.parent == null;
518 }
519
520 public int getNumChildren() {
521 return CollectionUtils.nullSafeSize(children);
522 }
523
524 public TokenizerNode getLastChild() {
525 if (CollectionUtils.isEmpty(children)) {
526 return null;
527 }
528 return CollectionUtils.getLast(children);
529 }
530
531 public boolean isLeaf() {
532 return CollectionUtils.isEmpty(children) && hasOccurrences();
533 }
534
535 public boolean isBranch() {
536 return CollectionUtils.notEmpty(children) && !hasOccurrences();
537 }
538
539 public boolean isNub() {
540 return CollectionUtils.notEmpty(children) && hasOccurrences();
541 }
542
543
544
545
546
547
548
549
550
551
552
553 public void incrementNumOccurrences(int d) {
554 numOccurrences += d;
555 }
556
557
558
559
560 public int getTokenOffset() {
561 return tokenStartOffset;
562 }
563
564 public TokenizerNode getParent() {
565 return parent;
566 }
567
568 public ByteRange getToken() {
569 return token;
570 }
571
572 public int getNumOccurrences() {
573 return numOccurrences;
574 }
575
576 public void setParent(TokenizerNode parent) {
577 this.parent = parent;
578 }
579
580 public void setNumOccurrences(int numOccurrences) {
581 this.numOccurrences = numOccurrences;
582 }
583
584 public ArrayList<TokenizerNode> getChildren() {
585 return children;
586 }
587
588 public long getId() {
589 return id;
590 }
591
592 public int getFirstInsertionIndex() {
593 return firstInsertionIndex;
594 }
595
596 public void setFirstInsertionIndex(int firstInsertionIndex) {
597 this.firstInsertionIndex = firstInsertionIndex;
598 }
599
600 public int getNegativeIndex() {
601 return negativeIndex;
602 }
603
604 public void setNegativeIndex(int negativeIndex) {
605 this.negativeIndex = negativeIndex;
606 }
607
608 public int getOutputArrayOffset() {
609 return outputArrayOffset;
610 }
611
612 public void setOutputArrayOffset(int outputArrayOffset) {
613 this.outputArrayOffset = outputArrayOffset;
614 }
615
616 public void setId(long id) {
617 this.id = id;
618 }
619
620 public void setBuilder(Tokenizer builder) {
621 this.builder = builder;
622 }
623
624 public void setTokenOffset(int tokenOffset) {
625 this.tokenStartOffset = tokenOffset;
626 }
627
628 public void setToken(ByteRange token) {
629 this.token = token;
630 }
631
632 }