View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.codec.prefixtree.decode;
20  
21  import org.apache.hadoop.classification.InterfaceAudience;
22  import org.apache.hadoop.hbase.Cell;
23  import org.apache.hadoop.hbase.CellUtil;
24  import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
25  import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
26  import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
27  
28  import com.google.common.primitives.UnsignedBytes;
29  
30  /**
31   * Searcher extends the capabilities of the Scanner + ReversibleScanner to add the ability to
32   * position itself on a requested Cell without scanning through cells before it. The PrefixTree is
33   * set up to be a Trie of rows, so finding a particular row is extremely cheap.
34   * <p/>
35   * Once it finds the row, it does a binary search through the cells inside the row, which is not as
36   * fast as the trie search, but faster than iterating through every cell like existing block
37   * formats
38   * do. For this reason, this implementation is targeted towards schemas where rows are narrow
39   * enough
40   * to have several or many per block, and where you are generally looking for the entire row or
41   * the
42   * first cell. It will still be fast for wide rows or point queries, but could be improved upon.
43   */
44  @InterfaceAudience.Private
45  public class PrefixTreeArraySearcher extends PrefixTreeArrayReversibleScanner implements
46      CellSearcher {
47  
48    /*************** construct ******************************/
49  
50    public PrefixTreeArraySearcher(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
51        int rowBufferLength, int qualifierBufferLength) {
52      super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength);
53    }
54  
55  
56    /********************* CellSearcher methods *******************/
57  
58    @Override
59    public boolean positionAt(Cell key) {
60      return CellScannerPosition.AT == positionAtOrAfter(key);
61    }
62  
63    @Override
64    public CellScannerPosition positionAtOrBefore(Cell key) {
65      reInitFirstNode();
66      int fanIndex = -1;
67  
68      while(true){
69        //detect row mismatch.  break loop if mismatch
70        int currentNodeDepth = rowLength;
71        int rowTokenComparison = compareToCurrentToken(key);
72        if(rowTokenComparison != 0){
73          return fixRowTokenMissReverse(rowTokenComparison);
74        }
75  
76        //exact row found, move on to qualifier & ts
77        if(rowMatchesAfterCurrentPosition(key)){
78          return positionAtQualifierTimestamp(key, true);
79        }
80  
81        //detect dead end (no fan to descend into)
82        if(!currentRowNode.hasFan()){
83          if(hasOccurrences()){//must be leaf or nub
84            populateLastNonRowFields();
85            return CellScannerPosition.BEFORE;
86          }else{
87            //TODO i don't think this case is exercised by any tests
88            return fixRowFanMissReverse(0);
89          }
90        }
91  
92        //keep hunting for the rest of the row
93        byte searchForByte = CellUtil.getRowByte(key, currentNodeDepth);
94        fanIndex = currentRowNode.whichFanNode(searchForByte);
95        if(fanIndex < 0){//no matching row.  return early
96          int insertionPoint = -fanIndex;
97          return fixRowFanMissReverse(insertionPoint);
98        }
99        //found a match, so dig deeper into the tree
100       followFan(fanIndex);
101     }
102   }
103 
104   /**
105    * Identical workflow as positionAtOrBefore, but split them to avoid having ~10 extra
106    * if-statements. Priority on readability and debugability.
107    */
108   @Override
109   public CellScannerPosition positionAtOrAfter(Cell key) {
110     reInitFirstNode();
111     int fanIndex = -1;
112 
113     while(true){
114       //detect row mismatch.  break loop if mismatch
115       int currentNodeDepth = rowLength;
116       int rowTokenComparison = compareToCurrentToken(key);
117       if(rowTokenComparison != 0){
118         return fixRowTokenMissForward(rowTokenComparison);
119       }
120 
121       //exact row found, move on to qualifier & ts
122       if(rowMatchesAfterCurrentPosition(key)){
123         return positionAtQualifierTimestamp(key, false);
124       }
125 
126       //detect dead end (no fan to descend into)
127       if(!currentRowNode.hasFan()){
128         if(hasOccurrences()){
129           populateFirstNonRowFields();
130           return CellScannerPosition.AFTER;
131         }else{
132           //TODO i don't think this case is exercised by any tests
133           return fixRowFanMissForward(0);
134         }
135       }
136 
137       //keep hunting for the rest of the row
138       byte searchForByte = CellUtil.getRowByte(key, currentNodeDepth);
139       fanIndex = currentRowNode.whichFanNode(searchForByte);
140       if(fanIndex < 0){//no matching row.  return early
141         int insertionPoint = -fanIndex;
142         return fixRowFanMissForward(insertionPoint);
143       }
144       //found a match, so dig deeper into the tree
145       followFan(fanIndex);
146     }
147   }
148 
149   @Override
150   public boolean seekForwardTo(Cell key) {
151     if(currentPositionIsAfter(key)){
152       //our position is after the requested key, so can't do anything
153       return false;
154     }
155     return positionAt(key);
156   }
157 
158   @Override
159   public CellScannerPosition seekForwardToOrBefore(Cell key) {
160     //Do we even need this check or should upper layers avoid this situation.  It's relatively
161     //expensive compared to the rest of the seek operation.
162     if(currentPositionIsAfter(key)){
163       //our position is after the requested key, so can't do anything
164       return CellScannerPosition.AFTER;
165     }
166 
167     return positionAtOrBefore(key);
168   }
169 
170   @Override
171   public CellScannerPosition seekForwardToOrAfter(Cell key) {
172     //Do we even need this check or should upper layers avoid this situation.  It's relatively
173     //expensive compared to the rest of the seek operation.
174     if(currentPositionIsAfter(key)){
175       //our position is after the requested key, so can't do anything
176       return CellScannerPosition.AFTER;
177     }
178 
179     return positionAtOrAfter(key);
180   }
181 
182   /**
183    * The content of the buffers doesn't matter here, only that afterLast=true and beforeFirst=false
184    */
185   @Override
186   public void positionAfterLastCell() {
187     resetToBeforeFirstEntry();
188     beforeFirst = false;
189     afterLast = true;
190   }
191 
192 
193   /***************** Object methods ***************************/
194 
195   @Override
196   public boolean equals(Object obj) {
197     //trivial override to confirm intent (findbugs)
198     return super.equals(obj);
199   }
200 
201 
202   /****************** internal methods ************************/
203 
204   protected boolean currentPositionIsAfter(Cell cell){
205     return compareTo(cell) > 0;
206   }
207 
208   protected CellScannerPosition positionAtQualifierTimestamp(Cell key, boolean beforeOnMiss) {
209     int minIndex = 0;
210     int maxIndex = currentRowNode.getLastCellIndex();
211     int diff;
212     while (true) {
213       int midIndex = (maxIndex + minIndex) / 2;//don't worry about overflow
214       diff = populateNonRowFieldsAndCompareTo(midIndex, key);
215 
216       if (diff == 0) {// found exact match
217         return CellScannerPosition.AT;
218       } else if (minIndex == maxIndex) {// even termination case
219         break;
220       } else if ((minIndex + 1) == maxIndex) {// odd termination case
221         diff = populateNonRowFieldsAndCompareTo(maxIndex, key);
222         if(diff > 0){
223           diff = populateNonRowFieldsAndCompareTo(minIndex, key);
224         }
225         break;
226       } else if (diff < 0) {// keep going forward
227         minIndex = currentCellIndex;
228       } else {// went past it, back up
229         maxIndex = currentCellIndex;
230       }
231     }
232 
233     if (diff == 0) {
234       return CellScannerPosition.AT;
235 
236     } else if (diff < 0) {// we are before key
237       if (beforeOnMiss) {
238         return CellScannerPosition.BEFORE;
239       }
240       if (advance()) {
241         return CellScannerPosition.AFTER;
242       }
243       return CellScannerPosition.AFTER_LAST;
244 
245     } else {// we are after key
246       if (!beforeOnMiss) {
247         return CellScannerPosition.AFTER;
248       }
249       if (previous()) {
250         return CellScannerPosition.BEFORE;
251       }
252       return CellScannerPosition.BEFORE_FIRST;
253     }
254   }
255 
256   /**
257    * compare this.row to key.row but starting at the current rowLength
258    * @param key Cell being searched for
259    * @return true if row buffer contents match key.row
260    */
261   protected boolean rowMatchesAfterCurrentPosition(Cell key) {
262     if (!currentRowNode.hasOccurrences()) {
263       return false;
264     }
265     int thatRowLength = key.getRowLength();
266     if (rowLength != thatRowLength) {
267       return false;
268     }
269     return true;
270   }
271 
272   // TODO move part of this to Cell comparator?
273   /**
274    * Compare only the bytes within the window of the current token
275    * @param key
276    * @return return -1 if key is lessThan (before) this, 0 if equal, and 1 if key is after
277    */
278   protected int compareToCurrentToken(Cell key) {
279     int startIndex = rowLength - currentRowNode.getTokenLength();
280     int endIndexExclusive = startIndex + currentRowNode.getTokenLength();
281     for (int i = startIndex; i < endIndexExclusive; ++i) {
282       if (i >= key.getRowLength()) {// key was shorter, so it's first
283         return -1;
284       }
285       byte keyByte = CellUtil.getRowByte(key, i);
286       byte thisByte = rowBuffer[i];
287       if (keyByte == thisByte) {
288         continue;
289       }
290       return UnsignedBytes.compare(keyByte, thisByte);
291     }
292     return 0;
293   }
294 
295   protected void followLastFansUntilExhausted(){
296     while(currentRowNode.hasFan()){
297       followLastFan();
298     }
299   }
300 
301 
302 	/****************** complete seek when token mismatch ******************/
303 
304   /**
305    * @param searcherIsAfterInputKey <0: input key is before the searcher's position<br/>
306    *          >0: input key is after the searcher's position
307    */
308   protected CellScannerPosition fixRowTokenMissReverse(int searcherIsAfterInputKey) {
309     if (searcherIsAfterInputKey < 0) {//searcher position is after the input key, so back up
310       boolean foundPreviousRow = previousRow(true);
311       if(foundPreviousRow){
312         populateLastNonRowFields();
313         return CellScannerPosition.BEFORE;
314       }else{
315         return CellScannerPosition.BEFORE_FIRST;
316       }
317 
318     }else{//searcher position is before the input key
319       if(currentRowNode.hasOccurrences()){
320         populateFirstNonRowFields();
321         return CellScannerPosition.BEFORE;
322       }
323       boolean foundNextRow = nextRow();
324       if(foundNextRow){
325         return CellScannerPosition.AFTER;
326       }else{
327         return CellScannerPosition.AFTER_LAST;
328       }
329     }
330   }
331 
332   /**
333    * @param searcherIsAfterInputKey <0: input key is before the searcher's position<br/>
334    *                   >0: input key is after the searcher's position
335    */
336   protected CellScannerPosition fixRowTokenMissForward(int searcherIsAfterInputKey) {
337     if (searcherIsAfterInputKey < 0) {//searcher position is after the input key
338       if(currentRowNode.hasOccurrences()){
339         populateFirstNonRowFields();
340         return CellScannerPosition.AFTER;
341       }
342       boolean foundNextRow = nextRow();
343       if(foundNextRow){
344         return CellScannerPosition.AFTER;
345       }else{
346         return CellScannerPosition.AFTER_LAST;
347       }
348 
349     }else{//searcher position is before the input key, so go forward
350       discardCurrentRowNode(true);
351       boolean foundNextRow = nextRow();
352       if(foundNextRow){
353         return CellScannerPosition.AFTER;
354       }else{
355         return CellScannerPosition.AFTER_LAST;
356       }
357     }
358   }
359 
360 
361   /****************** complete seek when fan mismatch ******************/
362 
363   protected CellScannerPosition fixRowFanMissReverse(int fanInsertionPoint){
364     if(fanInsertionPoint == 0){//we need to back up a row
365       boolean foundPreviousRow = previousRow(true);//true -> position on last cell in row
366       if(foundPreviousRow){
367         populateLastNonRowFields();
368         return CellScannerPosition.BEFORE;
369       }
370       return CellScannerPosition.BEFORE_FIRST;
371     }
372 
373     //follow the previous fan, but then descend recursively forward
374     followFan(fanInsertionPoint - 1);
375     followLastFansUntilExhausted();
376     populateLastNonRowFields();
377     return CellScannerPosition.BEFORE;
378   }
379 
380   protected CellScannerPosition fixRowFanMissForward(int fanInsertionPoint){
381     if(fanInsertionPoint >= currentRowNode.getFanOut()){
382       discardCurrentRowNode(true);
383       if (!nextRow()) {
384         return CellScannerPosition.AFTER_LAST;
385       } else {
386         return CellScannerPosition.AFTER;
387       }
388     }
389 
390     followFan(fanInsertionPoint);
391     if(hasOccurrences()){
392       populateFirstNonRowFields();
393       return CellScannerPosition.AFTER;
394     }
395 
396     if(nextRowInternal()){
397       populateFirstNonRowFields();
398       return CellScannerPosition.AFTER;
399 
400     }else{
401       return CellScannerPosition.AFTER_LAST;
402     }
403   }
404 
405 }