View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.regionserver;
22  
23  import org.apache.hadoop.hbase.HConstants;
24  import org.apache.hadoop.hbase.KeyValue;
25  import org.apache.hadoop.hbase.client.Scan;
26  import org.apache.hadoop.hbase.filter.Filter;
27  import org.apache.hadoop.hbase.filter.Filter.ReturnCode;
28  import org.apache.hadoop.hbase.io.TimeRange;
29  import org.apache.hadoop.hbase.util.Bytes;
30  
31  import java.util.NavigableSet;
32  
33  /**
34   * A query matcher that is specifically designed for the scan case.
35   */
36  public class ScanQueryMatcher {
37    // Optimization so we can skip lots of compares when we decide to skip
38    // to the next row.
39    private boolean stickyNextRow;
40    private byte[] stopRow;
41  
42    protected TimeRange tr;
43  
44    protected Filter filter;
45  
46    /** Keeps track of deletes */
47    protected DeleteTracker deletes;
48  
49    /** Keeps track of columns and versions */
50    protected ColumnTracker columns;
51  
52    /** Key to seek to in memstore and StoreFiles */
53    protected KeyValue startKey;
54  
55    /** Oldest allowed version stamp for TTL enforcement */
56    protected long oldestStamp;
57  
58    /** Row comparator for the region this query is for */
59    KeyValue.KeyComparator rowComparator;
60  
61    /** Row the query is on */
62    protected byte [] row;
63  
64    /**
65     * Constructs a ScanQueryMatcher for a Scan.
66     * @param scan
67     * @param family
68     * @param columns
69     * @param ttl
70     * @param rowComparator
71     */
72    public ScanQueryMatcher(Scan scan, byte [] family,
73        NavigableSet<byte[]> columns, long ttl,
74        KeyValue.KeyComparator rowComparator, int maxVersions) {
75      this.tr = scan.getTimeRange();
76      this.oldestStamp = System.currentTimeMillis() - ttl;
77      this.rowComparator = rowComparator;
78      this.deletes =  new ScanDeleteTracker();
79      this.stopRow = scan.getStopRow();
80      this.startKey = KeyValue.createFirstOnRow(scan.getStartRow());
81      this.filter = scan.getFilter();
82  
83      // Single branch to deal with two types of reads (columns vs all in family)
84      if (columns == null || columns.size() == 0) {
85        // use a specialized scan for wildcard column tracker.
86        this.columns = new ScanWildcardColumnTracker(maxVersions);
87      } else {
88        // We can share the ExplicitColumnTracker, diff is we reset
89        // between rows, not between storefiles.
90        this.columns = new ExplicitColumnTracker(columns,maxVersions);
91      }
92    }
93  
94    /**
95     * Determines if the caller should do one of several things:
96     * - seek/skip to the next row (MatchCode.SEEK_NEXT_ROW)
97     * - seek/skip to the next column (MatchCode.SEEK_NEXT_COL)
98     * - include the current KeyValue (MatchCode.INCLUDE)
99     * - ignore the current KeyValue (MatchCode.SKIP)
100    * - got to the next row (MatchCode.DONE)
101    *
102    * @param kv KeyValue to check
103    * @return The match code instance.
104    */
105   public MatchCode match(KeyValue kv) {
106     if (filter != null && filter.filterAllRemaining()) {
107       return MatchCode.DONE_SCAN;
108     }
109 
110     byte [] bytes = kv.getBuffer();
111     int offset = kv.getOffset();
112     int initialOffset = offset;
113 
114     int keyLength = Bytes.toInt(bytes, offset, Bytes.SIZEOF_INT);
115     offset += KeyValue.ROW_OFFSET;
116 
117     short rowLength = Bytes.toShort(bytes, offset, Bytes.SIZEOF_SHORT);
118     offset += Bytes.SIZEOF_SHORT;
119 
120     int ret = this.rowComparator.compareRows(row, 0, row.length,
121         bytes, offset, rowLength);
122     if (ret <= -1) {
123       return MatchCode.DONE;
124     } else if (ret >= 1) {
125       // could optimize this, if necessary?
126       // Could also be called SEEK_TO_CURRENT_ROW, but this
127       // should be rare/never happens.
128       return MatchCode.SKIP;
129     }
130 
131     // optimize case.
132     if (this.stickyNextRow)
133         return MatchCode.SEEK_NEXT_ROW;
134 
135     if (this.columns.done()) {
136       stickyNextRow = true;
137       return MatchCode.SEEK_NEXT_ROW;
138     }
139 
140     //Passing rowLength
141     offset += rowLength;
142 
143     //Skipping family
144     byte familyLength = bytes [offset];
145     offset += familyLength + 1;
146 
147     int qualLength = keyLength + KeyValue.ROW_OFFSET -
148       (offset - initialOffset) - KeyValue.TIMESTAMP_TYPE_SIZE;
149 
150     long timestamp = kv.getTimestamp();
151     if (isExpired(timestamp)) {
152       // done, the rest of this column will also be expired as well.
153       return MatchCode.SEEK_NEXT_COL;
154     }
155 
156     byte type = kv.getType();
157     if (isDelete(type)) {
158       if (tr.withinOrAfterTimeRange(timestamp)) {
159         this.deletes.add(bytes, offset, qualLength, timestamp, type);
160         // Can't early out now, because DelFam come before any other keys
161       }
162       // May be able to optimize the SKIP here, if we matched
163       // due to a DelFam, we can skip to next row
164       // due to a DelCol, we can skip to next col
165       // But it requires more info out of isDelete().
166       // needful -> million column challenge.
167       return MatchCode.SKIP;
168     }
169 
170     if (!this.deletes.isEmpty() &&
171         deletes.isDeleted(bytes, offset, qualLength, timestamp)) {
172       return MatchCode.SKIP;
173     }
174 
175     int timestampComparison = tr.compare(timestamp);
176     if (timestampComparison >= 1) {
177       return MatchCode.SKIP;
178     } else if (timestampComparison <= -1) {
179       return getNextRowOrNextColumn(bytes, offset, qualLength);
180     }
181 
182     /**
183      * Filters should be checked before checking column trackers. If we do
184      * otherwise, as was previously being done, ColumnTracker may increment its
185      * counter for even that KV which may be discarded later on by Filter. This
186      * would lead to incorrect results in certain cases.
187      */
188     if (filter != null) {
189       ReturnCode filterResponse = filter.filterKeyValue(kv);
190       if (filterResponse == ReturnCode.SKIP) {
191         return MatchCode.SKIP;
192       } else if (filterResponse == ReturnCode.NEXT_COL) {
193         return getNextRowOrNextColumn(bytes, offset, qualLength);
194       } else if (filterResponse == ReturnCode.NEXT_ROW) {
195         stickyNextRow = true;
196         return MatchCode.SEEK_NEXT_ROW;
197       }
198     }
199 
200     MatchCode colChecker = columns.checkColumn(bytes, offset, qualLength);
201     // if SKIP -> SEEK_NEXT_COL
202     // if (NEXT,DONE) -> SEEK_NEXT_ROW
203     // if (INCLUDE) -> INCLUDE
204     if (colChecker == MatchCode.SKIP) {
205       return MatchCode.SEEK_NEXT_COL;
206     } else if (colChecker == MatchCode.NEXT || colChecker == MatchCode.DONE) {
207       stickyNextRow = true;
208       return MatchCode.SEEK_NEXT_ROW;
209     }
210 
211     return MatchCode.INCLUDE;
212 
213   }
214 
215   public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset,
216       int qualLength) {
217     if (columns instanceof ExplicitColumnTracker) {
218       //We only come here when we know that columns is an instance of
219       //ExplicitColumnTracker so we should never have a cast exception
220       ((ExplicitColumnTracker)columns).doneWithColumn(bytes, offset,
221           qualLength);
222       if (columns.getColumnHint() == null) {
223         return MatchCode.SEEK_NEXT_ROW;
224       } else {
225         return MatchCode.SEEK_NEXT_COL;
226       }
227     } else {
228       return MatchCode.SEEK_NEXT_COL;
229     }
230   }
231 
232   public boolean moreRowsMayExistAfter(KeyValue kv) {
233     if (!Bytes.equals(stopRow , HConstants.EMPTY_END_ROW) &&
234         rowComparator.compareRows(kv.getBuffer(),kv.getRowOffset(),
235             kv.getRowLength(), stopRow, 0, stopRow.length) >= 0) {
236       return false;
237     } else {
238       return true;
239     }
240   }
241 
242   /**
243    * Set current row
244    * @param row
245    */
246   public void setRow(byte [] row) {
247     this.row = row;
248     reset();
249   }
250 
251   public void reset() {
252     this.deletes.reset();
253     this.columns.reset();
254 
255     stickyNextRow = false;
256   }
257 
258   // should be in KeyValue.
259   protected boolean isDelete(byte type) {
260     return (type != KeyValue.Type.Put.getCode());
261   }
262 
263   protected boolean isExpired(long timestamp) {
264     return (timestamp < oldestStamp);
265   }
266 
267   /**
268    *
269    * @return the start key
270    */
271   public KeyValue getStartKey() {
272     return this.startKey;
273   }
274 
275   /**
276    * {@link #match} return codes.  These instruct the scanner moving through
277    * memstores and StoreFiles what to do with the current KeyValue.
278    * <p>
279    * Additionally, this contains "early-out" language to tell the scanner to
280    * move on to the next File (memstore or Storefile), or to return immediately.
281    */
282   public static enum MatchCode {
283     /**
284      * Include KeyValue in the returned result
285      */
286     INCLUDE,
287 
288     /**
289      * Do not include KeyValue in the returned result
290      */
291     SKIP,
292 
293     /**
294      * Do not include, jump to next StoreFile or memstore (in time order)
295      */
296     NEXT,
297 
298     /**
299      * Do not include, return current result
300      */
301     DONE,
302 
303     /**
304      * These codes are used by the ScanQueryMatcher
305      */
306 
307     /**
308      * Done with the row, seek there.
309      */
310     SEEK_NEXT_ROW,
311     /**
312      * Done with column, seek to next.
313      */
314     SEEK_NEXT_COL,
315 
316     /**
317      * Done with scan, thanks to the row filter.
318      */
319     DONE_SCAN,
320   }
321 }