View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.regionserver;
22  
23  import org.apache.hadoop.hbase.HConstants;
24  import org.apache.hadoop.hbase.KeyValue;
25  import org.apache.hadoop.hbase.client.Scan;
26  import org.apache.hadoop.hbase.filter.Filter;
27  import org.apache.hadoop.hbase.filter.Filter.ReturnCode;
28  import org.apache.hadoop.hbase.io.TimeRange;
29  import org.apache.hadoop.hbase.util.Bytes;
30  
31  import java.util.NavigableSet;
32  
33  /**
34   * A query matcher that is specifically designed for the scan case.
35   */
36  public class ScanQueryMatcher {
37    // Optimization so we can skip lots of compares when we decide to skip
38    // to the next row.
39    private boolean stickyNextRow;
40    private byte[] stopRow;
41  
42    protected TimeRange tr;
43  
44    protected Filter filter;
45  
46    /** Keeps track of deletes */
47    protected DeleteTracker deletes;
48    protected boolean retainDeletesInOutput;
49  
50    /** Keeps track of columns and versions */
51    protected ColumnTracker columns;
52  
53    /** Key to seek to in memstore and StoreFiles */
54    protected KeyValue startKey;
55  
56    /** Oldest allowed version stamp for TTL enforcement */
57    protected long oldestStamp;
58  
59    /** Row comparator for the region this query is for */
60    KeyValue.KeyComparator rowComparator;
61  
62    /** Row the query is on */
63    protected byte [] row;
64  
65    /**
66     * Constructs a ScanQueryMatcher for a Scan.
67     * @param scan
68     * @param family
69     * @param columns
70     * @param ttl
71     * @param rowComparator
72     */
73    public ScanQueryMatcher(Scan scan, byte [] family,
74        NavigableSet<byte[]> columns, long ttl,
75        KeyValue.KeyComparator rowComparator, int maxVersions,
76        boolean retainDeletesInOutput) {
77      this.tr = scan.getTimeRange();
78      this.oldestStamp = System.currentTimeMillis() - ttl;
79      this.rowComparator = rowComparator;
80      this.deletes =  new ScanDeleteTracker();
81      this.stopRow = scan.getStopRow();
82      this.startKey = KeyValue.createFirstOnRow(scan.getStartRow());
83      this.filter = scan.getFilter();
84      this.retainDeletesInOutput = retainDeletesInOutput;
85  
86      // Single branch to deal with two types of reads (columns vs all in family)
87      if (columns == null || columns.size() == 0) {
88        // use a specialized scan for wildcard column tracker.
89        this.columns = new ScanWildcardColumnTracker(maxVersions);
90      } else {
91        // We can share the ExplicitColumnTracker, diff is we reset
92        // between rows, not between storefiles.
93        this.columns = new ExplicitColumnTracker(columns,maxVersions);
94      }
95    }
96    public ScanQueryMatcher(Scan scan, byte [] family,
97        NavigableSet<byte[]> columns, long ttl,
98        KeyValue.KeyComparator rowComparator, int maxVersions) {
99        /* By default we will not include deletes */
100       /* deletes are included explicitly (for minor compaction) */
101       this(scan, family, columns, ttl, rowComparator, maxVersions, false);
102   }
103 
104   /**
105    * Determines if the caller should do one of several things:
106    * - seek/skip to the next row (MatchCode.SEEK_NEXT_ROW)
107    * - seek/skip to the next column (MatchCode.SEEK_NEXT_COL)
108    * - include the current KeyValue (MatchCode.INCLUDE)
109    * - ignore the current KeyValue (MatchCode.SKIP)
110    * - got to the next row (MatchCode.DONE)
111    *
112    * @param kv KeyValue to check
113    * @return The match code instance.
114    */
115   public MatchCode match(KeyValue kv) {
116     if (filter != null && filter.filterAllRemaining()) {
117       return MatchCode.DONE_SCAN;
118     }
119 
120     byte [] bytes = kv.getBuffer();
121     int offset = kv.getOffset();
122     int initialOffset = offset;
123 
124     int keyLength = Bytes.toInt(bytes, offset, Bytes.SIZEOF_INT);
125     offset += KeyValue.ROW_OFFSET;
126 
127     short rowLength = Bytes.toShort(bytes, offset, Bytes.SIZEOF_SHORT);
128     offset += Bytes.SIZEOF_SHORT;
129 
130     int ret = this.rowComparator.compareRows(row, 0, row.length,
131         bytes, offset, rowLength);
132     if (ret <= -1) {
133       return MatchCode.DONE;
134     } else if (ret >= 1) {
135       // could optimize this, if necessary?
136       // Could also be called SEEK_TO_CURRENT_ROW, but this
137       // should be rare/never happens.
138       return MatchCode.SEEK_NEXT_ROW;
139     }
140 
141     // optimize case.
142     if (this.stickyNextRow)
143         return MatchCode.SEEK_NEXT_ROW;
144 
145     if (this.columns.done()) {
146       stickyNextRow = true;
147       return MatchCode.SEEK_NEXT_ROW;
148     }
149 
150     //Passing rowLength
151     offset += rowLength;
152 
153     //Skipping family
154     byte familyLength = bytes [offset];
155     offset += familyLength + 1;
156 
157     int qualLength = keyLength + KeyValue.ROW_OFFSET -
158       (offset - initialOffset) - KeyValue.TIMESTAMP_TYPE_SIZE;
159 
160     long timestamp = kv.getTimestamp();
161     if (isExpired(timestamp)) {
162       // done, the rest of this column will also be expired as well.
163       return getNextRowOrNextColumn(bytes, offset, qualLength);
164     }
165 
166     byte type = kv.getType();
167     if (isDelete(type)) {
168       if (tr.withinOrAfterTimeRange(timestamp)) {
169         this.deletes.add(bytes, offset, qualLength, timestamp, type);
170         // Can't early out now, because DelFam come before any other keys
171       }
172       if (retainDeletesInOutput) {
173         return MatchCode.INCLUDE;
174       }
175       else {
176         return MatchCode.SKIP;
177       }
178     }
179 
180     if (!this.deletes.isEmpty() &&
181         deletes.isDeleted(bytes, offset, qualLength, timestamp)) {
182 
183       // May be able to optimize the SKIP here, if we matched
184       // due to a DelFam, we can skip to next row
185       // due to a DelCol, we can skip to next col
186       // But it requires more info out of isDelete().
187       // needful -> million column challenge.
188       return MatchCode.SKIP;
189     }
190 
191     int timestampComparison = tr.compare(timestamp);
192     if (timestampComparison >= 1) {
193       return MatchCode.SKIP;
194     } else if (timestampComparison <= -1) {
195       return getNextRowOrNextColumn(bytes, offset, qualLength);
196     }
197 
198     /**
199      * Filters should be checked before checking column trackers. If we do
200      * otherwise, as was previously being done, ColumnTracker may increment its
201      * counter for even that KV which may be discarded later on by Filter. This
202      * would lead to incorrect results in certain cases.
203      */
204     if (filter != null) {
205       ReturnCode filterResponse = filter.filterKeyValue(kv);
206       if (filterResponse == ReturnCode.SKIP) {
207         return MatchCode.SKIP;
208       } else if (filterResponse == ReturnCode.NEXT_COL) {
209         return getNextRowOrNextColumn(bytes, offset, qualLength);
210       } else if (filterResponse == ReturnCode.NEXT_ROW) {
211         stickyNextRow = true;
212         return MatchCode.SEEK_NEXT_ROW;
213       } else if (filterResponse == ReturnCode.SEEK_NEXT_USING_HINT) {
214         return MatchCode.SEEK_NEXT_USING_HINT;
215       }
216     }
217 
218     MatchCode colChecker = columns.checkColumn(bytes, offset, qualLength, timestamp);
219     /*
220      * According to current implementation, colChecker can only be
221      * SEEK_NEXT_COL, SEEK_NEXT_ROW, SKIP or INCLUDE. Therefore, always return
222      * the MatchCode. If it is SEEK_NEXT_ROW, also set stickyNextRow.
223      */
224     if (colChecker == MatchCode.SEEK_NEXT_ROW) {
225       stickyNextRow = true;
226     }
227     return colChecker;
228 
229   }
230 
231   public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset,
232       int qualLength) {
233     if (columns instanceof ExplicitColumnTracker) {
234       //We only come here when we know that columns is an instance of
235       //ExplicitColumnTracker so we should never have a cast exception
236       ((ExplicitColumnTracker)columns).doneWithColumn(bytes, offset,
237           qualLength);
238       if (columns.getColumnHint() == null) {
239         return MatchCode.SEEK_NEXT_ROW;
240       } else {
241         return MatchCode.SEEK_NEXT_COL;
242       }
243     } else {
244       return MatchCode.SEEK_NEXT_COL;
245     }
246   }
247 
248   public boolean moreRowsMayExistAfter(KeyValue kv) {
249     if (!Bytes.equals(stopRow , HConstants.EMPTY_END_ROW) &&
250         rowComparator.compareRows(kv.getBuffer(),kv.getRowOffset(),
251             kv.getRowLength(), stopRow, 0, stopRow.length) >= 0) {
252       // KV >= STOPROW
253       // then NO there is nothing left.
254       return false;
255     } else {
256       return true;
257     }
258   }
259 
260   /**
261    * Set current row
262    * @param row
263    */
264   public void setRow(byte [] row) {
265     this.row = row;
266     reset();
267   }
268 
269   public void reset() {
270     this.deletes.reset();
271     this.columns.reset();
272 
273     stickyNextRow = false;
274   }
275 
276   // should be in KeyValue.
277   protected boolean isDelete(byte type) {
278     return (type != KeyValue.Type.Put.getCode());
279   }
280 
281   protected boolean isExpired(long timestamp) {
282     return (timestamp < oldestStamp);
283   }
284 
285   /**
286    *
287    * @return the start key
288    */
289   public KeyValue getStartKey() {
290     return this.startKey;
291   }
292 
293   public KeyValue getNextKeyHint(KeyValue kv) {
294     if (filter == null) {
295       return null;
296     } else {
297       return filter.getNextKeyHint(kv);
298     }
299   }
300 
301   public KeyValue getKeyForNextColumn(KeyValue kv) {
302     ColumnCount nextColumn = columns.getColumnHint();
303     if (nextColumn == null) {
304       return KeyValue.createLastOnRow(
305           kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
306           kv.getBuffer(), kv.getFamilyOffset(), kv.getFamilyLength(),
307           kv.getBuffer(), kv.getQualifierOffset(), kv.getQualifierLength());
308     } else {
309       return KeyValue.createFirstOnRow(
310           kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
311           kv.getBuffer(), kv.getFamilyOffset(), kv.getFamilyLength(),
312           nextColumn.getBuffer(), nextColumn.getOffset(), nextColumn.getLength());
313     }
314   }
315 
316   public KeyValue getKeyForNextRow(KeyValue kv) {
317     return KeyValue.createLastOnRow(
318         kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
319         null, 0, 0,
320         null, 0, 0);
321   }
322 
323   /**
324    * {@link #match} return codes.  These instruct the scanner moving through
325    * memstores and StoreFiles what to do with the current KeyValue.
326    * <p>
327    * Additionally, this contains "early-out" language to tell the scanner to
328    * move on to the next File (memstore or Storefile), or to return immediately.
329    */
330   public static enum MatchCode {
331     /**
332      * Include KeyValue in the returned result
333      */
334     INCLUDE,
335 
336     /**
337      * Do not include KeyValue in the returned result
338      */
339     SKIP,
340 
341     /**
342      * Do not include, jump to next StoreFile or memstore (in time order)
343      */
344     NEXT,
345 
346     /**
347      * Do not include, return current result
348      */
349     DONE,
350 
351     /**
352      * These codes are used by the ScanQueryMatcher
353      */
354 
355     /**
356      * Done with the row, seek there.
357      */
358     SEEK_NEXT_ROW,
359     /**
360      * Done with column, seek to next.
361      */
362     SEEK_NEXT_COL,
363 
364     /**
365      * Done with scan, thanks to the row filter.
366      */
367     DONE_SCAN,
368 
369     /*
370      * Seek to next key which is given as hint.
371      */
372     SEEK_NEXT_USING_HINT,
373   }
374 }