View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.util.ArrayList;
22  import java.util.List;
23  import java.util.NavigableSet;
24  
25  import org.apache.hadoop.classification.InterfaceAudience;
26  import org.apache.hadoop.hbase.HConstants;
27  import org.apache.hadoop.hbase.KeyValue;
28  import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
29  import org.apache.hadoop.hbase.util.Bytes;
30  
31  /**
32   * This class is used for the tracking and enforcement of columns and numbers
33   * of versions during the course of a Get or Scan operation, when explicit
34   * column qualifiers have been asked for in the query.
35   *
36   * With a little magic (see {@link ScanQueryMatcher}), we can use this matcher
37   * for both scans and gets.  The main difference is 'next' and 'done' collapse
38   * for the scan case (since we see all columns in order), and we only reset
39   * between rows.
40   *
41   * <p>
42   * This class is utilized by {@link ScanQueryMatcher} mainly through two methods:
43   * <ul><li>{@link #checkColumn} is called when a Put satisfies all other
44   * conditions of the query.
45   * <ul><li>{@link #getNextRowOrNextColumn} is called whenever ScanQueryMatcher
46   * believes that the current column should be skipped (by timestamp, filter etc.)
47   * <p>
48   * These two methods returns a 
49   * {@link org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode}
50   * to define what action should be taken.
51   * <p>
52   * This class is NOT thread-safe as queries are never multi-threaded
53   */
54  @InterfaceAudience.Private
55  public class ExplicitColumnTracker implements ColumnTracker {
56  
57    private final int maxVersions;
58    private final int minVersions;
59  
60   /**
61    * Contains the list of columns that the ExplicitColumnTracker is tracking.
62    * Each ColumnCount instance also tracks how many versions of the requested
63    * column have been returned.
64    */
65    private final List<ColumnCount> columns;
66    private int index;
67    private ColumnCount column;
68    /** Keeps track of the latest timestamp included for current column.
69     * Used to eliminate duplicates. */
70    private long latestTSOfCurrentColumn;
71    private long oldestStamp;
72  
73    /**
74     * Default constructor.
75     * @param columns columns specified user in query
76     * @param minVersions minimum number of versions to keep
77     * @param maxVersions maximum versions to return per column
78     * @param oldestUnexpiredTS the oldest timestamp we are interested in,
79     *  based on TTL 
80     */
81    public ExplicitColumnTracker(NavigableSet<byte[]> columns, int minVersions,
82        int maxVersions, long oldestUnexpiredTS) {
83      this.maxVersions = maxVersions;
84      this.minVersions = minVersions;
85      this.oldestStamp = oldestUnexpiredTS;
86      this.columns = new ArrayList<ColumnCount>(columns.size());
87      for(byte [] column : columns) {
88        this.columns.add(new ColumnCount(column));
89      }
90      reset();
91    }
92  
93      /**
94     * Done when there are no more columns to match against.
95     */
96    public boolean done() {
97      return this.index >= this.columns.size();
98    }
99  
100   public ColumnCount getColumnHint() {
101     return this.column;
102   }
103 
104   /**
105    * {@inheritDoc}
106    */
107   @Override
108   public ScanQueryMatcher.MatchCode checkColumn(byte [] bytes, int offset,
109       int length, long timestamp, byte type, boolean ignoreCount) {
110     // delete markers should never be passed to an
111     // *Explicit*ColumnTracker
112     assert !KeyValue.isDelete(type);
113     do {
114       // No more columns left, we are done with this query
115       if(done()) {
116         return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
117       }
118 
119       // No more columns to match against, done with storefile
120       if(this.column == null) {
121         return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
122       }
123 
124       // Compare specific column to current column
125       int ret = Bytes.compareTo(column.getBuffer(), column.getOffset(),
126           column.getLength(), bytes, offset, length);
127 
128       // Column Matches. If it is not a duplicate key, increment the version count
129       // and include.
130       if(ret == 0) {
131         if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
132 
133         //If column matches, check if it is a duplicate timestamp
134         if (sameAsPreviousTS(timestamp)) {
135           //If duplicate, skip this Key
136           return ScanQueryMatcher.MatchCode.SKIP;
137         }
138         int count = this.column.increment();
139         if(count >= maxVersions || (count >= minVersions && isExpired(timestamp))) {
140           // Done with versions for this column
141           ++this.index;
142           resetTS();
143           if (done()) {
144             // We have served all the requested columns.
145             this.column = null;
146             return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW;
147           } else {
148             // We are done with current column; advance to next column
149             // of interest.
150             this.column = this.columns.get(this.index);
151             return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL;
152           }
153         } else {
154           setTS(timestamp);
155         }
156         return ScanQueryMatcher.MatchCode.INCLUDE;
157       }
158 
159       resetTS();
160 
161       if (ret > 0) {
162         // The current KV is smaller than the column the ExplicitColumnTracker
163         // is interested in, so seek to that column of interest.
164         return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL;
165       }
166 
167       // The current KV is bigger than the column the ExplicitColumnTracker
168       // is interested in. That means there is no more data for the column
169       // of interest. Advance the ExplicitColumnTracker state to next
170       // column of interest, and check again.
171       if (ret <= -1) {
172         ++this.index;
173         if (done()) {
174           // No more to match, do not include, done with this row.
175           return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
176         }
177         // This is the recursive case.
178         this.column = this.columns.get(this.index);
179       }
180     } while(true);
181   }
182 
183   // Called between every row.
184   public void reset() {
185     this.index = 0;
186     this.column = this.columns.get(this.index);
187     for(ColumnCount col : this.columns) {
188       col.setCount(0);
189     }
190     resetTS();
191   }
192 
193   private void resetTS() {
194     latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
195   }
196 
197   private void setTS(long timestamp) {
198     latestTSOfCurrentColumn = timestamp;
199   }
200 
201   private boolean sameAsPreviousTS(long timestamp) {
202     return timestamp == latestTSOfCurrentColumn;
203   }
204 
205   private boolean isExpired(long timestamp) {
206     return timestamp < oldestStamp;
207   }
208 
209   /**
210    * This method is used to inform the column tracker that we are done with
211    * this column. We may get this information from external filters or
212    * timestamp range and we then need to indicate this information to
213    * tracker. It is required only in case of ExplicitColumnTracker.
214    * @param bytes
215    * @param offset
216    * @param length
217    */
218   public void doneWithColumn(byte [] bytes, int offset, int length) {
219     while (this.column != null) {
220       int compare = Bytes.compareTo(column.getBuffer(), column.getOffset(),
221           column.getLength(), bytes, offset, length);
222       resetTS();
223       if (compare <= 0) {
224         ++this.index;
225         if (done()) {
226           // Will not hit any more columns in this storefile
227           this.column = null;
228         } else {
229           this.column = this.columns.get(this.index);
230         }
231         if (compare <= -1)
232           continue;
233       }
234       return;
235     }
236   }
237 
238   public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset,
239       int qualLength) {
240     doneWithColumn(bytes, offset,qualLength);
241 
242     if (getColumnHint() == null) {
243       return MatchCode.SEEK_NEXT_ROW;
244     } else {
245       return MatchCode.SEEK_NEXT_COL;
246     }
247   }
248 
249   public boolean isDone(long timestamp) {
250     return minVersions <= 0 && isExpired(timestamp);
251   }
252 }