View Javadoc

1   /*
2    * Copyright 2009 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.util.ArrayList;
23  import java.util.List;
24  import java.util.NavigableSet;
25  
26  import org.apache.hadoop.hbase.HConstants;
27  import org.apache.hadoop.hbase.KeyValue;
28  import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
29  import org.apache.hadoop.hbase.util.Bytes;
30  
31  /**
32   * This class is used for the tracking and enforcement of columns and numbers
33   * of versions during the course of a Get or Scan operation, when explicit
34   * column qualifiers have been asked for in the query.
35   *
36   * With a little magic (see {@link ScanQueryMatcher}), we can use this matcher
37   * for both scans and gets.  The main difference is 'next' and 'done' collapse
38   * for the scan case (since we see all columns in order), and we only reset
39   * between rows.
40   *
41   * <p>
42   * This class is utilized by {@link ScanQueryMatcher} through two methods:
43   * <ul><li>{@link #checkColumn} is called when a Put satisfies all other
44   * conditions of the query.  This method returns a {@link org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode} to define
45   * what action should be taken.
46   * <li>{@link #update} is called at the end of every StoreFile or memstore.
47   * <p>
48   * This class is NOT thread-safe as queries are never multi-threaded
49   */
50  public class ExplicitColumnTracker implements ColumnTracker {
51  
52    private final int maxVersions;
53    private final int minVersions;
54  
55   /**
56    * Contains the list of columns that the ExplicitColumnTracker is tracking.
57    * Each ColumnCount instance also tracks how many versions of the requested
58    * column have been returned.
59    */
60    private final List<ColumnCount> columns;
61    private final List<ColumnCount> columnsToReuse;
62    private int index;
63    private ColumnCount column;
64    /** Keeps track of the latest timestamp included for current column.
65     * Used to eliminate duplicates. */
66    private long latestTSOfCurrentColumn;
67    private long oldestStamp;
68  
69    /**
70     * Default constructor.
71     * @param columns columns specified user in query
72     * @param minVersions minimum number of versions to keep
73     * @param maxVersions maximum versions to return per column
74     * @param oldestUnexpiredTS the oldest timestamp we are interested in,
75     *  based on TTL 
76     * @param ttl The timeToLive to enforce
77     */
78    public ExplicitColumnTracker(NavigableSet<byte[]> columns, int minVersions,
79        int maxVersions, long oldestUnexpiredTS) {
80      this.maxVersions = maxVersions;
81      this.minVersions = minVersions;
82      this.oldestStamp = oldestUnexpiredTS;
83      this.columns = new ArrayList<ColumnCount>(columns.size());
84      this.columnsToReuse = new ArrayList<ColumnCount>(columns.size());
85      for(byte [] column : columns) {
86        this.columnsToReuse.add(new ColumnCount(column));
87      }
88      reset();
89    }
90  
91      /**
92     * Done when there are no more columns to match against.
93     */
94    public boolean done() {
95      return this.columns.size() == 0;
96    }
97  
98    public ColumnCount getColumnHint() {
99      return this.column;
100   }
101 
102   /**
103    * {@inheritDoc}
104    */
105   @Override
106   public ScanQueryMatcher.MatchCode checkColumn(byte [] bytes, int offset,
107       int length, long timestamp, byte type, boolean ignoreCount) {
108     // delete markers should never be passed to an
109     // *Explicit*ColumnTracker
110     assert !KeyValue.isDelete(type);
111     do {
112       // No more columns left, we are done with this query
113       if(this.columns.size() == 0) {
114         return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
115       }
116 
117       // No more columns to match against, done with storefile
118       if(this.column == null) {
119         return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
120       }
121 
122       // Compare specific column to current column
123       int ret = Bytes.compareTo(column.getBuffer(), column.getOffset(),
124           column.getLength(), bytes, offset, length);
125 
126       // Column Matches. If it is not a duplicate key, increment the version count
127       // and include.
128       if(ret == 0) {
129         if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
130 
131         //If column matches, check if it is a duplicate timestamp
132         if (sameAsPreviousTS(timestamp)) {
133           //If duplicate, skip this Key
134           return ScanQueryMatcher.MatchCode.SKIP;
135         }
136         int count = this.column.increment();
137         if(count >= maxVersions || (count >= minVersions && isExpired(timestamp))) {
138           // Done with versions for this column
139           // Note: because we are done with this column, and are removing
140           // it from columns, we don't do a ++this.index. The index stays
141           // the same but the columns have shifted within the array such
142           // that index now points to the next column we are interested in.
143           this.columns.remove(this.index);
144 
145           resetTS();
146           if (this.columns.size() == this.index) {
147             // We have served all the requested columns.
148             this.column = null;
149             return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW;
150           } else {
151             // We are done with current column; advance to next column
152             // of interest.
153             this.column = this.columns.get(this.index);
154             return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL;
155           }
156         } else {
157           setTS(timestamp);
158         }
159         return ScanQueryMatcher.MatchCode.INCLUDE;
160       }
161 
162       resetTS();
163 
164       if (ret > 0) {
165         // The current KV is smaller than the column the ExplicitColumnTracker
166         // is interested in, so seek to that column of interest.
167         return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL;
168       }
169 
170       // The current KV is bigger than the column the ExplicitColumnTracker
171       // is interested in. That means there is no more data for the column
172       // of interest. Advance the ExplicitColumnTracker state to next
173       // column of interest, and check again.
174       if (ret <= -1) {
175         if (++this.index >= this.columns.size()) {
176           // No more to match, do not include, done with this row.
177           return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
178         }
179         // This is the recursive case.
180         this.column = this.columns.get(this.index);
181       }
182     } while(true);
183   }
184 
185   /**
186    * Called at the end of every StoreFile or memstore.
187    */
188   public void update() {
189     if(this.columns.size() != 0) {
190       this.index = 0;
191       this.column = this.columns.get(this.index);
192     } else {
193       this.index = -1;
194       this.column = null;
195     }
196   }
197 
198   // Called between every row.
199   public void reset() {
200     buildColumnList();
201     this.index = 0;
202     this.column = this.columns.get(this.index);
203     resetTS();
204   }
205 
206   private void resetTS() {
207     latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
208   }
209 
210   private void setTS(long timestamp) {
211     latestTSOfCurrentColumn = timestamp;
212   }
213 
214   private boolean sameAsPreviousTS(long timestamp) {
215     return timestamp == latestTSOfCurrentColumn;
216   }
217 
218   private boolean isExpired(long timestamp) {
219     return timestamp < oldestStamp;
220   }
221 
222   private void buildColumnList() {
223     this.columns.clear();
224     this.columns.addAll(this.columnsToReuse);
225     for(ColumnCount col : this.columns) {
226       col.setCount(0);
227     }
228   }
229 
230   /**
231    * This method is used to inform the column tracker that we are done with
232    * this column. We may get this information from external filters or
233    * timestamp range and we then need to indicate this information to
234    * tracker. It is required only in case of ExplicitColumnTracker.
235    * @param bytes
236    * @param offset
237    * @param length
238    */
239   public void doneWithColumn(byte [] bytes, int offset, int length) {
240     while (this.column != null) {
241       int compare = Bytes.compareTo(column.getBuffer(), column.getOffset(),
242           column.getLength(), bytes, offset, length);
243       resetTS();
244       if (compare == 0) {
245         this.columns.remove(this.index);
246         if (this.columns.size() == this.index) {
247           // Will not hit any more columns in this storefile
248           this.column = null;
249         } else {
250           this.column = this.columns.get(this.index);
251         }
252         return;
253       } else if ( compare <= -1) {
254         if(++this.index != this.columns.size()) {
255           this.column = this.columns.get(this.index);
256         } else {
257           this.column = null;
258         }
259       } else {
260         return;
261       }
262     }
263   }
264 
265   public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset,
266       int qualLength) {
267     doneWithColumn(bytes, offset,qualLength);
268 
269     if (getColumnHint() == null) {
270       return MatchCode.SEEK_NEXT_ROW;
271     } else {
272       return MatchCode.SEEK_NEXT_COL;
273     }
274   }
275 
276   public boolean isDone(long timestamp) {
277     return minVersions <= 0 && isExpired(timestamp);
278   }
279 }