View Javadoc

1   /*
2    * Copyright 2009 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.util.NavigableSet;
23  
24  import org.apache.hadoop.hbase.HConstants;
25  import org.apache.hadoop.hbase.KeyValue;
26  import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
27  import org.apache.hadoop.hbase.util.Bytes;
28  
29  /**
30   * This class is used for the tracking and enforcement of columns and numbers
31   * of versions during the course of a Get or Scan operation, when explicit
32   * column qualifiers have been asked for in the query.
33   *
34   * With a little magic (see {@link ScanQueryMatcher}), we can use this matcher
35   * for both scans and gets.  The main difference is 'next' and 'done' collapse
36   * for the scan case (since we see all columns in order), and we only reset
37   * between rows.
38   *
39   * <p>
40   * This class is utilized by {@link ScanQueryMatcher} mainly through two methods:
41   * <ul><li>{@link #checkColumn} is called when a Put satisfies all other
42   * conditions of the query.
43   * <ul><li>{@link #getNextRowOrNextColumn} is called whenever ScanQueryMatcher
44   * believes that the current column should be skipped (by timestamp, filter etc.)
45   * <p>
46   * These two methods returns a 
47   * {@link org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode}
48   * to define what action should be taken.
49   * <p>
50   * This class is NOT thread-safe as queries are never multi-threaded
51   */
52  public class ExplicitColumnTracker implements ColumnTracker {
53  
54    private final int maxVersions;
55    private final int minVersions;
56  
57   /**
58    * Contains the list of columns that the ExplicitColumnTracker is tracking.
59    * Each ColumnCount instance also tracks how many versions of the requested
60    * column have been returned.
61    */
62    private final ColumnCount[] columns;
63    private int index;
64    private ColumnCount column;
65    /** Keeps track of the latest timestamp included for current column.
66     * Used to eliminate duplicates. */
67    private long latestTSOfCurrentColumn;
68    private long oldestStamp;
69  
70    /**
71     * Default constructor.
72     * @param columns columns specified user in query
73     * @param minVersions minimum number of versions to keep
74     * @param maxVersions maximum versions to return per column
75     * @param oldestUnexpiredTS the oldest timestamp we are interested in,
76     *  based on TTL 
77     * @param ttl The timeToLive to enforce
78     */
79    public ExplicitColumnTracker(NavigableSet<byte[]> columns, int minVersions,
80        int maxVersions, long oldestUnexpiredTS) {
81      this.maxVersions = maxVersions;
82      this.minVersions = minVersions;
83      this.oldestStamp = oldestUnexpiredTS;
84      this.columns = new ColumnCount[columns.size()];
85      int i=0;
86      for(byte [] column : columns) {
87        this.columns[i++] = new ColumnCount(column);
88      }
89      reset();
90    }
91  
92      /**
93     * Done when there are no more columns to match against.
94     */
95    public boolean done() {
96      return this.index >= columns.length;
97    }
98  
99    public ColumnCount getColumnHint() {
100     return this.column;
101   }
102 
103   /**
104    * {@inheritDoc}
105    */
106   @Override
107   public ScanQueryMatcher.MatchCode checkColumn(byte [] bytes, int offset,
108       int length, byte type) {
109     // delete markers should never be passed to an
110     // *Explicit*ColumnTracker
111     assert !KeyValue.isDelete(type);
112     do {
113       // No more columns left, we are done with this query
114       if(done()) {
115         return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
116       }
117 
118       // No more columns to match against, done with storefile
119       if(this.column == null) {
120         return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
121       }
122 
123       // Compare specific column to current column
124       int ret = Bytes.compareTo(column.getBuffer(), column.getOffset(),
125           column.getLength(), bytes, offset, length);
126 
127       // Column Matches. Return include code. The caller would call checkVersions
128       // to limit the number of versions.
129       if(ret == 0) {
130         return ScanQueryMatcher.MatchCode.INCLUDE;
131       }
132 
133       resetTS();
134 
135       if (ret > 0) {
136         // The current KV is smaller than the column the ExplicitColumnTracker
137         // is interested in, so seek to that column of interest.
138         return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL;
139       }
140 
141       // The current KV is bigger than the column the ExplicitColumnTracker
142       // is interested in. That means there is no more data for the column
143       // of interest. Advance the ExplicitColumnTracker state to next
144       // column of interest, and check again.
145       if (ret <= -1) {
146         ++this.index;
147         if (done()) {
148           // No more to match, do not include, done with this row.
149           return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
150         }
151         // This is the recursive case.
152         this.column = this.columns[this.index];
153       }
154     } while(true);
155   }
156 
157   @Override
158   public ScanQueryMatcher.MatchCode checkVersions(byte[] bytes, int offset, int length,
159       long timestamp, byte type, boolean ignoreCount) {
160     assert !KeyValue.isDelete(type);
161     if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
162     // Check if it is a duplicate timestamp
163     if (sameAsPreviousTS(timestamp)) {
164       // If duplicate, skip this Key
165       return ScanQueryMatcher.MatchCode.SKIP;
166     }
167     int count = this.column.increment();
168     if (count >= maxVersions || (count >= minVersions && isExpired(timestamp))) {
169       // Done with versions for this column
170       ++this.index;
171       resetTS();
172       if (done()) {
173         // We have served all the requested columns.
174         this.column = null;
175         return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW;
176       }
177       // We are done with current column; advance to next column
178       // of interest.
179       this.column = this.columns[this.index];
180       return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL;
181     }
182     setTS(timestamp);
183     return ScanQueryMatcher.MatchCode.INCLUDE;
184   }
185 
186   // Called between every row.
187   public void reset() {
188     this.index = 0;
189     this.column = this.columns[this.index];
190     for(ColumnCount col : this.columns) {
191       col.setCount(0);
192     }
193     resetTS();
194   }
195 
196   private void resetTS() {
197     latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
198   }
199 
200   private void setTS(long timestamp) {
201     latestTSOfCurrentColumn = timestamp;
202   }
203 
204   private boolean sameAsPreviousTS(long timestamp) {
205     return timestamp == latestTSOfCurrentColumn;
206   }
207 
208   private boolean isExpired(long timestamp) {
209     return timestamp < oldestStamp;
210   }
211 
212   /**
213    * This method is used to inform the column tracker that we are done with
214    * this column. We may get this information from external filters or
215    * timestamp range and we then need to indicate this information to
216    * tracker. It is required only in case of ExplicitColumnTracker.
217    * @param bytes
218    * @param offset
219    * @param length
220    */
221   public void doneWithColumn(byte [] bytes, int offset, int length) {
222     while (this.column != null) {
223       int compare = Bytes.compareTo(column.getBuffer(), column.getOffset(),
224           column.getLength(), bytes, offset, length);
225       resetTS();
226       if (compare <= 0) {
227         ++this.index;
228         if (done()) {
229           // Will not hit any more columns in this storefile
230           this.column = null;
231         } else {
232           this.column = this.columns[this.index];
233         }
234         if (compare <= -1)
235           continue;
236       }
237       return;
238     }
239   }
240 
241   public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset,
242       int qualLength) {
243     doneWithColumn(bytes, offset,qualLength);
244 
245     if (getColumnHint() == null) {
246       return MatchCode.SEEK_NEXT_ROW;
247     } else {
248       return MatchCode.SEEK_NEXT_COL;
249     }
250   }
251 
252   public boolean isDone(long timestamp) {
253     return minVersions <= 0 && isExpired(timestamp);
254   }
255 }