View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  
24  import org.apache.hadoop.classification.InterfaceAudience;
25  import org.apache.hadoop.hbase.HConstants;
26  import org.apache.hadoop.hbase.KeyValue;
27  import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
28  import org.apache.hadoop.hbase.util.Bytes;
29  
30  /**
31   * Keeps track of the columns for a scan if they are not explicitly specified
32   */
33  @InterfaceAudience.Private
34  public class ScanWildcardColumnTracker implements ColumnTracker {
35    private byte [] columnBuffer = null;
36    private int columnOffset = 0;
37    private int columnLength = 0;
38    private int currentCount = 0;
39    private int maxVersions;
40    private int minVersions;
41    /* Keeps track of the latest timestamp and type included for current column.
42     * Used to eliminate duplicates. */
43    private long latestTSOfCurrentColumn;
44    private byte latestTypeOfCurrentColumn;
45  
46    private long oldestStamp;
47  
48    /**
49     * Return maxVersions of every row.
50     * @param minVersion Minimum number of versions to keep
51     * @param maxVersion Maximum number of versions to return
52     * @param oldestUnexpiredTS oldest timestamp that has not expired according
53     *          to the TTL.
54     */
55    public ScanWildcardColumnTracker(int minVersion, int maxVersion,
56        long oldestUnexpiredTS) {
57      this.maxVersions = maxVersion;
58      this.minVersions = minVersion;
59      this.oldestStamp = oldestUnexpiredTS;
60    }
61  
62    /**
63     * {@inheritDoc}
64     * This receives puts *and* deletes.
65     * Deletes do not count as a version, but rather take the version
66     * of the previous put (so eventually all but the last can be reclaimed).
67     */
68    @Override
69    public MatchCode checkColumn(byte[] bytes, int offset, int length,
70        long timestamp, byte type, boolean ignoreCount) throws IOException {
71      
72      if (columnBuffer == null) {
73        // first iteration.
74        resetBuffer(bytes, offset, length);
75        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
76        // do not count a delete marker as another version
77        return checkVersion(type, timestamp);
78      }
79      int cmp = Bytes.compareTo(bytes, offset, length,
80          columnBuffer, columnOffset, columnLength);
81      if (cmp == 0) {
82        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
83  
84        //If column matches, check if it is a duplicate timestamp
85        if (sameAsPreviousTSAndType(timestamp, type)) {
86          return ScanQueryMatcher.MatchCode.SKIP;
87        }
88        return checkVersion(type, timestamp);
89      }
90  
91      resetTSAndType();
92  
93      // new col > old col
94      if (cmp > 0) {
95        // switched columns, lets do something.x
96        resetBuffer(bytes, offset, length);
97        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
98        return checkVersion(type, timestamp);
99      }
100 
101     // new col < oldcol
102     // WARNING: This means that very likely an edit for some other family
103     // was incorrectly stored into the store for this one. Throw an exception,
104     // because this might lead to data corruption.
105     throw new IOException(
106         "ScanWildcardColumnTracker.checkColumn ran into a column actually " +
107         "smaller than the previous column: " +
108         Bytes.toStringBinary(bytes, offset, length));
109   }
110 
111   private void resetBuffer(byte[] bytes, int offset, int length) {
112     columnBuffer = bytes;
113     columnOffset = offset;
114     columnLength = length;
115     currentCount = 0;
116   }
117 
118   /**
119    * Check whether this version should be retained.
120    * There are 4 variables considered:
121    * If this version is past max versions -> skip it
122    * If this kv has expired or was deleted, check min versions
123    * to decide whther to skip it or not.
124    *
125    * Increase the version counter unless this is a delete
126    */
127   private MatchCode checkVersion(byte type, long timestamp) {
128     if (!KeyValue.isDelete(type)) {
129       currentCount++;
130     }
131     if (currentCount > maxVersions) {
132       return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL; // skip to next col
133     }
134     // keep the KV if required by minversions or it is not expired, yet
135     if (currentCount <= minVersions || !isExpired(timestamp)) {
136       setTSAndType(timestamp, type);
137       return ScanQueryMatcher.MatchCode.INCLUDE;
138     } else {
139       return MatchCode.SEEK_NEXT_COL;
140     }
141 
142   }
143 
144   @Override
145   public void reset() {
146     columnBuffer = null;
147     resetTSAndType();
148   }
149 
150   private void resetTSAndType() {
151     latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
152     latestTypeOfCurrentColumn = 0;
153   }
154 
155   private void setTSAndType(long timestamp, byte type) {
156     latestTSOfCurrentColumn = timestamp;
157     latestTypeOfCurrentColumn = type;
158   }
159 
160   private boolean sameAsPreviousTSAndType(long timestamp, byte type) {
161     return timestamp == latestTSOfCurrentColumn && type == latestTypeOfCurrentColumn;
162   }
163 
164   private boolean isExpired(long timestamp) {
165     return timestamp < oldestStamp;
166   }
167 
168   /**
169    * Used by matcher and scan/get to get a hint of the next column
170    * to seek to after checkColumn() returns SKIP.  Returns the next interesting
171    * column we want, or NULL there is none (wildcard scanner).
172    *
173    * @return The column count.
174    */
175   public ColumnCount getColumnHint() {
176     return null;
177   }
178 
179 
180   /**
181    * We can never know a-priori if we are done, so always return false.
182    * @return false
183    */
184   @Override
185   public boolean done() {
186     return false;
187   }
188 
189   public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset,
190       int qualLength) {
191     return MatchCode.SEEK_NEXT_COL;
192   }
193 
194   public boolean isDone(long timestamp) {
195     return minVersions <= 0 && isExpired(timestamp);
196   }
197 }