View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.regionserver;
22  
23  import java.io.IOException;
24  
25  import org.apache.hadoop.hbase.HConstants;
26  import org.apache.hadoop.hbase.KeyValue;
27  import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
28  import org.apache.hadoop.hbase.util.Bytes;
29  
30  /**
31   * Keeps track of the columns for a scan if they are not explicitly specified
32   */
33  public class ScanWildcardColumnTracker implements ColumnTracker {
34    private byte [] columnBuffer = null;
35    private int columnOffset = 0;
36    private int columnLength = 0;
37    private int currentCount = 0;
38    private int maxVersions;
39    private int minVersions;
40    /* Keeps track of the latest timestamp and type included for current column.
41     * Used to eliminate duplicates. */
42    private long latestTSOfCurrentColumn;
43    private byte latestTypeOfCurrentColumn;
44  
45    private long oldestStamp;
46  
47    /**
48     * Return maxVersions of every row.
49     * @param minVersion Minimum number of versions to keep
50     * @param maxVersion Maximum number of versions to return
51     * @param oldestUnexpiredTS oldest timestamp that has not expired according
52     *          to the TTL.
53     */
54    public ScanWildcardColumnTracker(int minVersion, int maxVersion,
55        long oldestUnexpiredTS) {
56      this.maxVersions = maxVersion;
57      this.minVersions = minVersion;
58      this.oldestStamp = oldestUnexpiredTS;
59    }
60  
61    /**
62     * {@inheritDoc}
63     * This receives puts *and* deletes.
64     * Deletes do not count as a version, but rather take the version
65     * of the previous put (so eventually all but the last can be reclaimed).
66     */
67    @Override
68    public MatchCode checkColumn(byte[] bytes, int offset, int length,
69        long timestamp, byte type, boolean ignoreCount) throws IOException {
70      
71      if (columnBuffer == null) {
72        // first iteration.
73        resetBuffer(bytes, offset, length);
74        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
75        // do not count a delete marker as another version
76        return checkVersion(type, timestamp);
77      }
78      int cmp = Bytes.compareTo(bytes, offset, length,
79          columnBuffer, columnOffset, columnLength);
80      if (cmp == 0) {
81        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
82  
83        //If column matches, check if it is a duplicate timestamp
84        if (sameAsPreviousTSAndType(timestamp, type)) {
85          return ScanQueryMatcher.MatchCode.SKIP;
86        }
87        return checkVersion(type, timestamp);
88      }
89  
90      resetTSAndType();
91  
92      // new col > old col
93      if (cmp > 0) {
94        // switched columns, lets do something.x
95        resetBuffer(bytes, offset, length);
96        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
97        return checkVersion(type, timestamp);
98      }
99  
100     // new col < oldcol
101     // WARNING: This means that very likely an edit for some other family
102     // was incorrectly stored into the store for this one. Throw an exception,
103     // because this might lead to data corruption.
104     throw new IOException(
105         "ScanWildcardColumnTracker.checkColumn ran into a column actually " +
106         "smaller than the previous column: " +
107         Bytes.toStringBinary(bytes, offset, length));
108   }
109 
110   private void resetBuffer(byte[] bytes, int offset, int length) {
111     columnBuffer = bytes;
112     columnOffset = offset;
113     columnLength = length;
114     currentCount = 0;
115   }
116 
117   /**
118    * Check whether this version should be retained.
119    * There are 4 variables considered:
120    * If this version is past max versions -> skip it
121    * If this kv has expired or was deleted, check min versions
122    * to decide whther to skip it or not.
123    *
124    * Increase the version counter unless this is a delete
125    */
126   private MatchCode checkVersion(byte type, long timestamp) {
127     if (!KeyValue.isDelete(type)) {
128       currentCount++;
129     }
130     if (currentCount > maxVersions) {
131       return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL; // skip to next col
132     }
133     // keep the KV if required by minversions or it is not expired, yet
134     if (currentCount <= minVersions || !isExpired(timestamp)) {
135       setTSAndType(timestamp, type);
136       return ScanQueryMatcher.MatchCode.INCLUDE;
137     } else {
138       return MatchCode.SEEK_NEXT_COL;
139     }
140 
141   }
142 
143   @Override
144   public void reset() {
145     columnBuffer = null;
146     resetTSAndType();
147   }
148 
149   private void resetTSAndType() {
150     latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
151     latestTypeOfCurrentColumn = 0;
152   }
153 
154   private void setTSAndType(long timestamp, byte type) {
155     latestTSOfCurrentColumn = timestamp;
156     latestTypeOfCurrentColumn = type;
157   }
158 
159   private boolean sameAsPreviousTSAndType(long timestamp, byte type) {
160     return timestamp == latestTSOfCurrentColumn && type == latestTypeOfCurrentColumn;
161   }
162 
163   private boolean isExpired(long timestamp) {
164     return timestamp < oldestStamp;
165   }
166 
167   /**
168    * Used by matcher and scan/get to get a hint of the next column
169    * to seek to after checkColumn() returns SKIP.  Returns the next interesting
170    * column we want, or NULL there is none (wildcard scanner).
171    *
172    * @return The column count.
173    */
174   public ColumnCount getColumnHint() {
175     return null;
176   }
177 
178 
179   /**
180    * We can never know a-priori if we are done, so always return false.
181    * @return false
182    */
183   @Override
184   public boolean done() {
185     return false;
186   }
187 
188   public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset,
189       int qualLength) {
190     return MatchCode.SEEK_NEXT_COL;
191   }
192 
193   public boolean isDone(long timestamp) {
194     return minVersions <= 0 && isExpired(timestamp);
195   }
196 }