View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.regionserver;
22  
23  import java.io.IOException;
24  
25  import org.apache.hadoop.hbase.HConstants;
26  import org.apache.hadoop.hbase.KeyValue;
27  import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
28  import org.apache.hadoop.hbase.util.Bytes;
29  
30  /**
31   * Keeps track of the columns for a scan if they are not explicitly specified
32   */
33  public class ScanWildcardColumnTracker implements ColumnTracker {
34    private byte [] columnBuffer = null;
35    private int columnOffset = 0;
36    private int columnLength = 0;
37    private int currentCount = 0;
38    private int maxVersions;
39    private int minVersions;
40    /* Keeps track of the latest timestamp and type included for current column.
41     * Used to eliminate duplicates. */
42    private long latestTSOfCurrentColumn;
43    private byte latestTypeOfCurrentColumn;
44  
45    private long oldestStamp;
46  
47    /**
48     * Return maxVersions of every row.
49     * @param minVersion Minimum number of versions to keep
50     * @param maxVersion Maximum number of versions to return
51     * @param oldestUnexpiredTS oldest timestamp that has not expired according
52     *          to the TTL.
53     */
54    public ScanWildcardColumnTracker(int minVersion, int maxVersion,
55        long oldestUnexpiredTS) {
56      this.maxVersions = maxVersion;
57      this.minVersions = minVersion;
58      this.oldestStamp = oldestUnexpiredTS;
59    }
60  
61    /**
62     * {@inheritDoc}
63     * This receives puts *and* deletes.
64     * Deletes do not count as a version, but rather take the version
65     * of the previous put (so eventually all but the last can be reclaimed).
66     */
67    @Override
68    public MatchCode checkColumn(byte[] bytes, int offset, int length,
69        long timestamp, byte type, boolean ignoreCount) throws IOException {
70      
71      if (columnBuffer == null) {
72        // first iteration.
73        resetBuffer(bytes, offset, length);
74        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
75        // do not count a delete marker as another version
76        return checkVersion(type, timestamp);
77      }
78      int cmp = Bytes.compareTo(bytes, offset, length,
79          columnBuffer, columnOffset, columnLength);
80      if (cmp == 0) {
81        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
82  
83        //If column matches, check if it is a duplicate timestamp
84        if (sameAsPreviousTSAndType(timestamp, type)) {
85          return ScanQueryMatcher.MatchCode.SKIP;
86        }
87        return checkVersion(type, timestamp);
88      }
89  
90      resetTSAndType();
91  
92      // new col > old col
93      if (cmp > 0) {
94        // switched columns, lets do something.x
95        resetBuffer(bytes, offset, length);
96        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
97        return checkVersion(type, timestamp);
98      }
99  
100     // new col < oldcol
101     // WARNING: This means that very likely an edit for some other family
102     // was incorrectly stored into the store for this one. Throw an exception,
103     // because this might lead to data corruption.
104     throw new IOException(
105         "ScanWildcardColumnTracker.checkColumn ran into a column actually " +
106         "smaller than the previous column: " +
107         Bytes.toStringBinary(bytes, offset, length));
108   }
109 
110   private void resetBuffer(byte[] bytes, int offset, int length) {
111     columnBuffer = bytes;
112     columnOffset = offset;
113     columnLength = length;
114     currentCount = 0;
115   }
116 
117   /**
118    * Check whether this version should be retained.
119    * There are 4 variables considered:
120    * If this version is past max versions -> skip it
121    * If this kv has expired or was deleted, check min versions
122    * to decide whther to skip it or not.
123    *
124    * Increase the version counter unless this is a delete
125    */
126   private MatchCode checkVersion(byte type, long timestamp) {
127     if (!KeyValue.isDelete(type)) {
128       currentCount++;
129     }
130     if (currentCount > maxVersions) {
131       return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL; // skip to next col
132     }
133     // keep the KV if required by minversions or it is not expired, yet
134     if (currentCount <= minVersions || !isExpired(timestamp)) {
135       setTSAndType(timestamp, type);
136       return ScanQueryMatcher.MatchCode.INCLUDE;
137     } else {
138       return MatchCode.SEEK_NEXT_COL;
139     }
140 
141   }
142 
143   @Override
144   public void update() {
145     // no-op, shouldn't even be called
146     throw new UnsupportedOperationException(
147         "ScanWildcardColumnTracker.update should never be called!");
148   }
149 
150   @Override
151   public void reset() {
152     columnBuffer = null;
153     resetTSAndType();
154   }
155 
156   private void resetTSAndType() {
157     latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
158     latestTypeOfCurrentColumn = 0;
159   }
160 
161   private void setTSAndType(long timestamp, byte type) {
162     latestTSOfCurrentColumn = timestamp;
163     latestTypeOfCurrentColumn = type;
164   }
165 
166   private boolean sameAsPreviousTSAndType(long timestamp, byte type) {
167     return timestamp == latestTSOfCurrentColumn && type == latestTypeOfCurrentColumn;
168   }
169 
170   private boolean isExpired(long timestamp) {
171     return timestamp < oldestStamp;
172   }
173 
174   /**
175    * Used by matcher and scan/get to get a hint of the next column
176    * to seek to after checkColumn() returns SKIP.  Returns the next interesting
177    * column we want, or NULL there is none (wildcard scanner).
178    *
179    * @return The column count.
180    */
181   public ColumnCount getColumnHint() {
182     return null;
183   }
184 
185 
186   /**
187    * We can never know a-priori if we are done, so always return false.
188    * @return false
189    */
190   @Override
191   public boolean done() {
192     return false;
193   }
194 
195   public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset,
196       int qualLength) {
197     return MatchCode.SEEK_NEXT_COL;
198   }
199 
200   public boolean isDone(long timestamp) {
201     return minVersions <= 0 && isExpired(timestamp);
202   }
203 }