View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  import java.util.ArrayList;
24  import java.util.Collection;
25  import java.util.List;
26  import java.util.SortedSet;
27  import java.util.concurrent.atomic.AtomicLong;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.classification.InterfaceAudience;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.KeyValue;
34  import org.apache.hadoop.hbase.client.Scan;
35  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
36  import org.apache.hadoop.hbase.regionserver.StoreFile.Reader;
37  
38  /**
39   * KeyValueScanner adaptor over the Reader.  It also provides hooks into
40   * bloom filter things.
41   */
42  @InterfaceAudience.LimitedPrivate("Coprocessor")
43  public class StoreFileScanner implements KeyValueScanner {
44    static final Log LOG = LogFactory.getLog(HStore.class);
45  
46    // the reader it comes from:
47    private final StoreFile.Reader reader;
48    private final HFileScanner hfs;
49    private KeyValue cur = null;
50  
51    private boolean realSeekDone;
52    private boolean delayedReseek;
53    private KeyValue delayedSeekKV;
54  
55    private boolean enforceMVCC = false;
56    private boolean hasMVCCInfo = false;
57  
58    private static AtomicLong seekCount;
59  
60    private ScanQueryMatcher matcher;
61  
62    /**
63     * Implements a {@link KeyValueScanner} on top of the specified {@link HFileScanner}
64     * @param hfs HFile scanner
65     */
66    public StoreFileScanner(StoreFile.Reader reader, HFileScanner hfs, boolean useMVCC, boolean hasMVCC) {
67      this.reader = reader;
68      this.hfs = hfs;
69      this.enforceMVCC = useMVCC;
70      this.hasMVCCInfo = hasMVCC;
71    }
72  
73    /**
74     * Return an array of scanners corresponding to the given
75     * set of store files.
76     */
77    public static List<StoreFileScanner> getScannersForStoreFiles(
78        Collection<StoreFile> files,
79        boolean cacheBlocks,
80        boolean usePread) throws IOException {
81      return getScannersForStoreFiles(files, cacheBlocks,
82                                     usePread, false);
83    }
84  
85    /**
86     * Return an array of scanners corresponding to the given set of store files.
87     */
88    public static List<StoreFileScanner> getScannersForStoreFiles(
89        Collection<StoreFile> files, boolean cacheBlocks, boolean usePread,
90        boolean isCompaction) throws IOException {
91      return getScannersForStoreFiles(files, cacheBlocks, usePread, isCompaction,
92          null);
93    }
94  
95    /**
96     * Return an array of scanners corresponding to the given set of store files,
97     * And set the ScanQueryMatcher for each store file scanner for further
98     * optimization
99     */
100   public static List<StoreFileScanner> getScannersForStoreFiles(
101       Collection<StoreFile> files, boolean cacheBlocks, boolean usePread,
102       boolean isCompaction, ScanQueryMatcher matcher) throws IOException {
103     List<StoreFileScanner> scanners = new ArrayList<StoreFileScanner>(
104         files.size());
105     for (StoreFile file : files) {
106       StoreFile.Reader r = file.createReader();
107       StoreFileScanner scanner = r.getStoreFileScanner(cacheBlocks, usePread,
108           isCompaction);
109       scanner.setScanQueryMatcher(matcher);
110       scanners.add(scanner);
111     }
112     return scanners;
113   }
114 
115   public String toString() {
116     return "StoreFileScanner[" + hfs.toString() + ", cur=" + cur + "]";
117   }
118 
119   public KeyValue peek() {
120     return cur;
121   }
122 
123   public KeyValue next() throws IOException {
124     KeyValue retKey = cur;
125 
126     try {
127       // only seek if we aren't at the end. cur == null implies 'end'.
128       if (cur != null) {
129         hfs.next();
130         cur = hfs.getKeyValue();
131         if (hasMVCCInfo)
132           skipKVsNewerThanReadpoint();
133       }
134     } catch(IOException e) {
135       throw new IOException("Could not iterate " + this, e);
136     }
137     return retKey;
138   }
139 
140   public boolean seek(KeyValue key) throws IOException {
141     if (seekCount != null) seekCount.incrementAndGet();
142 
143     try {
144       try {
145         if(!seekAtOrAfter(hfs, key)) {
146           close();
147           return false;
148         }
149 
150         cur = hfs.getKeyValue();
151 
152         return !hasMVCCInfo ? true : skipKVsNewerThanReadpoint();
153       } finally {
154         realSeekDone = true;
155       }
156     } catch (IOException ioe) {
157       throw new IOException("Could not seek " + this + " to key " + key, ioe);
158     }
159   }
160 
161   public boolean reseek(KeyValue key) throws IOException {
162     if (seekCount != null) seekCount.incrementAndGet();
163 
164     try {
165       try {
166         if (!reseekAtOrAfter(hfs, key)) {
167           close();
168           return false;
169         }
170         cur = hfs.getKeyValue();
171 
172         return !hasMVCCInfo ? true : skipKVsNewerThanReadpoint();
173       } finally {
174         realSeekDone = true;
175       }
176     } catch (IOException ioe) {
177       throw new IOException("Could not reseek " + this + " to key " + key,
178           ioe);
179     }
180   }
181 
182   protected boolean skipKVsNewerThanReadpoint() throws IOException {
183     long readPoint = MultiVersionConsistencyControl.getThreadReadPoint();
184 
185     // We want to ignore all key-values that are newer than our current
186     // readPoint
187     while(enforceMVCC
188         && cur != null
189         && (cur.getMvccVersion() > readPoint)) {
190       hfs.next();
191       cur = hfs.getKeyValue();
192     }
193 
194     if (cur == null) {
195       close();
196       return false;
197     }
198 
199     // For the optimisation in HBASE-4346, we set the KV's memstoreTS to
200     // 0, if it is older than all the scanners' read points. It is possible
201     // that a newer KV's memstoreTS was reset to 0. But, there is an
202     // older KV which was not reset to 0 (because it was
203     // not old enough during flush). Make sure that we set it correctly now,
204     // so that the comparision order does not change.
205     if (cur.getMvccVersion() <= readPoint) {
206       cur.setMvccVersion(0);
207     }
208     return true;
209   }
210 
211   public void close() {
212     // Nothing to close on HFileScanner?
213     cur = null;
214   }
215 
216   /**
217    *
218    * @param s
219    * @param k
220    * @return false if not found or if k is after the end.
221    * @throws IOException
222    */
223   public static boolean seekAtOrAfter(HFileScanner s, KeyValue k)
224   throws IOException {
225     int result = s.seekTo(k.getBuffer(), k.getKeyOffset(), k.getKeyLength());
226     if(result < 0) {
227       if (result == HConstants.INDEX_KEY_MAGIC) {
228         // using faked key
229         return true;
230       }
231       // Passed KV is smaller than first KV in file, work from start of file
232       return s.seekTo();
233     } else if(result > 0) {
234       // Passed KV is larger than current KV in file, if there is a next
235       // it is the "after", if not then this scanner is done.
236       return s.next();
237     }
238     // Seeked to the exact key
239     return true;
240   }
241 
242   static boolean reseekAtOrAfter(HFileScanner s, KeyValue k)
243   throws IOException {
244     //This function is similar to seekAtOrAfter function
245     int result = s.reseekTo(k.getBuffer(), k.getKeyOffset(), k.getKeyLength());
246     if (result <= 0) {
247       if (result == HConstants.INDEX_KEY_MAGIC) {
248         // using faked key
249         return true;
250       }
251       // If up to now scanner is not seeked yet, this means passed KV is smaller
252       // than first KV in file, and it is the first time we seek on this file.
253       // So we also need to work from the start of file.
254       if (!s.isSeeked()) {
255         return  s.seekTo();
256       }
257       return true;
258     }
259     // passed KV is larger than current KV in file, if there is a next
260     // it is after, if not then this scanner is done.
261     return s.next();
262   }
263 
264   @Override
265   public long getSequenceID() {
266     return reader.getSequenceID();
267   }
268 
269   /**
270    * Pretend we have done a seek but don't do it yet, if possible. The hope is
271    * that we find requested columns in more recent files and won't have to seek
272    * in older files. Creates a fake key/value with the given row/column and the
273    * highest (most recent) possible timestamp we might get from this file. When
274    * users of such "lazy scanner" need to know the next KV precisely (e.g. when
275    * this scanner is at the top of the heap), they run {@link #enforceSeek()}.
276    * <p>
277    * Note that this function does guarantee that the current KV of this scanner
278    * will be advanced to at least the given KV. Because of this, it does have
279    * to do a real seek in cases when the seek timestamp is older than the
280    * highest timestamp of the file, e.g. when we are trying to seek to the next
281    * row/column and use OLDEST_TIMESTAMP in the seek key.
282    */
283   @Override
284   public boolean requestSeek(KeyValue kv, boolean forward, boolean useBloom)
285       throws IOException {
286     if (kv.getFamilyLength() == 0) {
287       useBloom = false;
288     }
289 
290     boolean haveToSeek = true;
291     if (useBloom) {
292       // check ROWCOL Bloom filter first.
293       if (reader.getBloomFilterType() == BloomType.ROWCOL) {
294         haveToSeek = reader.passesGeneralBloomFilter(kv.getBuffer(),
295             kv.getRowOffset(), kv.getRowLength(), kv.getBuffer(),
296             kv.getQualifierOffset(), kv.getQualifierLength());
297       } else if (this.matcher != null && !matcher.hasNullColumnInQuery() &&
298           (kv.isDeleteFamily() || kv.isDeleteFamilyVersion())) {
299         // if there is no such delete family kv in the store file,
300         // then no need to seek.
301         haveToSeek = reader.passesDeleteFamilyBloomFilter(kv.getBuffer(),
302             kv.getRowOffset(), kv.getRowLength());
303       }
304     }
305 
306     delayedReseek = forward;
307     delayedSeekKV = kv;
308 
309     if (haveToSeek) {
310       // This row/column might be in this store file (or we did not use the
311       // Bloom filter), so we still need to seek.
312       realSeekDone = false;
313       long maxTimestampInFile = reader.getMaxTimestamp();
314       long seekTimestamp = kv.getTimestamp();
315       if (seekTimestamp > maxTimestampInFile) {
316         // Create a fake key that is not greater than the real next key.
317         // (Lower timestamps correspond to higher KVs.)
318         // To understand this better, consider that we are asked to seek to
319         // a higher timestamp than the max timestamp in this file. We know that
320         // the next point when we have to consider this file again is when we
321         // pass the max timestamp of this file (with the same row/column).
322         cur = kv.createFirstOnRowColTS(maxTimestampInFile);
323       } else {
324         // This will be the case e.g. when we need to seek to the next
325         // row/column, and we don't know exactly what they are, so we set the
326         // seek key's timestamp to OLDEST_TIMESTAMP to skip the rest of this
327         // row/column.
328         enforceSeek();
329       }
330       return cur != null;
331     }
332 
333     // Multi-column Bloom filter optimization.
334     // Create a fake key/value, so that this scanner only bubbles up to the top
335     // of the KeyValueHeap in StoreScanner after we scanned this row/column in
336     // all other store files. The query matcher will then just skip this fake
337     // key/value and the store scanner will progress to the next column. This
338     // is obviously not a "real real" seek, but unlike the fake KV earlier in
339     // this method, we want this to be propagated to ScanQueryMatcher.
340     cur = kv.createLastOnRowCol();
341 
342     realSeekDone = true;
343     return true;
344   }
345 
346   Reader getReaderForTesting() {
347     return reader;
348   }
349 
350   @Override
351   public boolean realSeekDone() {
352     return realSeekDone;
353   }
354 
355   @Override
356   public void enforceSeek() throws IOException {
357     if (realSeekDone)
358       return;
359 
360     if (delayedReseek) {
361       reseek(delayedSeekKV);
362     } else {
363       seek(delayedSeekKV);
364     }
365   }
366 
367   public void setScanQueryMatcher(ScanQueryMatcher matcher) {
368     this.matcher = matcher;
369   }
370 
371   @Override
372   public boolean isFileScanner() {
373     return true;
374   }
375 
376   // Test methods
377 
378   static final long getSeekCount() {
379     return seekCount.get();
380   }
381   static final void instrument() {
382     seekCount = new AtomicLong();
383   }
384 
385   @Override
386   public boolean shouldUseScanner(Scan scan, SortedSet<byte[]> columns, long oldestUnexpiredTS) {
387     return reader.passesTimerangeFilter(scan, oldestUnexpiredTS)
388         && reader.passesKeyRangeFilter(scan) && reader.passesBloomFilter(scan, columns);
389   }
390 }