View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.regionserver;
22  
23  import java.io.IOException;
24  import java.util.ArrayList;
25  import java.util.Collection;
26  import java.util.List;
27  import java.util.SortedSet;
28  import java.util.concurrent.atomic.AtomicLong;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.hbase.KeyValue;
33  import org.apache.hadoop.hbase.client.Scan;
34  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
35  import org.apache.hadoop.hbase.regionserver.StoreFile.Reader;
36  
37  /**
38   * KeyValueScanner adaptor over the Reader.  It also provides hooks into
39   * bloom filter things.
40   */
41  public class StoreFileScanner implements KeyValueScanner {
42    static final Log LOG = LogFactory.getLog(Store.class);
43  
44    // the reader it comes from:
45    private final StoreFile.Reader reader;
46    private final HFileScanner hfs;
47    private KeyValue cur = null;
48  
49    private boolean realSeekDone;
50    private boolean delayedReseek;
51    private KeyValue delayedSeekKV;
52  
53    private boolean enforceMVCC = false;
54  
55    private static final AtomicLong seekCount = new AtomicLong();
56  
57    private ScanQueryMatcher matcher;
58  
59    /**
60     * Implements a {@link KeyValueScanner} on top of the specified {@link HFileScanner}
61     * @param hfs HFile scanner
62     */
63    public StoreFileScanner(StoreFile.Reader reader, HFileScanner hfs, boolean useMVCC) {
64      this.reader = reader;
65      this.hfs = hfs;
66      this.enforceMVCC = useMVCC;
67    }
68  
69    /**
70     * Return an array of scanners corresponding to the given
71     * set of store files.
72     */
73    public static List<StoreFileScanner> getScannersForStoreFiles(
74        Collection<StoreFile> files,
75        boolean cacheBlocks,
76        boolean usePread) throws IOException {
77      return getScannersForStoreFiles(files, cacheBlocks,
78                                     usePread, false);
79    }
80  
81    /**
82     * Return an array of scanners corresponding to the given set of store files.
83     */
84    public static List<StoreFileScanner> getScannersForStoreFiles(
85        Collection<StoreFile> files, boolean cacheBlocks, boolean usePread,
86        boolean isCompaction) throws IOException {
87      return getScannersForStoreFiles(files, cacheBlocks, usePread, isCompaction,
88          null);
89    }
90  
91    /**
92     * Return an array of scanners corresponding to the given set of store files,
93     * And set the ScanQueryMatcher for each store file scanner for further
94     * optimization
95     */
96    public static List<StoreFileScanner> getScannersForStoreFiles(
97        Collection<StoreFile> files, boolean cacheBlocks, boolean usePread,
98        boolean isCompaction, ScanQueryMatcher matcher) throws IOException {
99      List<StoreFileScanner> scanners = new ArrayList<StoreFileScanner>(
100         files.size());
101     for (StoreFile file : files) {
102       StoreFile.Reader r = file.createReader();
103       StoreFileScanner scanner = r.getStoreFileScanner(cacheBlocks, usePread,
104           isCompaction);
105       scanner.setScanQueryMatcher(matcher);
106       scanners.add(scanner);
107     }
108     return scanners;
109   }
110 
111   public String toString() {
112     return "StoreFileScanner[" + hfs.toString() + ", cur=" + cur + "]";
113   }
114 
115   public KeyValue peek() {
116     return cur;
117   }
118 
119   public KeyValue next() throws IOException {
120     KeyValue retKey = cur;
121 
122     try {
123       // only seek if we aren't at the end. cur == null implies 'end'.
124       if (cur != null) {
125         hfs.next();
126         cur = hfs.getKeyValue();
127         skipKVsNewerThanReadpoint();
128       }
129     } catch(IOException e) {
130       throw new IOException("Could not iterate " + this, e);
131     }
132     return retKey;
133   }
134 
135   public boolean seek(KeyValue key) throws IOException {
136     seekCount.incrementAndGet();
137 
138     try {
139       try {
140         if(!seekAtOrAfter(hfs, key)) {
141           close();
142           return false;
143         }
144 
145         cur = hfs.getKeyValue();
146 
147         return skipKVsNewerThanReadpoint();
148       } finally {
149         realSeekDone = true;
150       }
151     } catch (IOException ioe) {
152       throw new IOException("Could not seek " + this + " to key " + key, ioe);
153     }
154   }
155 
156   public boolean reseek(KeyValue key) throws IOException {
157     seekCount.incrementAndGet();
158 
159     try {
160       try {
161         if (!reseekAtOrAfter(hfs, key)) {
162           close();
163           return false;
164         }
165         cur = hfs.getKeyValue();
166 
167         return skipKVsNewerThanReadpoint();
168       } finally {
169         realSeekDone = true;
170       }
171     } catch (IOException ioe) {
172       throw new IOException("Could not reseek " + this + " to key " + key,
173           ioe);
174     }
175   }
176 
177   protected boolean skipKVsNewerThanReadpoint() throws IOException {
178     long readPoint = MultiVersionConsistencyControl.getThreadReadPoint();
179 
180     // We want to ignore all key-values that are newer than our current
181     // readPoint
182     while(enforceMVCC
183         && cur != null
184         && (cur.getMemstoreTS() > readPoint)) {
185       hfs.next();
186       cur = hfs.getKeyValue();
187     }
188 
189     if (cur == null) {
190       close();
191       return false;
192     }
193 
194     // For the optimisation in HBASE-4346, we set the KV's memstoreTS to
195     // 0, if it is older than all the scanners' read points. It is possible
196     // that a newer KV's memstoreTS was reset to 0. But, there is an
197     // older KV which was not reset to 0 (because it was
198     // not old enough during flush). Make sure that we set it correctly now,
199     // so that the comparision order does not change.
200     if (cur.getMemstoreTS() <= readPoint) {
201       cur.setMemstoreTS(0);
202     }
203     return true;
204   }
205 
206   public void close() {
207     // Nothing to close on HFileScanner?
208     cur = null;
209   }
210 
211   /**
212    *
213    * @param s
214    * @param k
215    * @return
216    * @throws IOException
217    */
218   public static boolean seekAtOrAfter(HFileScanner s, KeyValue k)
219   throws IOException {
220     int result = s.seekTo(k.getBuffer(), k.getKeyOffset(), k.getKeyLength());
221     if(result < 0) {
222       // Passed KV is smaller than first KV in file, work from start of file
223       return s.seekTo();
224     } else if(result > 0) {
225       // Passed KV is larger than current KV in file, if there is a next
226       // it is the "after", if not then this scanner is done.
227       return s.next();
228     }
229     // Seeked to the exact key
230     return true;
231   }
232 
233   static boolean reseekAtOrAfter(HFileScanner s, KeyValue k)
234   throws IOException {
235     //This function is similar to seekAtOrAfter function
236     int result = s.reseekTo(k.getBuffer(), k.getKeyOffset(), k.getKeyLength());
237     if (result <= 0) {
238       // If up to now scanner is not seeked yet, this means passed KV is smaller
239       // than first KV in file, and it is the first time we seek on this file.
240       // So we also need to work from the start of file.
241       if (!s.isSeeked()) {
242         return  s.seekTo();
243       }
244       return true;
245     } else {
246       // passed KV is larger than current KV in file, if there is a next
247       // it is after, if not then this scanner is done.
248       return s.next();
249     }
250   }
251 
252   @Override
253   public long getSequenceID() {
254     return reader.getSequenceID();
255   }
256 
257   /**
258    * Pretend we have done a seek but don't do it yet, if possible. The hope is
259    * that we find requested columns in more recent files and won't have to seek
260    * in older files. Creates a fake key/value with the given row/column and the
261    * highest (most recent) possible timestamp we might get from this file. When
262    * users of such "lazy scanner" need to know the next KV precisely (e.g. when
263    * this scanner is at the top of the heap), they run {@link #enforceSeek()}.
264    * <p>
265    * Note that this function does guarantee that the current KV of this scanner
266    * will be advanced to at least the given KV. Because of this, it does have
267    * to do a real seek in cases when the seek timestamp is older than the
268    * highest timestamp of the file, e.g. when we are trying to seek to the next
269    * row/column and use OLDEST_TIMESTAMP in the seek key.
270    */
271   @Override
272   public boolean requestSeek(KeyValue kv, boolean forward, boolean useBloom)
273       throws IOException {
274     if (kv.getFamilyLength() == 0) {
275       useBloom = false;
276     }
277 
278     boolean haveToSeek = true;
279     if (useBloom) {
280       // check ROWCOL Bloom filter first.
281       if (reader.getBloomFilterType() == StoreFile.BloomType.ROWCOL) {
282         haveToSeek = reader.passesGeneralBloomFilter(kv.getBuffer(),
283             kv.getRowOffset(), kv.getRowLength(), kv.getBuffer(),
284             kv.getQualifierOffset(), kv.getQualifierLength());
285       } else if (this.matcher != null && !matcher.hasNullColumnInQuery() &&
286           kv.isDeleteFamily()) {
287         // if there is no such delete family kv in the store file,
288         // then no need to seek.
289         haveToSeek = reader.passesDeleteFamilyBloomFilter(kv.getBuffer(),
290             kv.getRowOffset(), kv.getRowLength());
291       }
292     }
293 
294     delayedReseek = forward;
295     delayedSeekKV = kv;
296 
297     if (haveToSeek) {
298       // This row/column might be in this store file (or we did not use the
299       // Bloom filter), so we still need to seek.
300       realSeekDone = false;
301       long maxTimestampInFile = reader.getMaxTimestamp();
302       long seekTimestamp = kv.getTimestamp();
303       if (seekTimestamp > maxTimestampInFile) {
304         // Create a fake key that is not greater than the real next key.
305         // (Lower timestamps correspond to higher KVs.)
306         // To understand this better, consider that we are asked to seek to
307         // a higher timestamp than the max timestamp in this file. We know that
308         // the next point when we have to consider this file again is when we
309         // pass the max timestamp of this file (with the same row/column).
310         cur = kv.createFirstOnRowColTS(maxTimestampInFile);
311       } else {
312         // This will be the case e.g. when we need to seek to the next
313         // row/column, and we don't know exactly what they are, so we set the
314         // seek key's timestamp to OLDEST_TIMESTAMP to skip the rest of this
315         // row/column.
316         enforceSeek();
317       }
318       return cur != null;
319     }
320 
321     // Multi-column Bloom filter optimization.
322     // Create a fake key/value, so that this scanner only bubbles up to the top
323     // of the KeyValueHeap in StoreScanner after we scanned this row/column in
324     // all other store files. The query matcher will then just skip this fake
325     // key/value and the store scanner will progress to the next column. This
326     // is obviously not a "real real" seek, but unlike the fake KV earlier in
327     // this method, we want this to be propagated to ScanQueryMatcher.
328     cur = kv.createLastOnRowCol();
329 
330     realSeekDone = true;
331     return true;
332   }
333 
334   Reader getReaderForTesting() {
335     return reader;
336   }
337 
338   @Override
339   public boolean realSeekDone() {
340     return realSeekDone;
341   }
342 
343   @Override
344   public void enforceSeek() throws IOException {
345     if (realSeekDone)
346       return;
347 
348     if (delayedReseek) {
349       reseek(delayedSeekKV);
350     } else {
351       seek(delayedSeekKV);
352     }
353   }
354 
355   public void setScanQueryMatcher(ScanQueryMatcher matcher) {
356     this.matcher = matcher;
357   }
358 
359   @Override
360   public boolean isFileScanner() {
361     return true;
362   }
363 
364   // Test methods
365 
366   static final long getSeekCount() {
367     return seekCount.get();
368   }
369 
370   @Override
371   public boolean shouldUseScanner(Scan scan, SortedSet<byte[]> columns, long oldestUnexpiredTS) {
372     return reader.passesTimerangeFilter(scan, oldestUnexpiredTS)
373         && reader.passesKeyRangeFilter(scan) && reader.passesBloomFilter(scan, columns);
374   }
375 }