View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.regionserver;
22  
23  import java.io.IOException;
24  import java.util.ArrayList;
25  import java.util.Collection;
26  import java.util.List;
27  import java.util.SortedSet;
28  import java.util.concurrent.atomic.AtomicLong;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.hbase.KeyValue;
33  import org.apache.hadoop.hbase.client.Scan;
34  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
35  import org.apache.hadoop.hbase.regionserver.StoreFile.Reader;
36  
37  /**
38   * KeyValueScanner adaptor over the Reader.  It also provides hooks into
39   * bloom filter things.
40   */
41  public class StoreFileScanner implements KeyValueScanner {
42    static final Log LOG = LogFactory.getLog(Store.class);
43  
44    // the reader it comes from:
45    private final StoreFile.Reader reader;
46    private final HFileScanner hfs;
47    private KeyValue cur = null;
48  
49    private boolean realSeekDone;
50    private boolean delayedReseek;
51    private KeyValue delayedSeekKV;
52  
53    private boolean enforceMVCC = false;
54  
55    //The variable, realSeekDone, may cheat on store file scanner for the
56    // multi-column bloom-filter optimization.
57    // So this flag shows whether this storeFileScanner could do a reseek.
58    private boolean isReseekable = false;
59  
60    private static final AtomicLong seekCount = new AtomicLong();
61  
62    private ScanQueryMatcher matcher;
63  
64    /**
65     * Implements a {@link KeyValueScanner} on top of the specified {@link HFileScanner}
66     * @param hfs HFile scanner
67     */
68    public StoreFileScanner(StoreFile.Reader reader, HFileScanner hfs, boolean useMVCC) {
69      this.reader = reader;
70      this.hfs = hfs;
71      this.enforceMVCC = useMVCC;
72    }
73  
74    /**
75     * Return an array of scanners corresponding to the given
76     * set of store files.
77     */
78    public static List<StoreFileScanner> getScannersForStoreFiles(
79        Collection<StoreFile> files,
80        boolean cacheBlocks,
81        boolean usePread) throws IOException {
82      return getScannersForStoreFiles(files, cacheBlocks,
83                                     usePread, false);
84    }
85  
86    /**
87     * Return an array of scanners corresponding to the given set of store files.
88     */
89    public static List<StoreFileScanner> getScannersForStoreFiles(
90        Collection<StoreFile> files, boolean cacheBlocks, boolean usePread,
91        boolean isCompaction) throws IOException {
92      return getScannersForStoreFiles(files, cacheBlocks, usePread, isCompaction,
93          null);
94    }
95  
96    /**
97     * Return an array of scanners corresponding to the given set of store files,
98     * And set the ScanQueryMatcher for each store file scanner for further
99     * optimization
100    */
101   public static List<StoreFileScanner> getScannersForStoreFiles(
102       Collection<StoreFile> files, boolean cacheBlocks, boolean usePread,
103       boolean isCompaction, ScanQueryMatcher matcher) throws IOException {
104     List<StoreFileScanner> scanners = new ArrayList<StoreFileScanner>(
105         files.size());
106     for (StoreFile file : files) {
107       StoreFile.Reader r = file.createReader();
108       StoreFileScanner scanner = r.getStoreFileScanner(cacheBlocks, usePread,
109           isCompaction);
110       scanner.setScanQueryMatcher(matcher);
111       scanners.add(scanner);
112     }
113     return scanners;
114   }
115 
116   public String toString() {
117     return "StoreFileScanner[" + hfs.toString() + ", cur=" + cur + "]";
118   }
119 
120   public KeyValue peek() {
121     return cur;
122   }
123 
124   public KeyValue next() throws IOException {
125     KeyValue retKey = cur;
126 
127     try {
128       // only seek if we aren't at the end. cur == null implies 'end'.
129       if (cur != null) {
130         hfs.next();
131         cur = hfs.getKeyValue();
132         skipKVsNewerThanReadpoint();
133       }
134     } catch(IOException e) {
135       throw new IOException("Could not iterate " + this, e);
136     }
137     return retKey;
138   }
139 
140   public boolean seek(KeyValue key) throws IOException {
141     seekCount.incrementAndGet();
142 
143     try {
144       try {
145         if(!seekAtOrAfter(hfs, key)) {
146           close();
147           return false;
148         }
149 
150         this.isReseekable = true;
151         cur = hfs.getKeyValue();
152 
153         return skipKVsNewerThanReadpoint();
154       } finally {
155         realSeekDone = true;
156       }
157     } catch (IOException ioe) {
158       throw new IOException("Could not seek " + this + " to key " + key, ioe);
159     }
160   }
161 
162   public boolean reseek(KeyValue key) throws IOException {
163     seekCount.incrementAndGet();
164 
165     try {
166       try {
167         if (!reseekAtOrAfter(hfs, key)) {
168           close();
169           return false;
170         }
171         cur = hfs.getKeyValue();
172 
173         return skipKVsNewerThanReadpoint();
174       } finally {
175         realSeekDone = true;
176       }
177     } catch (IOException ioe) {
178       throw new IOException("Could not reseek " + this + " to key " + key,
179           ioe);
180     }
181   }
182 
183   protected boolean skipKVsNewerThanReadpoint() throws IOException {
184     long readPoint = MultiVersionConsistencyControl.getThreadReadPoint();
185 
186     // We want to ignore all key-values that are newer than our current
187     // readPoint
188     while(enforceMVCC
189         && cur != null
190         && (cur.getMemstoreTS() > readPoint)) {
191       hfs.next();
192       cur = hfs.getKeyValue();
193     }
194 
195     if (cur == null) {
196       close();
197       return false;
198     }
199 
200     // For the optimisation in HBASE-4346, we set the KV's memstoreTS to
201     // 0, if it is older than all the scanners' read points. It is possible
202     // that a newer KV's memstoreTS was reset to 0. But, there is an
203     // older KV which was not reset to 0 (because it was
204     // not old enough during flush). Make sure that we set it correctly now,
205     // so that the comparision order does not change.
206     if (cur.getMemstoreTS() <= readPoint) {
207       cur.setMemstoreTS(0);
208     }
209     return true;
210   }
211 
212   public void close() {
213     // Nothing to close on HFileScanner?
214     cur = null;
215   }
216 
217   /**
218    *
219    * @param s
220    * @param k
221    * @return
222    * @throws IOException
223    */
224   public static boolean seekAtOrAfter(HFileScanner s, KeyValue k)
225   throws IOException {
226     int result = s.seekTo(k.getBuffer(), k.getKeyOffset(), k.getKeyLength());
227     if(result < 0) {
228       // Passed KV is smaller than first KV in file, work from start of file
229       return s.seekTo();
230     } else if(result > 0) {
231       // Passed KV is larger than current KV in file, if there is a next
232       // it is the "after", if not then this scanner is done.
233       return s.next();
234     }
235     // Seeked to the exact key
236     return true;
237   }
238 
239   static boolean reseekAtOrAfter(HFileScanner s, KeyValue k)
240   throws IOException {
241     //This function is similar to seekAtOrAfter function
242     int result = s.reseekTo(k.getBuffer(), k.getKeyOffset(), k.getKeyLength());
243     if (result <= 0) {
244       return true;
245     } else {
246       // passed KV is larger than current KV in file, if there is a next
247       // it is after, if not then this scanner is done.
248       return s.next();
249     }
250   }
251 
252   @Override
253   public long getSequenceID() {
254     return reader.getSequenceID();
255   }
256 
257   /**
258    * Pretend we have done a seek but don't do it yet, if possible. The hope is
259    * that we find requested columns in more recent files and won't have to seek
260    * in older files. Creates a fake key/value with the given row/column and the
261    * highest (most recent) possible timestamp we might get from this file. When
262    * users of such "lazy scanner" need to know the next KV precisely (e.g. when
263    * this scanner is at the top of the heap), they run {@link #enforceSeek()}.
264    * <p>
265    * Note that this function does guarantee that the current KV of this scanner
266    * will be advanced to at least the given KV. Because of this, it does have
267    * to do a real seek in cases when the seek timestamp is older than the
268    * highest timestamp of the file, e.g. when we are trying to seek to the next
269    * row/column and use OLDEST_TIMESTAMP in the seek key.
270    */
271   @Override
272   public boolean requestSeek(KeyValue kv, boolean forward, boolean useBloom)
273       throws IOException {
274     if (kv.getFamilyLength() == 0) {
275       useBloom = false;
276     }
277 
278     boolean haveToSeek = true;
279     if (useBloom) {
280       // check ROWCOL Bloom filter first.
281       if (reader.getBloomFilterType() == StoreFile.BloomType.ROWCOL) {
282         haveToSeek = reader.passesGeneralBloomFilter(kv.getBuffer(),
283             kv.getRowOffset(), kv.getRowLength(), kv.getBuffer(),
284             kv.getQualifierOffset(), kv.getQualifierLength());
285       } else if (this.matcher != null && !matcher.hasNullColumnInQuery() &&
286           kv.isDeleteFamily()) {
287         // if there is no such delete family kv in the store file,
288         // then no need to seek.
289         haveToSeek = reader.passesDeleteFamilyBloomFilter(kv.getBuffer(),
290             kv.getRowOffset(), kv.getRowLength());
291       }
292     }
293 
294     delayedReseek = forward;
295     delayedSeekKV = kv;
296 
297     if (haveToSeek) {
298       // This row/column might be in this store file (or we did not use the
299       // Bloom filter), so we still need to seek.
300       realSeekDone = false;
301       long maxTimestampInFile = reader.getMaxTimestamp();
302       long seekTimestamp = kv.getTimestamp();
303       if (seekTimestamp > maxTimestampInFile) {
304         // Create a fake key that is not greater than the real next key.
305         // (Lower timestamps correspond to higher KVs.)
306         // To understand this better, consider that we are asked to seek to
307         // a higher timestamp than the max timestamp in this file. We know that
308         // the next point when we have to consider this file again is when we
309         // pass the max timestamp of this file (with the same row/column).
310         cur = kv.createFirstOnRowColTS(maxTimestampInFile);
311       } else {
312         // This will be the case e.g. when we need to seek to the next
313         // row/column, and we don't know exactly what they are, so we set the
314         // seek key's timestamp to OLDEST_TIMESTAMP to skip the rest of this
315         // row/column.
316         enforceSeek();
317       }
318       return cur != null;
319     }
320 
321     // Multi-column Bloom filter optimization.
322     // Create a fake key/value, so that this scanner only bubbles up to the top
323     // of the KeyValueHeap in StoreScanner after we scanned this row/column in
324     // all other store files. The query matcher will then just skip this fake
325     // key/value and the store scanner will progress to the next column. This
326     // is obviously not a "real real" seek, but unlike the fake KV earlier in
327     // this method, we want this to be propagated to ScanQueryMatcher.
328     cur = kv.createLastOnRowCol();
329 
330     realSeekDone = true;
331     return true;
332   }
333 
334   Reader getReaderForTesting() {
335     return reader;
336   }
337 
338   @Override
339   public boolean realSeekDone() {
340     return realSeekDone;
341   }
342 
343   @Override
344   public void enforceSeek() throws IOException {
345     if (realSeekDone)
346       return;
347 
348     if (delayedReseek && this.isReseekable) {
349       reseek(delayedSeekKV);
350     } else {
351       seek(delayedSeekKV);
352     }
353   }
354 
355   public void setScanQueryMatcher(ScanQueryMatcher matcher) {
356     this.matcher = matcher;
357   }
358 
359   @Override
360   public boolean isFileScanner() {
361     return true;
362   }
363 
364   // Test methods
365 
366   static final long getSeekCount() {
367     return seekCount.get();
368   }
369 
370   @Override
371   public boolean shouldUseScanner(Scan scan, SortedSet<byte[]> columns,
372       long oldestUnexpiredTS) {
373     return reader.passesTimerangeFilter(scan, oldestUnexpiredTS) &&
374         reader.passesBloomFilter(scan, columns);
375   }
376 }