View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  import java.util.ArrayList;
24  import java.util.Collection;
25  import java.util.List;
26  import java.util.SortedSet;
27  import java.util.concurrent.atomic.AtomicLong;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.classification.InterfaceAudience;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.KeyValue;
34  import org.apache.hadoop.hbase.client.Scan;
35  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
36  import org.apache.hadoop.hbase.regionserver.StoreFile.Reader;
37  
38  /**
39   * KeyValueScanner adaptor over the Reader.  It also provides hooks into
40   * bloom filter things.
41   */
42  @InterfaceAudience.LimitedPrivate("Coprocessor")
43  public class StoreFileScanner implements KeyValueScanner {
44    static final Log LOG = LogFactory.getLog(HStore.class);
45  
46    // the reader it comes from:
47    private final StoreFile.Reader reader;
48    private final HFileScanner hfs;
49    private KeyValue cur = null;
50  
51    private boolean realSeekDone;
52    private boolean delayedReseek;
53    private KeyValue delayedSeekKV;
54  
55    private boolean enforceMVCC = false;
56  
57    private static final AtomicLong seekCount = new AtomicLong();
58  
59    private ScanQueryMatcher matcher;
60  
61    /**
62     * Implements a {@link KeyValueScanner} on top of the specified {@link HFileScanner}
63     * @param hfs HFile scanner
64     */
65    public StoreFileScanner(StoreFile.Reader reader, HFileScanner hfs, boolean useMVCC) {
66      this.reader = reader;
67      this.hfs = hfs;
68      this.enforceMVCC = useMVCC;
69    }
70  
71    /**
72     * Return an array of scanners corresponding to the given
73     * set of store files.
74     */
75    public static List<StoreFileScanner> getScannersForStoreFiles(
76        Collection<StoreFile> files,
77        boolean cacheBlocks,
78        boolean usePread) throws IOException {
79      return getScannersForStoreFiles(files, cacheBlocks,
80                                     usePread, false);
81    }
82  
83    /**
84     * Return an array of scanners corresponding to the given set of store files.
85     */
86    public static List<StoreFileScanner> getScannersForStoreFiles(
87        Collection<StoreFile> files, boolean cacheBlocks, boolean usePread,
88        boolean isCompaction) throws IOException {
89      return getScannersForStoreFiles(files, cacheBlocks, usePread, isCompaction,
90          null);
91    }
92  
93    /**
94     * Return an array of scanners corresponding to the given set of store files,
95     * And set the ScanQueryMatcher for each store file scanner for further
96     * optimization
97     */
98    public static List<StoreFileScanner> getScannersForStoreFiles(
99        Collection<StoreFile> files, boolean cacheBlocks, boolean usePread,
100       boolean isCompaction, ScanQueryMatcher matcher) throws IOException {
101     List<StoreFileScanner> scanners = new ArrayList<StoreFileScanner>(
102         files.size());
103     for (StoreFile file : files) {
104       StoreFile.Reader r = file.createReader();
105       StoreFileScanner scanner = r.getStoreFileScanner(cacheBlocks, usePread,
106           isCompaction);
107       scanner.setScanQueryMatcher(matcher);
108       scanners.add(scanner);
109     }
110     return scanners;
111   }
112 
113   public String toString() {
114     return "StoreFileScanner[" + hfs.toString() + ", cur=" + cur + "]";
115   }
116 
117   public KeyValue peek() {
118     return cur;
119   }
120 
121   public KeyValue next() throws IOException {
122     KeyValue retKey = cur;
123 
124     try {
125       // only seek if we aren't at the end. cur == null implies 'end'.
126       if (cur != null) {
127         hfs.next();
128         cur = hfs.getKeyValue();
129         skipKVsNewerThanReadpoint();
130       }
131     } catch(IOException e) {
132       throw new IOException("Could not iterate " + this, e);
133     }
134     return retKey;
135   }
136 
137   public boolean seek(KeyValue key) throws IOException {
138     seekCount.incrementAndGet();
139 
140     try {
141       try {
142         if(!seekAtOrAfter(hfs, key)) {
143           close();
144           return false;
145         }
146 
147         cur = hfs.getKeyValue();
148 
149         return skipKVsNewerThanReadpoint();
150       } finally {
151         realSeekDone = true;
152       }
153     } catch (IOException ioe) {
154       throw new IOException("Could not seek " + this + " to key " + key, ioe);
155     }
156   }
157 
158   public boolean reseek(KeyValue key) throws IOException {
159     seekCount.incrementAndGet();
160 
161     try {
162       try {
163         if (!reseekAtOrAfter(hfs, key)) {
164           close();
165           return false;
166         }
167         cur = hfs.getKeyValue();
168 
169         return skipKVsNewerThanReadpoint();
170       } finally {
171         realSeekDone = true;
172       }
173     } catch (IOException ioe) {
174       throw new IOException("Could not reseek " + this + " to key " + key,
175           ioe);
176     }
177   }
178 
179   protected boolean skipKVsNewerThanReadpoint() throws IOException {
180     long readPoint = MultiVersionConsistencyControl.getThreadReadPoint();
181 
182     // We want to ignore all key-values that are newer than our current
183     // readPoint
184     while(enforceMVCC
185         && cur != null
186         && (cur.getMemstoreTS() > readPoint)) {
187       hfs.next();
188       cur = hfs.getKeyValue();
189     }
190 
191     if (cur == null) {
192       close();
193       return false;
194     }
195 
196     // For the optimisation in HBASE-4346, we set the KV's memstoreTS to
197     // 0, if it is older than all the scanners' read points. It is possible
198     // that a newer KV's memstoreTS was reset to 0. But, there is an
199     // older KV which was not reset to 0 (because it was
200     // not old enough during flush). Make sure that we set it correctly now,
201     // so that the comparision order does not change.
202     if (cur.getMemstoreTS() <= readPoint) {
203       cur.setMemstoreTS(0);
204     }
205     return true;
206   }
207 
208   public void close() {
209     // Nothing to close on HFileScanner?
210     cur = null;
211   }
212 
213   /**
214    *
215    * @param s
216    * @param k
217    * @return false if not found or if k is after the end.
218    * @throws IOException
219    */
220   public static boolean seekAtOrAfter(HFileScanner s, KeyValue k)
221   throws IOException {
222     int result = s.seekTo(k.getBuffer(), k.getKeyOffset(), k.getKeyLength());
223     if(result < 0) {
224       if (result == HConstants.INDEX_KEY_MAGIC) {
225         // using faked key
226         return true;
227       }
228       // Passed KV is smaller than first KV in file, work from start of file
229       return s.seekTo();
230     } else if(result > 0) {
231       // Passed KV is larger than current KV in file, if there is a next
232       // it is the "after", if not then this scanner is done.
233       return s.next();
234     }
235     // Seeked to the exact key
236     return true;
237   }
238 
239   static boolean reseekAtOrAfter(HFileScanner s, KeyValue k)
240   throws IOException {
241     //This function is similar to seekAtOrAfter function
242     int result = s.reseekTo(k.getBuffer(), k.getKeyOffset(), k.getKeyLength());
243     if (result <= 0) {
244       if (result == HConstants.INDEX_KEY_MAGIC) {
245         // using faked key
246         return true;
247       }
248       // If up to now scanner is not seeked yet, this means passed KV is smaller
249       // than first KV in file, and it is the first time we seek on this file.
250       // So we also need to work from the start of file.
251       if (!s.isSeeked()) {
252         return  s.seekTo();
253       }
254       return true;
255     }
256     // passed KV is larger than current KV in file, if there is a next
257     // it is after, if not then this scanner is done.
258     return s.next();
259   }
260 
261   @Override
262   public long getSequenceID() {
263     return reader.getSequenceID();
264   }
265 
266   /**
267    * Pretend we have done a seek but don't do it yet, if possible. The hope is
268    * that we find requested columns in more recent files and won't have to seek
269    * in older files. Creates a fake key/value with the given row/column and the
270    * highest (most recent) possible timestamp we might get from this file. When
271    * users of such "lazy scanner" need to know the next KV precisely (e.g. when
272    * this scanner is at the top of the heap), they run {@link #enforceSeek()}.
273    * <p>
274    * Note that this function does guarantee that the current KV of this scanner
275    * will be advanced to at least the given KV. Because of this, it does have
276    * to do a real seek in cases when the seek timestamp is older than the
277    * highest timestamp of the file, e.g. when we are trying to seek to the next
278    * row/column and use OLDEST_TIMESTAMP in the seek key.
279    */
280   @Override
281   public boolean requestSeek(KeyValue kv, boolean forward, boolean useBloom)
282       throws IOException {
283     if (kv.getFamilyLength() == 0) {
284       useBloom = false;
285     }
286 
287     boolean haveToSeek = true;
288     if (useBloom) {
289       // check ROWCOL Bloom filter first.
290       if (reader.getBloomFilterType() == BloomType.ROWCOL) {
291         haveToSeek = reader.passesGeneralBloomFilter(kv.getBuffer(),
292             kv.getRowOffset(), kv.getRowLength(), kv.getBuffer(),
293             kv.getQualifierOffset(), kv.getQualifierLength());
294       } else if (this.matcher != null && !matcher.hasNullColumnInQuery() &&
295           kv.isDeleteFamily()) {
296         // if there is no such delete family kv in the store file,
297         // then no need to seek.
298         haveToSeek = reader.passesDeleteFamilyBloomFilter(kv.getBuffer(),
299             kv.getRowOffset(), kv.getRowLength());
300       }
301     }
302 
303     delayedReseek = forward;
304     delayedSeekKV = kv;
305 
306     if (haveToSeek) {
307       // This row/column might be in this store file (or we did not use the
308       // Bloom filter), so we still need to seek.
309       realSeekDone = false;
310       long maxTimestampInFile = reader.getMaxTimestamp();
311       long seekTimestamp = kv.getTimestamp();
312       if (seekTimestamp > maxTimestampInFile) {
313         // Create a fake key that is not greater than the real next key.
314         // (Lower timestamps correspond to higher KVs.)
315         // To understand this better, consider that we are asked to seek to
316         // a higher timestamp than the max timestamp in this file. We know that
317         // the next point when we have to consider this file again is when we
318         // pass the max timestamp of this file (with the same row/column).
319         cur = kv.createFirstOnRowColTS(maxTimestampInFile);
320       } else {
321         // This will be the case e.g. when we need to seek to the next
322         // row/column, and we don't know exactly what they are, so we set the
323         // seek key's timestamp to OLDEST_TIMESTAMP to skip the rest of this
324         // row/column.
325         enforceSeek();
326       }
327       return cur != null;
328     }
329 
330     // Multi-column Bloom filter optimization.
331     // Create a fake key/value, so that this scanner only bubbles up to the top
332     // of the KeyValueHeap in StoreScanner after we scanned this row/column in
333     // all other store files. The query matcher will then just skip this fake
334     // key/value and the store scanner will progress to the next column. This
335     // is obviously not a "real real" seek, but unlike the fake KV earlier in
336     // this method, we want this to be propagated to ScanQueryMatcher.
337     cur = kv.createLastOnRowCol();
338 
339     realSeekDone = true;
340     return true;
341   }
342 
343   Reader getReaderForTesting() {
344     return reader;
345   }
346 
347   @Override
348   public boolean realSeekDone() {
349     return realSeekDone;
350   }
351 
352   @Override
353   public void enforceSeek() throws IOException {
354     if (realSeekDone)
355       return;
356 
357     if (delayedReseek) {
358       reseek(delayedSeekKV);
359     } else {
360       seek(delayedSeekKV);
361     }
362   }
363 
364   public void setScanQueryMatcher(ScanQueryMatcher matcher) {
365     this.matcher = matcher;
366   }
367 
368   @Override
369   public boolean isFileScanner() {
370     return true;
371   }
372 
373   // Test methods
374 
375   static final long getSeekCount() {
376     return seekCount.get();
377   }
378 
379   @Override
380   public boolean shouldUseScanner(Scan scan, SortedSet<byte[]> columns,
381       long oldestUnexpiredTS) {
382     return reader.passesTimerangeFilter(scan, oldestUnexpiredTS) &&
383         reader.passesBloomFilter(scan, columns);
384   }
385 }