View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  import java.util.ArrayList;
25  import java.util.Collection;
26  import java.util.List;
27  import java.util.SortedSet;
28  import java.util.concurrent.atomic.AtomicLong;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.hbase.classification.InterfaceAudience;
33  import org.apache.hadoop.hbase.HConstants;
34  import org.apache.hadoop.hbase.KeyValue;
35  import org.apache.hadoop.hbase.client.Scan;
36  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
37  import org.apache.hadoop.hbase.regionserver.StoreFile.Reader;
38  
39  /**
40   * KeyValueScanner adaptor over the Reader.  It also provides hooks into
41   * bloom filter things.
42   */
43  @InterfaceAudience.LimitedPrivate("Coprocessor")
44  public class StoreFileScanner implements KeyValueScanner {
45    static final Log LOG = LogFactory.getLog(HStore.class);
46  
47    // the reader it comes from:
48    private final StoreFile.Reader reader;
49    private final HFileScanner hfs;
50    private KeyValue cur = null;
51  
52    private boolean realSeekDone;
53    private boolean delayedReseek;
54    private KeyValue delayedSeekKV;
55  
56    private boolean enforceMVCC = false;
57    private boolean hasMVCCInfo = false;
58    // A flag represents whether could stop skipping KeyValues for MVCC
59    // if have encountered the next row. Only used for reversed scan
60    private boolean stopSkippingKVsIfNextRow = false;
61  
62    private static AtomicLong seekCount;
63  
64    private ScanQueryMatcher matcher;
65    
66    private long readPt;
67  
68    /**
69     * Implements a {@link KeyValueScanner} on top of the specified {@link HFileScanner}
70     * @param hfs HFile scanner
71     */
72    public StoreFileScanner(StoreFile.Reader reader, HFileScanner hfs, boolean useMVCC,
73        boolean hasMVCC, long readPt) {
74      this.readPt = readPt;
75      this.reader = reader;
76      this.hfs = hfs;
77      this.enforceMVCC = useMVCC;
78      this.hasMVCCInfo = hasMVCC;
79    }
80  
81    /**
82     * Return an array of scanners corresponding to the given
83     * set of store files.
84     */
85    public static List<StoreFileScanner> getScannersForStoreFiles(
86        Collection<StoreFile> files,
87        boolean cacheBlocks,
88        boolean usePread, long readPt) throws IOException {
89      return getScannersForStoreFiles(files, cacheBlocks,
90                                     usePread, false, readPt);
91    }
92  
93    /**
94     * Return an array of scanners corresponding to the given set of store files.
95     */
96    public static List<StoreFileScanner> getScannersForStoreFiles(
97        Collection<StoreFile> files, boolean cacheBlocks, boolean usePread,
98        boolean isCompaction, long readPt) throws IOException {
99      return getScannersForStoreFiles(files, cacheBlocks, usePread, isCompaction,
100         null, readPt);
101   }
102 
103   /**
104    * Return an array of scanners corresponding to the given set of store files,
105    * And set the ScanQueryMatcher for each store file scanner for further
106    * optimization
107    */
108   public static List<StoreFileScanner> getScannersForStoreFiles(
109       Collection<StoreFile> files, boolean cacheBlocks, boolean usePread,
110       boolean isCompaction, ScanQueryMatcher matcher, long readPt) throws IOException {
111     List<StoreFileScanner> scanners = new ArrayList<StoreFileScanner>(
112         files.size());
113     for (StoreFile file : files) {
114       StoreFile.Reader r = file.createReader();
115       StoreFileScanner scanner = r.getStoreFileScanner(cacheBlocks, usePread,
116           isCompaction, readPt);
117       scanner.setScanQueryMatcher(matcher);
118       scanners.add(scanner);
119     }
120     return scanners;
121   }
122 
123   public String toString() {
124     return "StoreFileScanner[" + hfs.toString() + ", cur=" + cur + "]";
125   }
126 
127   public KeyValue peek() {
128     return cur;
129   }
130 
131   public KeyValue next() throws IOException {
132     KeyValue retKey = cur;
133 
134     try {
135       // only seek if we aren't at the end. cur == null implies 'end'.
136       if (cur != null) {
137         hfs.next();
138         cur = hfs.getKeyValue();
139         if (hasMVCCInfo)
140           skipKVsNewerThanReadpoint();
141       }
142     } catch (FileNotFoundException e) {
143       throw e;
144     } catch(IOException e) {
145       throw new IOException("Could not iterate " + this, e);
146     }
147     return retKey;
148   }
149 
150   public boolean seek(KeyValue key) throws IOException {
151     if (seekCount != null) seekCount.incrementAndGet();
152 
153     try {
154       try {
155         if(!seekAtOrAfter(hfs, key)) {
156           close();
157           return false;
158         }
159 
160         cur = hfs.getKeyValue();
161 
162         return !hasMVCCInfo ? true : skipKVsNewerThanReadpoint();
163       } finally {
164         realSeekDone = true;
165       }
166     } catch (FileNotFoundException e) {
167       throw e;
168     } catch (IOException ioe) {
169       throw new IOException("Could not seek " + this + " to key " + key, ioe);
170     }
171   }
172 
173   public boolean reseek(KeyValue key) throws IOException {
174     if (seekCount != null) seekCount.incrementAndGet();
175 
176     try {
177       try {
178         if (!reseekAtOrAfter(hfs, key)) {
179           close();
180           return false;
181         }
182         cur = hfs.getKeyValue();
183 
184         return !hasMVCCInfo ? true : skipKVsNewerThanReadpoint();
185       } finally {
186         realSeekDone = true;
187       }
188     } catch (FileNotFoundException e) {
189       throw e;
190     } catch (IOException ioe) {
191       throw new IOException("Could not reseek " + this + " to key " + key,
192           ioe);
193     }
194   }
195 
196   protected boolean skipKVsNewerThanReadpoint() throws IOException {
197     // We want to ignore all key-values that are newer than our current
198     // readPoint
199     KeyValue startKV = cur;
200     while(enforceMVCC
201         && cur != null
202         && (cur.getMvccVersion() > readPt)) {
203       hfs.next();
204       cur = hfs.getKeyValue();
205       if (this.stopSkippingKVsIfNextRow
206           && getComparator().compareRows(cur.getBuffer(), cur.getRowOffset(),
207               cur.getRowLength(), startKV.getBuffer(), startKV.getRowOffset(),
208               startKV.getRowLength()) > 0) {
209         return false;
210       }
211     }
212 
213     if (cur == null) {
214       close();
215       return false;
216     }
217 
218     // For the optimisation in HBASE-4346, we set the KV's memstoreTS to
219     // 0, if it is older than all the scanners' read points. It is possible
220     // that a newer KV's memstoreTS was reset to 0. But, there is an
221     // older KV which was not reset to 0 (because it was
222     // not old enough during flush). Make sure that we set it correctly now,
223     // so that the comparision order does not change.
224     if (cur.getMvccVersion() <= readPt) {
225       cur.setMvccVersion(0);
226     }
227     return true;
228   }
229 
230   public void close() {
231     // Nothing to close on HFileScanner?
232     cur = null;
233   }
234 
235   /**
236    *
237    * @param s
238    * @param k
239    * @return false if not found or if k is after the end.
240    * @throws IOException
241    */
242   public static boolean seekAtOrAfter(HFileScanner s, KeyValue k)
243   throws IOException {
244     int result = s.seekTo(k.getBuffer(), k.getKeyOffset(), k.getKeyLength());
245     if(result < 0) {
246       if (result == HConstants.INDEX_KEY_MAGIC) {
247         // using faked key
248         return true;
249       }
250       // Passed KV is smaller than first KV in file, work from start of file
251       return s.seekTo();
252     } else if(result > 0) {
253       // Passed KV is larger than current KV in file, if there is a next
254       // it is the "after", if not then this scanner is done.
255       return s.next();
256     }
257     // Seeked to the exact key
258     return true;
259   }
260 
261   static boolean reseekAtOrAfter(HFileScanner s, KeyValue k)
262   throws IOException {
263     //This function is similar to seekAtOrAfter function
264     int result = s.reseekTo(k.getBuffer(), k.getKeyOffset(), k.getKeyLength());
265     if (result <= 0) {
266       if (result == HConstants.INDEX_KEY_MAGIC) {
267         // using faked key
268         return true;
269       }
270       // If up to now scanner is not seeked yet, this means passed KV is smaller
271       // than first KV in file, and it is the first time we seek on this file.
272       // So we also need to work from the start of file.
273       if (!s.isSeeked()) {
274         return  s.seekTo();
275       }
276       return true;
277     }
278     // passed KV is larger than current KV in file, if there is a next
279     // it is after, if not then this scanner is done.
280     return s.next();
281   }
282 
283   @Override
284   public long getSequenceID() {
285     return reader.getSequenceID();
286   }
287 
288   /**
289    * Pretend we have done a seek but don't do it yet, if possible. The hope is
290    * that we find requested columns in more recent files and won't have to seek
291    * in older files. Creates a fake key/value with the given row/column and the
292    * highest (most recent) possible timestamp we might get from this file. When
293    * users of such "lazy scanner" need to know the next KV precisely (e.g. when
294    * this scanner is at the top of the heap), they run {@link #enforceSeek()}.
295    * <p>
296    * Note that this function does guarantee that the current KV of this scanner
297    * will be advanced to at least the given KV. Because of this, it does have
298    * to do a real seek in cases when the seek timestamp is older than the
299    * highest timestamp of the file, e.g. when we are trying to seek to the next
300    * row/column and use OLDEST_TIMESTAMP in the seek key.
301    */
302   @Override
303   public boolean requestSeek(KeyValue kv, boolean forward, boolean useBloom)
304       throws IOException {
305     if (kv.getFamilyLength() == 0) {
306       useBloom = false;
307     }
308 
309     boolean haveToSeek = true;
310     if (useBloom) {
311       // check ROWCOL Bloom filter first.
312       if (reader.getBloomFilterType() == BloomType.ROWCOL) {
313         haveToSeek = reader.passesGeneralBloomFilter(kv.getBuffer(),
314             kv.getRowOffset(), kv.getRowLength(), kv.getBuffer(),
315             kv.getQualifierOffset(), kv.getQualifierLength());
316       } else if (this.matcher != null && !matcher.hasNullColumnInQuery() &&
317           (kv.isDeleteFamily() || kv.isDeleteFamilyVersion())) {
318         // if there is no such delete family kv in the store file,
319         // then no need to seek.
320         haveToSeek = reader.passesDeleteFamilyBloomFilter(kv.getBuffer(),
321             kv.getRowOffset(), kv.getRowLength());
322       }
323     }
324 
325     delayedReseek = forward;
326     delayedSeekKV = kv;
327 
328     if (haveToSeek) {
329       // This row/column might be in this store file (or we did not use the
330       // Bloom filter), so we still need to seek.
331       realSeekDone = false;
332       long maxTimestampInFile = reader.getMaxTimestamp();
333       long seekTimestamp = kv.getTimestamp();
334       if (seekTimestamp > maxTimestampInFile) {
335         // Create a fake key that is not greater than the real next key.
336         // (Lower timestamps correspond to higher KVs.)
337         // To understand this better, consider that we are asked to seek to
338         // a higher timestamp than the max timestamp in this file. We know that
339         // the next point when we have to consider this file again is when we
340         // pass the max timestamp of this file (with the same row/column).
341         cur = kv.createFirstOnRowColTS(maxTimestampInFile);
342       } else {
343         // This will be the case e.g. when we need to seek to the next
344         // row/column, and we don't know exactly what they are, so we set the
345         // seek key's timestamp to OLDEST_TIMESTAMP to skip the rest of this
346         // row/column.
347         enforceSeek();
348       }
349       return cur != null;
350     }
351 
352     // Multi-column Bloom filter optimization.
353     // Create a fake key/value, so that this scanner only bubbles up to the top
354     // of the KeyValueHeap in StoreScanner after we scanned this row/column in
355     // all other store files. The query matcher will then just skip this fake
356     // key/value and the store scanner will progress to the next column. This
357     // is obviously not a "real real" seek, but unlike the fake KV earlier in
358     // this method, we want this to be propagated to ScanQueryMatcher.
359     cur = kv.createLastOnRowCol();
360 
361     realSeekDone = true;
362     return true;
363   }
364 
365   Reader getReaderForTesting() {
366     return reader;
367   }
368 
369   KeyValue.KVComparator getComparator() {
370     return reader.getComparator();
371   }
372 
373   @Override
374   public boolean realSeekDone() {
375     return realSeekDone;
376   }
377 
378   @Override
379   public void enforceSeek() throws IOException {
380     if (realSeekDone)
381       return;
382 
383     if (delayedReseek) {
384       reseek(delayedSeekKV);
385     } else {
386       seek(delayedSeekKV);
387     }
388   }
389 
390   public void setScanQueryMatcher(ScanQueryMatcher matcher) {
391     this.matcher = matcher;
392   }
393 
394   @Override
395   public boolean isFileScanner() {
396     return true;
397   }
398 
399   // Test methods
400 
401   static final long getSeekCount() {
402     return seekCount.get();
403   }
404   static final void instrument() {
405     seekCount = new AtomicLong();
406   }
407 
408   @Override
409   public boolean shouldUseScanner(Scan scan, SortedSet<byte[]> columns, long oldestUnexpiredTS) {
410     return reader.passesTimerangeFilter(scan, oldestUnexpiredTS)
411         && reader.passesKeyRangeFilter(scan) && reader.passesBloomFilter(scan, columns);
412   }
413 
414   @Override
415   public boolean seekToPreviousRow(KeyValue key) throws IOException {
416     try {
417       try {
418         KeyValue seekKey = KeyValue.createFirstOnRow(key.getRow());
419         if (seekCount != null) seekCount.incrementAndGet();
420         if (!hfs.seekBefore(seekKey.getBuffer(), seekKey.getKeyOffset(),
421             seekKey.getKeyLength())) {
422           close();
423           return false;
424         }
425         KeyValue firstKeyOfPreviousRow = KeyValue.createFirstOnRow(hfs
426             .getKeyValue().getRow());
427 
428         if (seekCount != null) seekCount.incrementAndGet();
429         if (!seekAtOrAfter(hfs, firstKeyOfPreviousRow)) {
430           close();
431           return false;
432         }
433 
434         cur = hfs.getKeyValue();
435         this.stopSkippingKVsIfNextRow = true;
436         boolean resultOfSkipKVs;
437         try {
438           resultOfSkipKVs = skipKVsNewerThanReadpoint();
439         } finally {
440           this.stopSkippingKVsIfNextRow = false;
441         }
442         if (!resultOfSkipKVs
443             || getComparator().compareRows(cur.getBuffer(), cur.getRowOffset(),
444                 cur.getRowLength(), firstKeyOfPreviousRow.getBuffer(),
445                 firstKeyOfPreviousRow.getRowOffset(),
446                 firstKeyOfPreviousRow.getRowLength()) > 0) {
447           return seekToPreviousRow(firstKeyOfPreviousRow);
448         }
449 
450         return true;
451       } finally {
452         realSeekDone = true;
453       }
454     } catch (IOException ioe) {
455       throw new IOException("Could not seekToPreviousRow " + this + " to key "
456           + key, ioe);
457     }
458   }
459 
460   @Override
461   public boolean seekToLastRow() throws IOException {
462     byte[] lastRow = reader.getLastRowKey();
463     if (lastRow == null) {
464       return false;
465     }
466     KeyValue seekKey = KeyValue.createFirstOnRow(lastRow);
467     if (seek(seekKey)) {
468       return true;
469     } else {
470       return seekToPreviousRow(seekKey);
471     }
472   }
473 
474   @Override
475   public boolean backwardSeek(KeyValue key) throws IOException {
476     seek(key);
477     if (cur == null
478         || getComparator().compareRows(cur.getRowArray(), cur.getRowOffset(),
479             cur.getRowLength(), key.getRowArray(), key.getRowOffset(),
480             key.getRowLength()) > 0) {
481       return seekToPreviousRow(key);
482     }
483     return true;
484   }
485 
486   @Override
487   public byte[] getNextIndexedKey() {
488     return hfs.getNextIndexedKey();
489   }
490 }