View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.regionserver;
22  
23  import java.io.IOException;
24  import java.util.ArrayList;
25  import java.util.Collection;
26  import java.util.List;
27  import java.util.SortedSet;
28  import java.util.concurrent.atomic.AtomicLong;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.hbase.KeyValue;
33  import org.apache.hadoop.hbase.client.Scan;
34  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
35  import org.apache.hadoop.hbase.regionserver.StoreFile.Reader;
36  
37  /**
38   * KeyValueScanner adaptor over the Reader.  It also provides hooks into
39   * bloom filter things.
40   */
41  public class StoreFileScanner implements KeyValueScanner {
42    static final Log LOG = LogFactory.getLog(Store.class);
43  
44    // the reader it comes from:
45    private final StoreFile.Reader reader;
46    private final HFileScanner hfs;
47    private KeyValue cur = null;
48  
49    private boolean realSeekDone;
50    private boolean delayedReseek;
51    private KeyValue delayedSeekKV;
52  
53    private boolean enforceMVCC = false;
54    private boolean hasMVCCInfo = false;
55  
56    private static AtomicLong seekCount;
57  
58    private ScanQueryMatcher matcher;
59  
60    /**
61     * Implements a {@link KeyValueScanner} on top of the specified {@link HFileScanner}
62     * @param hfs HFile scanner
63     */
64    public StoreFileScanner(StoreFile.Reader reader, HFileScanner hfs, boolean useMVCC, boolean hasMVCC) {
65      this.reader = reader;
66      this.hfs = hfs;
67      this.enforceMVCC = useMVCC;
68      this.hasMVCCInfo = hasMVCC;
69    }
70  
71    /**
72     * Return an array of scanners corresponding to the given
73     * set of store files.
74     */
75    public static List<StoreFileScanner> getScannersForStoreFiles(
76        Collection<StoreFile> files,
77        boolean cacheBlocks,
78        boolean usePread) throws IOException {
79      return getScannersForStoreFiles(files, cacheBlocks,
80                                     usePread, false);
81    }
82  
83    /**
84     * Return an array of scanners corresponding to the given set of store files.
85     */
86    public static List<StoreFileScanner> getScannersForStoreFiles(
87        Collection<StoreFile> files, boolean cacheBlocks, boolean usePread,
88        boolean isCompaction) throws IOException {
89      return getScannersForStoreFiles(files, cacheBlocks, usePread, isCompaction,
90          null);
91    }
92  
93    /**
94     * Return an array of scanners corresponding to the given set of store files,
95     * And set the ScanQueryMatcher for each store file scanner for further
96     * optimization
97     */
98    public static List<StoreFileScanner> getScannersForStoreFiles(
99        Collection<StoreFile> files, boolean cacheBlocks, boolean usePread,
100       boolean isCompaction, ScanQueryMatcher matcher) throws IOException {
101     List<StoreFileScanner> scanners = new ArrayList<StoreFileScanner>(
102         files.size());
103     for (StoreFile file : files) {
104       StoreFile.Reader r = file.createReader();
105       StoreFileScanner scanner = r.getStoreFileScanner(cacheBlocks, usePread,
106           isCompaction);
107       scanner.setScanQueryMatcher(matcher);
108       scanners.add(scanner);
109     }
110     return scanners;
111   }
112 
113   public String toString() {
114     return "StoreFileScanner[" + hfs.toString() + ", cur=" + cur + "]";
115   }
116 
117   public KeyValue peek() {
118     return cur;
119   }
120 
121   public KeyValue next() throws IOException {
122     KeyValue retKey = cur;
123 
124     try {
125       // only seek if we aren't at the end. cur == null implies 'end'.
126       if (cur != null) {
127         hfs.next();
128         cur = hfs.getKeyValue();
129         if (hasMVCCInfo)
130           skipKVsNewerThanReadpoint();
131       }
132     } catch(IOException e) {
133       throw new IOException("Could not iterate " + this, e);
134     }
135     return retKey;
136   }
137 
138   public boolean seek(KeyValue key) throws IOException {
139     if (seekCount != null) seekCount.incrementAndGet();
140 
141     try {
142       try {
143         if(!seekAtOrAfter(hfs, key)) {
144           close();
145           return false;
146         }
147 
148         cur = hfs.getKeyValue();
149 
150         return !hasMVCCInfo ? true : skipKVsNewerThanReadpoint();
151       } finally {
152         realSeekDone = true;
153       }
154     } catch (IOException ioe) {
155       throw new IOException("Could not seek " + this + " to key " + key, ioe);
156     }
157   }
158 
159   public boolean reseek(KeyValue key) throws IOException {
160     if (seekCount != null) seekCount.incrementAndGet();
161 
162     try {
163       try {
164         if (!reseekAtOrAfter(hfs, key)) {
165           close();
166           return false;
167         }
168         cur = hfs.getKeyValue();
169 
170         return !hasMVCCInfo ? true : skipKVsNewerThanReadpoint();
171       } finally {
172         realSeekDone = true;
173       }
174     } catch (IOException ioe) {
175       throw new IOException("Could not reseek " + this + " to key " + key,
176           ioe);
177     }
178   }
179 
180   protected boolean skipKVsNewerThanReadpoint() throws IOException {
181     long readPoint = MultiVersionConsistencyControl.getThreadReadPoint();
182 
183     // We want to ignore all key-values that are newer than our current
184     // readPoint
185     while(enforceMVCC
186         && cur != null
187         && (cur.getMemstoreTS() > readPoint)) {
188       hfs.next();
189       cur = hfs.getKeyValue();
190     }
191 
192     if (cur == null) {
193       close();
194       return false;
195     }
196 
197     // For the optimisation in HBASE-4346, we set the KV's memstoreTS to
198     // 0, if it is older than all the scanners' read points. It is possible
199     // that a newer KV's memstoreTS was reset to 0. But, there is an
200     // older KV which was not reset to 0 (because it was
201     // not old enough during flush). Make sure that we set it correctly now,
202     // so that the comparision order does not change.
203     if (cur.getMemstoreTS() <= readPoint) {
204       cur.setMemstoreTS(0);
205     }
206     return true;
207   }
208 
209   public void close() {
210     // Nothing to close on HFileScanner?
211     cur = null;
212   }
213 
214   /**
215    *
216    * @param s
217    * @param k
218    * @return
219    * @throws IOException
220    */
221   public static boolean seekAtOrAfter(HFileScanner s, KeyValue k)
222   throws IOException {
223     int result = s.seekTo(k.getBuffer(), k.getKeyOffset(), k.getKeyLength());
224     if(result < 0) {
225       // Passed KV is smaller than first KV in file, work from start of file
226       return s.seekTo();
227     } else if(result > 0) {
228       // Passed KV is larger than current KV in file, if there is a next
229       // it is the "after", if not then this scanner is done.
230       return s.next();
231     }
232     // Seeked to the exact key
233     return true;
234   }
235 
236   static boolean reseekAtOrAfter(HFileScanner s, KeyValue k)
237   throws IOException {
238     //This function is similar to seekAtOrAfter function
239     int result = s.reseekTo(k.getBuffer(), k.getKeyOffset(), k.getKeyLength());
240     if (result <= 0) {
241       // If up to now scanner is not seeked yet, this means passed KV is smaller
242       // than first KV in file, and it is the first time we seek on this file.
243       // So we also need to work from the start of file.
244       if (!s.isSeeked()) {
245         return  s.seekTo();
246       }
247       return true;
248     } else {
249       // passed KV is larger than current KV in file, if there is a next
250       // it is after, if not then this scanner is done.
251       return s.next();
252     }
253   }
254 
255   @Override
256   public long getSequenceID() {
257     return reader.getSequenceID();
258   }
259 
260   /**
261    * Pretend we have done a seek but don't do it yet, if possible. The hope is
262    * that we find requested columns in more recent files and won't have to seek
263    * in older files. Creates a fake key/value with the given row/column and the
264    * highest (most recent) possible timestamp we might get from this file. When
265    * users of such "lazy scanner" need to know the next KV precisely (e.g. when
266    * this scanner is at the top of the heap), they run {@link #enforceSeek()}.
267    * <p>
268    * Note that this function does guarantee that the current KV of this scanner
269    * will be advanced to at least the given KV. Because of this, it does have
270    * to do a real seek in cases when the seek timestamp is older than the
271    * highest timestamp of the file, e.g. when we are trying to seek to the next
272    * row/column and use OLDEST_TIMESTAMP in the seek key.
273    */
274   @Override
275   public boolean requestSeek(KeyValue kv, boolean forward, boolean useBloom)
276       throws IOException {
277     if (kv.getFamilyLength() == 0) {
278       useBloom = false;
279     }
280 
281     boolean haveToSeek = true;
282     if (useBloom) {
283       // check ROWCOL Bloom filter first.
284       if (reader.getBloomFilterType() == StoreFile.BloomType.ROWCOL) {
285         haveToSeek = reader.passesGeneralBloomFilter(kv.getBuffer(),
286             kv.getRowOffset(), kv.getRowLength(), kv.getBuffer(),
287             kv.getQualifierOffset(), kv.getQualifierLength());
288       } else if (this.matcher != null && !matcher.hasNullColumnInQuery() &&
289           kv.isDeleteFamily()) {
290         // if there is no such delete family kv in the store file,
291         // then no need to seek.
292         haveToSeek = reader.passesDeleteFamilyBloomFilter(kv.getBuffer(),
293             kv.getRowOffset(), kv.getRowLength());
294       }
295     }
296 
297     delayedReseek = forward;
298     delayedSeekKV = kv;
299 
300     if (haveToSeek) {
301       // This row/column might be in this store file (or we did not use the
302       // Bloom filter), so we still need to seek.
303       realSeekDone = false;
304       long maxTimestampInFile = reader.getMaxTimestamp();
305       long seekTimestamp = kv.getTimestamp();
306       if (seekTimestamp > maxTimestampInFile) {
307         // Create a fake key that is not greater than the real next key.
308         // (Lower timestamps correspond to higher KVs.)
309         // To understand this better, consider that we are asked to seek to
310         // a higher timestamp than the max timestamp in this file. We know that
311         // the next point when we have to consider this file again is when we
312         // pass the max timestamp of this file (with the same row/column).
313         cur = kv.createFirstOnRowColTS(maxTimestampInFile);
314       } else {
315         // This will be the case e.g. when we need to seek to the next
316         // row/column, and we don't know exactly what they are, so we set the
317         // seek key's timestamp to OLDEST_TIMESTAMP to skip the rest of this
318         // row/column.
319         enforceSeek();
320       }
321       return cur != null;
322     }
323 
324     // Multi-column Bloom filter optimization.
325     // Create a fake key/value, so that this scanner only bubbles up to the top
326     // of the KeyValueHeap in StoreScanner after we scanned this row/column in
327     // all other store files. The query matcher will then just skip this fake
328     // key/value and the store scanner will progress to the next column. This
329     // is obviously not a "real real" seek, but unlike the fake KV earlier in
330     // this method, we want this to be propagated to ScanQueryMatcher.
331     cur = kv.createLastOnRowCol();
332 
333     realSeekDone = true;
334     return true;
335   }
336 
337   Reader getReaderForTesting() {
338     return reader;
339   }
340 
341   @Override
342   public boolean realSeekDone() {
343     return realSeekDone;
344   }
345 
346   @Override
347   public void enforceSeek() throws IOException {
348     if (realSeekDone)
349       return;
350 
351     if (delayedReseek) {
352       reseek(delayedSeekKV);
353     } else {
354       seek(delayedSeekKV);
355     }
356   }
357 
358   public void setScanQueryMatcher(ScanQueryMatcher matcher) {
359     this.matcher = matcher;
360   }
361 
362   @Override
363   public boolean isFileScanner() {
364     return true;
365   }
366 
367   // Test methods
368 
369   static final long getSeekCount() {
370     return seekCount.get();
371   }
372   static final void instrument() {
373     seekCount = new AtomicLong();
374   }
375 
376   @Override
377   public boolean shouldUseScanner(Scan scan, SortedSet<byte[]> columns, long oldestUnexpiredTS) {
378     return reader.passesTimerangeFilter(scan, oldestUnexpiredTS)
379         && reader.passesKeyRangeFilter(scan) && reader.passesBloomFilter(scan, columns);
380   }
381 }