View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  import java.io.InterruptedIOException;
24  import java.util.ArrayList;
25  import java.util.List;
26  import java.util.NavigableSet;
27  import java.util.concurrent.CountDownLatch;
28  import java.util.concurrent.locks.ReentrantLock;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.hbase.classification.InterfaceAudience;
34  import org.apache.hadoop.hbase.Cell;
35  import org.apache.hadoop.hbase.DoNotRetryIOException;
36  import org.apache.hadoop.hbase.HConstants;
37  import org.apache.hadoop.hbase.KeyValue;
38  import org.apache.hadoop.hbase.KeyValue.KVComparator;
39  import org.apache.hadoop.hbase.KeyValueUtil;
40  import org.apache.hadoop.hbase.client.IsolationLevel;
41  import org.apache.hadoop.hbase.client.Scan;
42  import org.apache.hadoop.hbase.executor.ExecutorService;
43  import org.apache.hadoop.hbase.filter.Filter;
44  import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
45  import org.apache.hadoop.hbase.regionserver.handler.ParallelSeekHandler;
46  import org.apache.hadoop.hbase.util.Bytes;
47  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
48  
49  /**
50   * Scanner scans both the memstore and the Store. Coalesce KeyValue stream
51   * into List<KeyValue> for a single row.
52   */
53  @InterfaceAudience.Private
54  public class StoreScanner extends NonReversedNonLazyKeyValueScanner
55      implements KeyValueScanner, InternalScanner, ChangedReadersObserver {
56    static final Log LOG = LogFactory.getLog(StoreScanner.class);
57    protected Store store;
58    protected ScanQueryMatcher matcher;
59    protected KeyValueHeap heap;
60    protected boolean cacheBlocks;
61  
62    protected long countPerRow = 0;
63    protected int storeLimit = -1;
64    protected int storeOffset = 0;
65  
66    // Used to indicate that the scanner has closed (see HBASE-1107)
67    // Doesnt need to be volatile because it's always accessed via synchronized methods
68    protected boolean closing = false;
69    protected final boolean isGet;
70    protected final boolean explicitColumnQuery;
71    protected final boolean useRowColBloom;
72    /**
73     * A flag that enables StoreFileScanner parallel-seeking
74     */
75    protected boolean isParallelSeekEnabled = false;
76    protected ExecutorService executor;
77    protected final Scan scan;
78    protected final NavigableSet<byte[]> columns;
79    protected final long oldestUnexpiredTS;
80    protected final long now;
81    protected final int minVersions;
82  
83    /**
84     * The number of KVs seen by the scanner. Includes explicitly skipped KVs, but not
85     * KVs skipped via seeking to next row/column. TODO: estimate them?
86     */
87    private long kvsScanned = 0;
88    private KeyValue prevKV = null;
89  
90    /** We don't ever expect to change this, the constant is just for clarity. */
91    static final boolean LAZY_SEEK_ENABLED_BY_DEFAULT = true;
92    public static final String STORESCANNER_PARALLEL_SEEK_ENABLE =
93        "hbase.storescanner.parallel.seek.enable";
94  
95    /** Used during unit testing to ensure that lazy seek does save seek ops */
96    protected static boolean lazySeekEnabledGlobally =
97        LAZY_SEEK_ENABLED_BY_DEFAULT;
98  
99    // if heap == null and lastTop != null, you need to reseek given the key below
100   protected KeyValue lastTop = null;
101 
102   // A flag whether use pread for scan
103   private boolean scanUsePread = false;
104   protected ReentrantLock lock = new ReentrantLock();
105   
106   private final long readPt;
107 
108   // used by the injection framework to test race between StoreScanner construction and compaction
109   enum StoreScannerCompactionRace {
110     BEFORE_SEEK,
111     AFTER_SEEK,
112     COMPACT_COMPLETE
113   }
114   
115   /** An internal constructor. */
116   protected StoreScanner(Store store, boolean cacheBlocks, Scan scan,
117       final NavigableSet<byte[]> columns, long ttl, int minVersions, long readPt) {
118     this.readPt = readPt;
119     this.store = store;
120     this.cacheBlocks = cacheBlocks;
121     isGet = scan.isGetScan();
122     int numCol = columns == null ? 0 : columns.size();
123     explicitColumnQuery = numCol > 0;
124     this.scan = scan;
125     this.columns = columns;
126     this.now = EnvironmentEdgeManager.currentTimeMillis();
127     this.oldestUnexpiredTS = now - ttl;
128     this.minVersions = minVersions;
129 
130     if (store != null && ((HStore)store).getHRegion() != null
131         && ((HStore)store).getHRegion().getBaseConf() != null) {
132       Configuration conf = ((HStore) store).getHRegion().getBaseConf();
133       this.scanUsePread = conf.getBoolean("hbase.storescanner.use.pread", scan.isSmall());
134     } else {
135       this.scanUsePread = scan.isSmall();
136     }
137 
138     // We look up row-column Bloom filters for multi-column queries as part of
139     // the seek operation. However, we also look the row-column Bloom filter
140     // for multi-row (non-"get") scans because this is not done in
141     // StoreFile.passesBloomFilter(Scan, SortedSet<byte[]>).
142     useRowColBloom = numCol > 1 || (!isGet && numCol == 1);
143 
144     // The parallel-seeking is on :
145     // 1) the config value is *true*
146     // 2) store has more than one store file
147     if (store != null && ((HStore)store).getHRegion() != null
148         && store.getStorefilesCount() > 1) {
149       RegionServerServices rsService = ((HStore)store).getHRegion().getRegionServerServices();
150       if (rsService == null || !rsService.getConfiguration().getBoolean(
151             STORESCANNER_PARALLEL_SEEK_ENABLE, false)) return;
152       isParallelSeekEnabled = true;
153       executor = rsService.getExecutorService();
154     }
155   }
156 
157   /**
158    * Opens a scanner across memstore, snapshot, and all StoreFiles. Assumes we
159    * are not in a compaction.
160    *
161    * @param store who we scan
162    * @param scan the spec
163    * @param columns which columns we are scanning
164    * @throws IOException
165    */
166   public StoreScanner(Store store, ScanInfo scanInfo, Scan scan, final NavigableSet<byte[]> columns,
167       long readPt)
168                               throws IOException {
169     this(store, scan.getCacheBlocks(), scan, columns, scanInfo.getTtl(),
170         scanInfo.getMinVersions(), readPt);
171     if (columns != null && scan.isRaw()) {
172       throw new DoNotRetryIOException(
173           "Cannot specify any column for a raw scan");
174     }
175     matcher = new ScanQueryMatcher(scan, scanInfo, columns,
176         ScanType.USER_SCAN, Long.MAX_VALUE, HConstants.LATEST_TIMESTAMP,
177         oldestUnexpiredTS, now, store.getCoprocessorHost());
178 
179     this.store.addChangedReaderObserver(this);
180 
181     // Pass columns to try to filter out unnecessary StoreFiles.
182     List<KeyValueScanner> scanners = getScannersNoCompaction();
183 
184     // Seek all scanners to the start of the Row (or if the exact matching row
185     // key does not exist, then to the start of the next matching Row).
186     // Always check bloom filter to optimize the top row seek for delete
187     // family marker.
188     seekScanners(scanners, matcher.getStartKey(), explicitColumnQuery
189         && lazySeekEnabledGlobally, isParallelSeekEnabled);
190 
191     // set storeLimit
192     this.storeLimit = scan.getMaxResultsPerColumnFamily();
193 
194     // set rowOffset
195     this.storeOffset = scan.getRowOffsetPerColumnFamily();
196 
197     // Combine all seeked scanners with a heap
198     resetKVHeap(scanners, store.getComparator());
199   }
200 
201   /**
202    * Used for compactions.<p>
203    *
204    * Opens a scanner across specified StoreFiles.
205    * @param store who we scan
206    * @param scan the spec
207    * @param scanners ancillary scanners
208    * @param smallestReadPoint the readPoint that we should use for tracking
209    *          versions
210    */
211   public StoreScanner(Store store, ScanInfo scanInfo, Scan scan,
212       List<? extends KeyValueScanner> scanners, ScanType scanType,
213       long smallestReadPoint, long earliestPutTs) throws IOException {
214     this(store, scanInfo, scan, scanners, scanType, smallestReadPoint, earliestPutTs, null, null);
215   }
216 
217   /**
218    * Used for compactions that drop deletes from a limited range of rows.<p>
219    *
220    * Opens a scanner across specified StoreFiles.
221    * @param store who we scan
222    * @param scan the spec
223    * @param scanners ancillary scanners
224    * @param smallestReadPoint the readPoint that we should use for tracking versions
225    * @param dropDeletesFromRow The inclusive left bound of the range; can be EMPTY_START_ROW.
226    * @param dropDeletesToRow The exclusive right bound of the range; can be EMPTY_END_ROW.
227    */
228   public StoreScanner(Store store, ScanInfo scanInfo, Scan scan,
229       List<? extends KeyValueScanner> scanners, long smallestReadPoint, long earliestPutTs,
230       byte[] dropDeletesFromRow, byte[] dropDeletesToRow) throws IOException {
231     this(store, scanInfo, scan, scanners, ScanType.COMPACT_RETAIN_DELETES, smallestReadPoint,
232         earliestPutTs, dropDeletesFromRow, dropDeletesToRow);
233   }
234 
235   private StoreScanner(Store store, ScanInfo scanInfo, Scan scan,
236       List<? extends KeyValueScanner> scanners, ScanType scanType, long smallestReadPoint,
237       long earliestPutTs, byte[] dropDeletesFromRow, byte[] dropDeletesToRow) throws IOException {
238     this(store, false, scan, null, scanInfo.getTtl(), scanInfo.getMinVersions(),
239         ((HStore)store).getHRegion().getReadpoint(IsolationLevel.READ_COMMITTED));
240     if (dropDeletesFromRow == null) {
241       matcher = new ScanQueryMatcher(scan, scanInfo, null, scanType, smallestReadPoint,
242           earliestPutTs, oldestUnexpiredTS, now, store.getCoprocessorHost());
243     } else {
244       matcher = new ScanQueryMatcher(scan, scanInfo, null, smallestReadPoint, earliestPutTs,
245           oldestUnexpiredTS, now, dropDeletesFromRow, dropDeletesToRow, store.getCoprocessorHost());
246     }
247 
248     // Filter the list of scanners using Bloom filters, time range, TTL, etc.
249     scanners = selectScannersFrom(scanners);
250 
251     // Seek all scanners to the initial key
252     seekScanners(scanners, matcher.getStartKey(), false, isParallelSeekEnabled);
253 
254     // Combine all seeked scanners with a heap
255     resetKVHeap(scanners, store.getComparator());
256   }
257 
258   /** Constructor for testing. */
259   StoreScanner(final Scan scan, ScanInfo scanInfo,
260       ScanType scanType, final NavigableSet<byte[]> columns,
261       final List<KeyValueScanner> scanners) throws IOException {
262     this(scan, scanInfo, scanType, columns, scanners,
263         HConstants.LATEST_TIMESTAMP,
264         // 0 is passed as readpoint because the test bypasses Store
265         0);
266   }
267 
268   // Constructor for testing.
269   StoreScanner(final Scan scan, ScanInfo scanInfo,
270     ScanType scanType, final NavigableSet<byte[]> columns,
271     final List<KeyValueScanner> scanners, long earliestPutTs)
272         throws IOException {
273     this(scan, scanInfo, scanType, columns, scanners, earliestPutTs,
274       // 0 is passed as readpoint because the test bypasses Store
275       0);
276   }
277   
278   private StoreScanner(final Scan scan, ScanInfo scanInfo,
279       ScanType scanType, final NavigableSet<byte[]> columns,
280       final List<KeyValueScanner> scanners, long earliestPutTs, long readPt)
281           throws IOException {
282     this(null, scan.getCacheBlocks(), scan, columns, scanInfo.getTtl(),
283         scanInfo.getMinVersions(), readPt);
284     this.matcher = new ScanQueryMatcher(scan, scanInfo, columns, scanType,
285         Long.MAX_VALUE, earliestPutTs, oldestUnexpiredTS, now, null);
286 
287     // In unit tests, the store could be null
288     if (this.store != null) {
289       this.store.addChangedReaderObserver(this);
290     }
291     // Seek all scanners to the initial key
292     seekScanners(scanners, matcher.getStartKey(), false, isParallelSeekEnabled);
293     resetKVHeap(scanners, scanInfo.getComparator());
294   }
295 
296   /**
297    * Get a filtered list of scanners. Assumes we are not in a compaction.
298    * @return list of scanners to seek
299    */
300   protected List<KeyValueScanner> getScannersNoCompaction() throws IOException {
301     final boolean isCompaction = false;
302     boolean usePread = isGet || scanUsePread;
303     return selectScannersFrom(store.getScanners(cacheBlocks, isGet, usePread,
304         isCompaction, matcher, scan.getStartRow(), scan.getStopRow(), this.readPt));
305   }
306 
307   /**
308    * Seek the specified scanners with the given key
309    * @param scanners
310    * @param seekKey
311    * @param isLazy true if using lazy seek
312    * @param isParallelSeek true if using parallel seek
313    * @throws IOException
314    */
315   protected void seekScanners(List<? extends KeyValueScanner> scanners,
316       KeyValue seekKey, boolean isLazy, boolean isParallelSeek)
317       throws IOException {
318     // Seek all scanners to the start of the Row (or if the exact matching row
319     // key does not exist, then to the start of the next matching Row).
320     // Always check bloom filter to optimize the top row seek for delete
321     // family marker.
322     if (isLazy) {
323       for (KeyValueScanner scanner : scanners) {
324         scanner.requestSeek(seekKey, false, true);
325       }
326     } else {
327       if (!isParallelSeek) {
328         for (KeyValueScanner scanner : scanners) {
329           scanner.seek(seekKey);
330         }
331       } else {
332         parallelSeek(scanners, seekKey);
333       }
334     }
335   }
336 
337   protected void resetKVHeap(List<? extends KeyValueScanner> scanners,
338       KVComparator comparator) throws IOException {
339     // Combine all seeked scanners with a heap
340     heap = new KeyValueHeap(scanners, comparator);
341   }
342 
343   /**
344    * Filters the given list of scanners using Bloom filter, time range, and
345    * TTL.
346    */
347   protected List<KeyValueScanner> selectScannersFrom(
348       final List<? extends KeyValueScanner> allScanners) {
349     boolean memOnly;
350     boolean filesOnly;
351     if (scan instanceof InternalScan) {
352       InternalScan iscan = (InternalScan)scan;
353       memOnly = iscan.isCheckOnlyMemStore();
354       filesOnly = iscan.isCheckOnlyStoreFiles();
355     } else {
356       memOnly = false;
357       filesOnly = false;
358     }
359 
360     List<KeyValueScanner> scanners =
361         new ArrayList<KeyValueScanner>(allScanners.size());
362 
363     // We can only exclude store files based on TTL if minVersions is set to 0.
364     // Otherwise, we might have to return KVs that have technically expired.
365     long expiredTimestampCutoff = minVersions == 0 ? oldestUnexpiredTS :
366         Long.MIN_VALUE;
367 
368     // include only those scan files which pass all filters
369     for (KeyValueScanner kvs : allScanners) {
370       boolean isFile = kvs.isFileScanner();
371       if ((!isFile && filesOnly) || (isFile && memOnly)) {
372         continue;
373       }
374 
375       if (kvs.shouldUseScanner(scan, columns, expiredTimestampCutoff)) {
376         scanners.add(kvs);
377       }
378     }
379     return scanners;
380   }
381 
382   @Override
383   public KeyValue peek() {
384     lock.lock();
385     try {
386     if (this.heap == null) {
387       return this.lastTop;
388     }
389     return this.heap.peek();
390     } finally {
391       lock.unlock();
392     }
393   }
394 
395   @Override
396   public KeyValue next() {
397     // throw runtime exception perhaps?
398     throw new RuntimeException("Never call StoreScanner.next()");
399   }
400 
401   @Override
402   public void close() {
403     lock.lock();
404     try {
405     if (this.closing) return;
406     this.closing = true;
407     // under test, we dont have a this.store
408     if (this.store != null)
409       this.store.deleteChangedReaderObserver(this);
410     if (this.heap != null)
411       this.heap.close();
412     this.heap = null; // CLOSED!
413     this.lastTop = null; // If both are null, we are closed.
414     } finally {
415       lock.unlock();
416     }
417   }
418 
419   @Override
420   public boolean seek(KeyValue key) throws IOException {
421     lock.lock();
422     try {
423     // reset matcher state, in case that underlying store changed
424     checkReseek();
425     return this.heap.seek(key);
426     } finally {
427       lock.unlock();
428     }
429   }
430 
431   /**
432    * Get the next row of values from this Store.
433    * @param outResult
434    * @param limit
435    * @return true if there are more rows, false if scanner is done
436    */
437   @Override
438   public boolean next(List<Cell> outResult, int limit) throws IOException {
439     lock.lock();
440     try {
441     if (checkReseek()) {
442       return true;
443     }
444 
445     // if the heap was left null, then the scanners had previously run out anyways, close and
446     // return.
447     if (this.heap == null) {
448       close();
449       return false;
450     }
451 
452     KeyValue kv = this.heap.peek();
453     if (kv == null) {
454       close();
455       return false;
456     }
457 
458     // only call setRow if the row changes; avoids confusing the query matcher
459     // if scanning intra-row
460     byte[] row = kv.getBuffer();
461     int offset = kv.getRowOffset();
462     short length = kv.getRowLength();
463     if (limit < 0 || matcher.row == null || !Bytes.equals(row, offset, length, matcher.row,
464         matcher.rowOffset, matcher.rowLength)) {
465       this.countPerRow = 0;
466       matcher.setRow(row, offset, length);
467     }
468 
469     // Only do a sanity-check if store and comparator are available.
470     KeyValue.KVComparator comparator =
471         store != null ? store.getComparator() : null;
472 
473     int count = 0;
474     LOOP: do {
475       if (prevKV != kv) ++kvsScanned; // Do object compare - we set prevKV from the same heap.
476       checkScanOrder(prevKV, kv, comparator);
477       prevKV = kv;
478 
479       ScanQueryMatcher.MatchCode qcode = matcher.match(kv);
480       qcode = optimize(qcode, kv);
481       switch(qcode) {
482         case INCLUDE:
483         case INCLUDE_AND_SEEK_NEXT_ROW:
484         case INCLUDE_AND_SEEK_NEXT_COL:
485 
486           Filter f = matcher.getFilter();
487           if (f != null) {
488             // TODO convert Scan Query Matcher to be Cell instead of KV based ?
489             kv = KeyValueUtil.ensureKeyValue(f.transformCell(kv));
490           }
491 
492           this.countPerRow++;
493           if (storeLimit > -1 &&
494               this.countPerRow > (storeLimit + storeOffset)) {
495             // do what SEEK_NEXT_ROW does.
496             if (!matcher.moreRowsMayExistAfter(kv)) {
497               return false;
498             }
499             seekToNextRow(kv);
500             break LOOP;
501           }
502 
503           // add to results only if we have skipped #storeOffset kvs
504           // also update metric accordingly
505           if (this.countPerRow > storeOffset) {
506             outResult.add(kv);
507             count++;
508           }
509 
510           if (qcode == ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW) {
511             if (!matcher.moreRowsMayExistAfter(kv)) {
512               return false;
513             }
514             seekToNextRow(kv);
515           } else if (qcode == ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL) {
516             seekAsDirection(matcher.getKeyForNextColumn(kv));
517           } else {
518             this.heap.next();
519           }
520 
521           if (limit > 0 && (count == limit)) {
522             break LOOP;
523           }
524           continue;
525 
526         case DONE:
527           return true;
528 
529         case DONE_SCAN:
530           close();
531           return false;
532 
533         case SEEK_NEXT_ROW:
534           // This is just a relatively simple end of scan fix, to short-cut end
535           // us if there is an endKey in the scan.
536           if (!matcher.moreRowsMayExistAfter(kv)) {
537             return false;
538           }
539 
540           seekToNextRow(kv);
541           break;
542 
543         case SEEK_NEXT_COL:
544           seekAsDirection(matcher.getKeyForNextColumn(kv));
545           break;
546 
547         case SKIP:
548           this.heap.next();
549           break;
550 
551         case SEEK_NEXT_USING_HINT:
552           // TODO convert resee to Cell?
553           KeyValue nextKV = KeyValueUtil.ensureKeyValue(matcher.getNextKeyHint(kv));
554           if (nextKV != null) {
555             seekAsDirection(nextKV);
556           } else {
557             heap.next();
558           }
559           break;
560 
561         default:
562           throw new RuntimeException("UNEXPECTED");
563       }
564     } while((kv = this.heap.peek()) != null);
565 
566     if (count > 0) {
567       return true;
568     }
569 
570     // No more keys
571     close();
572     return false;
573     } finally {
574       lock.unlock();
575     }
576   }
577 
578   /*
579    * See if we should actually SEEK or rather just SKIP to the next Cell.
580    * (See HBASE-13109)
581    */
582   private ScanQueryMatcher.MatchCode optimize(ScanQueryMatcher.MatchCode qcode, Cell cell) {
583     switch(qcode) {
584     case INCLUDE_AND_SEEK_NEXT_COL:
585     case SEEK_NEXT_COL:
586     {
587       byte[] nextIndexedKey = getNextIndexedKey();
588       if (nextIndexedKey != null && nextIndexedKey != HConstants.NO_NEXT_INDEXED_KEY
589           && matcher.compareKeyForNextColumn(nextIndexedKey, cell) >= 0) {
590         return qcode == MatchCode.SEEK_NEXT_COL ? MatchCode.SKIP : MatchCode.INCLUDE;
591       }
592       break;
593     }
594     case INCLUDE_AND_SEEK_NEXT_ROW:
595     case SEEK_NEXT_ROW:
596     {
597       byte[] nextIndexedKey = getNextIndexedKey();
598       if (nextIndexedKey != null && nextIndexedKey != HConstants.NO_NEXT_INDEXED_KEY
599           && matcher.compareKeyForNextRow(nextIndexedKey, cell) >= 0) {
600         return qcode == MatchCode.SEEK_NEXT_ROW ? MatchCode.SKIP : MatchCode.INCLUDE;
601       }
602       break;
603     }
604     default:
605       break;
606     }
607     return qcode;
608   }
609 
610   @Override
611   public boolean next(List<Cell> outResult) throws IOException {
612     return next(outResult, -1);
613   }
614 
615   // Implementation of ChangedReadersObserver
616   @Override
617   public void updateReaders() throws IOException {
618     lock.lock();
619     try {
620     if (this.closing) return;
621 
622     // All public synchronized API calls will call 'checkReseek' which will cause
623     // the scanner stack to reseek if this.heap==null && this.lastTop != null.
624     // But if two calls to updateReaders() happen without a 'next' or 'peek' then we
625     // will end up calling this.peek() which would cause a reseek in the middle of a updateReaders
626     // which is NOT what we want, not to mention could cause an NPE. So we early out here.
627     if (this.heap == null) return;
628 
629     // this could be null.
630     this.lastTop = this.peek();
631 
632     //DebugPrint.println("SS updateReaders, topKey = " + lastTop);
633 
634     // close scanners to old obsolete Store files
635     this.heap.close(); // bubble thru and close all scanners.
636     this.heap = null; // the re-seeks could be slow (access HDFS) free up memory ASAP
637 
638     // Let the next() call handle re-creating and seeking
639     } finally {
640       lock.unlock();
641     }
642   }
643 
644   /**
645    * @return true if top of heap has changed (and KeyValueHeap has to try the
646    *         next KV)
647    * @throws IOException
648    */
649   protected boolean checkReseek() throws IOException {
650     if (this.heap == null && this.lastTop != null) {
651       resetScannerStack(this.lastTop);
652       if (this.heap.peek() == null
653           || store.getComparator().compareRows(this.lastTop, this.heap.peek()) != 0) {
654         LOG.debug("Storescanner.peek() is changed where before = "
655             + this.lastTop.toString() + ",and after = " + this.heap.peek());
656         this.lastTop = null;
657         return true;
658       }
659       this.lastTop = null; // gone!
660     }
661     // else dont need to reseek
662     return false;
663   }
664 
665   protected void resetScannerStack(KeyValue lastTopKey) throws IOException {
666     if (heap != null) {
667       throw new RuntimeException("StoreScanner.reseek run on an existing heap!");
668     }
669 
670     /* When we have the scan object, should we not pass it to getScanners()
671      * to get a limited set of scanners? We did so in the constructor and we
672      * could have done it now by storing the scan object from the constructor */
673     List<KeyValueScanner> scanners = getScannersNoCompaction();
674 
675     // Seek all scanners to the initial key
676     seekScanners(scanners, lastTopKey, false, isParallelSeekEnabled);
677 
678     // Combine all seeked scanners with a heap
679     resetKVHeap(scanners, store.getComparator());
680 
681     // Reset the state of the Query Matcher and set to top row.
682     // Only reset and call setRow if the row changes; avoids confusing the
683     // query matcher if scanning intra-row.
684     KeyValue kv = heap.peek();
685     if (kv == null) {
686       kv = lastTopKey;
687     }
688     byte[] row = kv.getBuffer();
689     int offset = kv.getRowOffset();
690     short length = kv.getRowLength();
691     if ((matcher.row == null) || !Bytes.equals(row, offset, length, matcher.row,
692         matcher.rowOffset, matcher.rowLength)) {
693       this.countPerRow = 0;
694       matcher.reset();
695       matcher.setRow(row, offset, length);
696     }
697   }
698 
699   /**
700    * Check whether scan as expected order
701    * @param prevKV
702    * @param kv
703    * @param comparator
704    * @throws IOException
705    */
706   protected void checkScanOrder(KeyValue prevKV, KeyValue kv,
707       KeyValue.KVComparator comparator) throws IOException {
708     // Check that the heap gives us KVs in an increasing order.
709     assert prevKV == null || comparator == null
710         || comparator.compare(prevKV, kv) <= 0 : "Key " + prevKV
711         + " followed by a " + "smaller key " + kv + " in cf " + store;
712   }
713 
714   protected boolean seekToNextRow(KeyValue kv) throws IOException {
715     return reseek(matcher.getKeyForNextRow(kv));
716   }
717 
718   /**
719    * Do a reseek in a normal StoreScanner(scan forward)
720    * @param kv
721    * @return true if scanner has values left, false if end of scanner
722    * @throws IOException
723    */
724   protected boolean seekAsDirection(KeyValue kv)
725       throws IOException {
726     return reseek(kv);
727   }
728 
729   @Override
730   public boolean reseek(KeyValue kv) throws IOException {
731     lock.lock();
732     try {
733     //Heap will not be null, if this is called from next() which.
734     //If called from RegionScanner.reseek(...) make sure the scanner
735     //stack is reset if needed.
736     checkReseek();
737     if (explicitColumnQuery && lazySeekEnabledGlobally) {
738       return heap.requestSeek(kv, true, useRowColBloom);
739     }
740     return heap.reseek(kv);
741     } finally {
742       lock.unlock();
743     }
744   }
745 
746   @Override
747   public long getSequenceID() {
748     return 0;
749   }
750 
751   /**
752    * Seek storefiles in parallel to optimize IO latency as much as possible
753    * @param scanners the list {@link KeyValueScanner}s to be read from
754    * @param kv the KeyValue on which the operation is being requested
755    * @throws IOException
756    */
757   private void parallelSeek(final List<? extends KeyValueScanner>
758       scanners, final KeyValue kv) throws IOException {
759     if (scanners.isEmpty()) return;
760     int storeFileScannerCount = scanners.size();
761     CountDownLatch latch = new CountDownLatch(storeFileScannerCount);
762     List<ParallelSeekHandler> handlers = 
763         new ArrayList<ParallelSeekHandler>(storeFileScannerCount);
764     for (KeyValueScanner scanner : scanners) {
765       if (scanner instanceof StoreFileScanner) {
766         ParallelSeekHandler seekHandler = new ParallelSeekHandler(scanner, kv,
767           this.readPt, latch);
768         executor.submit(seekHandler);
769         handlers.add(seekHandler);
770       } else {
771         scanner.seek(kv);
772         latch.countDown();
773       }
774     }
775 
776     try {
777       latch.await();
778     } catch (InterruptedException ie) {
779       throw (InterruptedIOException)new InterruptedIOException().initCause(ie);
780     }
781 
782     for (ParallelSeekHandler handler : handlers) {
783       if (handler.getErr() != null) {
784         throw new IOException(handler.getErr());
785       }
786     }
787   }
788 
789   /**
790    * Used in testing.
791    * @return all scanners in no particular order
792    */
793   List<KeyValueScanner> getAllScannersForTesting() {
794     List<KeyValueScanner> allScanners = new ArrayList<KeyValueScanner>();
795     KeyValueScanner current = heap.getCurrentForTesting();
796     if (current != null)
797       allScanners.add(current);
798     for (KeyValueScanner scanner : heap.getHeap())
799       allScanners.add(scanner);
800     return allScanners;
801   }
802 
803   static void enableLazySeekGlobally(boolean enable) {
804     lazySeekEnabledGlobally = enable;
805   }
806 
807   /**
808    * @return The estimated number of KVs seen by this scanner (includes some skipped KVs).
809    */
810   public long getEstimatedNumberOfKvsScanned() {
811     return this.kvsScanned;
812   }
813 
814   @Override
815   public byte[] getNextIndexedKey() {
816     return this.heap.getNextIndexedKey();
817   }
818 }
819