View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.regionserver;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.hbase.KeyValue;
26  import org.apache.hadoop.hbase.client.Scan;
27  
28  import java.io.IOException;
29  import java.util.ArrayList;
30  import java.util.List;
31  import java.util.NavigableSet;
32  
33  /**
34   * Scanner scans both the memstore and the HStore. Coalesce KeyValue stream
35   * into List<KeyValue> for a single row.
36   */
37  class StoreScanner implements KeyValueScanner, InternalScanner, ChangedReadersObserver {
38    static final Log LOG = LogFactory.getLog(StoreScanner.class);
39    private Store store;
40    private ScanQueryMatcher matcher;
41    private KeyValueHeap heap;
42    private boolean cacheBlocks;
43  
44    // Used to indicate that the scanner has closed (see HBASE-1107)
45    // Doesnt need to be volatile because it's always accessed via synchronized methods
46    private boolean closing = false;
47    private final boolean isGet;
48  
49    // if heap == null and lastTop != null, you need to reseek given the key below
50    private KeyValue lastTop = null;
51  
52    /**
53     * Opens a scanner across memstore, snapshot, and all StoreFiles.
54     *
55     * @param store who we scan
56     * @param scan the spec
57     * @param columns which columns we are scanning
58     * @throws IOException
59     */
60    StoreScanner(Store store, Scan scan, final NavigableSet<byte[]> columns) throws IOException {
61      this.store = store;
62      this.cacheBlocks = scan.getCacheBlocks();
63      matcher = new ScanQueryMatcher(scan, store.getFamily().getName(),
64          columns, store.ttl, store.comparator.getRawComparator(),
65          store.versionsToReturn(scan.getMaxVersions()));
66  
67      this.isGet = scan.isGetScan();
68      // pass columns = try to filter out unnecessary ScanFiles
69      List<KeyValueScanner> scanners = getScanners(scan, columns);
70  
71      // Seek all scanners to the initial key
72      for(KeyValueScanner scanner : scanners) {
73        scanner.seek(matcher.getStartKey());
74      }
75  
76      // Combine all seeked scanners with a heap
77      heap = new KeyValueHeap(scanners, store.comparator);
78  
79      this.store.addChangedReaderObserver(this);
80    }
81  
82    /**
83     * Used for major compactions.<p>
84     *
85     * Opens a scanner across specified StoreFiles.
86     * @param store who we scan
87     * @param scan the spec
88     * @param scanners ancilliary scanners
89     */
90    StoreScanner(Store store, Scan scan, List<? extends KeyValueScanner> scanners)
91        throws IOException {
92      this.store = store;
93      this.cacheBlocks = false;
94      this.isGet = false;
95      matcher = new ScanQueryMatcher(scan, store.getFamily().getName(),
96          null, store.ttl, store.comparator.getRawComparator(),
97          store.versionsToReturn(scan.getMaxVersions()));
98  
99      // Seek all scanners to the initial key
100     for(KeyValueScanner scanner : scanners) {
101       scanner.seek(matcher.getStartKey());
102     }
103 
104     // Combine all seeked scanners with a heap
105     heap = new KeyValueHeap(scanners, store.comparator);
106   }
107 
108   // Constructor for testing.
109   StoreScanner(final Scan scan, final byte [] colFamily, final long ttl,
110       final KeyValue.KVComparator comparator,
111       final NavigableSet<byte[]> columns,
112       final List<KeyValueScanner> scanners)
113         throws IOException {
114     this.store = null;
115     this.isGet = false;
116     this.cacheBlocks = scan.getCacheBlocks();
117     this.matcher = new ScanQueryMatcher(scan, colFamily, columns, ttl,
118         comparator.getRawComparator(), scan.getMaxVersions());
119 
120     // Seek all scanners to the initial key
121     for(KeyValueScanner scanner : scanners) {
122       scanner.seek(matcher.getStartKey());
123     }
124     heap = new KeyValueHeap(scanners, comparator);
125   }
126 
127   /*
128    * @return List of scanners ordered properly.
129    */
130   private List<KeyValueScanner> getScanners() throws IOException {
131     // First the store file scanners
132 
133     // TODO this used to get the store files in descending order,
134     // but now we get them in ascending order, which I think is
135     // actually more correct, since memstore get put at the end.
136     List<StoreFileScanner> sfScanners = StoreFileScanner
137       .getScannersForStoreFiles(store.getStorefiles(), cacheBlocks, isGet);
138     List<KeyValueScanner> scanners =
139       new ArrayList<KeyValueScanner>(sfScanners.size()+1);
140     scanners.addAll(sfScanners);
141     // Then the memstore scanners
142     scanners.addAll(this.store.memstore.getScanners());
143     return scanners;
144   }
145 
146   /*
147    * @return List of scanners to seek, possibly filtered by StoreFile.
148    */
149   private List<KeyValueScanner> getScanners(Scan scan,
150       final NavigableSet<byte[]> columns) throws IOException {
151     // First the store file scanners
152     List<StoreFileScanner> sfScanners = StoreFileScanner
153       .getScannersForStoreFiles(store.getStorefiles(), cacheBlocks, isGet);
154     List<KeyValueScanner> scanners =
155       new ArrayList<KeyValueScanner>(sfScanners.size()+1);
156 
157     // include only those scan files which pass all filters
158     for (StoreFileScanner sfs : sfScanners) {
159       if (sfs.shouldSeek(scan, columns)) {
160         scanners.add(sfs);
161       }
162     }
163 
164     // Then the memstore scanners
165     if (this.store.memstore.shouldSeek(scan)) {
166       scanners.addAll(this.store.memstore.getScanners());
167     }
168     return scanners;
169   }
170 
171   public synchronized KeyValue peek() {
172     if (this.heap == null) {
173       return this.lastTop;
174     }
175     return this.heap.peek();
176   }
177 
178   public KeyValue next() {
179     // throw runtime exception perhaps?
180     throw new RuntimeException("Never call StoreScanner.next()");
181   }
182 
183   public synchronized void close() {
184     if (this.closing) return;
185     this.closing = true;
186     // under test, we dont have a this.store
187     if (this.store != null)
188       this.store.deleteChangedReaderObserver(this);
189     if (this.heap != null)
190       this.heap.close();
191     this.heap = null; // CLOSED!
192     this.lastTop = null; // If both are null, we are closed.
193   }
194 
195   public synchronized boolean seek(KeyValue key) throws IOException {
196     if (this.heap == null) {
197 
198       List<KeyValueScanner> scanners = getScanners();
199 
200       heap = new KeyValueHeap(scanners, store.comparator);
201     }
202 
203     return this.heap.seek(key);
204   }
205 
206   /**
207    * Get the next row of values from this Store.
208    * @param outResult
209    * @param limit
210    * @return true if there are more rows, false if scanner is done
211    */
212   public synchronized boolean next(List<KeyValue> outResult, int limit) throws IOException {
213     //DebugPrint.println("SS.next");
214 
215     checkReseek();
216 
217     // if the heap was left null, then the scanners had previously run out anyways, close and
218     // return.
219     if (this.heap == null) {
220       close();
221       return false;
222     }
223 
224     KeyValue peeked = this.heap.peek();
225     if (peeked == null) {
226       close();
227       return false;
228     }
229 
230     // only call setRow if the row changes; avoids confusing the query matcher
231     // if scanning intra-row
232     if ((matcher.row == null) || !peeked.matchingRow(matcher.row)) {
233       matcher.setRow(peeked.getRow());
234     }
235 
236     KeyValue kv;
237     List<KeyValue> results = new ArrayList<KeyValue>();
238     LOOP: while((kv = this.heap.peek()) != null) {
239       ScanQueryMatcher.MatchCode qcode = matcher.match(kv);
240       //DebugPrint.println("SS peek kv = " + kv + " with qcode = " + qcode);
241       switch(qcode) {
242         case INCLUDE:
243           KeyValue next = this.heap.next();
244           results.add(next);
245           if (limit > 0 && (results.size() == limit)) {
246             break LOOP;
247           }
248           continue;
249 
250         case DONE:
251           // copy jazz
252           outResult.addAll(results);
253           return true;
254 
255         case DONE_SCAN:
256           close();
257 
258           // copy jazz
259           outResult.addAll(results);
260 
261           return false;
262 
263         case SEEK_NEXT_ROW:
264           if (!matcher.moreRowsMayExistAfter(kv)) {
265             outResult.addAll(results);
266             return false;
267           }
268           heap.next();
269           break;
270 
271         case SEEK_NEXT_COL:
272           // TODO hfile needs 'hinted' seeking to prevent it from
273           // reseeking from the start of the block on every dang seek.
274           // We need that API and expose it the scanner chain.
275           heap.next();
276           break;
277 
278         case SKIP:
279           this.heap.next();
280           break;
281 
282         case SEEK_NEXT_USING_HINT:
283           KeyValue nextKV = matcher.getNextKeyHint(kv);
284           if (nextKV != null) {
285             reseek(nextKV);
286           } else {
287             heap.next();
288           }
289           break;
290 
291         default:
292           throw new RuntimeException("UNEXPECTED");
293       }
294     }
295 
296     if (!results.isEmpty()) {
297       // copy jazz
298       outResult.addAll(results);
299       return true;
300     }
301 
302     // No more keys
303     close();
304     return false;
305   }
306 
307   public synchronized boolean next(List<KeyValue> outResult) throws IOException {
308     return next(outResult, -1);
309   }
310 
311   // Implementation of ChangedReadersObserver
312   public synchronized void updateReaders() throws IOException {
313     if (this.closing) return;
314 
315     // All public synchronized API calls will call 'checkReseek' which will cause
316     // the scanner stack to reseek if this.heap==null && this.lastTop != null.
317     // But if two calls to updateReaders() happen without a 'next' or 'peek' then we
318     // will end up calling this.peek() which would cause a reseek in the middle of a updateReaders
319     // which is NOT what we want, not to mention could cause an NPE. So we early out here.
320     if (this.heap == null) return;
321 
322     // this could be null.
323     this.lastTop = this.peek();
324 
325     //DebugPrint.println("SS updateReaders, topKey = " + lastTop);
326 
327     // close scanners to old obsolete Store files
328     this.heap.close(); // bubble thru and close all scanners.
329     this.heap = null; // the re-seeks could be slow (access HDFS) free up memory ASAP
330 
331     // Let the next() call handle re-creating and seeking
332   }
333 
334   private void checkReseek() throws IOException {
335     if (this.heap == null && this.lastTop != null) {
336       resetScannerStack(this.lastTop);
337       this.lastTop = null; // gone!
338     }
339     // else dont need to reseek
340   }
341 
342   private void resetScannerStack(KeyValue lastTopKey) throws IOException {
343     if (heap != null) {
344       throw new RuntimeException("StoreScanner.reseek run on an existing heap!");
345     }
346 
347     /* When we have the scan object, should we not pass it to getScanners()
348      * to get a limited set of scanners? We did so in the constructor and we
349      * could have done it now by storing the scan object from the constructor */
350     List<KeyValueScanner> scanners = getScanners();
351 
352     for(KeyValueScanner scanner : scanners) {
353       scanner.seek(lastTopKey);
354     }
355 
356     // Combine all seeked scanners with a heap
357     heap = new KeyValueHeap(scanners, store.comparator);
358 
359     // Reset the state of the Query Matcher and set to top row
360     matcher.reset();
361     KeyValue kv = heap.peek();
362     matcher.setRow((kv == null ? lastTopKey : kv).getRow());
363   }
364 
365   @Override
366   public synchronized boolean reseek(KeyValue kv) throws IOException {
367     //Heap cannot be null, because this is only called from next() which
368     //guarantees that heap will never be null before this call.
369     return this.heap.reseek(kv);
370   }
371 }