View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.client;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.LinkedList;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.conf.Configuration;
27  import org.apache.hadoop.hbase.DoNotRetryIOException;
28  import org.apache.hadoop.hbase.HConstants;
29  import org.apache.hadoop.hbase.HRegionInfo;
30  import org.apache.hadoop.hbase.KeyValue;
31  import org.apache.hadoop.hbase.NotServingRegionException;
32  import org.apache.hadoop.hbase.UnknownScannerException;
33  import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
34  import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
35  import org.apache.hadoop.hbase.util.Bytes;
36  import org.apache.hadoop.io.DataOutputBuffer;
37  
38  /**
39   * Implements the scanner interface for the HBase client.
40   * If there are multiple regions in a table, this scanner will iterate
41   * through them all.
42   */
43  public class ClientScanner extends AbstractClientScanner {
44      private final Log LOG = LogFactory.getLog(this.getClass());
45      private Scan scan;
46      private boolean closed = false;
47      // Current region scanner is against.  Gets cleared if current region goes
48      // wonky: e.g. if it splits on us.
49      private HRegionInfo currentRegion = null;
50      private ScannerCallable callable = null;
51      private final LinkedList<Result> cache = new LinkedList<Result>();
52      private final int caching;
53      private long lastNext;
54      // Keep lastResult returned successfully in case we have to reset scanner.
55      private Result lastResult = null;
56      private ScanMetrics scanMetrics = null;
57      private final long maxScannerResultSize;
58      private final HConnection connection;
59      private final byte[] tableName;
60      private final int scannerTimeout;
61  
62      /**
63       * Create a new ClientScanner for the specified table. An HConnection will be
64       * retrieved using the passed Configuration.
65       * Note that the passed {@link Scan}'s start row maybe changed changed. 
66       * 
67       * @param conf The {@link Configuration} to use.
68       * @param scan {@link Scan} to use in this scanner
69       * @param tableName The table that we wish to scan
70       * @throws IOException
71       */
72      public ClientScanner(final Configuration conf, final Scan scan,
73          final byte[] tableName) throws IOException {
74        this(conf, scan, tableName, HConnectionManager.getConnection(conf));
75      }
76   
77      /**
78       * Create a new ClientScanner for the specified table
79       * Note that the passed {@link Scan}'s start row maybe changed changed. 
80       * 
81       * @param conf The {@link Configuration} to use.
82       * @param scan {@link Scan} to use in this scanner
83       * @param tableName The table that we wish to scan
84       * @param connection Connection identifying the cluster
85       * @throws IOException
86       */
87      public ClientScanner(final Configuration conf, final Scan scan,
88        final byte[] tableName, HConnection connection) throws IOException {
89        if (LOG.isDebugEnabled()) {
90          LOG.debug("Creating scanner over "
91              + Bytes.toString(tableName)
92              + " starting at key '" + Bytes.toStringBinary(scan.getStartRow()) + "'");
93        }
94        this.scan = scan;
95        this.tableName = tableName;
96        this.lastNext = System.currentTimeMillis();
97        this.connection = connection;
98        this.maxScannerResultSize = conf.getLong(
99            HConstants.HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE_KEY,
100           HConstants.DEFAULT_HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE);
101       this.scannerTimeout = (int) conf.getLong(
102           HConstants.HBASE_REGIONSERVER_LEASE_PERIOD_KEY,
103           HConstants.DEFAULT_HBASE_REGIONSERVER_LEASE_PERIOD);
104 
105       // check if application wants to collect scan metrics
106       byte[] enableMetrics = scan.getAttribute(
107         Scan.SCAN_ATTRIBUTES_METRICS_ENABLE);
108       if (enableMetrics != null && Bytes.toBoolean(enableMetrics)) {
109         scanMetrics = new ScanMetrics();
110       }
111 
112       // Use the caching from the Scan.  If not set, use the default cache setting for this table.
113       if (this.scan.getCaching() > 0) {
114         this.caching = this.scan.getCaching();
115       } else {
116         this.caching = conf.getInt("hbase.client.scanner.caching", 1);
117       }
118 
119       // initialize the scanner
120       nextScanner(this.caching, false);
121     }
122 
123     protected HConnection getConnection() {
124       return this.connection;
125     }
126 
127     protected byte[] getTableName() {
128       return this.tableName;
129     }
130 
131     protected Scan getScan() {
132       return scan;
133     }
134 
135     protected long getTimestamp() {
136       return lastNext;
137     }
138 
139     // returns true if the passed region endKey
140     private boolean checkScanStopRow(final byte [] endKey) {
141       if (this.scan.getStopRow().length > 0) {
142         // there is a stop row, check to see if we are past it.
143         byte [] stopRow = scan.getStopRow();
144         int cmp = Bytes.compareTo(stopRow, 0, stopRow.length,
145           endKey, 0, endKey.length);
146         if (cmp <= 0) {
147           // stopRow <= endKey (endKey is equals to or larger than stopRow)
148           // This is a stop.
149           return true;
150         }
151       }
152       return false; //unlikely.
153     }
154 
155     /*
156      * Gets a scanner for the next region.  If this.currentRegion != null, then
157      * we will move to the endrow of this.currentRegion.  Else we will get
158      * scanner at the scan.getStartRow().  We will go no further, just tidy
159      * up outstanding scanners, if <code>currentRegion != null</code> and
160      * <code>done</code> is true.
161      * @param nbRows
162      * @param done Server-side says we're done scanning.
163      */
164     private boolean nextScanner(int nbRows, final boolean done)
165     throws IOException {
166       // Close the previous scanner if it's open
167       if (this.callable != null) {
168         this.callable.setClose();
169         callable.withRetries();
170         this.callable = null;
171       }
172 
173       // Where to start the next scanner
174       byte [] localStartKey;
175 
176       // if we're at end of table, close and return false to stop iterating
177       if (this.currentRegion != null) {
178         byte [] endKey = this.currentRegion.getEndKey();
179         if (endKey == null ||
180             Bytes.equals(endKey, HConstants.EMPTY_BYTE_ARRAY) ||
181             checkScanStopRow(endKey) ||
182             done) {
183           close();
184           if (LOG.isDebugEnabled()) {
185             LOG.debug("Finished with scanning at " + this.currentRegion);
186           }
187           return false;
188         }
189         localStartKey = endKey;
190         if (LOG.isDebugEnabled()) {
191           LOG.debug("Finished with region " + this.currentRegion);
192         }
193       } else {
194         localStartKey = this.scan.getStartRow();
195       }
196 
197       if (LOG.isDebugEnabled()) {
198         LOG.debug("Advancing internal scanner to startKey at '" +
199           Bytes.toStringBinary(localStartKey) + "'");
200       }
201       try {
202         callable = getScannerCallable(localStartKey, nbRows);
203         // Open a scanner on the region server starting at the
204         // beginning of the region
205         callable.withRetries();
206         this.currentRegion = callable.getHRegionInfo();
207         if (this.scanMetrics != null) {
208           this.scanMetrics.countOfRegions.inc();
209         }
210       } catch (IOException e) {
211         close();
212         throw e;
213       }
214       return true;
215     }
216 
217     protected ScannerCallable getScannerCallable(byte [] localStartKey,
218         int nbRows) {
219       scan.setStartRow(localStartKey);
220       ScannerCallable s = new ScannerCallable(getConnection(),
221         getTableName(), scan, this.scanMetrics);
222       s.setCaching(nbRows);
223       return s;
224     }
225 
226     /**
227      * Publish the scan metrics. For now, we use scan.setAttribute to pass the metrics back to the
228      * application or TableInputFormat.Later, we could push it to other systems. We don't use metrics
229      * framework because it doesn't support multi-instances of the same metrics on the same machine;
230      * for scan/map reduce scenarios, we will have multiple scans running at the same time.
231      *
232      * By default, scan metrics are disabled; if the application wants to collect them, this behavior
233      * can be turned on by calling calling:
234      *
235      * scan.setAttribute(SCAN_ATTRIBUTES_METRICS_ENABLE, Bytes.toBytes(Boolean.TRUE))
236      */
237     private void writeScanMetrics() throws IOException {
238       if (this.scanMetrics == null) {
239         return;
240       }
241       final DataOutputBuffer d = new DataOutputBuffer();
242       scanMetrics.write(d);
243       scan.setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_DATA, d.getData());
244     }
245 
246     public Result next() throws IOException {
247       // If the scanner is closed and there's nothing left in the cache, next is a no-op.
248       if (cache.size() == 0 && this.closed) {
249         return null;
250       }
251       if (cache.size() == 0) {
252         Result [] values = null;
253         long remainingResultSize = maxScannerResultSize;
254         int countdown = this.caching;
255         // We need to reset it if it's a new callable that was created
256         // with a countdown in nextScanner
257         callable.setCaching(this.caching);
258         // This flag is set when we want to skip the result returned.  We do
259         // this when we reset scanner because it split under us.
260         boolean skipFirst = false;
261         do {
262           try {
263             if (skipFirst) {
264               // Skip only the first row (which was the last row of the last
265               // already-processed batch).
266               callable.setCaching(1);
267               values = callable.withRetries();
268               callable.setCaching(this.caching);
269               skipFirst = false;
270             }
271             // Server returns a null values if scanning is to stop.  Else,
272             // returns an empty array if scanning is to go on and we've just
273             // exhausted current region.
274             values = callable.withRetries();
275           } catch (DoNotRetryIOException e) {
276             if (e instanceof UnknownScannerException) {
277               long timeout = lastNext + scannerTimeout;
278               // If we are over the timeout, throw this exception to the client
279               // Else, it's because the region moved and we used the old id
280               // against the new region server; reset the scanner.
281               if (timeout < System.currentTimeMillis()) {
282                 long elapsed = System.currentTimeMillis() - lastNext;
283                 ScannerTimeoutException ex = new ScannerTimeoutException(
284                     elapsed + "ms passed since the last invocation, " +
285                         "timeout is currently set to " + scannerTimeout);
286                 ex.initCause(e);
287                 throw ex;
288               }
289             } else {
290               Throwable cause = e.getCause();
291               if (cause == null || (!(cause instanceof NotServingRegionException)
292                   && !(cause instanceof RegionServerStoppedException))) {
293                 throw e;
294               }
295             }
296             // Else, its signal from depths of ScannerCallable that we got an
297             // NSRE on a next and that we need to reset the scanner.
298             if (this.lastResult != null) {
299               this.scan.setStartRow(this.lastResult.getRow());
300               // Skip first row returned.  We already let it out on previous
301               // invocation.
302               skipFirst = true;
303             }
304             // Clear region
305             this.currentRegion = null;
306             continue;
307           }
308           long currentTime = System.currentTimeMillis();
309           if (this.scanMetrics != null ) {
310             this.scanMetrics.sumOfMillisSecBetweenNexts.inc(currentTime-lastNext);
311           }
312           lastNext = currentTime;
313           if (values != null && values.length > 0) {
314             for (Result rs : values) {
315               cache.add(rs);
316               for (KeyValue kv : rs.raw()) {
317                   remainingResultSize -= kv.heapSize();
318               }
319               countdown--;
320               this.lastResult = rs;
321             }
322           }
323           // Values == null means server-side filter has determined we must STOP
324         } while (remainingResultSize > 0 && countdown > 0 && nextScanner(countdown, values == null));
325       }
326 
327       if (cache.size() > 0) {
328         return cache.poll();
329       }
330 
331       // if we exhausted this scanner before calling close, write out the scan metrics
332       writeScanMetrics();
333       return null;
334     }
335 
336     /**
337      * Get <param>nbRows</param> rows.
338      * How many RPCs are made is determined by the {@link Scan#setCaching(int)}
339      * setting (or hbase.client.scanner.caching in hbase-site.xml).
340      * @param nbRows number of rows to return
341      * @return Between zero and <param>nbRows</param> RowResults.  Scan is done
342      * if returned array is of zero-length (We never return null).
343      * @throws IOException
344      */
345     public Result [] next(int nbRows) throws IOException {
346       // Collect values to be returned here
347       ArrayList<Result> resultSets = new ArrayList<Result>(nbRows);
348       for(int i = 0; i < nbRows; i++) {
349         Result next = next();
350         if (next != null) {
351           resultSets.add(next);
352         } else {
353           break;
354         }
355       }
356       return resultSets.toArray(new Result[resultSets.size()]);
357     }
358 
359     public void close() {
360       if (callable != null) {
361         callable.setClose();
362         try {
363           callable.withRetries();
364         } catch (IOException e) {
365           // We used to catch this error, interpret, and rethrow. However, we
366           // have since decided that it's not nice for a scanner's close to
367           // throw exceptions. Chances are it was just an UnknownScanner
368           // exception due to lease time out.
369         } finally {
370           // we want to output the scan metrics even if an error occurred on close
371           try {
372             writeScanMetrics();
373           } catch (IOException e) {
374             // As above, we still don't want the scanner close() method to throw.
375           }
376         }
377         callable = null;
378       }
379       closed = true;
380     }
381 }