View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.client;
19  
20  import java.io.IOException;
21  import java.util.LinkedList;
22  
23  import com.google.common.annotations.VisibleForTesting;
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.hbase.classification.InterfaceAudience;
27  import org.apache.hadoop.hbase.classification.InterfaceStability;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.hbase.Cell;
30  import org.apache.hadoop.hbase.DoNotRetryIOException;
31  import org.apache.hadoop.hbase.HBaseConfiguration;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.KeyValueUtil;
35  import org.apache.hadoop.hbase.NotServingRegionException;
36  import org.apache.hadoop.hbase.TableName;
37  import org.apache.hadoop.hbase.UnknownScannerException;
38  import org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException;
39  import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
40  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
41  import org.apache.hadoop.hbase.protobuf.generated.MapReduceProtos;
42  import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
43  import org.apache.hadoop.hbase.util.Bytes;
44  
45  /**
46   * Implements the scanner interface for the HBase client.
47   * If there are multiple regions in a table, this scanner will iterate
48   * through them all.
49   */
50  @InterfaceAudience.Public
51  @InterfaceStability.Stable
52  public class ClientScanner extends AbstractClientScanner {
53      private final Log LOG = LogFactory.getLog(this.getClass());
54      protected Scan scan;
55      protected boolean closed = false;
56      // Current region scanner is against.  Gets cleared if current region goes
57      // wonky: e.g. if it splits on us.
58      protected HRegionInfo currentRegion = null;
59      protected ScannerCallable callable = null;
60      protected final LinkedList<Result> cache = new LinkedList<Result>();
61      protected final int caching;
62      protected long lastNext;
63      // Keep lastResult returned successfully in case we have to reset scanner.
64      protected Result lastResult = null;
65      protected final long maxScannerResultSize;
66      private final HConnection connection;
67      private final TableName tableName;
68      protected final int scannerTimeout;
69      protected boolean scanMetricsPublished = false;
70      protected RpcRetryingCaller<Result []> caller;
71      protected RpcControllerFactory rpcControllerFactory;
72  
73      /**
74       * Create a new ClientScanner for the specified table. An HConnection will be
75       * retrieved using the passed Configuration.
76       * Note that the passed {@link Scan}'s start row maybe changed changed.
77       *
78       * @param conf The {@link Configuration} to use.
79       * @param scan {@link Scan} to use in this scanner
80       * @param tableName The table that we wish to scan
81       * @throws IOException
82       */
83      @Deprecated
84      public ClientScanner(final Configuration conf, final Scan scan,
85          final TableName tableName) throws IOException {
86        this(conf, scan, tableName, HConnectionManager.getConnection(conf));
87      }
88  
89      /**
90       * @deprecated Use {@link #ClientScanner(Configuration, Scan, TableName)}
91       */
92      @Deprecated
93      public ClientScanner(final Configuration conf, final Scan scan,
94          final byte [] tableName) throws IOException {
95        this(conf, scan, TableName.valueOf(tableName));
96      }
97  
98  
99      /**
100      * Create a new ClientScanner for the specified table
101      * Note that the passed {@link Scan}'s start row maybe changed changed.
102      *
103      * @param conf The {@link Configuration} to use.
104      * @param scan {@link Scan} to use in this scanner
105      * @param tableName The table that we wish to scan
106      * @param connection Connection identifying the cluster
107      * @throws IOException
108      */
109   public ClientScanner(final Configuration conf, final Scan scan, final TableName tableName,
110       HConnection connection) throws IOException {
111     this(conf, scan, tableName, connection,
112       RpcRetryingCallerFactory.instantiate(conf, connection.getStatisticsTracker()),
113         RpcControllerFactory.instantiate(conf));
114   }
115 
116   /**
117    * @deprecated Use {@link #ClientScanner(Configuration, Scan, TableName, HConnection)}
118    */
119   @Deprecated
120   public ClientScanner(final Configuration conf, final Scan scan, final byte [] tableName,
121       HConnection connection) throws IOException {
122     this(conf, scan, TableName.valueOf(tableName), connection, new RpcRetryingCallerFactory(conf),
123         RpcControllerFactory.instantiate(conf));
124   }
125 
126   /**
127    * @deprecated Use
128    *             {@link #ClientScanner(Configuration, Scan, TableName, HConnection,
129    *             RpcRetryingCallerFactory, RpcControllerFactory)}
130    *             instead
131    */
132   @Deprecated
133   public ClientScanner(final Configuration conf, final Scan scan, final TableName tableName,
134       HConnection connection, RpcRetryingCallerFactory rpcFactory) throws IOException {
135     this(conf, scan, tableName, connection, rpcFactory, RpcControllerFactory.instantiate(conf));
136   }
137 
138   /**
139    * Create a new ClientScanner for the specified table Note that the passed {@link Scan}'s start
140    * row maybe changed changed.
141    * @param conf The {@link Configuration} to use.
142    * @param scan {@link Scan} to use in this scanner
143    * @param tableName The table that we wish to scan
144    * @param connection Connection identifying the cluster
145    * @throws IOException
146    */
147   public ClientScanner(final Configuration conf, final Scan scan, final TableName tableName,
148       HConnection connection, RpcRetryingCallerFactory rpcFactory,
149       RpcControllerFactory controllerFactory) throws IOException {
150       if (LOG.isTraceEnabled()) {
151         LOG.trace("Scan table=" + tableName
152             + ", startRow=" + Bytes.toStringBinary(scan.getStartRow()));
153       }
154       this.scan = scan;
155       this.tableName = tableName;
156       this.lastNext = System.currentTimeMillis();
157       this.connection = connection;
158       if (scan.getMaxResultSize() > 0) {
159         this.maxScannerResultSize = scan.getMaxResultSize();
160       } else {
161         this.maxScannerResultSize = conf.getLong(
162           HConstants.HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE_KEY,
163           HConstants.DEFAULT_HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE);
164       }
165       this.scannerTimeout = HBaseConfiguration.getInt(conf,
166         HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD,
167         HConstants.HBASE_REGIONSERVER_LEASE_PERIOD_KEY,
168         HConstants.DEFAULT_HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD);
169 
170       // check if application wants to collect scan metrics
171       initScanMetrics(scan);
172 
173       // Use the caching from the Scan.  If not set, use the default cache setting for this table.
174       if (this.scan.getCaching() > 0) {
175         this.caching = this.scan.getCaching();
176       } else {
177         this.caching = conf.getInt(
178             HConstants.HBASE_CLIENT_SCANNER_CACHING,
179             HConstants.DEFAULT_HBASE_CLIENT_SCANNER_CACHING);
180       }
181 
182       this.caller = rpcFactory.<Result[]> newCaller();
183       this.rpcControllerFactory = controllerFactory;
184 
185       initializeScannerInConstruction();
186     }
187 
188     protected void initializeScannerInConstruction() throws IOException{
189       // initialize the scanner
190       nextScanner(this.caching, false);
191     }
192 
193     protected HConnection getConnection() {
194       return this.connection;
195     }
196 
197     /**
198      * @return Table name
199      * @deprecated Since 0.96.0; use {@link #getTable()}
200      */
201     @Deprecated
202     protected byte [] getTableName() {
203       return this.tableName.getName();
204     }
205 
206     protected TableName getTable() {
207       return this.tableName;
208     }
209 
210     protected Scan getScan() {
211       return scan;
212     }
213 
214     protected long getTimestamp() {
215       return lastNext;
216     }
217 
218     protected long getMaxResultSize() {
219       return maxScannerResultSize;
220     }
221 
222     // returns true if the passed region endKey
223     protected boolean checkScanStopRow(final byte [] endKey) {
224       if (this.scan.getStopRow().length > 0) {
225         // there is a stop row, check to see if we are past it.
226         byte [] stopRow = scan.getStopRow();
227         int cmp = Bytes.compareTo(stopRow, 0, stopRow.length,
228           endKey, 0, endKey.length);
229         if (cmp <= 0) {
230           // stopRow <= endKey (endKey is equals to or larger than stopRow)
231           // This is a stop.
232           return true;
233         }
234       }
235       return false; //unlikely.
236     }
237 
238     /*
239      * Gets a scanner for the next region.  If this.currentRegion != null, then
240      * we will move to the endrow of this.currentRegion.  Else we will get
241      * scanner at the scan.getStartRow().  We will go no further, just tidy
242      * up outstanding scanners, if <code>currentRegion != null</code> and
243      * <code>done</code> is true.
244      * @param nbRows
245      * @param done Server-side says we're done scanning.
246      */
247   protected boolean nextScanner(int nbRows, final boolean done)
248     throws IOException {
249       // Close the previous scanner if it's open
250       if (this.callable != null) {
251         this.callable.setClose();
252         this.caller.callWithRetries(callable);
253         this.callable = null;
254       }
255 
256       // Where to start the next scanner
257       byte [] localStartKey;
258 
259       // if we're at end of table, close and return false to stop iterating
260       if (this.currentRegion != null) {
261         byte [] endKey = this.currentRegion.getEndKey();
262         if (endKey == null ||
263             Bytes.equals(endKey, HConstants.EMPTY_BYTE_ARRAY) ||
264             checkScanStopRow(endKey) ||
265             done) {
266           close();
267           if (LOG.isTraceEnabled()) {
268             LOG.trace("Finished " + this.currentRegion);
269           }
270           return false;
271         }
272         localStartKey = endKey;
273         if (LOG.isTraceEnabled()) {
274           LOG.trace("Finished " + this.currentRegion);
275         }
276       } else {
277         localStartKey = this.scan.getStartRow();
278       }
279 
280       if (LOG.isDebugEnabled() && this.currentRegion != null) {
281         // Only worth logging if NOT first region in scan.
282         LOG.debug("Advancing internal scanner to startKey at '" +
283           Bytes.toStringBinary(localStartKey) + "'");
284       }
285       try {
286         callable = getScannerCallable(localStartKey, nbRows);
287         // Open a scanner on the region server starting at the
288         // beginning of the region
289         this.caller.callWithRetries(callable);
290         this.currentRegion = callable.getHRegionInfo();
291         if (this.scanMetrics != null) {
292           this.scanMetrics.countOfRegions.incrementAndGet();
293         }
294       } catch (IOException e) {
295         close();
296         throw e;
297       }
298       return true;
299     }
300 
301     @InterfaceAudience.Private
302     protected ScannerCallable getScannerCallable(byte [] localStartKey,
303         int nbRows) {
304       scan.setStartRow(localStartKey);
305       ScannerCallable s = new ScannerCallable(getConnection(),
306         getTable(), scan, this.scanMetrics, rpcControllerFactory.newController());
307       s.setCaching(nbRows);
308       return s;
309     }
310 
311     /**
312      * Publish the scan metrics. For now, we use scan.setAttribute to pass the metrics back to the
313      * application or TableInputFormat.Later, we could push it to other systems. We don't use metrics
314      * framework because it doesn't support multi-instances of the same metrics on the same machine;
315      * for scan/map reduce scenarios, we will have multiple scans running at the same time.
316      *
317      * By default, scan metrics are disabled; if the application wants to collect them, this behavior
318      * can be turned on by calling calling:
319      *
320      * scan.setAttribute(SCAN_ATTRIBUTES_METRICS_ENABLE, Bytes.toBytes(Boolean.TRUE))
321      */
322     protected void writeScanMetrics() {
323       if (this.scanMetrics == null || scanMetricsPublished) {
324         return;
325       }
326       MapReduceProtos.ScanMetrics pScanMetrics = ProtobufUtil.toScanMetrics(scanMetrics);
327       scan.setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_DATA, pScanMetrics.toByteArray());
328       scanMetricsPublished = true;
329     }
330 
331     @Override
332     public Result next() throws IOException {
333       // If the scanner is closed and there's nothing left in the cache, next is a no-op.
334       if (cache.size() == 0 && this.closed) {
335         return null;
336       }
337       if (cache.size() == 0) {
338         loadCache();
339       }
340 
341       if (cache.size() > 0) {
342         return cache.poll();
343       }
344 
345       // if we exhausted this scanner before calling close, write out the scan metrics
346       writeScanMetrics();
347       return null;
348     }
349 
350   @VisibleForTesting
351   public int getCacheSize() {
352     return cache != null ? cache.size() : 0;
353   }
354 
355   /**
356    * Contact the servers to load more {@link Result}s in the cache.
357    */
358   protected void loadCache() throws IOException {
359     Result [] values = null;
360     long remainingResultSize = maxScannerResultSize;
361     int countdown = this.caching;
362     // We need to reset it if it's a new callable that was created
363     // with a countdown in nextScanner
364     callable.setCaching(this.caching);
365     // This flag is set when we want to skip the result returned.  We do
366     // this when we reset scanner because it split under us.
367     boolean skipFirst = false;
368     boolean retryAfterOutOfOrderException  = true;
369     // We don't expect that the server will have more results for us if
370     // it doesn't tell us otherwise. We rely on the size or count of results
371     boolean serverHasMoreResults = false;
372     do {
373       try {
374         if (skipFirst) {
375           // Skip only the first row (which was the last row of the last
376           // already-processed batch).
377           callable.setCaching(1);
378           values = this.caller.callWithRetries(callable);
379           callable.setCaching(this.caching);
380           skipFirst = false;
381         }
382         // Server returns a null values if scanning is to stop.  Else,
383         // returns an empty array if scanning is to go on and we've just
384         // exhausted current region.
385         values = this.caller.callWithRetries(callable);
386         if (skipFirst && values != null && values.length == 1) {
387           skipFirst = false; // Already skipped, unset it before scanning again
388           values = this.caller.callWithRetries(callable);
389         }
390         retryAfterOutOfOrderException  = true;
391       } catch (DoNotRetryIOException e) {
392         // DNRIOEs are thrown to make us break out of retries.  Some types of DNRIOEs want us
393         // to reset the scanner and come back in again.
394         if (e instanceof UnknownScannerException) {
395           long timeout = lastNext + scannerTimeout;
396           // If we are over the timeout, throw this exception to the client wrapped in
397           // a ScannerTimeoutException. Else, it's because the region moved and we used the old
398           // id against the new region server; reset the scanner.
399           if (timeout < System.currentTimeMillis()) {
400             LOG.info("For hints related to the following exception, please try taking a look at: " +
401                 "https://hbase.apache.org/book.html#trouble.client.scantimeout");
402             long elapsed = System.currentTimeMillis() - lastNext;
403             ScannerTimeoutException ex = new ScannerTimeoutException(
404                 elapsed + "ms passed since the last invocation, " +
405                     "timeout is currently set to " + scannerTimeout);
406             ex.initCause(e);
407             throw ex;
408           }
409         } else {
410           // If exception is any but the list below throw it back to the client; else setup
411           // the scanner and retry.
412           Throwable cause = e.getCause();
413           if ((cause != null && cause instanceof NotServingRegionException) ||
414             (cause != null && cause instanceof RegionServerStoppedException) ||
415             e instanceof OutOfOrderScannerNextException) {
416             // Pass
417             // It is easier writing the if loop test as list of what is allowed rather than
418             // as a list of what is not allowed... so if in here, it means we do not throw.
419           } else {
420             throw e;
421           }
422         }
423         // Else, its signal from depths of ScannerCallable that we need to reset the scanner.
424         if (this.lastResult != null) {
425           // The region has moved. We need to open a brand new scanner at
426           // the new location.
427           // Reset the startRow to the row we've seen last so that the new
428           // scanner starts at the correct row. Otherwise we may see previously
429           // returned rows again.
430           // (ScannerCallable by now has "relocated" the correct region)
431           this.scan.setStartRow(this.lastResult.getRow());
432 
433           // Skip first row returned.  We already let it out on previous
434           // invocation.
435           skipFirst = true;
436         }
437         if (e instanceof OutOfOrderScannerNextException) {
438           if (retryAfterOutOfOrderException) {
439             retryAfterOutOfOrderException = false;
440           } else {
441             // TODO: Why wrap this in a DNRIOE when it already is a DNRIOE?
442             throw new DoNotRetryIOException("Failed after retry of " +
443               "OutOfOrderScannerNextException: was there a rpc timeout?", e);
444           }
445         }
446         // Clear region.
447         this.currentRegion = null;
448         // Set this to zero so we don't try and do an rpc and close on remote server when
449         // the exception we got was UnknownScanner or the Server is going down.
450         callable = null;
451         // This continue will take us to while at end of loop where we will set up new scanner.
452         continue;
453       }
454       long currentTime = System.currentTimeMillis();
455       if (this.scanMetrics != null ) {
456         this.scanMetrics.sumOfMillisSecBetweenNexts.addAndGet(currentTime-lastNext);
457       }
458       lastNext = currentTime;
459       if (values != null && values.length > 0) {
460         for (Result rs : values) {
461           cache.add(rs);
462           for (Cell kv : rs.rawCells()) {
463             // TODO make method in Cell or CellUtil
464             remainingResultSize -= KeyValueUtil.ensureKeyValue(kv).heapSize();
465           }
466           countdown--;
467           this.lastResult = rs;
468         }
469       }
470       // We expect that the server won't have more results for us when we exhaust
471       // the size (bytes or count) of the results returned. If the server *does* inform us that
472       // there are more results, we want to avoid possiblyNextScanner(...). Only when we actually
473       // get results is the moreResults context valid.
474       if (null != values && values.length > 0 && callable.hasMoreResultsContext()) {
475         // Only adhere to more server results when we don't have any partialResults
476         // as it keeps the outer loop logic the same.
477         serverHasMoreResults = callable.getServerHasMoreResults();
478       }
479       // Values == null means server-side filter has determined we must STOP
480       // !partialResults.isEmpty() means that we are still accumulating partial Results for a
481       // row. We should not change scanners before we receive all the partial Results for that
482       // row.
483     } while (remainingResultSize > 0 && countdown > 0 && !serverHasMoreResults
484         && nextScanner(countdown, values == null));
485   }
486 
487     @Override
488     public void close() {
489       if (!scanMetricsPublished) writeScanMetrics();
490       if (callable != null) {
491         callable.setClose();
492         try {
493           this.caller.callWithRetries(callable);
494         } catch (UnknownScannerException e) {
495            // We used to catch this error, interpret, and rethrow. However, we
496            // have since decided that it's not nice for a scanner's close to
497            // throw exceptions. Chances are it was just due to lease time out.
498         } catch (IOException e) {
499            /* An exception other than UnknownScanner is unexpected. */
500            LOG.warn("scanner failed to close. Exception follows: " + e);
501         }
502         callable = null;
503       }
504       closed = true;
505     }
506 
507     @Override
508     public boolean renewLease() {
509       if (callable != null) {
510         // do not return any rows, do not advance the scanner
511         callable.setRenew(true);
512         try {
513           this.caller.callWithoutRetries(callable, this.scannerTimeout);
514         } catch (Exception e) {
515           return false;
516         } finally {
517           callable.setRenew(false);
518         }
519         return true;
520       }
521       return false;
522     }
523 }