View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.client;
19  
20  import java.io.IOException;
21  import java.io.InterruptedIOException;
22  import java.util.LinkedList;
23  import java.util.concurrent.ExecutorService;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.hbase.Cell;
30  import org.apache.hadoop.hbase.CellUtil;
31  import org.apache.hadoop.hbase.DoNotRetryIOException;
32  import org.apache.hadoop.hbase.HBaseConfiguration;
33  import org.apache.hadoop.hbase.HConstants;
34  import org.apache.hadoop.hbase.HRegionInfo;
35  import org.apache.hadoop.hbase.NotServingRegionException;
36  import org.apache.hadoop.hbase.TableName;
37  import org.apache.hadoop.hbase.UnknownScannerException;
38  import org.apache.hadoop.hbase.client.RpcRetryingCallerFactory;
39  import org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException;
40  import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
41  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
42  import org.apache.hadoop.hbase.protobuf.generated.MapReduceProtos;
43  import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
44  import org.apache.hadoop.hbase.util.Bytes;
45  
46  /**
47   * Implements the scanner interface for the HBase client.
48   * If there are multiple regions in a table, this scanner will iterate
49   * through them all.
50   */
51  @InterfaceAudience.Private
52  public class ClientScanner extends AbstractClientScanner {
53      private final Log LOG = LogFactory.getLog(this.getClass());
54      protected Scan scan;
55      protected boolean closed = false;
56      // Current region scanner is against.  Gets cleared if current region goes
57      // wonky: e.g. if it splits on us.
58      protected HRegionInfo currentRegion = null;
59      protected ScannerCallableWithReplicas callable = null;
60      protected final LinkedList<Result> cache = new LinkedList<Result>();
61      protected final int caching;
62      protected long lastNext;
63      // Keep lastResult returned successfully in case we have to reset scanner.
64      protected Result lastResult = null;
65      protected final long maxScannerResultSize;
66      private final ClusterConnection connection;
67      private final TableName tableName;
68      protected final int scannerTimeout;
69      protected boolean scanMetricsPublished = false;
70      protected RpcRetryingCaller<Result []> caller;
71      protected RpcControllerFactory rpcControllerFactory;
72      protected Configuration conf;
73      //The timeout on the primary. Applicable if there are multiple replicas for a region
74      //In that case, we will only wait for this much timeout on the primary before going
75      //to the replicas and trying the same scan. Note that the retries will still happen
76      //on each replica and the first successful results will be taken. A timeout of 0 is
77      //disallowed.
78      protected final int primaryOperationTimeout;
79      private int retries;
80      protected final ExecutorService pool;
81  
82    /**
83     * Create a new ClientScanner for the specified table Note that the passed {@link Scan}'s start
84     * row maybe changed changed.
85     * @param conf The {@link Configuration} to use.
86     * @param scan {@link Scan} to use in this scanner
87     * @param tableName The table that we wish to scan
88     * @param connection Connection identifying the cluster
89     * @throws IOException
90     */
91    public ClientScanner(final Configuration conf, final Scan scan, final TableName tableName,
92        ClusterConnection connection, RpcRetryingCallerFactory rpcFactory,
93        RpcControllerFactory controllerFactory, ExecutorService pool, int primaryOperationTimeout) throws IOException {
94        if (LOG.isTraceEnabled()) {
95          LOG.trace("Scan table=" + tableName
96              + ", startRow=" + Bytes.toStringBinary(scan.getStartRow()));
97        }
98        this.scan = scan;
99        this.tableName = tableName;
100       this.lastNext = System.currentTimeMillis();
101       this.connection = connection;
102       this.pool = pool;
103       this.primaryOperationTimeout = primaryOperationTimeout;
104       this.retries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
105           HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER);
106       if (scan.getMaxResultSize() > 0) {
107         this.maxScannerResultSize = scan.getMaxResultSize();
108       } else {
109         this.maxScannerResultSize = conf.getLong(
110           HConstants.HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE_KEY,
111           HConstants.DEFAULT_HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE);
112       }
113       this.scannerTimeout = HBaseConfiguration.getInt(conf,
114         HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD,
115         HConstants.HBASE_REGIONSERVER_LEASE_PERIOD_KEY,
116         HConstants.DEFAULT_HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD);
117 
118       // check if application wants to collect scan metrics
119       initScanMetrics(scan);
120 
121       // Use the caching from the Scan.  If not set, use the default cache setting for this table.
122       if (this.scan.getCaching() > 0) {
123         this.caching = this.scan.getCaching();
124       } else {
125         this.caching = conf.getInt(
126             HConstants.HBASE_CLIENT_SCANNER_CACHING,
127             HConstants.DEFAULT_HBASE_CLIENT_SCANNER_CACHING);
128       }
129 
130       this.caller = rpcFactory.<Result[]> newCaller();
131       this.rpcControllerFactory = controllerFactory;
132 
133       this.conf = conf;
134       initializeScannerInConstruction();
135     }
136 
137     protected void initializeScannerInConstruction() throws IOException{
138       // initialize the scanner
139       nextScanner(this.caching, false);
140     }
141 
142     protected ClusterConnection getConnection() {
143       return this.connection;
144     }
145 
146     /**
147      * @return Table name
148      * @deprecated Since 0.96.0; use {@link #getTable()}
149      */
150     @Deprecated
151     protected byte [] getTableName() {
152       return this.tableName.getName();
153     }
154 
155     protected TableName getTable() {
156       return this.tableName;
157     }
158 
159     protected int getRetries() {
160       return this.retries;
161     }
162 
163     protected int getScannerTimeout() {
164       return this.scannerTimeout;
165     }
166 
167     protected Configuration getConf() {
168       return this.conf;
169     }
170 
171     protected Scan getScan() {
172       return scan;
173     }
174 
175     protected ExecutorService getPool() {
176       return pool;
177     }
178 
179     protected int getPrimaryOperationTimeout() {
180       return primaryOperationTimeout;
181     }
182 
183     protected int getCaching() {
184       return caching;
185     }
186 
187     protected long getTimestamp() {
188       return lastNext;
189     }
190 
191     // returns true if the passed region endKey
192     protected boolean checkScanStopRow(final byte [] endKey) {
193       if (this.scan.getStopRow().length > 0) {
194         // there is a stop row, check to see if we are past it.
195         byte [] stopRow = scan.getStopRow();
196         int cmp = Bytes.compareTo(stopRow, 0, stopRow.length,
197           endKey, 0, endKey.length);
198         if (cmp <= 0) {
199           // stopRow <= endKey (endKey is equals to or larger than stopRow)
200           // This is a stop.
201           return true;
202         }
203       }
204       return false; //unlikely.
205     }
206 
207     private boolean possiblyNextScanner(int nbRows, final boolean done) throws IOException {
208       // If we have just switched replica, don't go to the next scanner yet. Rather, try
209       // the scanner operations on the new replica, from the right point in the scan
210       // Note that when we switched to a different replica we left it at a point
211       // where we just did the "openScanner" with the appropriate startrow
212       if (callable != null && callable.switchedToADifferentReplica()) return true;
213       return nextScanner(nbRows, done);
214     }
215 
216     /*
217      * Gets a scanner for the next region.  If this.currentRegion != null, then
218      * we will move to the endrow of this.currentRegion.  Else we will get
219      * scanner at the scan.getStartRow().  We will go no further, just tidy
220      * up outstanding scanners, if <code>currentRegion != null</code> and
221      * <code>done</code> is true.
222      * @param nbRows
223      * @param done Server-side says we're done scanning.
224      */
225   protected boolean nextScanner(int nbRows, final boolean done)
226     throws IOException {
227       // Close the previous scanner if it's open
228       if (this.callable != null) {
229         this.callable.setClose();
230         call(scan, callable, caller, scannerTimeout);
231         this.callable = null;
232       }
233 
234       // Where to start the next scanner
235       byte [] localStartKey;
236 
237       // if we're at end of table, close and return false to stop iterating
238       if (this.currentRegion != null) {
239         byte [] endKey = this.currentRegion.getEndKey();
240         if (endKey == null ||
241             Bytes.equals(endKey, HConstants.EMPTY_BYTE_ARRAY) ||
242             checkScanStopRow(endKey) ||
243             done) {
244           close();
245           if (LOG.isTraceEnabled()) {
246             LOG.trace("Finished " + this.currentRegion);
247           }
248           return false;
249         }
250         localStartKey = endKey;
251         if (LOG.isTraceEnabled()) {
252           LOG.trace("Finished " + this.currentRegion);
253         }
254       } else {
255         localStartKey = this.scan.getStartRow();
256       }
257 
258       if (LOG.isDebugEnabled() && this.currentRegion != null) {
259         // Only worth logging if NOT first region in scan.
260         LOG.debug("Advancing internal scanner to startKey at '" +
261           Bytes.toStringBinary(localStartKey) + "'");
262       }
263       try {
264         callable = getScannerCallable(localStartKey, nbRows);
265         // Open a scanner on the region server starting at the
266         // beginning of the region
267         call(scan, callable, caller, scannerTimeout);
268         this.currentRegion = callable.getHRegionInfo();
269         if (this.scanMetrics != null) {
270           this.scanMetrics.countOfRegions.incrementAndGet();
271         }
272       } catch (IOException e) {
273         close();
274         throw e;
275       }
276       return true;
277     }
278 
279 
280   Result[] call(Scan scan, ScannerCallableWithReplicas callable,
281       RpcRetryingCaller<Result[]> caller, int scannerTimeout)
282       throws IOException, RuntimeException {
283     if (Thread.interrupted()) {
284       throw new InterruptedIOException();
285     }
286     // callWithoutRetries is at this layer. Within the ScannerCallableWithReplicas,
287     // we do a callWithRetries
288     return caller.callWithoutRetries(callable, scannerTimeout);
289   }
290 
291     @InterfaceAudience.Private
292     protected ScannerCallableWithReplicas getScannerCallable(byte [] localStartKey,
293         int nbRows) {
294       scan.setStartRow(localStartKey);
295       ScannerCallable s =
296           new ScannerCallable(getConnection(), getTable(), scan, this.scanMetrics,
297               this.rpcControllerFactory);
298       s.setCaching(nbRows);
299       ScannerCallableWithReplicas sr = new ScannerCallableWithReplicas(tableName, getConnection(),
300        s, pool, primaryOperationTimeout, scan,
301        retries, scannerTimeout, caching, conf, caller);
302       return sr;
303     }
304 
305     /**
306      * Publish the scan metrics. For now, we use scan.setAttribute to pass the metrics back to the
307      * application or TableInputFormat.Later, we could push it to other systems. We don't use metrics
308      * framework because it doesn't support multi-instances of the same metrics on the same machine;
309      * for scan/map reduce scenarios, we will have multiple scans running at the same time.
310      *
311      * By default, scan metrics are disabled; if the application wants to collect them, this
312      * behavior can be turned on by calling calling {@link Scan#setScanMetricsEnabled(boolean)}
313      *
314      * <p>This invocation clears the scan metrics. Metrics are aggregated in the Scan instance.
315      */
316     protected void writeScanMetrics() {
317       if (this.scanMetrics == null || scanMetricsPublished) {
318         return;
319       }
320       MapReduceProtos.ScanMetrics pScanMetrics = ProtobufUtil.toScanMetrics(scanMetrics);
321       scan.setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_DATA, pScanMetrics.toByteArray());
322       scanMetricsPublished = true;
323     }
324 
325     @Override
326     public Result next() throws IOException {
327       // If the scanner is closed and there's nothing left in the cache, next is a no-op.
328       if (cache.size() == 0 && this.closed) {
329         return null;
330       }
331       if (cache.size() == 0) {
332         loadCache();
333       }
334 
335       if (cache.size() > 0) {
336         return cache.poll();
337       }
338 
339       // if we exhausted this scanner before calling close, write out the scan metrics
340       writeScanMetrics();
341       return null;
342     }
343 
344   /**
345    * Contact the servers to load more {@link Result}s in the cache.
346    */
347   protected void loadCache() throws IOException {
348     Result[] values = null;
349     long remainingResultSize = maxScannerResultSize;
350     int countdown = this.caching;
351 
352     // We need to reset it if it's a new callable that was created
353     // with a countdown in nextScanner
354     callable.setCaching(this.caching);
355     // This flag is set when we want to skip the result returned.  We do
356     // this when we reset scanner because it split under us.
357     boolean skipFirst = false;
358     boolean retryAfterOutOfOrderException = true;
359     // We don't expect that the server will have more results for us if
360     // it doesn't tell us otherwise. We rely on the size or count of results
361     boolean serverHasMoreResults = false;
362     do {
363       try {
364         if (skipFirst) {
365           // Skip only the first row (which was the last row of the last
366           // already-processed batch).
367           callable.setCaching(1);
368           values = call(scan, callable, caller, scannerTimeout);
369           // When the replica switch happens, we need to do certain operations
370           // again. The scannercallable will openScanner with the right startkey
371           // but we need to pick up from there. Bypass the rest of the loop
372           // and let the catch-up happen in the beginning of the loop as it
373           // happens for the cases where we see exceptions. Since only openScanner
374           // would have happened, values would be null
375           if (values == null && callable.switchedToADifferentReplica()) {
376             if (this.lastResult != null) { //only skip if there was something read earlier
377               skipFirst = true;
378             }
379             this.currentRegion = callable.getHRegionInfo();
380             continue;
381           }
382           callable.setCaching(this.caching);
383           skipFirst = false;
384         }
385         // Server returns a null values if scanning is to stop. Else,
386         // returns an empty array if scanning is to go on and we've just
387         // exhausted current region.
388         values = call(scan,   callable, caller, scannerTimeout);
389         if (skipFirst && values != null && values.length == 1) {
390           skipFirst = false; // Already skipped, unset it before scanning again
391           values = call(scan, callable, caller, scannerTimeout);
392         }
393         // When the replica switch happens, we need to do certain operations
394         // again. The callable will openScanner with the right startkey
395         // but we need to pick up from there. Bypass the rest of the loop
396         // and let the catch-up happen in the beginning of the loop as it
397         // happens for the cases where we see exceptions. Since only openScanner
398         // would have happened, values would be null
399         if (values == null && callable.switchedToADifferentReplica()) {
400           if (this.lastResult != null) { //only skip if there was something read earlier
401             skipFirst = true;
402           }
403           this.currentRegion = callable.getHRegionInfo();
404           continue;
405         }
406         retryAfterOutOfOrderException = true;
407       } catch (DoNotRetryIOException e) {
408         // DNRIOEs are thrown to make us break out of retries. Some types of DNRIOEs want us
409         // to reset the scanner and come back in again.
410         if (e instanceof UnknownScannerException) {
411           long timeout = lastNext + scannerTimeout;
412           // If we are over the timeout, throw this exception to the client wrapped in
413           // a ScannerTimeoutException. Else, it's because the region moved and we used the old
414           // id against the new region server; reset the scanner.
415           if (timeout < System.currentTimeMillis()) {
416             long elapsed = System.currentTimeMillis() - lastNext;
417             ScannerTimeoutException ex =
418                 new ScannerTimeoutException(elapsed + "ms passed since the last invocation, "
419                     + "timeout is currently set to " + scannerTimeout);
420             ex.initCause(e);
421             throw ex;
422           }
423         } else {
424           // If exception is any but the list below throw it back to the client; else setup
425           // the scanner and retry.
426           Throwable cause = e.getCause();
427           if ((cause != null && cause instanceof NotServingRegionException) ||
428               (cause != null && cause instanceof RegionServerStoppedException) ||
429               e instanceof OutOfOrderScannerNextException) {
430             // Pass
431             // It is easier writing the if loop test as list of what is allowed rather than
432             // as a list of what is not allowed... so if in here, it means we do not throw.
433           } else {
434             throw e;
435           }
436         }
437         // Else, its signal from depths of ScannerCallable that we need to reset the scanner.
438         if (this.lastResult != null) {
439           // The region has moved. We need to open a brand new scanner at
440           // the new location.
441           // Reset the startRow to the row we've seen last so that the new
442           // scanner starts at the correct row. Otherwise we may see previously
443           // returned rows again.
444           // (ScannerCallable by now has "relocated" the correct region)
445           this.scan.setStartRow(this.lastResult.getRow());
446 
447           // Skip first row returned.  We already let it out on previous
448           // invocation.
449           skipFirst = true;
450         }
451         if (e instanceof OutOfOrderScannerNextException) {
452           if (retryAfterOutOfOrderException) {
453             retryAfterOutOfOrderException = false;
454           } else {
455             // TODO: Why wrap this in a DNRIOE when it already is a DNRIOE?
456             throw new DoNotRetryIOException("Failed after retry of " +
457                 "OutOfOrderScannerNextException: was there a rpc timeout?", e);
458           }
459         }
460         // Clear region.
461         this.currentRegion = null;
462         // Set this to zero so we don't try and do an rpc and close on remote server when
463         // the exception we got was UnknownScanner or the Server is going down.
464         callable = null;
465         // This continue will take us to while at end of loop where we will set up new scanner.
466         continue;
467       }
468       long currentTime = System.currentTimeMillis();
469       if (this.scanMetrics != null) {
470         this.scanMetrics.sumOfMillisSecBetweenNexts.addAndGet(currentTime - lastNext);
471       }
472       lastNext = currentTime;
473       if (values != null && values.length > 0) {
474         for (Result rs : values) {
475           cache.add(rs);
476           // We don't make Iterator here
477           for (Cell cell : rs.rawCells()) {
478             remainingResultSize -= CellUtil.estimatedHeapSizeOf(cell);
479           }
480           countdown--;
481           this.lastResult = rs;
482         }
483       }
484       // We expect that the server won't have more results for us when we exhaust
485       // the size (bytes or count) of the results returned. If the server *does* inform us that
486       // there are more results, we want to avoid possiblyNextScanner(...). Only when we actually
487       // get results is the moreResults context valid.
488       if (null != values && values.length > 0 && callable.hasMoreResultsContext()) {
489         // Only adhere to more server results when we don't have any partialResults
490         // as it keeps the outer loop logic the same.
491         serverHasMoreResults = callable.getServerHasMoreResults();
492       }
493       // Values == null means server-side filter has determined we must STOP
494       // !partialResults.isEmpty() means that we are still accumulating partial Results for a
495       // row. We should not change scanners before we receive all the partial Results for that
496       // row.
497     } while (remainingResultSize > 0 && countdown > 0 && !serverHasMoreResults
498         && possiblyNextScanner(countdown, values == null));
499   }
500 
501     @Override
502     public void close() {
503       if (!scanMetricsPublished) writeScanMetrics();
504       if (callable != null) {
505         callable.setClose();
506         try {
507           call(scan, callable, caller, scannerTimeout);
508         } catch (UnknownScannerException e) {
509            // We used to catch this error, interpret, and rethrow. However, we
510            // have since decided that it's not nice for a scanner's close to
511            // throw exceptions. Chances are it was just due to lease time out.
512         } catch (IOException e) {
513            /* An exception other than UnknownScanner is unexpected. */
514            LOG.warn("scanner failed to close. Exception follows: " + e);
515         }
516         callable = null;
517       }
518       closed = true;
519     }
520 }