View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.client;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.LinkedList;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.classification.InterfaceAudience;
27  import org.apache.hadoop.classification.InterfaceStability;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.hbase.TableName;
30  import org.apache.hadoop.hbase.DoNotRetryIOException;
31  import org.apache.hadoop.hbase.HBaseConfiguration;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.KeyValue;
35  import org.apache.hadoop.hbase.NotServingRegionException;
36  import org.apache.hadoop.hbase.UnknownScannerException;
37  import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
38  import org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException;
39  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
40  import org.apache.hadoop.hbase.protobuf.generated.MapReduceProtos;
41  import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
42  import org.apache.hadoop.hbase.util.Bytes;
43  
44  /**
45   * Implements the scanner interface for the HBase client.
46   * If there are multiple regions in a table, this scanner will iterate
47   * through them all.
48   */
49  @InterfaceAudience.Public
50  @InterfaceStability.Stable
51  public class ClientScanner extends AbstractClientScanner {
52      private final Log LOG = LogFactory.getLog(this.getClass());
53      private Scan scan;
54      private boolean closed = false;
55      // Current region scanner is against.  Gets cleared if current region goes
56      // wonky: e.g. if it splits on us.
57      private HRegionInfo currentRegion = null;
58      private ScannerCallable callable = null;
59      private final LinkedList<Result> cache = new LinkedList<Result>();
60      private final int caching;
61      private long lastNext;
62      // Keep lastResult returned successfully in case we have to reset scanner.
63      private Result lastResult = null;
64      private ScanMetrics scanMetrics = null;
65      private final long maxScannerResultSize;
66      private final HConnection connection;
67      private final TableName tableName;
68      private final int scannerTimeout;
69      private boolean scanMetricsPublished = false;
70      private RpcRetryingCaller<Result []> caller;
71  
72      /**
73       * Create a new ClientScanner for the specified table. An HConnection will be
74       * retrieved using the passed Configuration.
75       * Note that the passed {@link Scan}'s start row maybe changed changed.
76       *
77       * @param conf The {@link Configuration} to use.
78       * @param scan {@link Scan} to use in this scanner
79       * @param tableName The table that we wish to scan
80       * @throws IOException
81       */
82      public ClientScanner(final Configuration conf, final Scan scan,
83          final TableName tableName) throws IOException {
84        this(conf, scan, tableName, HConnectionManager.getConnection(conf));
85      }
86  
87  
88      /**
89       * Create a new ClientScanner for the specified table
90       * Note that the passed {@link Scan}'s start row maybe changed changed.
91       *
92       * @param conf The {@link Configuration} to use.
93       * @param scan {@link Scan} to use in this scanner
94       * @param tableName The table that we wish to scan
95       * @param connection Connection identifying the cluster
96       * @throws IOException
97       */
98    public ClientScanner(final Configuration conf, final Scan scan, final TableName tableName,
99        HConnection connection) throws IOException {
100     this(conf, scan, tableName, connection, new RpcRetryingCallerFactory(conf));
101   }
102 
103   /**
104    * Create a new ClientScanner for the specified table Note that the passed {@link Scan}'s start
105    * row maybe changed changed.
106    * @param conf The {@link Configuration} to use.
107    * @param scan {@link Scan} to use in this scanner
108    * @param tableName The table that we wish to scan
109    * @param connection Connection identifying the cluster
110    * @throws IOException
111    */
112   public ClientScanner(final Configuration conf, final Scan scan, final TableName tableName,
113       HConnection connection, RpcRetryingCallerFactory rpcFactory) throws IOException {
114       if (LOG.isTraceEnabled()) {
115         LOG.trace("Scan table=" + tableName
116             + ", startRow=" + Bytes.toStringBinary(scan.getStartRow()));
117       }
118       this.scan = scan;
119       this.tableName = tableName;
120       this.lastNext = System.currentTimeMillis();
121       this.connection = connection;
122       if (scan.getMaxResultSize() > 0) {
123         this.maxScannerResultSize = scan.getMaxResultSize();
124       } else {
125         this.maxScannerResultSize = conf.getLong(
126           HConstants.HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE_KEY,
127           HConstants.DEFAULT_HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE);
128       }
129       this.scannerTimeout = HBaseConfiguration.getInt(conf,
130         HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD,
131         HConstants.HBASE_REGIONSERVER_LEASE_PERIOD_KEY,
132         HConstants.DEFAULT_HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD);
133 
134       // check if application wants to collect scan metrics
135       byte[] enableMetrics = scan.getAttribute(
136         Scan.SCAN_ATTRIBUTES_METRICS_ENABLE);
137       if (enableMetrics != null && Bytes.toBoolean(enableMetrics)) {
138         scanMetrics = new ScanMetrics();
139       }
140 
141       // Use the caching from the Scan.  If not set, use the default cache setting for this table.
142       if (this.scan.getCaching() > 0) {
143         this.caching = this.scan.getCaching();
144       } else {
145         this.caching = conf.getInt(
146             HConstants.HBASE_CLIENT_SCANNER_CACHING,
147             HConstants.DEFAULT_HBASE_CLIENT_SCANNER_CACHING);
148       }
149 
150     this.caller = rpcFactory.<Result[]> newCaller();
151 
152       // initialize the scanner
153       nextScanner(this.caching, false);
154     }
155 
156     protected HConnection getConnection() {
157       return this.connection;
158     }
159 
160     protected TableName getTableName() {
161       return this.tableName;
162     }
163 
164     protected Scan getScan() {
165       return scan;
166     }
167 
168     protected long getTimestamp() {
169       return lastNext;
170     }
171 
172     // returns true if the passed region endKey
173     private boolean checkScanStopRow(final byte [] endKey) {
174       if (this.scan.getStopRow().length > 0) {
175         // there is a stop row, check to see if we are past it.
176         byte [] stopRow = scan.getStopRow();
177         int cmp = Bytes.compareTo(stopRow, 0, stopRow.length,
178           endKey, 0, endKey.length);
179         if (cmp <= 0) {
180           // stopRow <= endKey (endKey is equals to or larger than stopRow)
181           // This is a stop.
182           return true;
183         }
184       }
185       return false; //unlikely.
186     }
187 
188     /*
189      * Gets a scanner for the next region.  If this.currentRegion != null, then
190      * we will move to the endrow of this.currentRegion.  Else we will get
191      * scanner at the scan.getStartRow().  We will go no further, just tidy
192      * up outstanding scanners, if <code>currentRegion != null</code> and
193      * <code>done</code> is true.
194      * @param nbRows
195      * @param done Server-side says we're done scanning.
196      */
197     private boolean nextScanner(int nbRows, final boolean done)
198     throws IOException {
199       // Close the previous scanner if it's open
200       if (this.callable != null) {
201         this.callable.setClose();
202         this.caller.callWithRetries(callable);
203         this.callable = null;
204       }
205 
206       // Where to start the next scanner
207       byte [] localStartKey;
208 
209       // if we're at end of table, close and return false to stop iterating
210       if (this.currentRegion != null) {
211         byte [] endKey = this.currentRegion.getEndKey();
212         if (endKey == null ||
213             Bytes.equals(endKey, HConstants.EMPTY_BYTE_ARRAY) ||
214             checkScanStopRow(endKey) ||
215             done) {
216           close();
217           if (LOG.isDebugEnabled()) {
218             LOG.debug("Finished region=" + this.currentRegion);
219           }
220           return false;
221         }
222         localStartKey = endKey;
223         if (LOG.isDebugEnabled()) {
224           LOG.debug("Finished with region " + this.currentRegion);
225         }
226       } else {
227         localStartKey = this.scan.getStartRow();
228       }
229 
230       if (LOG.isDebugEnabled() && this.currentRegion != null) {
231         // Only worth logging if NOT first region in scan.
232         LOG.debug("Advancing internal scanner to startKey at '" +
233           Bytes.toStringBinary(localStartKey) + "'");
234       }
235       try {
236         callable = getScannerCallable(localStartKey, nbRows);
237         // Open a scanner on the region server starting at the
238         // beginning of the region
239         this.caller.callWithRetries(callable);
240         this.currentRegion = callable.getHRegionInfo();
241         if (this.scanMetrics != null) {
242           this.scanMetrics.countOfRegions.incrementAndGet();
243         }
244       } catch (IOException e) {
245         close();
246         throw e;
247       }
248       return true;
249     }
250 
251     protected ScannerCallable getScannerCallable(byte [] localStartKey,
252         int nbRows) {
253       scan.setStartRow(localStartKey);
254       ScannerCallable s = new ScannerCallable(getConnection(),
255         getTableName(), scan, this.scanMetrics);
256       s.setCaching(nbRows);
257       return s;
258     }
259 
260     /**
261      * Publish the scan metrics. For now, we use scan.setAttribute to pass the metrics back to the
262      * application or TableInputFormat.Later, we could push it to other systems. We don't use metrics
263      * framework because it doesn't support multi-instances of the same metrics on the same machine;
264      * for scan/map reduce scenarios, we will have multiple scans running at the same time.
265      *
266      * By default, scan metrics are disabled; if the application wants to collect them, this behavior
267      * can be turned on by calling calling:
268      *
269      * scan.setAttribute(SCAN_ATTRIBUTES_METRICS_ENABLE, Bytes.toBytes(Boolean.TRUE))
270      */
271     private void writeScanMetrics() {
272       if (this.scanMetrics == null || scanMetricsPublished) {
273         return;
274       }
275       MapReduceProtos.ScanMetrics pScanMetrics = ProtobufUtil.toScanMetrics(scanMetrics);
276       scan.setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_DATA, pScanMetrics.toByteArray());
277       scanMetricsPublished = true;
278     }
279 
280     @Override
281     public Result next() throws IOException {
282       // If the scanner is closed and there's nothing left in the cache, next is a no-op.
283       if (cache.size() == 0 && this.closed) {
284         return null;
285       }
286       if (cache.size() == 0) {
287         Result [] values = null;
288         long remainingResultSize = maxScannerResultSize;
289         int countdown = this.caching;
290         // We need to reset it if it's a new callable that was created
291         // with a countdown in nextScanner
292         callable.setCaching(this.caching);
293         // This flag is set when we want to skip the result returned.  We do
294         // this when we reset scanner because it split under us.
295         boolean skipFirst = false;
296         boolean retryAfterOutOfOrderException  = true;
297         do {
298           try {
299             if (skipFirst) {
300               // Skip only the first row (which was the last row of the last
301               // already-processed batch).
302               callable.setCaching(1);
303               values = this.caller.callWithRetries(callable);
304               callable.setCaching(this.caching);
305               skipFirst = false;
306             }
307             // Server returns a null values if scanning is to stop.  Else,
308             // returns an empty array if scanning is to go on and we've just
309             // exhausted current region.
310             values = this.caller.callWithRetries(callable);
311             if (skipFirst && values != null && values.length == 1) {
312               skipFirst = false; // Already skipped, unset it before scanning again
313               values = this.caller.callWithRetries(callable);
314             }
315             retryAfterOutOfOrderException  = true;
316           } catch (DoNotRetryIOException e) {
317             // DNRIOEs are thrown to make us break out of retries.  Some types of DNRIOEs want us
318             // to reset the scanner and come back in again.
319             if (e instanceof UnknownScannerException) {
320               long timeout = lastNext + scannerTimeout;
321               // If we are over the timeout, throw this exception to the client wrapped in
322               // a ScannerTimeoutException. Else, it's because the region moved and we used the old
323               // id against the new region server; reset the scanner.
324               if (timeout < System.currentTimeMillis()) {
325                 long elapsed = System.currentTimeMillis() - lastNext;
326                 ScannerTimeoutException ex = new ScannerTimeoutException(
327                     elapsed + "ms passed since the last invocation, " +
328                         "timeout is currently set to " + scannerTimeout);
329                 ex.initCause(e);
330                 throw ex;
331               }
332             } else {
333               // If exception is any but the list below throw it back to the client; else setup
334               // the scanner and retry.
335               Throwable cause = e.getCause();
336               if ((cause != null && cause instanceof NotServingRegionException) ||
337                 (cause != null && cause instanceof RegionServerStoppedException) ||
338                 e instanceof OutOfOrderScannerNextException) {
339                 // Pass
340                 // It is easier writing the if loop test as list of what is allowed rather than
341                 // as a list of what is not allowed... so if in here, it means we do not throw.
342               } else {
343                 throw e;
344               }
345             }
346             // Else, its signal from depths of ScannerCallable that we need to reset the scanner.
347             if (this.lastResult != null) {
348               this.scan.setStartRow(this.lastResult.getRow());
349               // Skip first row returned.  We already let it out on previous
350               // invocation.
351               skipFirst = true;
352             }
353             if (e instanceof OutOfOrderScannerNextException) {
354               if (retryAfterOutOfOrderException) {
355                 retryAfterOutOfOrderException = false;
356               } else {
357                 // TODO: Why wrap this in a DNRIOE when it already is a DNRIOE?
358                 throw new DoNotRetryIOException("Failed after retry of " +
359                   "OutOfOrderScannerNextException: was there a rpc timeout?", e);
360               }
361             }
362             // Clear region.
363             this.currentRegion = null;
364             // Set this to zero so we don't try and do an rpc and close on remote server when
365             // the exception we got was UnknownScanner or the Server is going down.
366             callable = null;
367             // This continue will take us to while at end of loop where we will set up new scanner.
368             continue;
369           }
370           long currentTime = System.currentTimeMillis();
371           if (this.scanMetrics != null ) {
372             this.scanMetrics.sumOfMillisSecBetweenNexts.addAndGet(currentTime-lastNext);
373           }
374           lastNext = currentTime;
375           if (values != null && values.length > 0) {
376             for (Result rs : values) {
377               cache.add(rs);
378               for (KeyValue kv : rs.raw()) {
379                   remainingResultSize -= kv.heapSize();
380               }
381               countdown--;
382               this.lastResult = rs;
383             }
384           }
385           // Values == null means server-side filter has determined we must STOP
386         } while (remainingResultSize > 0 && countdown > 0 && nextScanner(countdown, values == null));
387       }
388 
389       if (cache.size() > 0) {
390         return cache.poll();
391       }
392 
393       // if we exhausted this scanner before calling close, write out the scan metrics
394       writeScanMetrics();
395       return null;
396     }
397 
398     /**
399      * Get <param>nbRows</param> rows.
400      * How many RPCs are made is determined by the {@link Scan#setCaching(int)}
401      * setting (or hbase.client.scanner.caching in hbase-site.xml).
402      * @param nbRows number of rows to return
403      * @return Between zero and <param>nbRows</param> RowResults.  Scan is done
404      * if returned array is of zero-length (We never return null).
405      * @throws IOException
406      */
407     @Override
408     public Result [] next(int nbRows) throws IOException {
409       // Collect values to be returned here
410       ArrayList<Result> resultSets = new ArrayList<Result>(nbRows);
411       for(int i = 0; i < nbRows; i++) {
412         Result next = next();
413         if (next != null) {
414           resultSets.add(next);
415         } else {
416           break;
417         }
418       }
419       return resultSets.toArray(new Result[resultSets.size()]);
420     }
421 
422     @Override
423     public void close() {
424       if (!scanMetricsPublished) writeScanMetrics();
425       if (callable != null) {
426         callable.setClose();
427         try {
428           this.caller.callWithRetries(callable);
429         } catch (IOException e) {
430           // We used to catch this error, interpret, and rethrow. However, we
431           // have since decided that it's not nice for a scanner's close to
432           // throw exceptions. Chances are it was just an UnknownScanner
433           // exception due to lease time out.
434         }
435         callable = null;
436       }
437       closed = true;
438     }
439 }