View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.client;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.LinkedList;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.classification.InterfaceAudience;
27  import org.apache.hadoop.classification.InterfaceStability;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.hbase.Cell;
30  import org.apache.hadoop.hbase.DoNotRetryIOException;
31  import org.apache.hadoop.hbase.HBaseConfiguration;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.KeyValueUtil;
35  import org.apache.hadoop.hbase.NotServingRegionException;
36  import org.apache.hadoop.hbase.TableName;
37  import org.apache.hadoop.hbase.UnknownScannerException;
38  import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
39  import org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException;
40  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
41  import org.apache.hadoop.hbase.protobuf.generated.MapReduceProtos;
42  import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
43  import org.apache.hadoop.hbase.util.Bytes;
44  
45  /**
46   * Implements the scanner interface for the HBase client.
47   * If there are multiple regions in a table, this scanner will iterate
48   * through them all.
49   */
50  @InterfaceAudience.Public
51  @InterfaceStability.Stable
52  public class ClientScanner extends AbstractClientScanner {
53      private final Log LOG = LogFactory.getLog(this.getClass());
54      protected Scan scan;
55      protected boolean closed = false;
56      // Current region scanner is against.  Gets cleared if current region goes
57      // wonky: e.g. if it splits on us.
58      protected HRegionInfo currentRegion = null;
59      private ScannerCallable callable = null;
60      protected final LinkedList<Result> cache = new LinkedList<Result>();
61      protected final int caching;
62      protected long lastNext;
63      // Keep lastResult returned successfully in case we have to reset scanner.
64      protected Result lastResult = null;
65      protected ScanMetrics scanMetrics = null;
66      protected final long maxScannerResultSize;
67      private final HConnection connection;
68      private final TableName tableName;
69      private final int scannerTimeout;
70      protected boolean scanMetricsPublished = false;
71      protected RpcRetryingCaller<Result []> caller;
72  
73      /**
74       * Create a new ClientScanner for the specified table. An HConnection will be
75       * retrieved using the passed Configuration.
76       * Note that the passed {@link Scan}'s start row maybe changed changed.
77       *
78       * @param conf The {@link Configuration} to use.
79       * @param scan {@link Scan} to use in this scanner
80       * @param tableName The table that we wish to scan
81       * @throws IOException
82       */
83      public ClientScanner(final Configuration conf, final Scan scan,
84          final TableName tableName) throws IOException {
85        this(conf, scan, tableName, HConnectionManager.getConnection(conf));
86      }
87  
88      /**
89       * @deprecated Use {@link #ClientScanner(Configuration, Scan, TableName)}
90       */
91      @Deprecated
92      public ClientScanner(final Configuration conf, final Scan scan,
93          final byte [] tableName) throws IOException {
94        this(conf, scan, TableName.valueOf(tableName));
95      }
96  
97  
98      /**
99       * Create a new ClientScanner for the specified table
100      * Note that the passed {@link Scan}'s start row maybe changed changed.
101      *
102      * @param conf The {@link Configuration} to use.
103      * @param scan {@link Scan} to use in this scanner
104      * @param tableName The table that we wish to scan
105      * @param connection Connection identifying the cluster
106      * @throws IOException
107      */
108   public ClientScanner(final Configuration conf, final Scan scan, final TableName tableName,
109       HConnection connection) throws IOException {
110     this(conf, scan, tableName, connection, new RpcRetryingCallerFactory(conf));
111   }
112 
113   /**
114    * @deprecated Use {@link #ClientScanner(Configuration, Scan, TableName, HConnection)}
115    */
116   @Deprecated
117   public ClientScanner(final Configuration conf, final Scan scan, final byte [] tableName,
118       HConnection connection) throws IOException {
119     this(conf, scan, TableName.valueOf(tableName), connection, new RpcRetryingCallerFactory(conf));
120   }
121 
122   /**
123    * Create a new ClientScanner for the specified table Note that the passed {@link Scan}'s start
124    * row maybe changed changed.
125    * @param conf The {@link Configuration} to use.
126    * @param scan {@link Scan} to use in this scanner
127    * @param tableName The table that we wish to scan
128    * @param connection Connection identifying the cluster
129    * @throws IOException
130    */
131   public ClientScanner(final Configuration conf, final Scan scan, final TableName tableName,
132       HConnection connection, RpcRetryingCallerFactory rpcFactory) throws IOException {
133       if (LOG.isTraceEnabled()) {
134         LOG.trace("Scan table=" + tableName
135             + ", startRow=" + Bytes.toStringBinary(scan.getStartRow()));
136       }
137       this.scan = scan;
138       this.tableName = tableName;
139       this.lastNext = System.currentTimeMillis();
140       this.connection = connection;
141       if (scan.getMaxResultSize() > 0) {
142         this.maxScannerResultSize = scan.getMaxResultSize();
143       } else {
144         this.maxScannerResultSize = conf.getLong(
145           HConstants.HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE_KEY,
146           HConstants.DEFAULT_HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE);
147       }
148       this.scannerTimeout = HBaseConfiguration.getInt(conf,
149         HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD,
150         HConstants.HBASE_REGIONSERVER_LEASE_PERIOD_KEY,
151         HConstants.DEFAULT_HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD);
152 
153       // check if application wants to collect scan metrics
154       byte[] enableMetrics = scan.getAttribute(
155         Scan.SCAN_ATTRIBUTES_METRICS_ENABLE);
156       if (enableMetrics != null && Bytes.toBoolean(enableMetrics)) {
157         scanMetrics = new ScanMetrics();
158       }
159 
160       // Use the caching from the Scan.  If not set, use the default cache setting for this table.
161       if (this.scan.getCaching() > 0) {
162         this.caching = this.scan.getCaching();
163       } else {
164         this.caching = conf.getInt(
165             HConstants.HBASE_CLIENT_SCANNER_CACHING,
166             HConstants.DEFAULT_HBASE_CLIENT_SCANNER_CACHING);
167       }
168 
169     this.caller = rpcFactory.<Result[]> newCaller();
170 
171       initializeScannerInConstruction();
172     }
173   
174     protected void initializeScannerInConstruction() throws IOException{
175       // initialize the scanner
176       nextScanner(this.caching, false);
177     }
178 
179     protected HConnection getConnection() {
180       return this.connection;
181     }
182 
183     /**
184      * @return Table name
185      * @deprecated Since 0.96.0; use {@link #getTable()}
186      */
187     @Deprecated
188     protected byte [] getTableName() {
189       return this.tableName.getName();
190     }
191 
192     protected TableName getTable() {
193       return this.tableName;
194     }
195 
196     protected Scan getScan() {
197       return scan;
198     }
199 
200     protected long getTimestamp() {
201       return lastNext;
202     }
203 
204     // returns true if the passed region endKey
205     protected boolean checkScanStopRow(final byte [] endKey) {
206       if (this.scan.getStopRow().length > 0) {
207         // there is a stop row, check to see if we are past it.
208         byte [] stopRow = scan.getStopRow();
209         int cmp = Bytes.compareTo(stopRow, 0, stopRow.length,
210           endKey, 0, endKey.length);
211         if (cmp <= 0) {
212           // stopRow <= endKey (endKey is equals to or larger than stopRow)
213           // This is a stop.
214           return true;
215         }
216       }
217       return false; //unlikely.
218     }
219 
220     /*
221      * Gets a scanner for the next region.  If this.currentRegion != null, then
222      * we will move to the endrow of this.currentRegion.  Else we will get
223      * scanner at the scan.getStartRow().  We will go no further, just tidy
224      * up outstanding scanners, if <code>currentRegion != null</code> and
225      * <code>done</code> is true.
226      * @param nbRows
227      * @param done Server-side says we're done scanning.
228      */
229     private boolean nextScanner(int nbRows, final boolean done)
230     throws IOException {
231       // Close the previous scanner if it's open
232       if (this.callable != null) {
233         this.callable.setClose();
234         this.caller.callWithRetries(callable);
235         this.callable = null;
236       }
237 
238       // Where to start the next scanner
239       byte [] localStartKey;
240 
241       // if we're at end of table, close and return false to stop iterating
242       if (this.currentRegion != null) {
243         byte [] endKey = this.currentRegion.getEndKey();
244         if (endKey == null ||
245             Bytes.equals(endKey, HConstants.EMPTY_BYTE_ARRAY) ||
246             checkScanStopRow(endKey) ||
247             done) {
248           close();
249           if (LOG.isTraceEnabled()) {
250             LOG.trace("Finished " + this.currentRegion);
251           }
252           return false;
253         }
254         localStartKey = endKey;
255         if (LOG.isTraceEnabled()) {
256           LOG.trace("Finished " + this.currentRegion);
257         }
258       } else {
259         localStartKey = this.scan.getStartRow();
260       }
261 
262       if (LOG.isDebugEnabled() && this.currentRegion != null) {
263         // Only worth logging if NOT first region in scan.
264         LOG.debug("Advancing internal scanner to startKey at '" +
265           Bytes.toStringBinary(localStartKey) + "'");
266       }
267       try {
268         callable = getScannerCallable(localStartKey, nbRows);
269         // Open a scanner on the region server starting at the
270         // beginning of the region
271         this.caller.callWithRetries(callable);
272         this.currentRegion = callable.getHRegionInfo();
273         if (this.scanMetrics != null) {
274           this.scanMetrics.countOfRegions.incrementAndGet();
275         }
276       } catch (IOException e) {
277         close();
278         throw e;
279       }
280       return true;
281     }
282 
283     @InterfaceAudience.Private
284     protected ScannerCallable getScannerCallable(byte [] localStartKey,
285         int nbRows) {
286       scan.setStartRow(localStartKey);
287       ScannerCallable s = new ScannerCallable(getConnection(),
288         getTable(), scan, this.scanMetrics);
289       s.setCaching(nbRows);
290       return s;
291     }
292 
293     /**
294      * Publish the scan metrics. For now, we use scan.setAttribute to pass the metrics back to the
295      * application or TableInputFormat.Later, we could push it to other systems. We don't use metrics
296      * framework because it doesn't support multi-instances of the same metrics on the same machine;
297      * for scan/map reduce scenarios, we will have multiple scans running at the same time.
298      *
299      * By default, scan metrics are disabled; if the application wants to collect them, this behavior
300      * can be turned on by calling calling:
301      *
302      * scan.setAttribute(SCAN_ATTRIBUTES_METRICS_ENABLE, Bytes.toBytes(Boolean.TRUE))
303      */
304     protected void writeScanMetrics() {
305       if (this.scanMetrics == null || scanMetricsPublished) {
306         return;
307       }
308       MapReduceProtos.ScanMetrics pScanMetrics = ProtobufUtil.toScanMetrics(scanMetrics);
309       scan.setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_DATA, pScanMetrics.toByteArray());
310       scanMetricsPublished = true;
311     }
312 
313     @Override
314     public Result next() throws IOException {
315       // If the scanner is closed and there's nothing left in the cache, next is a no-op.
316       if (cache.size() == 0 && this.closed) {
317         return null;
318       }
319       if (cache.size() == 0) {
320         Result [] values = null;
321         long remainingResultSize = maxScannerResultSize;
322         int countdown = this.caching;
323         // We need to reset it if it's a new callable that was created
324         // with a countdown in nextScanner
325         callable.setCaching(this.caching);
326         // This flag is set when we want to skip the result returned.  We do
327         // this when we reset scanner because it split under us.
328         boolean skipFirst = false;
329         boolean retryAfterOutOfOrderException  = true;
330         do {
331           try {
332             if (skipFirst) {
333               // Skip only the first row (which was the last row of the last
334               // already-processed batch).
335               callable.setCaching(1);
336               values = this.caller.callWithRetries(callable);
337               callable.setCaching(this.caching);
338               skipFirst = false;
339             }
340             // Server returns a null values if scanning is to stop.  Else,
341             // returns an empty array if scanning is to go on and we've just
342             // exhausted current region.
343             values = this.caller.callWithRetries(callable);
344             if (skipFirst && values != null && values.length == 1) {
345               skipFirst = false; // Already skipped, unset it before scanning again
346               values = this.caller.callWithRetries(callable);
347             }
348             retryAfterOutOfOrderException  = true;
349           } catch (DoNotRetryIOException e) {
350             // DNRIOEs are thrown to make us break out of retries.  Some types of DNRIOEs want us
351             // to reset the scanner and come back in again.
352             if (e instanceof UnknownScannerException) {
353               long timeout = lastNext + scannerTimeout;
354               // If we are over the timeout, throw this exception to the client wrapped in
355               // a ScannerTimeoutException. Else, it's because the region moved and we used the old
356               // id against the new region server; reset the scanner.
357               if (timeout < System.currentTimeMillis()) {
358                 long elapsed = System.currentTimeMillis() - lastNext;
359                 ScannerTimeoutException ex = new ScannerTimeoutException(
360                     elapsed + "ms passed since the last invocation, " +
361                         "timeout is currently set to " + scannerTimeout);
362                 ex.initCause(e);
363                 throw ex;
364               }
365             } else {
366               // If exception is any but the list below throw it back to the client; else setup
367               // the scanner and retry.
368               Throwable cause = e.getCause();
369               if ((cause != null && cause instanceof NotServingRegionException) ||
370                 (cause != null && cause instanceof RegionServerStoppedException) ||
371                 e instanceof OutOfOrderScannerNextException) {
372                 // Pass
373                 // It is easier writing the if loop test as list of what is allowed rather than
374                 // as a list of what is not allowed... so if in here, it means we do not throw.
375               } else {
376                 throw e;
377               }
378             }
379             // Else, its signal from depths of ScannerCallable that we need to reset the scanner.
380             if (this.lastResult != null) {
381               this.scan.setStartRow(this.lastResult.getRow());
382               // Skip first row returned.  We already let it out on previous
383               // invocation.
384               skipFirst = true;
385             }
386             if (e instanceof OutOfOrderScannerNextException) {
387               if (retryAfterOutOfOrderException) {
388                 retryAfterOutOfOrderException = false;
389               } else {
390                 // TODO: Why wrap this in a DNRIOE when it already is a DNRIOE?
391                 throw new DoNotRetryIOException("Failed after retry of " +
392                   "OutOfOrderScannerNextException: was there a rpc timeout?", e);
393               }
394             }
395             // Clear region.
396             this.currentRegion = null;
397             // Set this to zero so we don't try and do an rpc and close on remote server when
398             // the exception we got was UnknownScanner or the Server is going down.
399             callable = null;
400             // This continue will take us to while at end of loop where we will set up new scanner.
401             continue;
402           }
403           long currentTime = System.currentTimeMillis();
404           if (this.scanMetrics != null ) {
405             this.scanMetrics.sumOfMillisSecBetweenNexts.addAndGet(currentTime-lastNext);
406           }
407           lastNext = currentTime;
408           if (values != null && values.length > 0) {
409             for (Result rs : values) {
410               cache.add(rs);
411               for (Cell kv : rs.rawCells()) {
412                 // TODO make method in Cell or CellUtil
413                 remainingResultSize -= KeyValueUtil.ensureKeyValue(kv).heapSize();
414               }
415               countdown--;
416               this.lastResult = rs;
417             }
418           }
419           // Values == null means server-side filter has determined we must STOP
420         } while (remainingResultSize > 0 && countdown > 0 && nextScanner(countdown, values == null));
421       }
422 
423       if (cache.size() > 0) {
424         return cache.poll();
425       }
426 
427       // if we exhausted this scanner before calling close, write out the scan metrics
428       writeScanMetrics();
429       return null;
430     }
431 
432     /**
433      * Get <param>nbRows</param> rows.
434      * How many RPCs are made is determined by the {@link Scan#setCaching(int)}
435      * setting (or hbase.client.scanner.caching in hbase-site.xml).
436      * @param nbRows number of rows to return
437      * @return Between zero and <param>nbRows</param> RowResults.  Scan is done
438      * if returned array is of zero-length (We never return null).
439      * @throws IOException
440      */
441     @Override
442     public Result [] next(int nbRows) throws IOException {
443       // Collect values to be returned here
444       ArrayList<Result> resultSets = new ArrayList<Result>(nbRows);
445       for(int i = 0; i < nbRows; i++) {
446         Result next = next();
447         if (next != null) {
448           resultSets.add(next);
449         } else {
450           break;
451         }
452       }
453       return resultSets.toArray(new Result[resultSets.size()]);
454     }
455 
456     @Override
457     public void close() {
458       if (!scanMetricsPublished) writeScanMetrics();
459       if (callable != null) {
460         callable.setClose();
461         try {
462           this.caller.callWithRetries(callable);
463         } catch (IOException e) {
464           // We used to catch this error, interpret, and rethrow. However, we
465           // have since decided that it's not nice for a scanner's close to
466           // throw exceptions. Chances are it was just an UnknownScanner
467           // exception due to lease time out.
468         }
469         callable = null;
470       }
471       closed = true;
472     }
473 }