View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.client;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.LinkedList;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.conf.Configuration;
27  import org.apache.hadoop.hbase.DoNotRetryIOException;
28  import org.apache.hadoop.hbase.HConstants;
29  import org.apache.hadoop.hbase.HRegionInfo;
30  import org.apache.hadoop.hbase.KeyValue;
31  import org.apache.hadoop.hbase.NotServingRegionException;
32  import org.apache.hadoop.hbase.UnknownScannerException;
33  import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
34  import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
35  import org.apache.hadoop.hbase.util.Bytes;
36  import org.apache.hadoop.io.DataOutputBuffer;
37  
38  /**
39   * Implements the scanner interface for the HBase client.
40   * If there are multiple regions in a table, this scanner will iterate
41   * through them all.
42   */
43  public class ClientScanner extends AbstractClientScanner {
44      private final Log LOG = LogFactory.getLog(this.getClass());
45      protected Scan scan;
46      protected boolean closed = false;
47      // Current region scanner is against.  Gets cleared if current region goes
48      // wonky: e.g. if it splits on us.
49      protected HRegionInfo currentRegion = null;
50      private ScannerCallable callable = null;
51      protected final LinkedList<Result> cache = new LinkedList<Result>();
52      protected final int caching;
53      protected long lastNext;
54      // Keep lastResult returned successfully in case we have to reset scanner.
55      protected Result lastResult = null;
56      protected ScanMetrics scanMetrics = null;
57      protected final long maxScannerResultSize;
58      private final HConnection connection;
59      private final byte[] tableName;
60      private final int scannerTimeout;
61      private boolean scanMetricsPublished = false;
62      
63      /**
64       * Create a new ClientScanner for the specified table. An HConnection will be
65       * retrieved using the passed Configuration.
66       * Note that the passed {@link Scan}'s start row maybe changed changed. 
67       * 
68       * @param conf The {@link Configuration} to use.
69       * @param scan {@link Scan} to use in this scanner
70       * @param tableName The table that we wish to scan
71       * @throws IOException
72       */
73      public ClientScanner(final Configuration conf, final Scan scan,
74          final byte[] tableName) throws IOException {
75        this(conf, scan, tableName, HConnectionManager.getConnection(conf));
76      }
77   
78      /**
79       * Create a new ClientScanner for the specified table
80       * Note that the passed {@link Scan}'s start row maybe changed changed. 
81       * 
82       * @param conf The {@link Configuration} to use.
83       * @param scan {@link Scan} to use in this scanner
84       * @param tableName The table that we wish to scan
85       * @param connection Connection identifying the cluster
86       * @throws IOException
87       */
88      public ClientScanner(final Configuration conf, final Scan scan,
89        final byte[] tableName, HConnection connection) throws IOException {
90        if (LOG.isDebugEnabled()) {
91          LOG.debug("Creating scanner over "
92              + Bytes.toString(tableName)
93              + " starting at key '" + Bytes.toStringBinary(scan.getStartRow()) + "'");
94        }
95        this.scan = scan;
96        this.tableName = tableName;
97        this.lastNext = System.currentTimeMillis();
98        this.connection = connection;
99        this.maxScannerResultSize = conf.getLong(
100           HConstants.HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE_KEY,
101           HConstants.DEFAULT_HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE);
102       this.scannerTimeout = (int) conf.getLong(
103           HConstants.HBASE_REGIONSERVER_LEASE_PERIOD_KEY,
104           HConstants.DEFAULT_HBASE_REGIONSERVER_LEASE_PERIOD);
105 
106       // check if application wants to collect scan metrics
107       byte[] enableMetrics = scan.getAttribute(
108         Scan.SCAN_ATTRIBUTES_METRICS_ENABLE);
109       if (enableMetrics != null && Bytes.toBoolean(enableMetrics)) {
110         scanMetrics = new ScanMetrics();
111       }
112 
113       // Use the caching from the Scan.  If not set, use the default cache setting for this table.
114       if (this.scan.getCaching() > 0) {
115         this.caching = this.scan.getCaching();
116       } else {
117         this.caching = conf.getInt("hbase.client.scanner.caching", 1);
118       }
119 
120       // initialize the scanner
121       initializeScannerInConstruction();
122     }
123     
124     protected void initializeScannerInConstruction() throws IOException{
125       // initialize the scanner
126       nextScanner(this.caching, false);
127     }
128 
129     protected HConnection getConnection() {
130       return this.connection;
131     }
132 
133     protected byte[] getTableName() {
134       return this.tableName;
135     }
136 
137     protected Scan getScan() {
138       return scan;
139     }
140 
141     protected long getTimestamp() {
142       return lastNext;
143     }
144 
145     // returns true if the passed region endKey
146     protected boolean checkScanStopRow(final byte [] endKey) {
147       if (this.scan.getStopRow().length > 0) {
148         // there is a stop row, check to see if we are past it.
149         byte [] stopRow = scan.getStopRow();
150         int cmp = Bytes.compareTo(stopRow, 0, stopRow.length,
151           endKey, 0, endKey.length);
152         if (cmp <= 0) {
153           // stopRow <= endKey (endKey is equals to or larger than stopRow)
154           // This is a stop.
155           return true;
156         }
157       }
158       return false; //unlikely.
159     }
160 
161     /*
162      * Gets a scanner for the next region.  If this.currentRegion != null, then
163      * we will move to the endrow of this.currentRegion.  Else we will get
164      * scanner at the scan.getStartRow().  We will go no further, just tidy
165      * up outstanding scanners, if <code>currentRegion != null</code> and
166      * <code>done</code> is true.
167      * @param nbRows
168      * @param done Server-side says we're done scanning.
169      */
170     private boolean nextScanner(int nbRows, final boolean done)
171     throws IOException {
172       // Close the previous scanner if it's open
173       if (this.callable != null) {
174         this.callable.setClose();
175         callable.withRetries();
176         this.callable = null;
177       }
178 
179       // Where to start the next scanner
180       byte [] localStartKey;
181 
182       // if we're at end of table, close and return false to stop iterating
183       if (this.currentRegion != null) {
184         byte [] endKey = this.currentRegion.getEndKey();
185         if (endKey == null ||
186             Bytes.equals(endKey, HConstants.EMPTY_BYTE_ARRAY) ||
187             checkScanStopRow(endKey) ||
188             done) {
189           close();
190           if (LOG.isDebugEnabled()) {
191             LOG.debug("Finished with scanning at " + this.currentRegion);
192           }
193           return false;
194         }
195         localStartKey = endKey;
196         if (LOG.isDebugEnabled()) {
197           LOG.debug("Finished with region " + this.currentRegion);
198         }
199       } else {
200         localStartKey = this.scan.getStartRow();
201       }
202 
203       if (LOG.isDebugEnabled()) {
204         LOG.debug("Advancing internal scanner to startKey at '" +
205           Bytes.toStringBinary(localStartKey) + "'");
206       }
207       try {
208         callable = getScannerCallable(localStartKey, nbRows);
209         // Open a scanner on the region server starting at the
210         // beginning of the region
211         callable.withRetries();
212         this.currentRegion = callable.getHRegionInfo();
213         if (this.scanMetrics != null) {
214           this.scanMetrics.countOfRegions.inc();
215         }
216       } catch (IOException e) {
217         close();
218         throw e;
219       }
220       return true;
221     }
222 
223     protected ScannerCallable getScannerCallable(byte [] localStartKey,
224         int nbRows) {
225       scan.setStartRow(localStartKey);
226       ScannerCallable s = new ScannerCallable(getConnection(),
227         getTableName(), scan, this.scanMetrics);
228       s.setCaching(nbRows);
229       return s;
230     }
231 
232     /**
233      * Publish the scan metrics. For now, we use scan.setAttribute to pass the metrics back to the
234      * application or TableInputFormat.Later, we could push it to other systems. We don't use metrics
235      * framework because it doesn't support multi-instances of the same metrics on the same machine;
236      * for scan/map reduce scenarios, we will have multiple scans running at the same time.
237      *
238      * By default, scan metrics are disabled; if the application wants to collect them, this behavior
239      * can be turned on by calling calling:
240      *
241      * scan.setAttribute(SCAN_ATTRIBUTES_METRICS_ENABLE, Bytes.toBytes(Boolean.TRUE))
242      */
243     protected void writeScanMetrics() throws IOException {
244       if (this.scanMetrics == null || scanMetricsPublished) {
245         return;
246       }
247       final DataOutputBuffer d = new DataOutputBuffer();
248       scanMetrics.write(d);
249       scan.setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_DATA, d.getData());
250       scanMetricsPublished = true;
251     }
252 
253     public Result next() throws IOException {
254       // If the scanner is closed and there's nothing left in the cache, next is a no-op.
255       if (cache.size() == 0 && this.closed) {
256         return null;
257       }
258       if (cache.size() == 0) {
259         Result [] values = null;
260         long remainingResultSize = maxScannerResultSize;
261         int countdown = this.caching;
262         // We need to reset it if it's a new callable that was created
263         // with a countdown in nextScanner
264         callable.setCaching(this.caching);
265         // This flag is set when we want to skip the result returned.  We do
266         // this when we reset scanner because it split under us.
267         boolean skipFirst = false;
268         do {
269           try {
270             if (skipFirst) {
271               // Skip only the first row (which was the last row of the last
272               // already-processed batch).
273               callable.setCaching(1);
274               values = callable.withRetries();
275               callable.setCaching(this.caching);
276               skipFirst = false;
277             }
278             // Server returns a null values if scanning is to stop.  Else,
279             // returns an empty array if scanning is to go on and we've just
280             // exhausted current region.
281             values = callable.withRetries();
282           } catch (DoNotRetryIOException e) {
283             if (e instanceof UnknownScannerException) {
284               long timeout = lastNext + scannerTimeout;
285               // If we are over the timeout, throw this exception to the client
286               // Else, it's because the region moved and we used the old id
287               // against the new region server; reset the scanner.
288               if (timeout < System.currentTimeMillis()) {
289                 long elapsed = System.currentTimeMillis() - lastNext;
290                 ScannerTimeoutException ex = new ScannerTimeoutException(
291                     elapsed + "ms passed since the last invocation, " +
292                         "timeout is currently set to " + scannerTimeout);
293                 ex.initCause(e);
294                 throw ex;
295               }
296             } else {
297               Throwable cause = e.getCause();
298               if (cause == null || (!(cause instanceof NotServingRegionException)
299                   && !(cause instanceof RegionServerStoppedException))) {
300                 throw e;
301               }
302             }
303             // Else, its signal from depths of ScannerCallable that we got an
304             // NSRE on a next and that we need to reset the scanner.
305             if (this.lastResult != null) {
306               this.scan.setStartRow(this.lastResult.getRow());
307               // Skip first row returned.  We already let it out on previous
308               // invocation.
309               skipFirst = true;
310             }
311             // Clear region
312             this.currentRegion = null;
313             continue;
314           }
315           long currentTime = System.currentTimeMillis();
316           if (this.scanMetrics != null ) {
317             this.scanMetrics.sumOfMillisSecBetweenNexts.inc(currentTime-lastNext);
318           }
319           lastNext = currentTime;
320           if (values != null && values.length > 0) {
321             for (Result rs : values) {
322               cache.add(rs);
323               for (KeyValue kv : rs.raw()) {
324                   remainingResultSize -= kv.heapSize();
325               }
326               countdown--;
327               this.lastResult = rs;
328             }
329           }
330           // Values == null means server-side filter has determined we must STOP
331         } while (remainingResultSize > 0 && countdown > 0 && nextScanner(countdown, values == null));
332       }
333 
334       if (cache.size() > 0) {
335         return cache.poll();
336       }
337 
338       // if we exhausted this scanner before calling close, write out the scan metrics
339       writeScanMetrics();
340       return null;
341     }
342 
343     /**
344      * Get <param>nbRows</param> rows.
345      * How many RPCs are made is determined by the {@link Scan#setCaching(int)}
346      * setting (or hbase.client.scanner.caching in hbase-site.xml).
347      * @param nbRows number of rows to return
348      * @return Between zero and <param>nbRows</param> RowResults.  Scan is done
349      * if returned array is of zero-length (We never return null).
350      * @throws IOException
351      */
352     public Result [] next(int nbRows) throws IOException {
353       // Collect values to be returned here
354       ArrayList<Result> resultSets = new ArrayList<Result>(nbRows);
355       for(int i = 0; i < nbRows; i++) {
356         Result next = next();
357         if (next != null) {
358           resultSets.add(next);
359         } else {
360           break;
361         }
362       }
363       return resultSets.toArray(new Result[resultSets.size()]);
364     }
365 
366     public void close() {
367       if (!scanMetricsPublished){
368         try {
369           writeScanMetrics();
370         } catch (IOException e) {
371         }
372       }
373       
374       if (callable != null) {
375         callable.setClose();
376         try {
377           callable.withRetries();
378         } catch (IOException e) {
379           // We used to catch this error, interpret, and rethrow. However, we
380           // have since decided that it's not nice for a scanner's close to
381           // throw exceptions. Chances are it was just an UnknownScanner
382           // exception due to lease time out.
383         }
384         callable = null;
385       }
386       closed = true;
387     }
388 }