View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.client;
20  
21  import java.io.IOException;
22  import java.io.InterruptedIOException;
23  import java.net.UnknownHostException;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.hbase.Cell;
30  import org.apache.hadoop.hbase.CellScanner;
31  import org.apache.hadoop.hbase.CellUtil;
32  import org.apache.hadoop.hbase.DoNotRetryIOException;
33  import org.apache.hadoop.hbase.HBaseIOException;
34  import org.apache.hadoop.hbase.HRegionInfo;
35  import org.apache.hadoop.hbase.HRegionLocation;
36  import org.apache.hadoop.hbase.NotServingRegionException;
37  import org.apache.hadoop.hbase.RegionLocations;
38  import org.apache.hadoop.hbase.RemoteExceptionHandler;
39  import org.apache.hadoop.hbase.ServerName;
40  import org.apache.hadoop.hbase.TableName;
41  import org.apache.hadoop.hbase.UnknownScannerException;
42  import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
43  import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
44  import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
45  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
46  import org.apache.hadoop.hbase.protobuf.RequestConverter;
47  import org.apache.hadoop.hbase.protobuf.ResponseConverter;
48  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
49  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanResponse;
50  import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
51  import org.apache.hadoop.ipc.RemoteException;
52  import org.apache.hadoop.net.DNS;
53  
54  import com.google.protobuf.ServiceException;
55  import com.google.protobuf.TextFormat;
56  
57  /**
58   * Scanner operations such as create, next, etc.
59   * Used by {@link ResultScanner}s made by {@link HTable}. Passed to a retrying caller such as
60   * {@link RpcRetryingCaller} so fails are retried.
61   */
62  @InterfaceAudience.Private
63  public class ScannerCallable extends RegionServerCallable<Result[]> {
64    public static final String LOG_SCANNER_LATENCY_CUTOFF
65      = "hbase.client.log.scanner.latency.cutoff";
66    public static final String LOG_SCANNER_ACTIVITY = "hbase.client.log.scanner.activity";
67  
68    public static final Log LOG = LogFactory.getLog(ScannerCallable.class);
69    protected long scannerId = -1L;
70    protected boolean instantiated = false;
71    protected boolean closed = false;
72    private Scan scan;
73    private int caching = 1;
74    protected final ClusterConnection cConnection;
75    protected ScanMetrics scanMetrics;
76    private boolean logScannerActivity = false;
77    private int logCutOffLatency = 1000;
78    private static String myAddress;
79    protected final int id;
80    protected boolean serverHasMoreResultsContext;
81    protected boolean serverHasMoreResults;
82    static {
83      try {
84        myAddress = DNS.getDefaultHost("default", "default");
85      } catch (UnknownHostException uhe) {
86        LOG.error("cannot determine my address", uhe);
87      }
88    }
89  
90    // indicate if it is a remote server call
91    protected boolean isRegionServerRemote = true;
92    private long nextCallSeq = 0;
93    protected RpcControllerFactory controllerFactory;
94  
95    /**
96     * @param connection which connection
97     * @param tableName table callable is on
98     * @param scan the scan to execute
99     * @param scanMetrics the ScanMetrics to used, if it is null, 
100    *        ScannerCallable won't collect metrics
101    * @param rpcControllerFactory factory to use when creating 
102    *        {@link com.google.protobuf.RpcController}
103    */
104   public ScannerCallable (ClusterConnection connection, TableName tableName, Scan scan,
105       ScanMetrics scanMetrics, RpcControllerFactory rpcControllerFactory) {
106     this(connection, tableName, scan, scanMetrics, rpcControllerFactory, 0);
107   }
108   /**
109    *
110    * @param connection
111    * @param tableName
112    * @param scan
113    * @param scanMetrics
114    * @param id the replicaId
115    */
116   public ScannerCallable (ClusterConnection connection, TableName tableName, Scan scan,
117       ScanMetrics scanMetrics, RpcControllerFactory rpcControllerFactory, int id) {
118     super(connection, tableName, scan.getStartRow());
119     this.id = id;
120     this.cConnection = connection;
121     this.scan = scan;
122     this.scanMetrics = scanMetrics;
123     Configuration conf = connection.getConfiguration();
124     logScannerActivity = conf.getBoolean(LOG_SCANNER_ACTIVITY, false);
125     logCutOffLatency = conf.getInt(LOG_SCANNER_LATENCY_CUTOFF, 1000);
126     this.controllerFactory = rpcControllerFactory;
127   }
128 
129   /**
130    * @param reload force reload of server location
131    * @throws IOException
132    */
133   @Override
134   public void prepare(boolean reload) throws IOException {
135     if (Thread.interrupted()) {
136       throw new InterruptedIOException();
137     }
138     RegionLocations rl = RpcRetryingCallerWithReadReplicas.getRegionLocations(!reload,
139         id, getConnection(), getTableName(), getRow());
140     location = id < rl.size() ? rl.getRegionLocation(id) : null;
141     if (location == null || location.getServerName() == null) {
142       // With this exception, there will be a retry. The location can be null for a replica
143       //  when the table is created or after a split.
144       throw new HBaseIOException("There is no location for replica id #" + id);
145     }
146     ServerName dest = location.getServerName();
147     setStub(super.getConnection().getClient(dest));
148     if (!instantiated || reload) {
149       checkIfRegionServerIsRemote();
150       instantiated = true;
151     }
152 
153     // check how often we retry.
154     // HConnectionManager will call instantiateServer with reload==true
155     // if and only if for retries.
156     if (reload && this.scanMetrics != null) {
157       this.scanMetrics.countOfRPCRetries.incrementAndGet();
158       if (isRegionServerRemote) {
159         this.scanMetrics.countOfRemoteRPCRetries.incrementAndGet();
160       }
161     }
162   }
163 
164   /**
165    * compare the local machine hostname with region server's hostname
166    * to decide if hbase client connects to a remote region server
167    */
168   protected void checkIfRegionServerIsRemote() {
169     if (getLocation().getHostname().equalsIgnoreCase(myAddress)) {
170       isRegionServerRemote = false;
171     } else {
172       isRegionServerRemote = true;
173     }
174   }
175 
176 
177   @Override
178   public Result [] call(int callTimeout) throws IOException {
179     if (Thread.interrupted()) {
180       throw new InterruptedIOException();
181     }
182     if (closed) {
183       if (scannerId != -1) {
184         close();
185       }
186     } else {
187       if (scannerId == -1L) {
188         this.scannerId = openScanner();
189       } else {
190         Result [] rrs = null;
191         ScanRequest request = null;
192         try {
193           incRPCcallsMetrics();
194           request = RequestConverter.buildScanRequest(scannerId, caching, false, nextCallSeq);
195           ScanResponse response = null;
196           PayloadCarryingRpcController controller = controllerFactory.newController();
197           controller.setPriority(getTableName());
198           controller.setCallTimeout(callTimeout);
199           try {
200             response = getStub().scan(controller, request);
201             // Client and RS maintain a nextCallSeq number during the scan. Every next() call
202             // from client to server will increment this number in both sides. Client passes this
203             // number along with the request and at RS side both the incoming nextCallSeq and its
204             // nextCallSeq will be matched. In case of a timeout this increment at the client side
205             // should not happen. If at the server side fetching of next batch of data was over,
206             // there will be mismatch in the nextCallSeq number. Server will throw
207             // OutOfOrderScannerNextException and then client will reopen the scanner with startrow
208             // as the last successfully retrieved row.
209             // See HBASE-5974
210             nextCallSeq++;
211             long timestamp = System.currentTimeMillis();
212             // Results are returned via controller
213             CellScanner cellScanner = controller.cellScanner();
214             rrs = ResponseConverter.getResults(cellScanner, response);
215             if (logScannerActivity) {
216               long now = System.currentTimeMillis();
217               if (now - timestamp > logCutOffLatency) {
218                 int rows = rrs == null ? 0 : rrs.length;
219                 LOG.info("Took " + (now-timestamp) + "ms to fetch "
220                   + rows + " rows from scanner=" + scannerId);
221               }
222             }
223             // moreResults is only used for the case where a filter exhausts all elements
224             if (response.hasMoreResults() && !response.getMoreResults()) {
225               scannerId = -1L;
226               closed = true;
227               // Implied that no results were returned back, either.
228               return null;
229             }
230             // moreResultsInRegion explicitly defines when a RS may choose to terminate a batch due
231             // to size or quantity of results in the response.
232             if (response.hasMoreResultsInRegion()) {
233               // Set what the RS said
234               setHasMoreResultsContext(true);
235               setServerHasMoreResults(response.getMoreResultsInRegion());
236             } else {
237               // Server didn't respond whether it has more results or not.
238               setHasMoreResultsContext(false);
239             }
240           } catch (ServiceException se) {
241             throw ProtobufUtil.getRemoteException(se);
242           }
243           updateResultsMetrics(rrs);
244         } catch (IOException e) {
245           if (logScannerActivity) {
246             LOG.info("Got exception making request " + TextFormat.shortDebugString(request)
247               + " to " + getLocation(), e);
248           }
249           IOException ioe = e;
250           if (e instanceof RemoteException) {
251             ioe = RemoteExceptionHandler.decodeRemoteException((RemoteException)e);
252           }
253           if (logScannerActivity && (ioe instanceof UnknownScannerException)) {
254             try {
255               HRegionLocation location =
256                 getConnection().relocateRegion(getTableName(), scan.getStartRow());
257               LOG.info("Scanner=" + scannerId
258                 + " expired, current region location is " + location.toString());
259             } catch (Throwable t) {
260               LOG.info("Failed to relocate region", t);
261             }
262           }
263           // The below convertion of exceptions into DoNotRetryExceptions is a little strange.
264           // Why not just have these exceptions implment DNRIOE you ask?  Well, usually we want
265           // ServerCallable#withRetries to just retry when it gets these exceptions.  In here in
266           // a scan when doing a next in particular, we want to break out and get the scanner to
267           // reset itself up again.  Throwing a DNRIOE is how we signal this to happen (its ugly,
268           // yeah and hard to follow and in need of a refactor).
269           if (ioe instanceof NotServingRegionException) {
270             // Throw a DNRE so that we break out of cycle of calling NSRE
271             // when what we need is to open scanner against new location.
272             // Attach NSRE to signal client that it needs to re-setup scanner.
273             if (this.scanMetrics != null) {
274               this.scanMetrics.countOfNSRE.incrementAndGet();
275             }
276             throw new DoNotRetryIOException("Resetting the scanner -- see exception cause", ioe);
277           } else if (ioe instanceof RegionServerStoppedException) {
278             // Throw a DNRE so that we break out of cycle of the retries and instead go and
279             // open scanner against new location.
280             throw new DoNotRetryIOException("Resetting the scanner -- see exception cause", ioe);
281           } else {
282             // The outer layers will retry
283             throw ioe;
284           }
285         }
286         return rrs;
287       }
288     }
289     return null;
290   }
291 
292   private void incRPCcallsMetrics() {
293     if (this.scanMetrics == null) {
294       return;
295     }
296     this.scanMetrics.countOfRPCcalls.incrementAndGet();
297     if (isRegionServerRemote) {
298       this.scanMetrics.countOfRemoteRPCcalls.incrementAndGet();
299     }
300   }
301 
302   private void updateResultsMetrics(Result[] rrs) {
303     if (this.scanMetrics == null || rrs == null || rrs.length == 0) {
304       return;
305     }
306     long resultSize = 0;
307     for (Result rr : rrs) {
308       for (Cell cell : rr.rawCells()) {
309         resultSize += CellUtil.estimatedSerializedSizeOf(cell);
310       }
311     }
312     this.scanMetrics.countOfBytesInResults.addAndGet(resultSize);
313     if (isRegionServerRemote) {
314       this.scanMetrics.countOfBytesInRemoteResults.addAndGet(resultSize);
315     }
316   }
317 
318   private void close() {
319     if (this.scannerId == -1L) {
320       return;
321     }
322     try {
323       incRPCcallsMetrics();
324       ScanRequest request =
325         RequestConverter.buildScanRequest(this.scannerId, 0, true);
326       try {
327         getStub().scan(null, request);
328       } catch (ServiceException se) {
329         throw ProtobufUtil.getRemoteException(se);
330       }
331     } catch (IOException e) {
332       LOG.warn("Ignore, probably already closed", e);
333     }
334     this.scannerId = -1L;
335   }
336 
337   protected long openScanner() throws IOException {
338     incRPCcallsMetrics();
339     ScanRequest request =
340       RequestConverter.buildScanRequest(
341         getLocation().getRegionInfo().getRegionName(),
342         this.scan, 0, false);
343     try {
344       ScanResponse response = getStub().scan(null, request);
345       long id = response.getScannerId();
346       if (logScannerActivity) {
347         LOG.info("Open scanner=" + id + " for scan=" + scan.toString()
348           + " on region " + getLocation().toString());
349       }
350       return id;
351     } catch (ServiceException se) {
352       throw ProtobufUtil.getRemoteException(se);
353     }
354   }
355 
356   protected Scan getScan() {
357     return scan;
358   }
359 
360   /**
361    * Call this when the next invocation of call should close the scanner
362    */
363   public void setClose() {
364     this.closed = true;
365   }
366 
367   /**
368    * @return the HRegionInfo for the current region
369    */
370   @Override
371   public HRegionInfo getHRegionInfo() {
372     if (!instantiated) {
373       return null;
374     }
375     return getLocation().getRegionInfo();
376   }
377 
378   /**
379    * Get the number of rows that will be fetched on next
380    * @return the number of rows for caching
381    */
382   public int getCaching() {
383     return caching;
384   }
385 
386   @Override
387   public ClusterConnection getConnection() {
388     return cConnection;
389   }
390 
391   /**
392    * Set the number of rows that will be fetched on next
393    * @param caching the number of rows for caching
394    */
395   public void setCaching(int caching) {
396     this.caching = caching;
397   }
398 
399   public ScannerCallable getScannerCallableForReplica(int id) {
400     ScannerCallable s = new ScannerCallable(this.getConnection(), this.tableName,
401         this.getScan(), this.scanMetrics, controllerFactory, id);
402     s.setCaching(this.caching);
403     return s;
404   }
405 
406   /**
407    * Should the client attempt to fetch more results from this region
408    * @return True if the client should attempt to fetch more results, false otherwise.
409    */
410   protected boolean getServerHasMoreResults() {
411     assert serverHasMoreResultsContext;
412     return this.serverHasMoreResults;
413   }
414 
415   protected void setServerHasMoreResults(boolean serverHasMoreResults) {
416     this.serverHasMoreResults = serverHasMoreResults;
417   }
418 
419   /**
420    * Did the server respond with information about whether more results might exist.
421    * Not guaranteed to respond with older server versions
422    * @return True if the server responded with information about more results.
423    */
424   protected boolean hasMoreResultsContext() {
425     return serverHasMoreResultsContext;
426   }
427 
428   protected void setHasMoreResultsContext(boolean serverHasMoreResultsContext) {
429     this.serverHasMoreResultsContext = serverHasMoreResultsContext;
430   }
431 }