View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.client;
21  
22  import java.io.IOException;
23  import java.io.InterruptedIOException;
24  import java.lang.reflect.UndeclaredThrowableException;
25  import java.net.SocketTimeoutException;
26  import java.util.ArrayList;
27  import java.util.List;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.classification.InterfaceAudience;
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.hbase.DoNotRetryIOException;
34  import org.apache.hadoop.hbase.HConstants;
35  import org.apache.hadoop.hbase.ipc.RpcClient;
36  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
37  import org.apache.hadoop.hbase.util.ExceptionUtil;
38  import org.apache.hadoop.ipc.RemoteException;
39  
40  import com.google.protobuf.ServiceException;
41  
42  /**
43   * Runs an rpc'ing {@link RetryingCallable}. Sets into rpc client
44   * threadlocal outstanding timeouts as so we don't persist too much.
45   * Dynamic rather than static so can set the generic appropriately.
46   */
47  @InterfaceAudience.Private
48  @edu.umd.cs.findbugs.annotations.SuppressWarnings
49      (value = "IS2_INCONSISTENT_SYNC", justification = "na")
50  public class RpcRetryingCaller<T> {
51    static final Log LOG = LogFactory.getLog(RpcRetryingCaller.class);
52    /**
53     * Timeout for the call including retries
54     */
55    private int callTimeout;
56    /**
57     * When we started making calls.
58     */
59    private long globalStartTime;
60    /**
61     * Start and end times for a single call.
62     */
63    private final static int MIN_RPC_TIMEOUT = 2000;
64  
65    private final long pause;
66    private final int retries;
67  
68    public RpcRetryingCaller(Configuration conf) {
69      this.pause = conf.getLong(HConstants.HBASE_CLIENT_PAUSE,
70        HConstants.DEFAULT_HBASE_CLIENT_PAUSE);
71      this.retries =
72          conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
73            HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER);
74      this.callTimeout = conf.getInt(
75          HConstants.HBASE_CLIENT_OPERATION_TIMEOUT,
76          HConstants.DEFAULT_HBASE_CLIENT_OPERATION_TIMEOUT);
77    }
78  
79    private void beforeCall() {
80      int remaining = (int)(callTimeout -
81        (EnvironmentEdgeManager.currentTimeMillis() - this.globalStartTime));
82      if (remaining < MIN_RPC_TIMEOUT) {
83        // If there is no time left, we're trying anyway. It's too late.
84        // 0 means no timeout, and it's not the intent here. So we secure both cases by
85        // resetting to the minimum.
86        remaining = MIN_RPC_TIMEOUT;
87      }
88      RpcClient.setRpcTimeout(remaining);
89    }
90  
91    private void afterCall() {
92      RpcClient.resetRpcTimeout();
93    }
94  
95    public synchronized T callWithRetries(RetryingCallable<T> callable) throws IOException,
96        RuntimeException {
97      return callWithRetries(callable, HConstants.DEFAULT_HBASE_CLIENT_OPERATION_TIMEOUT);
98    }
99  
100   /**
101    * Retries if invocation fails.
102    * @param callTimeout Timeout for this call
103    * @param callable The {@link RetryingCallable} to run.
104    * @return an object of type T
105    * @throws IOException if a remote or network exception occurs
106    * @throws RuntimeException other unspecified error
107    */
108   @edu.umd.cs.findbugs.annotations.SuppressWarnings
109       (value = "SWL_SLEEP_WITH_LOCK_HELD", justification = "na")
110   public synchronized T callWithRetries(RetryingCallable<T> callable, int callTimeout)
111   throws IOException, RuntimeException {
112     this.callTimeout = callTimeout;
113     List<RetriesExhaustedException.ThrowableWithExtraContext> exceptions =
114       new ArrayList<RetriesExhaustedException.ThrowableWithExtraContext>();
115     this.globalStartTime = EnvironmentEdgeManager.currentTimeMillis();
116     for (int tries = 0;; tries++) {
117       long expectedSleep = 0;
118       try {
119         beforeCall();
120         callable.prepare(tries != 0); // if called with false, check table status on ZK
121         return callable.call();
122       } catch (Throwable t) {
123         if (LOG.isTraceEnabled()) {
124           LOG.trace("Call exception, tries=" + tries + ", retries=" + retries + ", retryTime=" +
125               (EnvironmentEdgeManager.currentTimeMillis() - this.globalStartTime) + "ms", t);
126         }
127         // translateException throws exception when should not retry: i.e. when request is bad.
128         t = translateException(t);
129         callable.throwable(t, retries != 1);
130         RetriesExhaustedException.ThrowableWithExtraContext qt =
131             new RetriesExhaustedException.ThrowableWithExtraContext(t,
132                 EnvironmentEdgeManager.currentTimeMillis(), toString());
133         exceptions.add(qt);
134         ExceptionUtil.rethrowIfInterrupt(t);
135         if (tries >= retries - 1) {
136           throw new RetriesExhaustedException(tries, exceptions);
137         }
138         // If the server is dead, we need to wait a little before retrying, to give
139         //  a chance to the regions to be
140         // tries hasn't been bumped up yet so we use "tries + 1" to get right pause time
141         expectedSleep = callable.sleep(pause, tries + 1);
142 
143         // If, after the planned sleep, there won't be enough time left, we stop now.
144         long duration = singleCallDuration(expectedSleep);
145         if (duration > this.callTimeout) {
146           String msg = "callTimeout=" + this.callTimeout + ", callDuration=" + duration +
147               ": " + callable.getExceptionMessageAdditionalDetail();
148           throw (SocketTimeoutException)(new SocketTimeoutException(msg).initCause(t));
149         }
150       } finally {
151         afterCall();
152       }
153       try {
154         Thread.sleep(expectedSleep);
155       } catch (InterruptedException e) {
156         throw new InterruptedIOException("Interrupted after " + tries + " tries  on " + retries);
157       }
158     }
159   }
160 
161   /**
162    * @param expectedSleep
163    * @return Calculate how long a single call took
164    */
165   private long singleCallDuration(final long expectedSleep) {
166     return (EnvironmentEdgeManager.currentTimeMillis() - this.globalStartTime)
167       + MIN_RPC_TIMEOUT + expectedSleep;
168   }
169 
170   /**
171    * Call the server once only.
172    * {@link RetryingCallable} has a strange shape so we can do retrys.  Use this invocation if you
173    * want to do a single call only (A call to {@link RetryingCallable#call()} will not likely
174    * succeed).
175    * @return an object of type T
176    * @throws IOException if a remote or network exception occurs
177    * @throws RuntimeException other unspecified error
178    */
179   public T callWithoutRetries(RetryingCallable<T> callable)
180   throws IOException, RuntimeException {
181     // The code of this method should be shared with withRetries.
182     this.globalStartTime = EnvironmentEdgeManager.currentTimeMillis();
183     try {
184       beforeCall();
185       callable.prepare(false);
186       return callable.call();
187     } catch (Throwable t) {
188       Throwable t2 = translateException(t);
189       ExceptionUtil.rethrowIfInterrupt(t2);
190       // It would be nice to clear the location cache here.
191       if (t2 instanceof IOException) {
192         throw (IOException)t2;
193       } else {
194         throw new RuntimeException(t2);
195       }
196     } finally {
197       afterCall();
198     }
199   }
200 
201   /**
202    * Get the good or the remote exception if any, throws the DoNotRetryIOException.
203    * @param t the throwable to analyze
204    * @return the translated exception, if it's not a DoNotRetryIOException
205    * @throws DoNotRetryIOException - if we find it, we throw it instead of translating.
206    */
207   static Throwable translateException(Throwable t) throws DoNotRetryIOException {
208     if (t instanceof UndeclaredThrowableException) {
209       if (t.getCause() != null) {
210         t = t.getCause();
211       }
212     }
213     if (t instanceof RemoteException) {
214       t = ((RemoteException)t).unwrapRemoteException();
215     }
216     if (t instanceof ServiceException) {
217       ServiceException se = (ServiceException)t;
218       Throwable cause = se.getCause();
219       if (cause != null && cause instanceof DoNotRetryIOException) {
220         throw (DoNotRetryIOException)cause;
221       }
222       // Don't let ServiceException out; its rpc specific.
223       t = cause;
224       // t could be a RemoteException so go aaround again.
225       translateException(t);
226     } else if (t instanceof DoNotRetryIOException) {
227       throw (DoNotRetryIOException)t;
228     }
229     return t;
230   }
231 }