View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  
21  package org.apache.hadoop.hbase.client;
22  
23  
24  import com.google.protobuf.ServiceException;
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.conf.Configuration;
28  import org.apache.hadoop.hbase.DoNotRetryIOException;
29  import org.apache.hadoop.hbase.HBaseIOException;
30  import org.apache.hadoop.hbase.HRegionLocation;
31  import org.apache.hadoop.hbase.RegionLocations;
32  import org.apache.hadoop.hbase.ServerName;
33  import org.apache.hadoop.hbase.TableName;
34  import org.apache.hadoop.hbase.classification.InterfaceAudience;
35  import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
36  import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
37  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
38  import org.apache.hadoop.hbase.protobuf.RequestConverter;
39  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
40  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
41  
42  import org.apache.htrace.Trace;
43  
44  import java.io.IOException;
45  import java.io.InterruptedIOException;
46  import java.util.Collections;
47  import java.util.List;
48  import java.util.concurrent.CancellationException;
49  import java.util.concurrent.ExecutionException;
50  import java.util.concurrent.Executor;
51  import java.util.concurrent.ExecutorService;
52  import java.util.concurrent.Future;
53  import java.util.concurrent.RunnableFuture;
54  import java.util.concurrent.TimeUnit;
55  import java.util.concurrent.TimeoutException;
56  
57  /**
58   * Caller that goes to replica if the primary region does no answer within a configurable
59   * timeout. If the timeout is reached, it calls all the secondary replicas, and returns
60   * the first answer. If the answer comes from one of the secondary replica, it will
61   * be marked as stale.
62   */
63  @InterfaceAudience.Private
64  public class RpcRetryingCallerWithReadReplicas {
65    static final Log LOG = LogFactory.getLog(RpcRetryingCallerWithReadReplicas.class);
66  
67    protected final ExecutorService pool;
68    protected final ClusterConnection cConnection;
69    protected final Configuration conf;
70    protected final Get get;
71    protected final TableName tableName;
72    protected final int timeBeforeReplicas;
73    private final int callTimeout;
74    private final int retries;
75    private final RpcControllerFactory rpcControllerFactory;
76    private final RpcRetryingCallerFactory rpcRetryingCallerFactory;
77  
78    public RpcRetryingCallerWithReadReplicas(
79        RpcControllerFactory rpcControllerFactory, TableName tableName,
80        ClusterConnection cConnection, final Get get,
81        ExecutorService pool, int retries, int callTimeout,
82        int timeBeforeReplicas) {
83      this.rpcControllerFactory = rpcControllerFactory;
84      this.tableName = tableName;
85      this.cConnection = cConnection;
86      this.conf = cConnection.getConfiguration();
87      this.get = get;
88      this.pool = pool;
89      this.retries = retries;
90      this.callTimeout = callTimeout;
91      this.timeBeforeReplicas = timeBeforeReplicas;
92      this.rpcRetryingCallerFactory = new RpcRetryingCallerFactory(conf);
93    }
94  
95    /**
96     * A RegionServerCallable that takes into account the replicas, i.e.
97     * - the call can be on any replica
98     * - we need to stop retrying when the call is completed
99     * - we can be interrupted
100    */
101   class ReplicaRegionServerCallable extends RegionServerCallable<Result> {
102     final int id;
103     private final PayloadCarryingRpcController controller;
104 
105     public ReplicaRegionServerCallable(int id, HRegionLocation location) {
106       super(RpcRetryingCallerWithReadReplicas.this.cConnection,
107           RpcRetryingCallerWithReadReplicas.this.tableName, get.getRow());
108       this.id = id;
109       this.location = location;
110       this.controller = rpcControllerFactory.newController();
111       controller.setPriority(tableName);
112     }
113 
114     public void startCancel() {
115       controller.startCancel();
116     }
117 
118     /**
119      * Two responsibilities
120      * - if the call is already completed (by another replica) stops the retries.
121      * - set the location to the right region, depending on the replica.
122      */
123     @Override
124     public void prepare(final boolean reload) throws IOException {
125       if (controller.isCanceled()) return;
126 
127       if (Thread.interrupted()) {
128         throw new InterruptedIOException();
129       }
130 
131       if (reload || location == null) {
132         RegionLocations rl = getRegionLocations(false, id, cConnection, tableName, get.getRow());
133         location = id < rl.size() ? rl.getRegionLocation(id) : null;
134       }
135 
136       if (location == null || location.getServerName() == null) {
137         // With this exception, there will be a retry. The location can be null for a replica
138         //  when the table is created or after a split.
139         throw new HBaseIOException("There is no location for replica id #" + id);
140       }
141 
142       ServerName dest = location.getServerName();
143 
144       setStub(cConnection.getClient(dest));
145     }
146 
147     @Override
148     public Result call(int callTimeout) throws Exception {
149       if (controller.isCanceled()) return null;
150 
151       if (Thread.interrupted()) {
152         throw new InterruptedIOException();
153       }
154 
155       byte[] reg = location.getRegionInfo().getRegionName();
156 
157       ClientProtos.GetRequest request =
158           RequestConverter.buildGetRequest(reg, get);
159       controller.setCallTimeout(callTimeout);
160 
161       try {
162         ClientProtos.GetResponse response = getStub().get(controller, request);
163         if (response == null) {
164           return null;
165         }
166         return ProtobufUtil.toResult(response.getResult());
167       } catch (ServiceException se) {
168         throw ProtobufUtil.getRemoteException(se);
169       }
170     }
171   }
172 
173   /**
174    * Algo:
175    * - we put the query into the execution pool.
176    * - after x ms, if we don't have a result, we add the queries for the secondary replicas
177    * - we take the first answer
178    * - when done, we cancel what's left. Cancelling means:
179    * - removing from the pool if the actual call was not started
180    * - interrupting the call if it has started
181    * Client side, we need to take into account
182    * - a call is not executed immediately after being put into the pool
183    * - a call is a thread. Let's not multiply the number of thread by the number of replicas.
184    * Server side, if we can cancel when it's still in the handler pool, it's much better, as a call
185    * can take some i/o.
186    * <p/>
187    * Globally, the number of retries, timeout and so on still applies, but it's per replica,
188    * not global. We continue until all retries are done, or all timeouts are exceeded.
189    */
190   public synchronized Result call()
191       throws DoNotRetryIOException, InterruptedIOException, RetriesExhaustedException {
192     boolean isTargetReplicaSpecified = (get.getReplicaId() >= 0);
193 
194     RegionLocations rl = getRegionLocations(true, (isTargetReplicaSpecified ? get.getReplicaId()
195         : RegionReplicaUtil.DEFAULT_REPLICA_ID), cConnection, tableName, get.getRow());
196     ResultBoundedCompletionService cs = new ResultBoundedCompletionService(pool, rl.size());
197 
198     if(isTargetReplicaSpecified) {
199       addCallsForReplica(cs, rl, get.getReplicaId(), get.getReplicaId());
200     } else {
201       addCallsForReplica(cs, rl, 0, 0);
202       try {
203         // wait for the timeout to see whether the primary responds back
204         Future<Result> f = cs.poll(timeBeforeReplicas, TimeUnit.MICROSECONDS); // Yes, microseconds
205         if (f != null) {
206           return f.get(); //great we got a response
207         }
208       } catch (ExecutionException e) {
209         throwEnrichedException(e, retries);
210       } catch (CancellationException e) {
211         throw new InterruptedIOException();
212       } catch (InterruptedException e) {
213         throw new InterruptedIOException();
214       }
215 
216       // submit call for the all of the secondaries at once
217       addCallsForReplica(cs, rl, 1, rl.size() - 1);
218     }
219 
220     try {
221       try {
222         Future<Result> f = cs.take();
223         return f.get();
224       } catch (ExecutionException e) {
225         throwEnrichedException(e, retries);
226       }
227     } catch (CancellationException e) {
228       throw new InterruptedIOException();
229     } catch (InterruptedException e) {
230       throw new InterruptedIOException();
231     } finally {
232       // We get there because we were interrupted or because one or more of the
233       // calls succeeded or failed. In all case, we stop all our tasks.
234       cs.cancelAll();
235     }
236 
237     return null; // unreachable
238   }
239 
240   /**
241    * Extract the real exception from the ExecutionException, and throws what makes more
242    * sense.
243    */
244   static void throwEnrichedException(ExecutionException e, int retries)
245       throws RetriesExhaustedException, DoNotRetryIOException {
246     Throwable t = e.getCause();
247     assert t != null; // That's what ExecutionException is about: holding an exception
248 
249     if (t instanceof RetriesExhaustedException) {
250       throw (RetriesExhaustedException) t;
251     }
252 
253     if (t instanceof DoNotRetryIOException) {
254       throw (DoNotRetryIOException) t;
255     }
256 
257     RetriesExhaustedException.ThrowableWithExtraContext qt =
258         new RetriesExhaustedException.ThrowableWithExtraContext(t,
259             EnvironmentEdgeManager.currentTime(), null);
260 
261     List<RetriesExhaustedException.ThrowableWithExtraContext> exceptions =
262         Collections.singletonList(qt);
263 
264     throw new RetriesExhaustedException(retries, exceptions);
265   }
266 
267   /**
268    * Creates the calls and submit them
269    *
270    * @param cs  - the completion service to use for submitting
271    * @param rl  - the region locations
272    * @param min - the id of the first replica, inclusive
273    * @param max - the id of the last replica, inclusive.
274    */
275   private void addCallsForReplica(ResultBoundedCompletionService cs,
276                                  RegionLocations rl, int min, int max) {
277     for (int id = min; id <= max; id++) {
278       HRegionLocation hrl = rl.getRegionLocation(id);
279       ReplicaRegionServerCallable callOnReplica = new ReplicaRegionServerCallable(id, hrl);
280       cs.submit(callOnReplica, callTimeout);
281     }
282   }
283 
284   static RegionLocations getRegionLocations(boolean useCache, int replicaId,
285                  ClusterConnection cConnection, TableName tableName, byte[] row)
286       throws RetriesExhaustedException, DoNotRetryIOException, InterruptedIOException {
287 
288     RegionLocations rl;
289     try {
290       if (!useCache) {
291         rl = cConnection.relocateRegion(tableName, row, replicaId);
292       } else {
293         rl = cConnection.locateRegion(tableName, row, useCache, true, replicaId);
294       }
295     } catch (DoNotRetryIOException e) {
296       throw e;
297     } catch (RetriesExhaustedException e) {
298       throw e;
299     } catch (InterruptedIOException e) {
300       throw e;
301     } catch (IOException e) {
302       throw new RetriesExhaustedException("Can't get the location", e);
303     }
304     if (rl == null) {
305       throw new RetriesExhaustedException("Can't get the locations");
306     }
307 
308     return rl;
309   }
310 
311 
312   /**
313    * A completion service for the RpcRetryingCallerFactory.
314    * Keeps the list of the futures, and allows to cancel them all.
315    * This means as well that it can be used for a small set of tasks only.
316    * <br>Implementation is not Thread safe.
317    */
318   public class ResultBoundedCompletionService {
319     private final Executor executor;
320     private final QueueingFuture[] tasks; // all the tasks
321     private volatile QueueingFuture completed = null;
322 
323     class QueueingFuture implements RunnableFuture<Result> {
324       private final ReplicaRegionServerCallable future;
325       private Result result = null;
326       private ExecutionException exeEx = null;
327       private volatile boolean canceled;
328       private final int callTimeout;
329       private final RpcRetryingCaller<Result> retryingCaller;
330 
331 
332       public QueueingFuture(ReplicaRegionServerCallable future, int callTimeout) {
333         this.future = future;
334         this.callTimeout = callTimeout;
335         this.retryingCaller = rpcRetryingCallerFactory.<Result>newCaller();
336       }
337 
338       @Override
339       public void run() {
340         try {
341           if (!canceled) {
342             result =
343                 rpcRetryingCallerFactory.<Result>newCaller().callWithRetries(future, callTimeout);
344           }
345         } catch (Throwable t) {
346           exeEx = new ExecutionException(t);
347         } finally {
348           if (!canceled && completed == null) {
349             completed = QueueingFuture.this;
350             synchronized (tasks) {
351               tasks.notify();
352             }
353           }
354         }
355       }
356 
357       @Override
358       public boolean cancel(boolean mayInterruptIfRunning) {
359         if (result != null || exeEx != null) return false;
360         retryingCaller.cancel();
361         future.startCancel();
362         canceled = true;
363         return true;
364       }
365 
366       @Override
367       public boolean isCancelled() {
368         return canceled;
369       }
370 
371       @Override
372       public boolean isDone() {
373         return result != null || exeEx != null;
374       }
375 
376       @Override
377       public Result get() throws InterruptedException, ExecutionException {
378         try {
379           return get(1000, TimeUnit.DAYS);
380         } catch (TimeoutException e) {
381           throw new RuntimeException("You did wait for 1000 days here?", e);
382         }
383       }
384 
385       @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="RCN_REDUNDANT_NULLCHECK_OF_NULL_VALUE",
386           justification="Is this an issue?")
387       @Override
388       public Result get(long timeout, TimeUnit unit)
389           throws InterruptedException, ExecutionException, TimeoutException {
390         synchronized (tasks) {
391           if (result != null) {
392             return result;
393           }
394           if (exeEx != null) {
395             throw exeEx;
396           }
397           unit.timedWait(tasks, timeout);
398         }
399         // Findbugs says this null check is redundant.  Will result be set across the wait above?
400         if (result != null) {
401           return result;
402         }
403         if (exeEx != null) {
404           throw exeEx;
405         }
406 
407         throw new TimeoutException("timeout=" + timeout + ", " + unit);
408       }
409     }
410 
411     public ResultBoundedCompletionService(Executor executor, int maxTasks) {
412       this.executor = executor;
413       this.tasks = new QueueingFuture[maxTasks];
414     }
415 
416 
417     public void submit(ReplicaRegionServerCallable task, int callTimeout) {
418       QueueingFuture newFuture = new QueueingFuture(task, callTimeout);
419       executor.execute(Trace.wrap(newFuture));
420       tasks[task.id] = newFuture;
421     }
422 
423     public QueueingFuture take() throws InterruptedException {
424       synchronized (tasks) {
425         while (completed == null) tasks.wait();
426       }
427       return completed;
428     }
429 
430     public QueueingFuture poll(long timeout, TimeUnit unit) throws InterruptedException {
431       synchronized (tasks) {
432         if (completed == null) unit.timedWait(tasks, timeout);
433       }
434       return completed;
435     }
436 
437     public void cancelAll() {
438       for (QueueingFuture future : tasks) {
439         if (future != null) future.cancel(true);
440       }
441     }
442   }
443 }