View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.client;
21  
22  import java.io.IOException;
23  import java.io.InterruptedIOException;
24  import java.util.ArrayList;
25  import java.util.Arrays;
26  import java.util.Collection;
27  import java.util.HashMap;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.concurrent.ConcurrentHashMap;
32  import java.util.concurrent.ConcurrentMap;
33  import java.util.concurrent.ConcurrentSkipListMap;
34  import java.util.concurrent.ExecutorService;
35  import java.util.concurrent.RejectedExecutionException;
36  import java.util.concurrent.atomic.AtomicBoolean;
37  import java.util.concurrent.atomic.AtomicInteger;
38  import java.util.concurrent.atomic.AtomicLong;
39  
40  import org.apache.commons.logging.Log;
41  import org.apache.commons.logging.LogFactory;
42  import org.apache.hadoop.conf.Configuration;
43  import org.apache.hadoop.hbase.DoNotRetryIOException;
44  import org.apache.hadoop.hbase.HConstants;
45  import org.apache.hadoop.hbase.HRegionInfo;
46  import org.apache.hadoop.hbase.HRegionLocation;
47  import org.apache.hadoop.hbase.ServerName;
48  import org.apache.hadoop.hbase.TableName;
49  import org.apache.hadoop.hbase.client.backoff.ServerStatistics;
50  import org.apache.hadoop.hbase.client.coprocessor.Batch;
51  import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
52  import org.apache.hadoop.hbase.util.Bytes;
53  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
54  import org.apache.hadoop.hbase.util.Pair;
55  import org.cloudera.htrace.Trace;
56  
57  import com.google.common.base.Preconditions;
58  
59  /**
60   * This class  allows a continuous flow of requests. It's written to be compatible with a
61   * synchronous caller such as HTable.
62   * <p>
63   * The caller sends a buffer of operation, by calling submit. This class extract from this list
64   * the operations it can send, i.e. the operations that are on region that are not considered
65   * as busy. The process is asynchronous, i.e. it returns immediately when if has finished to
66   * iterate on the list. If, and only if, the maximum number of current task is reached, the call
67   * to submit will block.
68   * </p>
69   * <p>
70   * The class manages internally the retries.
71   * </p>
72   * <p>
73   * The class includes an error marker: it allows to know if an operation has failed or not, and
74   * to get the exception details, i.e. the full list of throwables for each attempt. This marker
75   * is here to help the backward compatibility in HTable. In most (new) cases, it should be
76   * managed by the callbacks.
77   * </p>
78   * <p>
79   * A callback is available, in order to: <list>
80   * <li>Get the result of the operation (failure or success)</li>
81   * <li>When an operation fails but could be retried, allows or not to retry</li>
82   * <li>When an operation fails for good (can't be retried or already retried the maximum number
83   * time), register the error or not.
84   * </list>
85   * <p>
86   * This class is not thread safe externally; only one thread should submit operations at a time.
87   * Internally, the class is thread safe enough to manage simultaneously new submission and results
88   * arising from older operations.
89   * </p>
90   * <p>
91   * Internally, this class works with {@link Row}, this mean it could be theoretically used for
92   * gets as well.
93   * </p>
94   */
95  class AsyncProcess<CResult> {
96    private static final Log LOG = LogFactory.getLog(AsyncProcess.class);
97  
98    /**
99     * Configure the number of failures after which the client will start logging. A few failures
100    * is fine: region moved, then is not opened, then is overloaded. We try to have an acceptable
101    * heuristic for the number of errors we don't log. 9 was chosen because we wait for 1s at
102    * this stage.
103    */
104   public static final String START_LOG_ERRORS_AFTER_COUNT_KEY =
105       "hbase.client.start.log.errors.counter";
106   public static final int DEFAULT_START_LOG_ERRORS_AFTER_COUNT = 9;
107 
108   protected static final AtomicLong COUNTER = new AtomicLong();
109   protected final long id;
110   private final int startLogErrorsCnt;
111   protected final HConnection hConnection;
112   protected final TableName tableName;
113   protected final ExecutorService pool;
114   protected final AsyncProcessCallback<CResult> callback;
115   protected final BatchErrors errors = new BatchErrors();
116   protected final AtomicBoolean hasError = new AtomicBoolean(false);
117   protected final AtomicLong tasksSent = new AtomicLong(0);
118   protected final AtomicLong tasksDone = new AtomicLong(0);
119   protected final AtomicLong retriesCnt = new AtomicLong(0);
120   protected final ConcurrentMap<byte[], AtomicInteger> taskCounterPerRegion =
121       new ConcurrentSkipListMap<byte[], AtomicInteger>(Bytes.BYTES_COMPARATOR);
122   protected final ConcurrentMap<ServerName, AtomicInteger> taskCounterPerServer =
123       new ConcurrentHashMap<ServerName, AtomicInteger>();
124   protected final int timeout;
125 
126   /**
127    * The number of tasks simultaneously executed on the cluster.
128    */
129   protected final int maxTotalConcurrentTasks;
130 
131   /**
132    * The number of tasks we run in parallel on a single region.
133    * With 1 (the default) , we ensure that the ordering of the queries is respected: we don't start
134    * a set of operations on a region before the previous one is done. As well, this limits
135    * the pressure we put on the region server.
136    */
137   protected final int maxConcurrentTasksPerRegion;
138 
139   /**
140    * The number of task simultaneously executed on a single region server.
141    */
142   protected final int maxConcurrentTasksPerServer;
143   protected final long pause;
144   protected int numTries;
145   protected int serverTrackerTimeout;
146   protected RpcRetryingCallerFactory rpcCallerFactory;
147   private RpcControllerFactory rpcFactory;
148 
149 
150   /**
151    * This interface allows to keep the interface of the previous synchronous interface, that uses
152    * an array of object to return the result.
153    * <p/>
154    * This interface allows the caller to specify the behavior on errors: <list>
155    * <li>If we have not yet reach the maximum number of retries, the user can nevertheless
156    * specify if this specific operation should be retried or not.
157    * </li>
158    * <li>If an operation fails (i.e. is not retried or fails after all retries), the user can
159    * specify is we should mark this AsyncProcess as in error or not.
160    * </li>
161    * </list>
162    */
163   interface AsyncProcessCallback<CResult> {
164 
165     /**
166      * Called on success. originalIndex holds the index in the action list.
167      */
168     void success(int originalIndex, byte[] region, Row row, CResult result);
169 
170     /**
171      * called on failure, if we don't retry (i.e. called once per failed operation).
172      *
173      * @return true if we should store the error and tag this async process as being in error.
174      *         false if the failure of this operation can be safely ignored, and does not require
175      *         the current process to be stopped without proceeding with the other operations in
176      *         the queue.
177      */
178     boolean failure(int originalIndex, byte[] region, Row row, Throwable t);
179 
180     /**
181      * Called on a failure we plan to retry. This allows the user to stop retrying. Will be
182      * called multiple times for a single action if it fails multiple times.
183      *
184      * @return false if we should retry, true otherwise.
185      */
186     boolean retriableFailure(int originalIndex, Row row, byte[] region, Throwable exception);
187   }
188 
189   private static class BatchErrors {
190     private final List<Throwable> throwables = new ArrayList<Throwable>();
191     private final List<Row> actions = new ArrayList<Row>();
192     private final List<String> addresses = new ArrayList<String>();
193 
194     public synchronized void add(Throwable ex, Row row, HRegionLocation location) {
195       if (row == null){
196         throw new IllegalArgumentException("row cannot be null. location=" + location);
197       }
198 
199       throwables.add(ex);
200       actions.add(row);
201       addresses.add(location != null ? location.getServerName().toString() : "null location");
202     }
203 
204     private synchronized RetriesExhaustedWithDetailsException makeException() {
205       return new RetriesExhaustedWithDetailsException(
206           new ArrayList<Throwable>(throwables),
207           new ArrayList<Row>(actions), new ArrayList<String>(addresses));
208     }
209 
210     public synchronized void clear() {
211       throwables.clear();
212       actions.clear();
213       addresses.clear();
214     }
215   }
216 
217   public AsyncProcess(HConnection hc, TableName tableName, ExecutorService pool,
218       AsyncProcessCallback<CResult> callback, Configuration conf,
219       RpcRetryingCallerFactory rpcCaller, RpcControllerFactory rpcFactory) {
220     if (hc == null){
221       throw new IllegalArgumentException("HConnection cannot be null.");
222     }
223 
224     this.hConnection = hc;
225     this.tableName = tableName;
226     this.pool = pool;
227     this.callback = callback;
228 
229     this.id = COUNTER.incrementAndGet();
230 
231     this.pause = conf.getLong(HConstants.HBASE_CLIENT_PAUSE,
232         HConstants.DEFAULT_HBASE_CLIENT_PAUSE);
233     this.numTries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
234         HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER);
235     this.timeout = conf.getInt(HConstants.HBASE_RPC_TIMEOUT_KEY,
236         HConstants.DEFAULT_HBASE_RPC_TIMEOUT);
237 
238 
239     this.maxTotalConcurrentTasks = conf.getInt(HConstants.HBASE_CLIENT_MAX_TOTAL_TASKS,
240       HConstants.DEFAULT_HBASE_CLIENT_MAX_TOTAL_TASKS);
241     this.maxConcurrentTasksPerServer = conf.getInt(HConstants.HBASE_CLIENT_MAX_PERSERVER_TASKS,
242           HConstants.DEFAULT_HBASE_CLIENT_MAX_PERSERVER_TASKS);
243     this.maxConcurrentTasksPerRegion = conf.getInt(HConstants.HBASE_CLIENT_MAX_PERREGION_TASKS,
244           HConstants.DEFAULT_HBASE_CLIENT_MAX_PERREGION_TASKS);
245 
246     this.startLogErrorsCnt =
247         conf.getInt(START_LOG_ERRORS_AFTER_COUNT_KEY, DEFAULT_START_LOG_ERRORS_AFTER_COUNT);
248 
249     if (this.maxTotalConcurrentTasks <= 0) {
250       throw new IllegalArgumentException("maxTotalConcurrentTasks=" + maxTotalConcurrentTasks);
251     }
252     if (this.maxConcurrentTasksPerServer <= 0) {
253       throw new IllegalArgumentException("maxConcurrentTasksPerServer=" +
254           maxConcurrentTasksPerServer);
255     }
256     if (this.maxConcurrentTasksPerRegion <= 0) {
257       throw new IllegalArgumentException("maxConcurrentTasksPerRegion=" +
258           maxConcurrentTasksPerRegion);
259     }
260 
261     // Server tracker allows us to do faster, and yet useful (hopefully), retries.
262     // However, if we are too useful, we might fail very quickly due to retry count limit.
263     // To avoid this, we are going to cheat for now (see HBASE-7659), and calculate maximum
264     // retry time if normal retries were used. Then we will retry until this time runs out.
265     // If we keep hitting one server, the net effect will be the incremental backoff, and
266     // essentially the same number of retries as planned. If we have to do faster retries,
267     // we will do more retries in aggregate, but the user will be none the wiser.
268     this.serverTrackerTimeout = 0;
269     for (int i = 0; i < this.numTries; ++i) {
270       serverTrackerTimeout += ConnectionUtils.getPauseTime(this.pause, i);
271     }
272 
273     this.rpcCallerFactory = rpcCaller;
274     Preconditions.checkNotNull(rpcFactory);
275     this.rpcFactory = rpcFactory;
276   }
277 
278   /**
279    * Extract from the rows list what we can submit. The rows we can not submit are kept in the
280    * list.
281    *
282    * @param rows - the submitted row. Modified by the method: we remove the rows we took.
283    * @param atLeastOne true if we should submit at least a subset.
284    */
285   public void submit(List<? extends Row> rows, boolean atLeastOne) throws InterruptedIOException {
286     submit(rows, atLeastOne, null);
287   }
288 
289   /**
290    * Extract from the rows list what we can submit. The rows we can not submit are kept in the
291    * list.
292    *
293    * @param rows - the submitted row. Modified by the method: we remove the rows we took.
294    * @param atLeastOne true if we should submit at least a subset.
295    * @param batchCallback Batch callback. Only called on success
296    */
297   public void submit(List<? extends Row> rows, boolean atLeastOne,
298       Batch.Callback<CResult> batchCallback) throws InterruptedIOException {
299     if (rows.isEmpty()) {
300       return;
301     }
302 
303     // This looks like we are keying by region but HRegionLocation has a comparator that compares
304     // on the server portion only (hostname + port) so this Map collects regions by server.
305     Map<HRegionLocation, MultiAction<Row>> actionsByServer =
306       new HashMap<HRegionLocation, MultiAction<Row>>();
307     List<Action<Row>> retainedActions = new ArrayList<Action<Row>>(rows.size());
308 
309     long currentTaskCnt = tasksDone.get();
310     boolean alreadyLooped = false;
311 
312     NonceGenerator ng = this.hConnection.getNonceGenerator();
313     do {
314       if (alreadyLooped){
315         // if, for whatever reason, we looped, we want to be sure that something has changed.
316         waitForNextTaskDone(currentTaskCnt);
317         currentTaskCnt = tasksDone.get();
318       } else {
319         alreadyLooped = true;
320       }
321 
322       // Wait until there is at least one slot for a new task.
323       waitForMaximumCurrentTasks(maxTotalConcurrentTasks - 1);
324 
325       // Remember the previous decisions about regions or region servers we put in the
326       //  final multi.
327       Map<Long, Boolean> regionIncluded = new HashMap<Long, Boolean>();
328       Map<ServerName, Boolean> serverIncluded = new HashMap<ServerName, Boolean>();
329 
330       int posInList = -1;
331       Iterator<? extends Row> it = rows.iterator();
332       while (it.hasNext()) {
333         Row r = it.next();
334         HRegionLocation loc = findDestLocation(r, posInList);
335 
336         if (loc == null) { // loc is null if there is an error such as meta not available.
337           it.remove();
338         } else if (canTakeOperation(loc, regionIncluded, serverIncluded)) {
339           Action<Row> action = new Action<Row>(r, ++posInList);
340           setNonce(ng, r, action);
341           retainedActions.add(action);
342           addAction(loc, action, actionsByServer, ng);
343           it.remove();
344         }
345       }
346     } while (retainedActions.isEmpty() && atLeastOne && !hasError());
347 
348     HConnectionManager.ServerErrorTracker errorsByServer = createServerErrorTracker();
349     sendMultiAction(retainedActions, actionsByServer, 1, errorsByServer, batchCallback);
350   }
351 
352   /**
353    * Group the actions per region server.
354    *
355    * @param loc - the destination. Must not be null.
356    * @param action - the action to add to the multiaction
357    * @param actionsByServer the multiaction per server
358    * @param ng Nonce generator, or null if no nonces are needed.
359    */
360   private void addAction(HRegionLocation loc, Action<Row> action, Map<HRegionLocation,
361       MultiAction<Row>> actionsByServer, NonceGenerator ng) {
362     final byte[] regionName = loc.getRegionInfo().getRegionName();
363     MultiAction<Row> multiAction = actionsByServer.get(loc);
364     if (multiAction == null) {
365       multiAction = new MultiAction<Row>();
366       actionsByServer.put(loc, multiAction);
367     }
368     if (action.hasNonce() && !multiAction.hasNonceGroup()) {
369       // TODO: this code executes for every (re)try, and calls getNonceGroup again
370       //       for the same action. It must return the same value across calls.
371       multiAction.setNonceGroup(ng.getNonceGroup());
372     }
373 
374     multiAction.add(regionName, action);
375   }
376 
377   /**
378    * Find the destination.
379    *
380    * @param row          the row
381    * @param posInList    the position in the list
382    * @return the destination. Null if we couldn't find it.
383    */
384   private HRegionLocation findDestLocation(Row row, int posInList) {
385     if (row == null) throw new IllegalArgumentException("#" + id + ", row cannot be null");
386     HRegionLocation loc = null;
387     IOException locationException = null;
388     try {
389       loc = hConnection.locateRegion(this.tableName, row.getRow());
390       if (loc == null) {
391         locationException = new IOException("#" + id + ", no location found, aborting submit for" +
392             " tableName=" + tableName +
393             " rowkey=" + Arrays.toString(row.getRow()));
394       }
395     } catch (IOException e) {
396       locationException = e;
397     }
398     if (locationException != null) {
399       // There are multiple retries in locateRegion already. No need to add new.
400       // We can't continue with this row, hence it's the last retry.
401       manageError(posInList, row, false, locationException, null);
402       return null;
403     }
404 
405     return loc;
406   }
407 
408   /**
409    * Check if we should send new operations to this region or region server.
410    * We're taking into account the past decision; if we have already accepted
411    * operation on a given region, we accept all operations for this region.
412    *
413    * @param loc; the region and the server name we want to use.
414    * @return true if this region is considered as busy.
415    */
416   protected boolean canTakeOperation(HRegionLocation loc,
417                                      Map<Long, Boolean> regionsIncluded,
418                                      Map<ServerName, Boolean> serversIncluded) {
419     long regionId = loc.getRegionInfo().getRegionId();
420     Boolean regionPrevious = regionsIncluded.get(regionId);
421 
422     if (regionPrevious != null) {
423       // We already know what to do with this region.
424       return regionPrevious;
425     }
426 
427     Boolean serverPrevious = serversIncluded.get(loc.getServerName());
428     if (Boolean.FALSE.equals(serverPrevious)) {
429       // It's a new region, on a region server that we have already excluded.
430       regionsIncluded.put(regionId, Boolean.FALSE);
431       return false;
432     }
433 
434     AtomicInteger regionCnt = taskCounterPerRegion.get(loc.getRegionInfo().getRegionName());
435     if (regionCnt != null && regionCnt.get() >= maxConcurrentTasksPerRegion) {
436       // Too many tasks on this region already.
437       regionsIncluded.put(regionId, Boolean.FALSE);
438       return false;
439     }
440 
441     if (serverPrevious == null) {
442       // The region is ok, but we need to decide for this region server.
443       int newServers = 0; // number of servers we're going to contact so far
444       for (Map.Entry<ServerName, Boolean> kv : serversIncluded.entrySet()) {
445         if (kv.getValue()) {
446           newServers++;
447         }
448       }
449 
450       // Do we have too many total tasks already?
451       boolean ok = (newServers + getCurrentTasksCount()) < maxTotalConcurrentTasks;
452 
453       if (ok) {
454         // If the total is fine, is it ok for this individual server?
455         AtomicInteger serverCnt = taskCounterPerServer.get(loc.getServerName());
456         ok = (serverCnt == null || serverCnt.get() < maxConcurrentTasksPerServer);
457       }
458 
459       if (!ok) {
460         regionsIncluded.put(regionId, Boolean.FALSE);
461         serversIncluded.put(loc.getServerName(), Boolean.FALSE);
462         return false;
463       }
464 
465       serversIncluded.put(loc.getServerName(), Boolean.TRUE);
466     } else {
467       assert serverPrevious.equals(Boolean.TRUE);
468     }
469 
470     regionsIncluded.put(regionId, Boolean.TRUE);
471 
472     return true;
473   }
474 
475   /**
476    * Submit immediately the list of rows, whatever the server status. Kept for backward
477    * compatibility: it allows to be used with the batch interface that return an array of objects.
478    *
479    * @param rows the list of rows.
480    */
481   public void submitAll(List<? extends Row> rows) {
482     List<Action<Row>> actions = new ArrayList<Action<Row>>(rows.size());
483 
484     // The position will be used by the processBatch to match the object array returned.
485     int posInList = -1;
486     NonceGenerator ng = this.hConnection.getNonceGenerator();
487     for (Row r : rows) {
488       posInList++;
489       if (r instanceof Put) {
490         Put put = (Put) r;
491         if (put.isEmpty()) {
492           throw new IllegalArgumentException("No columns to insert for #" + (posInList+1)+ " item");
493         }
494       }
495       Action<Row> action = new Action<Row>(r, posInList);
496       setNonce(ng, r, action);
497       actions.add(action);
498     }
499     HConnectionManager.ServerErrorTracker errorsByServer = createServerErrorTracker();
500     submit(actions, actions, 1, errorsByServer);
501   }
502 
503   private void setNonce(NonceGenerator ng, Row r, Action<Row> action) {
504     if (!(r instanceof Append) && !(r instanceof Increment)) return;
505     action.setNonce(ng.newNonce()); // Action handles NO_NONCE, so it's ok if ng is disabled.
506   }
507 
508 
509   /**
510    * Group a list of actions per region servers, and send them. The created MultiActions are
511    * added to the inProgress list. Does not take into account the region/server load.
512    *
513    * @param initialActions - the full list of the actions in progress
514    * @param currentActions - the list of row to submit
515    * @param numAttempt - the current numAttempt (first attempt is 1)
516    */
517   private void submit(List<Action<Row>> initialActions,
518                       List<Action<Row>> currentActions, int numAttempt,
519                       final HConnectionManager.ServerErrorTracker errorsByServer) {
520 
521     if (numAttempt > 1){
522       retriesCnt.incrementAndGet();
523     }
524 
525     // group per location => regions server
526     final Map<HRegionLocation, MultiAction<Row>> actionsByServer =
527         new HashMap<HRegionLocation, MultiAction<Row>>();
528 
529     NonceGenerator ng = this.hConnection.getNonceGenerator();
530     for (Action<Row> action : currentActions) {
531       HRegionLocation loc = findDestLocation(action.getAction(), action.getOriginalIndex());
532       if (loc != null) {
533         addAction(loc, action, actionsByServer, ng);
534       }
535     }
536 
537     if (!actionsByServer.isEmpty()) {
538       sendMultiAction(initialActions, actionsByServer, numAttempt, errorsByServer, null);
539     }
540   }
541 
542   /**
543    * Send a multi action structure to the servers, after a delay depending on the attempt
544    * number. Asynchronous.
545    *
546    * @param initialActions  the list of the actions, flat.
547    * @param actionsByServer the actions structured by regions
548    * @param numAttempt      the attempt number.
549    */
550   public void sendMultiAction(final List<Action<Row>> initialActions,
551                               Map<HRegionLocation, MultiAction<Row>> actionsByServer,
552                               final int numAttempt,
553                               final HConnectionManager.ServerErrorTracker errorsByServer,
554                               Batch.Callback<CResult> batchCallback) {
555     // Send the queries and add them to the inProgress list
556     // This iteration is by server (the HRegionLocation comparator is by server portion only).
557     for (Map.Entry<HRegionLocation, MultiAction<Row>> e : actionsByServer.entrySet()) {
558       HRegionLocation loc = e.getKey();
559       MultiAction<Row> multiAction = e.getValue();
560       Collection<? extends Runnable> runnables = getNewMultiActionRunnable(initialActions, loc,
561         multiAction, numAttempt, errorsByServer, batchCallback);
562       for (Runnable runnable: runnables) {
563         try {
564           incTaskCounters(multiAction.getRegions(), loc.getServerName());
565           this.pool.submit(runnable);
566         } catch (Throwable t) {
567           if (t instanceof RejectedExecutionException) {
568             // This should never happen. But as the pool is provided by the end user, let's secure
569             // this a little.
570             LOG.warn("#" + id + ", the task was rejected by the pool. This is unexpected."
571                 + " Server is " + loc.getServerName(), t);
572           } else {
573             // see #HBASE-14359 for more details
574             LOG.warn("Caught unexpected exception/error: ", t);
575           }
576           decTaskCounters(multiAction.getRegions(), loc.getServerName());
577           // We're likely to fail again, but this will increment the attempt counter, so it will
578           //  finish.
579           receiveGlobalFailure(initialActions, multiAction, loc, numAttempt, t, errorsByServer);
580         }
581       }
582     }
583   }
584 
585   private Runnable getNewSingleServerRunnable(
586       final List<Action<Row>> initialActions,
587       final HRegionLocation loc,
588       final MultiAction<Row> multiAction,
589       final int numAttempt,
590       final HConnectionManager.ServerErrorTracker errorsByServer,
591       final Batch.Callback<CResult> batchCallback) {
592     return new Runnable() {
593       @Override
594       public void run() {
595         MultiResponse res;
596         try {
597           MultiServerCallable<Row> callable = createCallable(loc, multiAction);
598           try {
599             res = createCaller(callable).callWithoutRetries(callable, timeout);
600           } catch (IOException e) {
601             // The service itself failed . It may be an error coming from the communication
602             //   layer, but, as well, a functional error raised by the server.
603             receiveGlobalFailure(initialActions, multiAction, loc, numAttempt, e,
604                 errorsByServer);
605             return;
606           } catch (Throwable t) {
607             // This should not happen. Let's log & retry anyway.
608             LOG.error("#" + id + ", Caught throwable while calling. This is unexpected." +
609                 " Retrying. Server is " + loc.getServerName() + ", tableName=" + tableName, t);
610             receiveGlobalFailure(initialActions, multiAction, loc, numAttempt, t,
611                 errorsByServer);
612             return;
613           }
614 
615           // Nominal case: we received an answer from the server, and it's not an exception.
616           receiveMultiAction(initialActions, multiAction, loc, res, numAttempt, errorsByServer,
617             batchCallback);
618 
619         } finally {
620           decTaskCounters(multiAction.getRegions(), loc.getServerName());
621         }
622       }
623     };
624   }
625 
626   private Collection<? extends Runnable> getNewMultiActionRunnable(
627       final List<Action<Row>> initialActions,
628       final HRegionLocation loc,
629       final MultiAction<Row> multiAction,
630       final int numAttempt,
631       final HConnectionManager.ServerErrorTracker errorsByServer,
632       final Batch.Callback<CResult> batchCallback) {
633     // no stats to manage, just do the standard action
634     if (AsyncProcess.this.hConnection.getStatisticsTracker() == null) {
635       if (hConnection.getConnectionMetrics() != null) {
636         hConnection.getConnectionMetrics().incrNormalRunners();
637       }
638       List<Runnable> toReturn = new ArrayList<Runnable>(1);
639       toReturn.add(Trace.wrap("AsyncProcess.sendMultiAction", 
640         getNewSingleServerRunnable(initialActions, loc, multiAction, numAttempt,
641           errorsByServer, batchCallback)));
642       return toReturn;
643     } else {
644       // group the actions by the amount of delay
645       Map<Long, DelayingRunner> actions = new HashMap<Long, DelayingRunner>(multiAction
646         .size());
647 
648       // split up the actions
649       for (Map.Entry<byte[], List<Action<Row>>> e : multiAction.actions.entrySet()) {
650         Long backoff = getBackoff(loc);
651         DelayingRunner runner = actions.get(backoff);
652         if (runner == null) {
653           actions.put(backoff, new DelayingRunner(backoff, e));
654         } else {
655           runner.add(e);
656         }
657       }
658 
659       List<Runnable> toReturn = new ArrayList<Runnable>(actions.size());
660       for (DelayingRunner runner : actions.values()) {
661         String traceText = "AsyncProcess.sendMultiAction";
662         Runnable runnable = getNewSingleServerRunnable(initialActions, loc, runner.getActions(),
663           numAttempt, errorsByServer, batchCallback);
664         // use a delay runner only if we need to sleep for some time
665         if (runner.getSleepTime() > 0) {
666           runner.setRunner(runnable);
667           traceText = "AsyncProcess.clientBackoff.sendMultiAction";
668           runnable = runner;
669           if (hConnection.getConnectionMetrics() != null) {
670             hConnection.getConnectionMetrics().incrDelayRunners();
671             hConnection.getConnectionMetrics().updateDelayInterval(runner.getSleepTime());
672           }
673         } else {
674           if (hConnection.getConnectionMetrics() != null) {
675             hConnection.getConnectionMetrics().incrNormalRunners();
676           }
677         }
678         runnable = Trace.wrap(traceText, runnable);
679         toReturn.add(runnable);
680       }
681       return toReturn;
682     }
683   }
684 
685   /**
686    * @param server server location where the target region is hosted
687    * @param regionName name of the region which we are going to write some data
688    * @return the amount of time the client should wait until it submit a request to the
689    * specified server and region
690    */
691   private Long getBackoff(HRegionLocation location) {
692     ServerStatisticTracker tracker = AsyncProcess.this.hConnection.getStatisticsTracker();
693     ServerStatistics stats = tracker.getStats(location.getServerName());
694     return AsyncProcess.this.hConnection.getBackoffPolicy()
695       .getBackoffTime(location.getServerName(), location.getRegionInfo().getRegionName(),
696         stats);
697   }
698 
699   /**
700    * Create a callable. Isolated to be easily overridden in the tests.
701    */
702   protected MultiServerCallable<Row> createCallable(final HRegionLocation location,
703       final MultiAction<Row> multi) {
704     return new MultiServerCallable<Row>(hConnection, tableName, location, this.rpcFactory, multi);
705   }
706 
707   /**
708    * For tests.
709    * @param callable: used in tests.
710    * @return Returns a caller.
711    */
712   protected RpcRetryingCaller<MultiResponse> createCaller(MultiServerCallable<Row> callable) {
713     return rpcCallerFactory.<MultiResponse> newCaller();
714   }
715 
716   /**
717    * Check that we can retry acts accordingly: logs, set the error status, call the callbacks.
718    *
719    * @param originalIndex the position in the list sent
720    * @param row           the row
721    * @param canRetry      if false, we won't retry whatever the settings.
722    * @param throwable     the throwable, if any (can be null)
723    * @param location      the location, if any (can be null)
724    * @return true if the action can be retried, false otherwise.
725    */
726   private boolean manageError(int originalIndex, Row row, boolean canRetry,
727                               Throwable throwable, HRegionLocation location) {
728     if (canRetry && throwable != null && throwable instanceof DoNotRetryIOException) {
729       canRetry = false;
730     }
731 
732     byte[] region = null;
733     if (canRetry && callback != null) {
734       region = location == null ? null : location.getRegionInfo().getEncodedNameAsBytes();
735       canRetry = callback.retriableFailure(originalIndex, row, region, throwable);
736     }
737 
738     if (!canRetry) {
739       if (callback != null) {
740         if (region == null && location != null) {
741           region = location.getRegionInfo().getEncodedNameAsBytes();
742         }
743         callback.failure(originalIndex, region, row, throwable);
744       }
745       errors.add(throwable, row, location);
746       this.hasError.set(true);
747     }
748 
749     return canRetry;
750   }
751 
752   /**
753    * Resubmit all the actions from this multiaction after a failure.
754    *
755    * @param initialActions the full initial action list
756    * @param rsActions  the actions still to do from the initial list
757    * @param location   the destination
758    * @param numAttempt the number of attempts so far
759    * @param t the throwable (if any) that caused the resubmit
760    */
761   private void receiveGlobalFailure(List<Action<Row>> initialActions, MultiAction<Row> rsActions,
762                                     HRegionLocation location, int numAttempt, Throwable t,
763                                     HConnectionManager.ServerErrorTracker errorsByServer) {
764     // Do not use the exception for updating cache because it might be coming from
765     // any of the regions in the MultiAction.
766     hConnection.updateCachedLocations(tableName,
767       rsActions.actions.values().iterator().next().get(0).getAction().getRow(), null, location);
768     errorsByServer.reportServerError(location);
769     boolean canRetry = errorsByServer.canRetryMore(numAttempt);
770 
771     List<Action<Row>> toReplay = new ArrayList<Action<Row>>(initialActions.size());
772     for (Map.Entry<byte[], List<Action<Row>>> e : rsActions.actions.entrySet()) {
773       for (Action<Row> action : e.getValue()) {
774         if (manageError(action.getOriginalIndex(), action.getAction(), canRetry, t, location)) {
775           toReplay.add(action);
776         }
777       }
778     }
779 
780     logAndResubmit(initialActions, location, toReplay, numAttempt, rsActions.size(),
781         t, errorsByServer);
782   }
783 
784   /**
785    * Log as many info as possible, and, if there is something to replay, submit it again after
786    *  a back off sleep.
787    */
788   private void logAndResubmit(List<Action<Row>> initialActions, HRegionLocation oldLocation,
789                               List<Action<Row>> toReplay, int numAttempt, int failureCount,
790                               Throwable throwable,
791                               HConnectionManager.ServerErrorTracker errorsByServer) {
792     if (toReplay.isEmpty()) {
793       // it's either a success or a last failure
794       if (failureCount != 0) {
795         // We have a failure but nothing to retry. We're done, it's a final failure..
796         LOG.warn(createLog(numAttempt, failureCount, toReplay.size(),
797             oldLocation.getServerName(), throwable, -1, false,
798             errorsByServer.getStartTrackingTime()));
799       } else if (numAttempt > startLogErrorsCnt + 1) {
800         // The operation was successful, but needed several attempts. Let's log this.
801         LOG.info(createLog(numAttempt, failureCount, 0,
802             oldLocation.getServerName(), throwable, -1, false,
803             errorsByServer.getStartTrackingTime()));
804       }
805       return;
806     }
807 
808     // We have something to replay. We're going to sleep a little before.
809 
810     // We have two contradicting needs here:
811     //  1) We want to get the new location after having slept, as it may change.
812     //  2) We want to take into account the location when calculating the sleep time.
813     // It should be possible to have some heuristics to take the right decision. Short term,
814     //  we go for one.
815     long backOffTime = errorsByServer.calculateBackoffTime(oldLocation, pause);
816 
817     if (numAttempt > startLogErrorsCnt) {
818       // We use this value to have some logs when we have multiple failures, but not too many
819       //  logs, as errors are to be expected when a region moves, splits and so on
820       LOG.info(createLog(numAttempt, failureCount, toReplay.size(),
821           oldLocation.getServerName(), throwable, backOffTime, true,
822           errorsByServer.getStartTrackingTime()));
823     }
824 
825     try {
826       Thread.sleep(backOffTime);
827     } catch (InterruptedException e) {
828       LOG.warn("#" + id + ", not sent: " + toReplay.size() + " operations, " + oldLocation, e);
829       Thread.currentThread().interrupt();
830       return;
831     }
832 
833     submit(initialActions, toReplay, numAttempt + 1, errorsByServer);
834   }
835 
836   /**
837    * Called when we receive the result of a server query.
838    *
839    * @param initialActions - the whole action list
840    * @param multiAction    - the multiAction we sent
841    * @param location       - the location. It's used as a server name.
842    * @param responses      - the response, if any
843    * @param numAttempt     - the attempt
844    */
845   private void receiveMultiAction(List<Action<Row>> initialActions, MultiAction<Row> multiAction,
846                                   HRegionLocation location,
847                                   MultiResponse responses, int numAttempt,
848                                   HConnectionManager.ServerErrorTracker errorsByServer,
849                                   Batch.Callback<CResult> batchCallback) {
850      assert responses != null;
851 
852     // Success or partial success
853     // Analyze detailed results. We can still have individual failures to be redo.
854     // two specific throwables are managed:
855     //  - DoNotRetryIOException: we continue to retry for other actions
856     //  - RegionMovedException: we update the cache with the new region location
857 
858     List<Action<Row>> toReplay = new ArrayList<Action<Row>>();
859     Throwable throwable = null;
860     int failureCount = 0;
861     boolean canRetry = true;
862 
863     for (Map.Entry<byte[], List<Pair<Integer, Object>>> resultsForRS :
864         responses.getResults().entrySet()) {
865 
866       boolean regionFailureRegistered = false;
867       for (Pair<Integer, Object> regionResult : resultsForRS.getValue()) {
868         Object result = regionResult.getSecond();
869 
870         // Failure: retry if it's make sense else update the errors lists
871         if (result == null || result instanceof Throwable) {
872           throwable = (Throwable) result;
873           Action<Row> correspondingAction = initialActions.get(regionResult.getFirst());
874           Row row = correspondingAction.getAction();
875           failureCount++;
876           if (!regionFailureRegistered) { // We're doing this once per location.
877             regionFailureRegistered= true;
878             // The location here is used as a server name.
879             hConnection.updateCachedLocations(this.tableName, row.getRow(), result, location);
880             if (failureCount == 1) {
881               errorsByServer.reportServerError(location);
882               canRetry = errorsByServer.canRetryMore(numAttempt);
883             }
884           }
885 
886           if (manageError(correspondingAction.getOriginalIndex(), row, canRetry,
887               throwable, location)) {
888             toReplay.add(correspondingAction);
889           }
890         } else { // success
891 
892           if (AsyncProcess.this.hConnection.getConnectionMetrics() != null) {
893             AsyncProcess.this.hConnection.getConnectionMetrics().
894               updateServerStats(location.getServerName(),
895                 location.getRegionInfo().getRegionName(), result);
896           }
897 
898           if (callback != null || batchCallback != null) {
899             int index = regionResult.getFirst();
900             Action<Row> correspondingAction = initialActions.get(index);
901             Row row = correspondingAction.getAction();
902             if (callback != null) {
903               //noinspection unchecked
904               this.callback.success(index, resultsForRS.getKey(), row, (CResult) result);
905             }
906             if (batchCallback != null) {
907               batchCallback.update(resultsForRS.getKey(), row.getRow(), (CResult) result);
908             }
909           }
910         }
911       }
912     }
913 
914     // The failures global to a region. We will use for multiAction we sent previously to find the
915     //   actions to replay.
916 
917     for (Map.Entry<byte[], Throwable> throwableEntry : responses.getExceptions().entrySet()) {
918       throwable = throwableEntry.getValue();
919       byte[] region =throwableEntry.getKey();
920       List<Action<Row>> actions = multiAction.actions.get(region);
921       if (actions == null || actions.isEmpty()) {
922         throw new IllegalStateException("Wrong response for the region: " +
923             HRegionInfo.encodeRegionName(region));
924       }
925 
926       if (failureCount == 0) {
927         errorsByServer.reportServerError(location);
928         canRetry = errorsByServer.canRetryMore(numAttempt);
929       }
930       hConnection.updateCachedLocations(this.tableName, actions.get(0).getAction().getRow(),
931           throwable, location);
932       failureCount += actions.size();
933 
934       for (Action<Row> action : actions) {
935         Row row = action.getAction();
936         if (manageError(action.getOriginalIndex(), row, canRetry, throwable, location)) {
937           toReplay.add(action);
938         }
939       }
940     }
941 
942     logAndResubmit(initialActions, location, toReplay, numAttempt, failureCount,
943         throwable, errorsByServer);
944   }
945 
946   private String createLog(int numAttempt, int failureCount, int replaySize, ServerName sn,
947                            Throwable error, long backOffTime, boolean willRetry, String startTime){
948     StringBuilder sb = new StringBuilder();
949 
950     sb.append("#").append(id).append(", table=").append(tableName).
951         append(", attempt=").append(numAttempt).append("/").append(numTries).append(" ");
952 
953     if (failureCount > 0 || error != null){
954       sb.append("failed ").append(failureCount).append(" ops").append(", last exception: ").
955           append(error == null ? "null" : error);
956     } else {
957       sb.append("SUCCEEDED");
958     }
959 
960     sb.append(" on ").append(sn);
961 
962     sb.append(", tracking started ").append(startTime);
963 
964     if (willRetry) {
965       sb.append(", retrying after ").append(backOffTime).append(" ms").
966           append(", replay ").append(replaySize).append(" ops.");
967     } else if (failureCount > 0) {
968       sb.append(" - FAILED, NOT RETRYING ANYMORE");
969     }
970 
971     return sb.toString();
972   }
973 
974   /**
975    * Waits for another task to finish.
976    * @param currentNumberOfTask - the number of task finished when calling the method.
977    */
978   protected void waitForNextTaskDone(long currentNumberOfTask) throws InterruptedIOException {
979     synchronized (this.tasksDone) {
980       while (currentNumberOfTask == tasksDone.get()) {
981         try {
982           this.tasksDone.wait(100);
983         } catch (InterruptedException e) {
984           throw new InterruptedIOException("#" + id + ", interrupted." +
985               " currentNumberOfTask=" + currentNumberOfTask +
986               ",  tableName=" + tableName + ", tasksDone=" + tasksDone.get());
987         }
988       }
989     }
990   }
991 
992   /**
993    * Wait until the async does not have more than max tasks in progress.
994    */
995   private void waitForMaximumCurrentTasks(int max) throws InterruptedIOException {
996     long lastLog = EnvironmentEdgeManager.currentTimeMillis();
997     long currentTasksDone = this.tasksDone.get();
998 
999     while ((tasksSent.get() - currentTasksDone) > max) {
1000       long now = EnvironmentEdgeManager.currentTimeMillis();
1001       if (now > lastLog + 10000) {
1002         lastLog = now;
1003         LOG.info("#" + id + ", waiting for some tasks to finish. Expected max="
1004             + max + ", tasksSent=" + tasksSent.get() + ", tasksDone=" + tasksDone.get() +
1005             ", currentTasksDone=" + currentTasksDone + ", retries=" + retriesCnt.get() +
1006             " hasError=" + hasError() + ", tableName=" + tableName);
1007       }
1008       waitForNextTaskDone(currentTasksDone);
1009       currentTasksDone = this.tasksDone.get();
1010     }
1011   }
1012 
1013   private long getCurrentTasksCount(){
1014     return  tasksSent.get() - tasksDone.get();
1015   }
1016 
1017   /**
1018    * Wait until all tasks are executed, successfully or not.
1019    */
1020   public void waitUntilDone() throws InterruptedIOException {
1021     waitForMaximumCurrentTasks(0);
1022   }
1023 
1024 
1025   public boolean hasError() {
1026     return hasError.get();
1027   }
1028 
1029   public List<? extends Row> getFailedOperations() {
1030     return errors.actions;
1031   }
1032 
1033   /**
1034    * Clean the errors stacks. Should be called only when there are no actions in progress.
1035    */
1036   public void clearErrors() {
1037     errors.clear();
1038     hasError.set(false);
1039   }
1040 
1041   public RetriesExhaustedWithDetailsException getErrors() {
1042     return errors.makeException();
1043   }
1044 
1045   /**
1046    * increment the tasks counters for a given set of regions. MT safe.
1047    */
1048   protected void incTaskCounters(Collection<byte[]> regions, ServerName sn) {
1049     tasksSent.incrementAndGet();
1050 
1051     AtomicInteger serverCnt = taskCounterPerServer.get(sn);
1052     if (serverCnt == null) {
1053       taskCounterPerServer.putIfAbsent(sn, new AtomicInteger());
1054       serverCnt = taskCounterPerServer.get(sn);
1055     }
1056     serverCnt.incrementAndGet();
1057 
1058     for (byte[] regBytes : regions) {
1059       AtomicInteger regionCnt = taskCounterPerRegion.get(regBytes);
1060       if (regionCnt == null) {
1061         regionCnt = new AtomicInteger();
1062         AtomicInteger oldCnt = taskCounterPerRegion.putIfAbsent(regBytes, regionCnt);
1063         if (oldCnt != null) {
1064           regionCnt = oldCnt;
1065         }
1066       }
1067       regionCnt.incrementAndGet();
1068     }
1069   }
1070 
1071   /**
1072    * Decrements the counters for a given region and the region server. MT Safe.
1073    */
1074   protected void decTaskCounters(Collection<byte[]> regions, ServerName sn) {
1075     for (byte[] regBytes : regions) {
1076       AtomicInteger regionCnt = taskCounterPerRegion.get(regBytes);
1077       regionCnt.decrementAndGet();
1078     }
1079 
1080     taskCounterPerServer.get(sn).decrementAndGet();
1081 
1082     tasksDone.incrementAndGet();
1083     synchronized (tasksDone) {
1084       tasksDone.notifyAll();
1085     }
1086   }
1087 
1088   /**
1089    * Creates the server error tracker to use inside process.
1090    * Currently, to preserve the main assumption about current retries, and to work well with
1091    * the retry-limit-based calculation, the calculation is local per Process object.
1092    * We may benefit from connection-wide tracking of server errors.
1093    * @return ServerErrorTracker to use, null if there is no ServerErrorTracker on this connection
1094    */
1095   protected HConnectionManager.ServerErrorTracker createServerErrorTracker() {
1096     return new HConnectionManager.ServerErrorTracker(this.serverTrackerTimeout, this.numTries);
1097   }
1098 }