View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.procedure;
19  
20  import java.io.IOException;
21  import java.util.Collection;
22  import java.util.HashSet;
23  import java.util.List;
24  import java.util.Set;
25  import java.util.concurrent.ConcurrentMap;
26  import java.util.concurrent.ExecutorService;
27  import java.util.concurrent.Future;
28  import java.util.concurrent.RejectedExecutionException;
29  import java.util.concurrent.SynchronousQueue;
30  import java.util.concurrent.ThreadPoolExecutor;
31  import java.util.concurrent.TimeUnit;
32  
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  import org.apache.hadoop.classification.InterfaceAudience;
36  import org.apache.hadoop.classification.InterfaceStability;
37  import org.apache.hadoop.hbase.DaemonThreadFactory;
38  import org.apache.hadoop.hbase.errorhandling.ForeignException;
39  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
40  
41  import com.google.common.collect.MapMaker;
42  
43  /**
44   * This is the master side of a distributed complex procedure execution.
45   * <p>
46   * The {@link Procedure} is generic and subclassing or customization shouldn't be
47   * necessary -- any customization should happen just in {@link Subprocedure}s.
48   */
49  @InterfaceAudience.Public
50  @InterfaceStability.Evolving
51  public class ProcedureCoordinator {
52    private static final Log LOG = LogFactory.getLog(ProcedureCoordinator.class);
53  
54    final static long KEEP_ALIVE_MILLIS_DEFAULT = 5000;
55    final static long TIMEOUT_MILLIS_DEFAULT = 60000;
56    final static long WAKE_MILLIS_DEFAULT = 500;
57  
58    private final ProcedureCoordinatorRpcs rpcs;
59    private final ExecutorService pool;
60    private final long wakeTimeMillis;
61    private final long timeoutMillis;
62  
63    // Running procedure table.  Maps procedure name to running procedure reference
64    private final ConcurrentMap<String, Procedure> procedures =
65        new MapMaker().concurrencyLevel(4).weakValues().makeMap();
66  
67    /**
68     * Create and start a ProcedureCoordinator.
69     *
70     * The rpc object registers the ProcedureCoordinator and starts any threads in this
71     * constructor.
72     *
73     * @param rpcs
74     * @param pool Used for executing procedures.
75     */
76    public ProcedureCoordinator(ProcedureCoordinatorRpcs rpcs, ThreadPoolExecutor pool) {
77      this(rpcs, pool, TIMEOUT_MILLIS_DEFAULT, WAKE_MILLIS_DEFAULT);
78    }
79  
80    /**
81     * Create and start a ProcedureCoordinator.
82     *
83     * The rpc object registers the ProcedureCoordinator and starts any threads in
84     * this constructor.
85     *
86     * @param rpcs
87     * @param pool Used for executing procedures.
88     * @param timeoutMillis
89     */
90    public ProcedureCoordinator(ProcedureCoordinatorRpcs rpcs, ThreadPoolExecutor pool,
91        long timeoutMillis, long wakeTimeMillis) {
92      this.timeoutMillis = timeoutMillis;
93      this.wakeTimeMillis = wakeTimeMillis;
94      this.rpcs = rpcs;
95      this.pool = pool;
96      this.rpcs.start(this);
97    }
98  
99    /**
100    * Default thread pool for the procedure
101    *
102    * @param coordName
103    * @param opThreads the maximum number of threads to allow in the pool
104    */
105   public static ThreadPoolExecutor defaultPool(String coordName, int opThreads) {
106     return defaultPool(coordName, opThreads, KEEP_ALIVE_MILLIS_DEFAULT);
107   }
108 
109   /**
110    * Default thread pool for the procedure
111    *
112    * @param coordName
113    * @param opThreads the maximum number of threads to allow in the pool
114    * @param keepAliveMillis the maximum time (ms) that excess idle threads will wait for new tasks
115    */
116   public static ThreadPoolExecutor defaultPool(String coordName, int opThreads,
117       long keepAliveMillis) {
118     return new ThreadPoolExecutor(1, opThreads, keepAliveMillis, TimeUnit.MILLISECONDS,
119         new SynchronousQueue<Runnable>(),
120         new DaemonThreadFactory("(" + coordName + ")-proc-coordinator-pool"));
121   }
122 
123   /**
124    * Shutdown the thread pools and release rpc resources
125    * @throws IOException
126    */
127   public void close() throws IOException {
128     // have to use shutdown now to break any latch waiting
129     pool.shutdownNow();
130     rpcs.close();
131   }
132 
133   /**
134    * Submit an procedure to kick off its dependent subprocedures.
135    * @param proc Procedure to execute
136    * @return <tt>true</tt> if the procedure was started correctly, <tt>false</tt> if the
137    *         procedure or any subprocedures could not be started.  Failure could be due to
138    *         submitting a procedure multiple times (or one with the same name), or some sort
139    *         of IO problem.  On errors, the procedure's monitor holds a reference to the exception
140    *         that caused the failure.
141    */
142   boolean submitProcedure(Procedure proc) {
143     // if the submitted procedure was null, then we don't want to run it
144     if (proc == null) {
145       return false;
146     }
147     String procName = proc.getName();
148 
149     // make sure we aren't already running a procedure of that name
150     synchronized (procedures) {
151       Procedure oldProc = procedures.get(procName);
152       if (oldProc != null) {
153         // procedures are always eventually completed on both successful and failed execution
154         if (oldProc.completedLatch.getCount() != 0) {
155           LOG.warn("Procedure " + procName + " currently running.  Rejecting new request");
156           return false;
157         }
158         LOG.debug("Procedure " + procName + " was in running list but was completed.  Accepting new attempt.");
159         procedures.remove(procName);
160       }
161     }
162 
163     // kick off the procedure's execution in a separate thread
164     Future<Void> f = null;
165     try {
166       synchronized (procedures) {
167         this.procedures.put(procName, proc);
168         f = this.pool.submit(proc);
169       }
170       return true;
171     } catch (RejectedExecutionException e) {
172       LOG.warn("Procedure " + procName + " rejected by execution pool.  Propagating error and " +
173           "cancelling operation.", e);
174       // Remove the procedure from the list since is not started
175       this.procedures.remove(procName);
176       // the thread pool is full and we can't run the procedure
177       proc.receive(new ForeignException(procName, e));
178 
179       // cancel procedure proactively
180       if (f != null) {
181         f.cancel(true);
182       }
183     }
184     return false;
185   }
186 
187   /**
188    * The connection to the rest of the procedure group (members and coordinator) has been
189    * broken/lost/failed. This should fail any interested procedures, but not attempt to notify other
190    * members since we cannot reach them anymore.
191    * @param message description of the error
192    * @param cause the actual cause of the failure
193    */
194   void rpcConnectionFailure(final String message, final IOException cause) {
195     Collection<Procedure> toNotify = procedures.values();
196 
197     for (Procedure proc : toNotify) {
198       if (proc == null) {
199         continue;
200       }
201       // notify the elements, if they aren't null
202       proc.receive(new ForeignException(proc.getName(), cause));
203     }
204   }
205 
206   /**
207    * Abort the procedure with the given name
208    * @param procName name of the procedure to abort
209    * @param reason serialized information about the abort
210    */
211   public void abortProcedure(String procName, ForeignException reason) {
212     // if we know about the Procedure, notify it
213     synchronized(procedures) {
214       Procedure proc = procedures.get(procName);
215       if (proc == null) {
216         return;
217       }
218       proc.receive(reason);
219     }
220   }
221 
222   /**
223    * Exposed for hooking with unit tests.
224    * @param procName
225    * @param procArgs
226    * @param expectedMembers
227    * @return
228    */
229   Procedure createProcedure(ForeignExceptionDispatcher fed, String procName, byte[] procArgs,
230       List<String> expectedMembers) {
231     // build the procedure
232     return new Procedure(this, fed, wakeTimeMillis, timeoutMillis,
233         procName, procArgs, expectedMembers);
234   }
235 
236   /**
237    * Kick off the named procedure
238    * @param procName name of the procedure to start
239    * @param procArgs arguments for the procedure
240    * @param expectedMembers expected members to start
241    * @return handle to the running procedure, if it was started correctly, <tt>null</tt> otherwise
242    * @throws RejectedExecutionException if there are no more available threads to run the procedure
243    */
244   public Procedure startProcedure(ForeignExceptionDispatcher fed, String procName, byte[] procArgs,
245       List<String> expectedMembers) throws RejectedExecutionException {
246     Procedure proc = createProcedure(fed, procName, procArgs, expectedMembers);
247     if (!this.submitProcedure(proc)) {
248       LOG.error("Failed to submit procedure '" + procName + "'");
249       return null;
250     }
251     return proc;
252   }
253 
254   /**
255    * Notification that the procedure had the specified member acquired its part of the barrier
256    * via {@link Subprocedure#acquireBarrier()}.
257    * @param procName name of the procedure that acquired
258    * @param member name of the member that acquired
259    */
260   void memberAcquiredBarrier(String procName, final String member) {
261     Procedure proc = procedures.get(procName);
262     if (proc == null) {
263       LOG.warn("Member '"+ member +"' is trying to acquire an unknown procedure '"+ procName +"'");
264       return;
265     }
266 
267     proc.barrierAcquiredByMember(member);
268   }
269 
270   /**
271    * Notification that the procedure had another member finished executing its in-barrier subproc
272    * via {@link Subprocedure#insideBarrier()}.
273    * @param procName name of the subprocedure that finished
274    * @param member name of the member that executed and released its barrier
275    */
276   void memberFinishedBarrier(String procName, final String member) {
277     Procedure proc = procedures.get(procName);
278     if (proc == null) {
279       LOG.warn("Member '"+ member +"' is trying to release an unknown procedure '"+ procName +"'");
280       return;
281     }
282     proc.barrierReleasedByMember(member);
283   }
284 
285   /**
286    * @return the rpcs implementation for all current procedures
287    */
288   ProcedureCoordinatorRpcs getRpcs() {
289     return rpcs;
290   }
291 
292   /**
293    * Returns the procedure.  This Procedure is a live instance so should not be modified but can
294    * be inspected.
295    * @param name Name of the procedure
296    * @return Procedure or null if not present any more
297    */
298   public Procedure getProcedure(String name) {
299     return procedures.get(name);
300   }
301 
302   /**
303    * @return Return set of all procedure names.
304    */
305   public Set<String> getProcedureNames() {
306     return new HashSet<String>(procedures.keySet());
307   }
308 }