View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.ipc;
22  
23  import java.io.BufferedInputStream;
24  import java.io.BufferedOutputStream;
25  import java.io.DataInputStream;
26  import java.io.DataOutputStream;
27  import java.io.FilterInputStream;
28  import java.io.IOException;
29  import java.io.InputStream;
30  import java.net.ConnectException;
31  import java.net.InetSocketAddress;
32  import java.net.Socket;
33  import java.net.SocketException;
34  import java.net.SocketTimeoutException;
35  import java.net.UnknownHostException;
36  import java.util.Iterator;
37  import java.util.LinkedList;
38  import java.util.Map.Entry;
39  import java.util.concurrent.ConcurrentSkipListMap;
40  import java.util.concurrent.atomic.AtomicBoolean;
41  import java.util.concurrent.atomic.AtomicLong;
42  
43  import javax.net.SocketFactory;
44  
45  import org.apache.commons.logging.Log;
46  import org.apache.commons.logging.LogFactory;
47  import org.apache.hadoop.conf.Configuration;
48  import org.apache.hadoop.hbase.HConstants;
49  import org.apache.hadoop.hbase.security.User;
50  import org.apache.hadoop.hbase.util.Bytes;
51  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
52  import org.apache.hadoop.hbase.util.Pair;
53  import org.apache.hadoop.hbase.util.PoolMap;
54  import org.apache.hadoop.hbase.util.PoolMap.PoolType;
55  import org.apache.hadoop.io.DataOutputBuffer;
56  import org.apache.hadoop.io.IOUtils;
57  import org.apache.hadoop.io.Writable;
58  import org.apache.hadoop.io.WritableUtils;
59  import org.apache.hadoop.ipc.RemoteException;
60  import org.apache.hadoop.net.NetUtils;
61  import org.apache.hadoop.util.ReflectionUtils;
62  
63  /** A client for an IPC service.  IPC calls take a single {@link Writable} as a
64   * parameter, and return a {@link Writable} as their value.  A service runs on
65   * a port and is defined by a parameter class and a value class.
66   *
67   * <p>This is the org.apache.hadoop.ipc.Client renamed as HBaseClient and
68   * moved into this package so can access package-private methods.
69   *
70   * @see HBaseServer
71   */
72  public class HBaseClient {
73  
74    private static final Log LOG = LogFactory
75        .getLog("org.apache.hadoop.ipc.HBaseClient");
76    protected final PoolMap<ConnectionId, Connection> connections;
77  
78    protected final Class<? extends Writable> valueClass;   // class of call values
79    protected int counter;                            // counter for call ids
80    protected final AtomicBoolean running = new AtomicBoolean(true); // if client runs
81    final protected Configuration conf;
82    final protected int maxIdleTime; // connections will be culled if it was idle for
83                             // maxIdleTime microsecs
84    final protected int maxRetries; //the max. no. of retries for socket connections
85    final protected long failureSleep; // Time to sleep before retry on failure.
86    protected final boolean tcpNoDelay; // if T then disable Nagle's Algorithm
87    protected final boolean tcpKeepAlive; // if T then use keepalives
88    protected int pingInterval; // how often sends ping to the server in msecs
89    protected int socketTimeout; // socket timeout
90    protected FailedServers failedServers;
91  
92    protected final SocketFactory socketFactory;           // how to create sockets
93    protected String clusterId;
94  
95    final private static String PING_INTERVAL_NAME = "ipc.ping.interval";
96    final private static String SOCKET_TIMEOUT = "ipc.socket.timeout";
97    final static int DEFAULT_PING_INTERVAL = 60000;  // 1 min
98    final static int DEFAULT_SOCKET_TIMEOUT = 20000; // 20 seconds
99    final static int PING_CALL_ID = -1;
100 
101   public final static String FAILED_SERVER_EXPIRY_KEY = "hbase.ipc.client.failed.servers.expiry";
102   public final static int FAILED_SERVER_EXPIRY_DEFAULT = 2000;
103 
104   /**
105    * A class to manage a list of servers that failed recently.
106    */
107   static class FailedServers {
108     private final LinkedList<Pair<Long, String>> failedServers = new
109         LinkedList<Pair<Long, String>>();
110     private final int recheckServersTimeout;
111 
112     FailedServers(Configuration conf) {
113       this.recheckServersTimeout = conf.getInt(
114           FAILED_SERVER_EXPIRY_KEY, FAILED_SERVER_EXPIRY_DEFAULT);
115     }
116 
117     /**
118      * Add an address to the list of the failed servers list.
119      */
120     public synchronized void addToFailedServers(InetSocketAddress address) {
121       final long expiry = EnvironmentEdgeManager.currentTimeMillis() + recheckServersTimeout;
122       failedServers.addFirst(new Pair<Long, String>(expiry, address.toString()));
123     }
124 
125     /**
126      * Check if the server should be considered as bad. Clean the old entries of the list.
127      *
128      * @return true if the server is in the failed servers list
129      */
130     public synchronized boolean isFailedServer(final InetSocketAddress address) {
131       if (failedServers.isEmpty()) {
132         return false;
133       }
134 
135       final String lookup = address.toString();
136       final long now = EnvironmentEdgeManager.currentTimeMillis();
137 
138       // iterate, looking for the search entry and cleaning expired entries
139       Iterator<Pair<Long, String>> it = failedServers.iterator();
140       while (it.hasNext()) {
141         Pair<Long, String> cur = it.next();
142         if (cur.getFirst() < now) {
143           it.remove();
144         } else {
145           if (lookup.equals(cur.getSecond())) {
146             return true;
147           }
148         }
149       }
150 
151       return false;
152     }
153 
154   }
155 
156   public static class FailedServerException extends IOException {
157     public FailedServerException(String s) {
158       super(s);
159     }
160   }
161 
162 
163   /**
164    * set the ping interval value in configuration
165    *
166    * @param conf Configuration
167    * @param pingInterval the ping interval
168    */
169   public static void setPingInterval(Configuration conf, int pingInterval) {
170     conf.setInt(PING_INTERVAL_NAME, pingInterval);
171   }
172 
173   /**
174    * Get the ping interval from configuration;
175    * If not set in the configuration, return the default value.
176    *
177    * @param conf Configuration
178    * @return the ping interval
179    */
180   static int getPingInterval(Configuration conf) {
181     return conf.getInt(PING_INTERVAL_NAME, DEFAULT_PING_INTERVAL);
182   }
183 
184   /**
185    * Set the socket timeout
186    * @param conf Configuration
187    * @param socketTimeout the socket timeout
188    */
189   public static void setSocketTimeout(Configuration conf, int socketTimeout) {
190     conf.setInt(SOCKET_TIMEOUT, socketTimeout);
191   }
192 
193   /**
194    * @return the socket timeout
195    */
196   static int getSocketTimeout(Configuration conf) {
197     return conf.getInt(SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT);
198   }
199 
200   /** A call waiting for a value. */
201   protected class Call {
202     final int id;                                       // call id
203     final Writable param;                               // parameter
204     Writable value;                               // value, null if error
205     IOException error;                            // exception, null if value
206     boolean done;                                 // true when call is done
207     long startTime;
208 
209     protected Call(Writable param) {
210       this.param = param;
211       this.startTime = System.currentTimeMillis();
212       synchronized (HBaseClient.this) {
213         this.id = counter++;
214       }
215     }
216 
217     /** Indicate when the call is complete and the
218      * value or error are available.  Notifies by default.  */
219     protected synchronized void callComplete() {
220       this.done = true;
221       notify();                                 // notify caller
222     }
223 
224     /** Set the exception when there is an error.
225      * Notify the caller the call is done.
226      *
227      * @param error exception thrown by the call; either local or remote
228      */
229     public synchronized void setException(IOException error) {
230       this.error = error;
231       callComplete();
232     }
233 
234     /** Set the return value when there is no error.
235      * Notify the caller the call is done.
236      *
237      * @param value return value of the call.
238      */
239     public synchronized void setValue(Writable value) {
240       this.value = value;
241       callComplete();
242     }
243 
244     public long getStartTime() {
245       return this.startTime;
246     }
247   }
248 
249   /**
250    * Creates a connection. Can be overridden by a subclass for testing.
251    *
252    * @param remoteId - the ConnectionId to use for the connection creation.
253    */
254   protected Connection createConnection(ConnectionId remoteId) throws IOException {
255     return new Connection(remoteId);
256   }
257 
258   /** Thread that reads responses and notifies callers.  Each connection owns a
259    * socket connected to a remote address.  Calls are multiplexed through this
260    * socket: responses may be delivered out of order. */
261   protected class Connection extends Thread {
262     private ConnectionHeader header;              // connection header
263     protected ConnectionId remoteId;
264     protected Socket socket = null;                 // connected socket
265     protected DataInputStream in;
266     protected DataOutputStream out;
267 
268     // currently active calls
269     protected final ConcurrentSkipListMap<Integer, Call> calls = new ConcurrentSkipListMap<Integer, Call>();
270     protected final AtomicLong lastActivity = new AtomicLong();// last I/O activity time
271     protected final AtomicBoolean shouldCloseConnection = new AtomicBoolean();  // indicate if the connection is closed
272     protected IOException closeException; // close reason
273 
274     Connection(ConnectionId remoteId) throws IOException {
275       if (remoteId.getAddress().isUnresolved()) {
276         throw new UnknownHostException("unknown host: " +
277                                        remoteId.getAddress().getHostName());
278       }
279       this.remoteId = remoteId;
280       User ticket = remoteId.getTicket();
281       Class<? extends VersionedProtocol> protocol = remoteId.getProtocol();
282 
283       header = new ConnectionHeader(
284           protocol == null ? null : protocol.getName(), ticket);
285 
286       this.setName("IPC Client (" + socketFactory.hashCode() +") connection to " +
287         remoteId.getAddress().toString() +
288         ((ticket==null)?" from an unknown user": (" from " + ticket.getName())));
289       this.setDaemon(true);
290     }
291 
292     /** Update lastActivity with the current time. */
293     protected void touch() {
294       lastActivity.set(System.currentTimeMillis());
295     }
296 
297     /**
298      * Add a call to this connection's call queue and notify
299      * a listener; synchronized. If the connection is dead, the call is not added, and the
300      * caller is notified.
301      * This function can return a connection that is already marked as 'shouldCloseConnection'
302      *  It is up to the user code to check this status.
303      * @param call to add
304      */
305     protected synchronized void addCall(Call call) {
306       // If the connection is about to close, we manage this as if the call was already added
307       //  to the connection calls list. If not, the connection creations are serialized, as
308       //  mentioned in HBASE-6364
309       if (this.shouldCloseConnection.get()) {
310         if (this.closeException == null) {
311           call.setException(new IOException(
312               "Call " + call.id + " not added as the connection " + remoteId + " is closing"));
313         } else {
314           call.setException(this.closeException);
315         }
316         synchronized (call) {
317           call.notifyAll();
318         }
319       } else {
320         calls.put(call.id, call);
321         notify();
322       }
323     }
324 
325     /** This class sends a ping to the remote side when timeout on
326      * reading. If no failure is detected, it retries until at least
327      * a byte is read.
328      */
329     protected class PingInputStream extends FilterInputStream {
330       /* constructor */
331       protected PingInputStream(InputStream in) {
332         super(in);
333       }
334 
335       /* Process timeout exception
336        * if the connection is not going to be closed, send a ping.
337        * otherwise, throw the timeout exception.
338        */
339       private void handleTimeout(SocketTimeoutException e) throws IOException {
340         if (shouldCloseConnection.get() || !running.get() ||
341             remoteId.rpcTimeout > 0) {
342           throw e;
343         }
344         sendPing();
345       }
346 
347       /** Read a byte from the stream.
348        * Send a ping if timeout on read. Retries if no failure is detected
349        * until a byte is read.
350        * @throws IOException for any IO problem other than socket timeout
351        */
352       @Override
353       public int read() throws IOException {
354         do {
355           try {
356             return super.read();
357           } catch (SocketTimeoutException e) {
358             handleTimeout(e);
359           }
360         } while (true);
361       }
362 
363       /** Read bytes into a buffer starting from offset <code>off</code>
364        * Send a ping if timeout on read. Retries if no failure is detected
365        * until a byte is read.
366        *
367        * @return the total number of bytes read; -1 if the connection is closed.
368        */
369       @Override
370       public int read(byte[] buf, int off, int len) throws IOException {
371         do {
372           try {
373             return super.read(buf, off, len);
374           } catch (SocketTimeoutException e) {
375             handleTimeout(e);
376           }
377         } while (true);
378       }
379     }
380 
381     protected synchronized void setupConnection() throws IOException {
382       short ioFailures = 0;
383       short timeoutFailures = 0;
384       while (true) {
385         try {
386           this.socket = socketFactory.createSocket();
387           this.socket.setTcpNoDelay(tcpNoDelay);
388           this.socket.setKeepAlive(tcpKeepAlive);
389           // connection time out is 20s
390           NetUtils.connect(this.socket, remoteId.getAddress(),
391               getSocketTimeout(conf));
392           if (remoteId.rpcTimeout > 0) {
393             pingInterval = remoteId.rpcTimeout; // overwrite pingInterval
394           }
395           this.socket.setSoTimeout(pingInterval);
396           return;
397         } catch (SocketTimeoutException toe) {
398           /* The max number of retries is 45,
399            * which amounts to 20s*45 = 15 minutes retries.
400            */
401           handleConnectionFailure(timeoutFailures++, maxRetries, toe);
402         } catch (IOException ie) {
403           handleConnectionFailure(ioFailures++, maxRetries, ie);
404         }
405       }
406     }
407 
408     /** Connect to the server and set up the I/O streams. It then sends
409      * a header to the server and starts
410      * the connection thread that waits for responses.
411      * @throws java.io.IOException e
412      */
413     protected synchronized void setupIOstreams()
414         throws IOException, InterruptedException {
415 
416       if (socket != null || shouldCloseConnection.get()) {
417         return;
418       }
419 
420       if (failedServers.isFailedServer(remoteId.getAddress())) {
421         if (LOG.isDebugEnabled()) {
422           LOG.debug("Not trying to connect to " + remoteId.getAddress() +
423               " this server is in the failed servers list");
424         }
425         IOException e = new FailedServerException(
426             "This server is in the failed servers list: " + remoteId.getAddress());
427         markClosed(e);
428         close();
429         throw e;
430       }
431 
432       try {
433         if (LOG.isDebugEnabled()) {
434           LOG.debug("Connecting to "+remoteId);
435         }
436         setupConnection();
437         this.in = new DataInputStream(new BufferedInputStream
438             (new PingInputStream(NetUtils.getInputStream(socket))));
439         this.out = new DataOutputStream
440             (new BufferedOutputStream(NetUtils.getOutputStream(socket)));
441         writeHeader();
442 
443         // update last activity time
444         touch();
445 
446         // start the receiver thread after the socket connection has been set up
447         start();
448       } catch (IOException e) {
449         failedServers.addToFailedServers(remoteId.address);
450         markClosed(e);
451         close();
452 
453         throw e;
454       }
455     }
456 
457     protected void closeConnection() {
458       // close the current connection
459       if (socket != null) {
460         try {
461           socket.close();
462         } catch (IOException e) {
463           LOG.warn("Not able to close a socket", e);
464         }
465       }
466       // set socket to null so that the next call to setupIOstreams
467       // can start the process of connect all over again.
468       socket = null;
469     }
470 
471     /**
472      *  Handle connection failures
473      *
474      * If the current number of retries is equal to the max number of retries,
475      * stop retrying and throw the exception; Otherwise backoff N seconds and
476      * try connecting again.
477      *
478      * This Method is only called from inside setupIOstreams(), which is
479      * synchronized. Hence the sleep is synchronized; the locks will be retained.
480      *
481      * @param curRetries current number of retries
482      * @param maxRetries max number of retries allowed
483      * @param ioe failure reason
484      * @throws IOException if max number of retries is reached
485      */
486     private void handleConnectionFailure(
487         int curRetries, int maxRetries, IOException ioe) throws IOException {
488 
489       closeConnection();
490 
491       // throw the exception if the maximum number of retries is reached
492       if (curRetries >= maxRetries) {
493         throw ioe;
494       }
495 
496       // otherwise back off and retry
497       try {
498         Thread.sleep(failureSleep);
499       } catch (InterruptedException ignored) {}
500 
501       LOG.info("Retrying connect to server: " + remoteId.getAddress() +
502         " after sleeping " + failureSleep + "ms. Already tried " + curRetries +
503         " time(s).");
504     }
505 
506     /* Write the header for each connection
507      * Out is not synchronized because only the first thread does this.
508      */
509     private void writeHeader() throws IOException {
510       out.write(HBaseServer.HEADER.array());
511       out.write(HBaseServer.CURRENT_VERSION);
512       //When there are more fields we can have ConnectionHeader Writable.
513       DataOutputBuffer buf = new DataOutputBuffer();
514       header.write(buf);
515 
516       int bufLen = buf.getLength();
517       out.writeInt(bufLen);
518       out.write(buf.getData(), 0, bufLen);
519     }
520 
521     /* wait till someone signals us to start reading RPC response or
522      * it is idle too long, it is marked as to be closed,
523      * or the client is marked as not running.
524      *
525      * Return true if it is time to read a response; false otherwise.
526      */
527     @SuppressWarnings({"ThrowableInstanceNeverThrown"})
528     protected synchronized boolean waitForWork() {
529       if (calls.isEmpty() && !shouldCloseConnection.get()  && running.get())  {
530         long timeout = maxIdleTime-
531               (System.currentTimeMillis()-lastActivity.get());
532         if (timeout>0) {
533           try {
534             wait(timeout);
535           } catch (InterruptedException ignored) {}
536         }
537       }
538 
539       if (!calls.isEmpty() && !shouldCloseConnection.get() && running.get()) {
540         return true;
541       } else if (shouldCloseConnection.get()) {
542         return false;
543       } else if (calls.isEmpty()) { // idle connection closed or stopped
544         markClosed(null);
545         return false;
546       } else { // get stopped but there are still pending requests
547         markClosed((IOException)new IOException().initCause(
548             new InterruptedException()));
549         return false;
550       }
551     }
552 
553     public InetSocketAddress getRemoteAddress() {
554       return remoteId.getAddress();
555     }
556 
557     /* Send a ping to the server if the time elapsed
558      * since last I/O activity is equal to or greater than the ping interval
559      */
560     protected synchronized void sendPing() throws IOException {
561       long curTime = System.currentTimeMillis();
562       if ( curTime - lastActivity.get() >= pingInterval) {
563         lastActivity.set(curTime);
564         //noinspection SynchronizeOnNonFinalField
565         synchronized (this.out) {
566           out.writeInt(PING_CALL_ID);
567           out.flush();
568         }
569       }
570     }
571 
572     @Override
573     public void run() {
574       if (LOG.isDebugEnabled())
575         LOG.debug(getName() + ": starting, having connections "
576             + connections.size());
577 
578       try {
579         while (waitForWork()) {//wait here for work - read or close connection
580           receiveResponse();
581         }
582       } catch (Throwable t) {
583         LOG.warn("Unexpected exception receiving call responses", t);
584         markClosed(new IOException("Unexpected exception receiving call responses", t));
585       }
586 
587       close();
588 
589       if (LOG.isDebugEnabled())
590         LOG.debug(getName() + ": stopped, remaining connections "
591             + connections.size());
592     }
593 
594     /* Initiates a call by sending the parameter to the remote server.
595      * Note: this is not called from the Connection thread, but by other
596      * threads.
597      */
598     protected void sendParam(Call call) {
599       if (shouldCloseConnection.get()) {
600         return;
601       }
602 
603       // For serializing the data to be written.
604 
605       final DataOutputBuffer d = new DataOutputBuffer();
606       try {
607         if (LOG.isDebugEnabled())
608           LOG.debug(getName() + " sending #" + call.id);
609 
610         d.writeInt(0xdeadbeef); // placeholder for data length
611         d.writeInt(call.id);
612         call.param.write(d);
613         byte[] data = d.getData();
614         int dataLength = d.getLength();
615         // fill in the placeholder
616         Bytes.putInt(data, 0, dataLength - 4);
617         //noinspection SynchronizeOnNonFinalField
618         synchronized (this.out) { // FindBugs IS2_INCONSISTENT_SYNC
619           out.write(data, 0, dataLength);
620           out.flush();
621         }
622       } catch(IOException e) {
623         markClosed(e);
624       } finally {
625         //the buffer is just an in-memory buffer, but it is still polite to
626         // close early
627         IOUtils.closeStream(d);
628       }
629     }
630 
631     /* Receive a response.
632      * Because only one receiver, so no synchronization on in.
633      */
634     protected void receiveResponse() {
635       if (shouldCloseConnection.get()) {
636         return;
637       }
638       touch();
639 
640       try {
641         // See HBaseServer.Call.setResponse for where we write out the response.
642         // It writes the call.id (int), a flag byte, then optionally the length
643         // of the response (int) followed by data.
644 
645         // Read the call id.
646         int id = in.readInt();
647 
648         if (LOG.isDebugEnabled())
649           LOG.debug(getName() + " got value #" + id);
650         Call call = calls.get(id);
651 
652         // Read the flag byte
653         byte flag = in.readByte();
654         boolean isError = ResponseFlag.isError(flag);
655         if (ResponseFlag.isLength(flag)) {
656           // Currently length if present is unused.
657           in.readInt();
658         }
659         int state = in.readInt(); // Read the state.  Currently unused.
660         if (isError) {
661           if (call != null) {
662             //noinspection ThrowableInstanceNeverThrown
663             call.setException(new RemoteException(WritableUtils.readString(in),
664                 WritableUtils.readString(in)));
665           }
666         } else {
667           Writable value = ReflectionUtils.newInstance(valueClass, conf);
668           value.readFields(in);                 // read value
669           // it's possible that this call may have been cleaned up due to a RPC
670           // timeout, so check if it still exists before setting the value.
671           if (call != null) {
672             call.setValue(value);
673           }
674         }
675         calls.remove(id);
676       } catch (IOException e) {
677         if (e instanceof SocketTimeoutException && remoteId.rpcTimeout > 0) {
678           // Clean up open calls but don't treat this as a fatal condition,
679           // since we expect certain responses to not make it by the specified
680           // {@link ConnectionId#rpcTimeout}.
681           closeException = e;
682         } else {
683           // Since the server did not respond within the default ping interval
684           // time, treat this as a fatal condition and close this connection
685           markClosed(e);
686         }
687       } finally {
688         if (remoteId.rpcTimeout > 0) {
689           cleanupCalls(remoteId.rpcTimeout);
690         }
691       }
692     }
693 
694     protected synchronized void markClosed(IOException e) {
695       if (shouldCloseConnection.compareAndSet(false, true)) {
696         closeException = e;
697         notifyAll();
698       }
699     }
700 
701     /** Close the connection. */
702     protected synchronized void close() {
703       if (!shouldCloseConnection.get()) {
704         LOG.error("The connection is not in the closed state");
705         return;
706       }
707 
708       // release the resources
709       // first thing to do;take the connection out of the connection list
710       synchronized (connections) {
711         connections.remove(remoteId, this);
712       }
713 
714       // close the streams and therefore the socket
715       IOUtils.closeStream(out);
716       IOUtils.closeStream(in);
717 
718       // clean up all calls
719       if (closeException == null) {
720         if (!calls.isEmpty()) {
721           LOG.warn(
722               "A connection is closed for no cause and calls are not empty");
723 
724           // clean up calls anyway
725           closeException = new IOException("Unexpected closed connection");
726           cleanupCalls();
727         }
728       } else {
729         // log the info
730         if (LOG.isDebugEnabled()) {
731           LOG.debug("closing ipc connection to " + remoteId.address + ": " +
732               closeException.getMessage(),closeException);
733         }
734 
735         // cleanup calls
736         cleanupCalls();
737       }
738       if (LOG.isDebugEnabled())
739         LOG.debug(getName() + ": closed");
740     }
741 
742     /* Cleanup all calls and mark them as done */
743     protected void cleanupCalls() {
744       cleanupCalls(0);
745     }
746 
747     protected void cleanupCalls(long rpcTimeout) {
748       Iterator<Entry<Integer, Call>> itor = calls.entrySet().iterator();
749       while (itor.hasNext()) {
750         Call c = itor.next().getValue();
751         long waitTime = System.currentTimeMillis() - c.getStartTime();
752         if (waitTime >= rpcTimeout) {
753           if (this.closeException == null) {
754             // There may be no exception in the case that there are many calls
755             // being multiplexed over this connection and these are succeeding
756             // fine while this Call object is taking a long time to finish
757             // over on the server; e.g. I just asked the regionserver to bulk
758             // open 3k regions or its a big fat multiput into a heavily-loaded
759             // server (Perhaps this only happens at the extremes?)
760             this.closeException = new CallTimeoutException("Call id=" + c.id +
761               ", waitTime=" + waitTime + ", rpcTimetout=" + rpcTimeout);
762           }
763           c.setException(this.closeException);
764           synchronized (c) {
765             c.notifyAll();
766           }
767           itor.remove();
768         } else {
769           break;
770         }
771       }
772       try {
773         if (!calls.isEmpty()) {
774           Call firstCall = calls.get(calls.firstKey());
775           long maxWaitTime = System.currentTimeMillis() - firstCall.getStartTime();
776           if (maxWaitTime < rpcTimeout) {
777             rpcTimeout -= maxWaitTime;
778           }
779         }
780         if (!shouldCloseConnection.get()) {
781           closeException = null;
782           if (socket != null) {
783             socket.setSoTimeout((int) rpcTimeout);
784           }
785         }
786       } catch (SocketException e) {
787         LOG.debug("Couldn't lower timeout, which may result in longer than expected calls");
788       }
789     }
790   }
791 
792   /**
793    * Client-side call timeout
794    */
795   public static class CallTimeoutException extends IOException {
796     public CallTimeoutException(final String msg) {
797       super(msg);
798     }
799   }
800 
801   /** Call implementation used for parallel calls. */
802   protected class ParallelCall extends Call {
803     private final ParallelResults results;
804     protected final int index;
805 
806     public ParallelCall(Writable param, ParallelResults results, int index) {
807       super(param);
808       this.results = results;
809       this.index = index;
810     }
811 
812     /** Deliver result to result collector. */
813     @Override
814     protected void callComplete() {
815       results.callComplete(this);
816     }
817   }
818 
819   /** Result collector for parallel calls. */
820   protected static class ParallelResults {
821     protected final Writable[] values;
822     protected int size;
823     protected int count;
824 
825     public ParallelResults(int size) {
826       this.values = new Writable[size];
827       this.size = size;
828     }
829 
830     /*
831      * Collect a result.
832      */
833     synchronized void callComplete(ParallelCall call) {
834       // FindBugs IS2_INCONSISTENT_SYNC
835       values[call.index] = call.value;            // store the value
836       count++;                                    // count it
837       if (count == size)                          // if all values are in
838         notify();                                 // then notify waiting caller
839     }
840   }
841 
842   /**
843    * Construct an IPC client whose values are of the given {@link Writable}
844    * class.
845    * @param valueClass value class
846    * @param conf configuration
847    * @param factory socket factory
848    */
849   public HBaseClient(Class<? extends Writable> valueClass, Configuration conf,
850       SocketFactory factory) {
851     this.valueClass = valueClass;
852     this.maxIdleTime =
853       conf.getInt("hbase.ipc.client.connection.maxidletime", 10000); //10s
854     this.maxRetries = conf.getInt("hbase.ipc.client.connect.max.retries", 0);
855     this.failureSleep = conf.getInt("hbase.client.pause", 1000);
856     this.tcpNoDelay = conf.getBoolean("hbase.ipc.client.tcpnodelay", false);
857     this.tcpKeepAlive = conf.getBoolean("hbase.ipc.client.tcpkeepalive", true);
858     this.pingInterval = getPingInterval(conf);
859     if (LOG.isDebugEnabled()) {
860       LOG.debug("The ping interval is" + this.pingInterval + "ms.");
861     }
862     this.conf = conf;
863     this.socketFactory = factory;
864     this.clusterId = conf.get(HConstants.CLUSTER_ID, "default");
865     this.connections = new PoolMap<ConnectionId, Connection>(
866         getPoolType(conf), getPoolSize(conf));
867     this.failedServers = new FailedServers(conf);
868   }
869 
870   /**
871    * Construct an IPC client with the default SocketFactory
872    * @param valueClass value class
873    * @param conf configuration
874    */
875   public HBaseClient(Class<? extends Writable> valueClass, Configuration conf) {
876     this(valueClass, conf, NetUtils.getDefaultSocketFactory(conf));
877   }
878 
879   /**
880    * Return the pool type specified in the configuration, which must be set to
881    * either {@link PoolType#RoundRobin} or {@link PoolType#ThreadLocal},
882    * otherwise default to the former.
883    *
884    * For applications with many user threads, use a small round-robin pool. For
885    * applications with few user threads, you may want to try using a
886    * thread-local pool. In any case, the number of {@link HBaseClient} instances
887    * should not exceed the operating system's hard limit on the number of
888    * connections.
889    *
890    * @param config configuration
891    * @return either a {@link PoolType#RoundRobin} or
892    *         {@link PoolType#ThreadLocal}
893    */
894   protected static PoolType getPoolType(Configuration config) {
895     return PoolType.valueOf(config.get(HConstants.HBASE_CLIENT_IPC_POOL_TYPE),
896         PoolType.RoundRobin, PoolType.ThreadLocal);
897   }
898 
899   /**
900    * Return the pool size specified in the configuration, which is applicable only if
901    * the pool type is {@link PoolType#RoundRobin}.
902    *
903    * @param config
904    * @return the maximum pool size
905    */
906   protected static int getPoolSize(Configuration config) {
907     return config.getInt(HConstants.HBASE_CLIENT_IPC_POOL_SIZE, 1);
908   }
909 
910   /** Return the socket factory of this client
911    *
912    * @return this client's socket factory
913    */
914   SocketFactory getSocketFactory() {
915     return socketFactory;
916   }
917 
918   /** Stop all threads related to this client.  No further calls may be made
919    * using this client. */
920   public void stop() {
921     if (LOG.isDebugEnabled()) {
922       LOG.debug("Stopping client");
923     }
924 
925     if (!running.compareAndSet(true, false)) {
926       return;
927     }
928 
929     // wake up all connections
930     synchronized (connections) {
931       for (Connection conn : connections.values()) {
932         conn.interrupt();
933       }
934     }
935 
936     // wait until all connections are closed
937     while (!connections.isEmpty()) {
938       try {
939         Thread.sleep(100);
940       } catch (InterruptedException ignored) {
941       }
942     }
943   }
944 
945   /** Make a call, passing <code>param</code>, to the IPC server running at
946    * <code>address</code>, returning the value.  Throws exceptions if there are
947    * network problems or if the remote code threw an exception.
948    * @param param writable parameter
949    * @param address network address
950    * @return Writable
951    * @throws IOException e
952    */
953   public Writable call(Writable param, InetSocketAddress address)
954   throws IOException, InterruptedException {
955       return call(param, address, null, 0);
956   }
957 
958   public Writable call(Writable param, InetSocketAddress addr,
959                        User ticket, int rpcTimeout)
960                        throws IOException, InterruptedException {
961     return call(param, addr, null, ticket, rpcTimeout);
962   }
963 
964   /** Make a call, passing <code>param</code>, to the IPC server running at
965    * <code>address</code> which is servicing the <code>protocol</code> protocol,
966    * with the <code>ticket</code> credentials, returning the value.
967    * Throws exceptions if there are network problems or if the remote code
968    * threw an exception. */
969   public Writable call(Writable param, InetSocketAddress addr,
970                        Class<? extends VersionedProtocol> protocol,
971                        User ticket, int rpcTimeout)
972       throws InterruptedException, IOException {
973     Call call = new Call(param);
974     Connection connection = getConnection(addr, protocol, ticket, rpcTimeout, call);
975     connection.sendParam(call);                 // send the parameter
976     boolean interrupted = false;
977     //noinspection SynchronizationOnLocalVariableOrMethodParameter
978     synchronized (call) {
979       while (!call.done) {
980         try {
981           call.wait();                           // wait for the result
982         } catch (InterruptedException ignored) {
983           // save the fact that we were interrupted
984           interrupted = true;
985         }
986       }
987 
988       if (interrupted) {
989         // set the interrupt flag now that we are done waiting
990         Thread.currentThread().interrupt();
991       }
992 
993       if (call.error != null) {
994         if (call.error instanceof RemoteException) {
995           call.error.fillInStackTrace();
996           throw call.error;
997         }
998         // local exception
999         throw wrapException(addr, call.error);
1000       }
1001       return call.value;
1002     }
1003   }
1004 
1005   /**
1006    * Take an IOException and the address we were trying to connect to
1007    * and return an IOException with the input exception as the cause.
1008    * The new exception provides the stack trace of the place where
1009    * the exception is thrown and some extra diagnostics information.
1010    * If the exception is ConnectException or SocketTimeoutException,
1011    * return a new one of the same type; Otherwise return an IOException.
1012    *
1013    * @param addr target address
1014    * @param exception the relevant exception
1015    * @return an exception to throw
1016    */
1017   @SuppressWarnings({"ThrowableInstanceNeverThrown"})
1018   protected IOException wrapException(InetSocketAddress addr,
1019                                          IOException exception) {
1020     if (exception instanceof ConnectException) {
1021       //connection refused; include the host:port in the error
1022       return (ConnectException)new ConnectException(
1023            "Call to " + addr + " failed on connection exception: " + exception)
1024                     .initCause(exception);
1025     } else if (exception instanceof SocketTimeoutException) {
1026       return (SocketTimeoutException)new SocketTimeoutException(
1027            "Call to " + addr + " failed on socket timeout exception: "
1028                       + exception).initCause(exception);
1029     } else {
1030       return (IOException)new IOException(
1031            "Call to " + addr + " failed on local exception: " + exception)
1032                                  .initCause(exception);
1033 
1034     }
1035   }
1036 
1037   /** Makes a set of calls in parallel.  Each parameter is sent to the
1038    * corresponding address.  When all values are available, or have timed out
1039    * or errored, the collected results are returned in an array.  The array
1040    * contains nulls for calls that timed out or errored.
1041    * @param params writable parameters
1042    * @param addresses socket addresses
1043    * @return  Writable[]
1044    * @throws IOException e
1045    * @deprecated Use {@link #call(Writable[], InetSocketAddress[], Class, User)} instead
1046    */
1047   @Deprecated
1048   public Writable[] call(Writable[] params, InetSocketAddress[] addresses)
1049     throws IOException, InterruptedException {
1050     return call(params, addresses, null, null);
1051   }
1052 
1053   /** Makes a set of calls in parallel.  Each parameter is sent to the
1054    * corresponding address.  When all values are available, or have timed out
1055    * or errored, the collected results are returned in an array.  The array
1056    * contains nulls for calls that timed out or errored.  */
1057   public Writable[] call(Writable[] params, InetSocketAddress[] addresses,
1058                          Class<? extends VersionedProtocol> protocol,
1059                          User ticket)
1060       throws IOException, InterruptedException {
1061     if (addresses.length == 0) return new Writable[0];
1062 
1063     ParallelResults results = new ParallelResults(params.length);
1064     // TODO this synchronization block doesnt make any sense, we should possibly fix it
1065     //noinspection SynchronizationOnLocalVariableOrMethodParameter
1066     synchronized (results) {
1067       for (int i = 0; i < params.length; i++) {
1068         ParallelCall call = new ParallelCall(params[i], results, i);
1069         try {
1070           Connection connection =
1071               getConnection(addresses[i], protocol, ticket, 0, call);
1072           connection.sendParam(call);             // send each parameter
1073         } catch (IOException e) {
1074           // log errors
1075           LOG.info("Calling "+addresses[i]+" caught: " +
1076                    e.getMessage(),e);
1077           results.size--;                         //  wait for one fewer result
1078         }
1079       }
1080       while (results.count != results.size) {
1081         try {
1082           results.wait();                    // wait for all results
1083         } catch (InterruptedException ignored) {}
1084       }
1085 
1086       return results.values;
1087     }
1088   }
1089 
1090   /* Get a connection from the pool, or create a new one and add it to the
1091    * pool.  Connections to a given host/port are reused. */
1092   protected Connection getConnection(InetSocketAddress addr,
1093                                    Class<? extends VersionedProtocol> protocol,
1094                                    User ticket,
1095                                    int rpcTimeout,
1096                                    Call call)
1097                                    throws IOException, InterruptedException {
1098     if (!running.get()) {
1099       // the client is stopped
1100       throw new IOException("The client is stopped");
1101     }
1102     Connection connection;
1103     /* we could avoid this allocation for each RPC by having a
1104      * connectionsId object and with set() method. We need to manage the
1105      * refs for keys in HashMap properly. For now its ok.
1106      */
1107     ConnectionId remoteId = new ConnectionId(addr, protocol, ticket, rpcTimeout);
1108     synchronized (connections) {
1109       connection = connections.get(remoteId);
1110       if (connection == null) {
1111         connection = createConnection(remoteId);
1112         connections.put(remoteId, connection);
1113       }
1114     }
1115     connection.addCall(call);
1116 
1117     //we don't invoke the method below inside "synchronized (connections)"
1118     //block above. The reason for that is if the server happens to be slow,
1119     //it will take longer to establish a connection and that will slow the
1120     //entire system down.
1121     //Moreover, if the connection is currently created, there will be many threads
1122     // waiting here; as setupIOstreams is synchronized. If the connection fails with a
1123     // timeout, they will all fail simultaneously. This is checked in setupIOstreams.
1124     connection.setupIOstreams();
1125     return connection;
1126   }
1127 
1128   /**
1129    * This class holds the address and the user ticket. The client connections
1130    * to servers are uniquely identified by <remoteAddress, ticket>
1131    */
1132   protected static class ConnectionId {
1133     final InetSocketAddress address;
1134     final User ticket;
1135     final int rpcTimeout;
1136     Class<? extends VersionedProtocol> protocol;
1137     private static final int PRIME = 16777619;
1138 
1139     ConnectionId(InetSocketAddress address,
1140         Class<? extends VersionedProtocol> protocol,
1141         User ticket,
1142         int rpcTimeout) {
1143       this.protocol = protocol;
1144       this.address = address;
1145       this.ticket = ticket;
1146       this.rpcTimeout = rpcTimeout;
1147     }
1148 
1149     InetSocketAddress getAddress() {
1150       return address;
1151     }
1152 
1153     Class<? extends VersionedProtocol> getProtocol() {
1154       return protocol;
1155     }
1156 
1157     User getTicket() {
1158       return ticket;
1159     }
1160 
1161     @Override
1162     public boolean equals(Object obj) {
1163      if (obj instanceof ConnectionId) {
1164        ConnectionId id = (ConnectionId) obj;
1165        return address.equals(id.address) && protocol == id.protocol &&
1166               ((ticket != null && ticket.equals(id.ticket)) ||
1167                (ticket == id.ticket)) && rpcTimeout == id.rpcTimeout;
1168      }
1169      return false;
1170     }
1171 
1172     @Override  // simply use the default Object#hashcode() ?
1173     public int hashCode() {
1174       return (address.hashCode() + PRIME * (
1175                   PRIME * System.identityHashCode(protocol) ^
1176              (ticket == null ? 0 : ticket.hashCode()) )) ^ rpcTimeout;
1177     }
1178   }
1179 
1180   /**
1181    * @return the clusterId
1182    */
1183   public String getClusterId() {
1184     return clusterId;
1185   }
1186 }