View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.client;
20  
21  import java.io.Closeable;
22  import java.io.IOException;
23  import java.lang.reflect.Constructor;
24  import java.lang.reflect.UndeclaredThrowableException;
25  import java.net.SocketException;
26  import java.util.ArrayList;
27  import java.util.HashMap;
28  import java.util.HashSet;
29  import java.util.LinkedHashMap;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.Map.Entry;
33  import java.util.NavigableMap;
34  import java.util.Set;
35  import java.util.concurrent.ConcurrentHashMap;
36  import java.util.concurrent.ConcurrentMap;
37  import java.util.concurrent.CopyOnWriteArraySet;
38  import java.util.concurrent.ExecutorService;
39  import java.util.concurrent.SynchronousQueue;
40  import java.util.concurrent.ThreadPoolExecutor;
41  import java.util.concurrent.TimeUnit;
42  import java.util.concurrent.atomic.AtomicBoolean;
43  import java.util.concurrent.atomic.AtomicInteger;
44  
45  import org.apache.commons.logging.Log;
46  import org.apache.commons.logging.LogFactory;
47  import org.apache.hadoop.classification.InterfaceAudience;
48  import org.apache.hadoop.classification.InterfaceStability;
49  import org.apache.hadoop.conf.Configuration;
50  import org.apache.hadoop.hbase.Chore;
51  import org.apache.hadoop.hbase.TableName;
52  import org.apache.hadoop.hbase.DoNotRetryIOException;
53  import org.apache.hadoop.hbase.HBaseConfiguration;
54  import org.apache.hadoop.hbase.HConstants;
55  import org.apache.hadoop.hbase.HRegionInfo;
56  import org.apache.hadoop.hbase.HRegionLocation;
57  import org.apache.hadoop.hbase.HTableDescriptor;
58  import org.apache.hadoop.hbase.KeyValue;
59  import org.apache.hadoop.hbase.MasterNotRunningException;
60  import org.apache.hadoop.hbase.ServerName;
61  import org.apache.hadoop.hbase.Stoppable;
62  import org.apache.hadoop.hbase.TableNotFoundException;
63  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
64  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
65  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
66  import org.apache.hadoop.hbase.client.coprocessor.Batch;
67  import org.apache.hadoop.hbase.exceptions.RegionMovedException;
68  import org.apache.hadoop.hbase.exceptions.RegionOpeningException;
69  import org.apache.hadoop.hbase.ipc.RpcClient;
70  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
71  import org.apache.hadoop.hbase.protobuf.RequestConverter;
72  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
73  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ClientService;
74  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceRequest;
75  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceResponse;
76  import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ModifyNamespaceResponse;
77  import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ModifyNamespaceRequest;
78  import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.CreateNamespaceResponse;
79  import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.CreateNamespaceRequest;
80  import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteNamespaceResponse;
81  import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteNamespaceRequest;
82  import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.GetNamespaceDescriptorResponse;
83  import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.GetNamespaceDescriptorRequest;
84  import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos
85      .ListNamespaceDescriptorsResponse;
86  import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos
87      .ListNamespaceDescriptorsRequest;
88  import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos
89      .ListTableDescriptorsByNamespaceResponse;
90  import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos
91      .ListTableDescriptorsByNamespaceRequest;
92  import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos
93      .ListTableNamesByNamespaceResponse;
94  import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos
95      .ListTableNamesByNamespaceRequest;
96  import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.AddColumnRequest;
97  import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.AddColumnResponse;
98  import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.AssignRegionRequest;
99  import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.AssignRegionResponse;
100 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.BalanceRequest;
101 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.BalanceResponse;
102 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.CatalogScanRequest;
103 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.CatalogScanResponse;
104 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.CreateTableRequest;
105 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.CreateTableResponse;
106 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteColumnRequest;
107 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteColumnResponse;
108 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteSnapshotRequest;
109 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteSnapshotResponse;
110 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteTableRequest;
111 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteTableResponse;
112 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DisableTableRequest;
113 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DisableTableResponse;
114 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DispatchMergingRegionsRequest;
115 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DispatchMergingRegionsResponse;
116 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.EnableCatalogJanitorRequest;
117 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.EnableCatalogJanitorResponse;
118 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.EnableTableRequest;
119 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.EnableTableResponse;
120 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.IsCatalogJanitorEnabledRequest;
121 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.IsCatalogJanitorEnabledResponse;
122 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.IsRestoreSnapshotDoneRequest;
123 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.IsRestoreSnapshotDoneResponse;
124 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.IsSnapshotDoneRequest;
125 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.IsSnapshotDoneResponse;
126 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ListSnapshotRequest;
127 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ListSnapshotResponse;
128 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.MasterAdminService;
129 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ModifyColumnRequest;
130 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ModifyColumnResponse;
131 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ModifyTableRequest;
132 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ModifyTableResponse;
133 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.MoveRegionRequest;
134 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.MoveRegionResponse;
135 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.OfflineRegionRequest;
136 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.OfflineRegionResponse;
137 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.RestoreSnapshotRequest;
138 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.RestoreSnapshotResponse;
139 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.SetBalancerRunningRequest;
140 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.SetBalancerRunningResponse;
141 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ShutdownRequest;
142 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ShutdownResponse;
143 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.StopMasterRequest;
144 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.StopMasterResponse;
145 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.TakeSnapshotRequest;
146 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.TakeSnapshotResponse;
147 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.UnassignRegionRequest;
148 import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.UnassignRegionResponse;
149 import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetClusterStatusRequest;
150 import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetClusterStatusResponse;
151 import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetSchemaAlterStatusRequest;
152 import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetSchemaAlterStatusResponse;
153 import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetTableDescriptorsRequest;
154 import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetTableDescriptorsResponse;
155 import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetTableNamesRequest;
156 import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetTableNamesResponse;
157 import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.MasterMonitorService;
158 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos;
159 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsMasterRunningRequest;
160 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsMasterRunningResponse;
161 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
162 import org.apache.hadoop.hbase.security.User;
163 import org.apache.hadoop.hbase.util.Bytes;
164 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
165 import org.apache.hadoop.hbase.util.SoftValueSortedMap;
166 import org.apache.hadoop.hbase.util.Threads;
167 import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
168 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
169 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
170 import org.apache.hadoop.ipc.RemoteException;
171 import org.apache.zookeeper.KeeperException;
172 
173 import com.google.protobuf.BlockingRpcChannel;
174 import com.google.protobuf.RpcController;
175 import com.google.protobuf.ServiceException;
176 
177 /**
178  * A non-instantiable class that manages creation of {@link HConnection}s.
179  * <p>The simplest way to use this class is by using {@link #createConnection(Configuration)}.
180  * This creates a new {@link HConnection} that is managed by the caller.
181  * From this {@link HConnection} {@link HTableInterface} implementations are retrieved 
182  * with {@link HConnection#getTable(byte[])}. Example:
183  * <pre>
184  * {@code
185  * HConnection connection = HConnectionManager.createConnection(config);
186  * HTableInterface table = connection.getTable("table1");
187  * // use the table as needed, for a single operation and a single thread
188  * table.close();
189  * connection.close();
190  * }
191  * </pre>
192  * <p>The following logic and API will be removed in the future:
193  * <p>This class has a static Map of {@link HConnection} instances keyed by
194  * {@link Configuration}; all invocations of {@link #getConnection(Configuration)}
195  * that pass the same {@link Configuration} instance will be returned the same
196  * {@link  HConnection} instance (Adding properties to a Configuration
197  * instance does not change its object identity; for more on how this is done see
198  * {@link HConnectionKey}).  Sharing {@link HConnection}
199  * instances is usually what you want; all clients of the {@link HConnection}
200  * instances share the HConnections' cache of Region locations rather than each
201  * having to discover for itself the location of meta, etc.  It makes
202  * sense for the likes of the pool of HTables class {@link HTablePool}, for
203  * instance (If concerned that a single {@link HConnection} is insufficient
204  * for sharing amongst clients in say an heavily-multithreaded environment,
205  * in practise its not proven to be an issue.  Besides, {@link HConnection} is
206  * implemented atop Hadoop RPC and as of this writing, Hadoop RPC does a
207  * connection per cluster-member, exclusively).
208  *
209  * <p>But sharing connections makes clean up of {@link HConnection} instances a little awkward.
210  * Currently, clients cleanup by calling {@link #deleteConnection(Configuration)}. This will
211  * shutdown the zookeeper connection the HConnection was using and clean up all
212  * HConnection resources as well as stopping proxies to servers out on the
213  * cluster. Not running the cleanup will not end the world; it'll
214  * just stall the closeup some and spew some zookeeper connection failed
215  * messages into the log.  Running the cleanup on a {@link HConnection} that is
216  * subsequently used by another will cause breakage so be careful running
217  * cleanup.
218  * <p>To create a {@link HConnection} that is not shared by others, you can
219  * create a new {@link Configuration} instance, pass this new instance to
220  * {@link #getConnection(Configuration)}, and then when done, close it up by
221  * doing something like the following:
222  * <pre>
223  * {@code
224  * Configuration newConfig = new Configuration(originalConf);
225  * HConnection connection = HConnectionManager.getConnection(newConfig);
226  * // Use the connection to your hearts' delight and then when done...
227  * HConnectionManager.deleteConnection(newConfig, true);
228  * }
229  * </pre>
230  * <p>Cleanup used to be done inside in a shutdown hook.  On startup we'd
231  * register a shutdown hook that called {@link #deleteAllConnections()}
232  * on its way out but the order in which shutdown hooks run is not defined so
233  * were problematic for clients of HConnection that wanted to register their
234  * own shutdown hooks so we removed ours though this shifts the onus for
235  * cleanup to the client.
236  */
237 @SuppressWarnings("serial")
238 @InterfaceAudience.Public
239 @InterfaceStability.Evolving
240 public class HConnectionManager {
241   static final Log LOG = LogFactory.getLog(HConnectionManager.class);
242 
243   public static final String RETRIES_BY_SERVER_KEY = "hbase.client.retries.by.server";
244 
245   // An LRU Map of HConnectionKey -> HConnection (TableServer).  All
246   // access must be synchronized.  This map is not private because tests
247   // need to be able to tinker with it.
248   static final Map<HConnectionKey, HConnectionImplementation> CONNECTION_INSTANCES;
249 
250   public static final int MAX_CACHED_CONNECTION_INSTANCES;
251 
252   static {
253     // We set instances to one more than the value specified for {@link
254     // HConstants#ZOOKEEPER_MAX_CLIENT_CNXNS}. By default, the zk default max
255     // connections to the ensemble from the one client is 30, so in that case we
256     // should run into zk issues before the LRU hit this value of 31.
257     MAX_CACHED_CONNECTION_INSTANCES = HBaseConfiguration.create().getInt(
258       HConstants.ZOOKEEPER_MAX_CLIENT_CNXNS, HConstants.DEFAULT_ZOOKEPER_MAX_CLIENT_CNXNS) + 1;
259     CONNECTION_INSTANCES = new LinkedHashMap<HConnectionKey, HConnectionImplementation>(
260         (int) (MAX_CACHED_CONNECTION_INSTANCES / 0.75F) + 1, 0.75F, true) {
261       @Override
262       protected boolean removeEldestEntry(
263           Map.Entry<HConnectionKey, HConnectionImplementation> eldest) {
264          return size() > MAX_CACHED_CONNECTION_INSTANCES;
265        }
266     };
267   }
268 
269   /*
270    * Non-instantiable.
271    */
272   private HConnectionManager() {
273     super();
274   }
275 
276   /**
277    * Get the connection that goes with the passed <code>conf</code> configuration instance.
278    * If no current connection exists, method creates a new connection and keys it using
279    * connection-specific properties from the passed {@link Configuration}; see
280    * {@link HConnectionKey}.
281    * @param conf configuration
282    * @return HConnection object for <code>conf</code>
283    * @throws ZooKeeperConnectionException
284    */
285   @Deprecated
286   @SuppressWarnings("resource")
287   public static HConnection getConnection(final Configuration conf)
288   throws IOException {
289     HConnectionKey connectionKey = new HConnectionKey(conf);
290     synchronized (CONNECTION_INSTANCES) {
291       HConnectionImplementation connection = CONNECTION_INSTANCES.get(connectionKey);
292       if (connection == null) {
293         connection = (HConnectionImplementation)createConnection(conf, true);
294         CONNECTION_INSTANCES.put(connectionKey, connection);
295       } else if (connection.isClosed()) {
296         HConnectionManager.deleteConnection(connectionKey, true);
297         connection = (HConnectionImplementation)createConnection(conf, true);
298         CONNECTION_INSTANCES.put(connectionKey, connection);
299       }
300       connection.incCount();
301       return connection;
302     }
303   }
304 
305   /**
306    * Create a new HConnection instance using the passed <code>conf</code> instance.
307    * <p>Note: This bypasses the usual HConnection life cycle management done by
308    * {@link #getConnection(Configuration)}. The caller is responsible for
309    * calling {@link HConnection#close()} on the returned connection instance.
310    *
311    * This is the recommended way to create HConnections.
312    * {@code
313    * HConnection connection = HConnectionManager.createConnection(conf);
314    * HTableInterface table = connection.getTable("mytable");
315    * table.get(...);
316    * ...
317    * table.close();
318    * connection.close();
319    * }
320    *
321    * @param conf configuration
322    * @return HConnection object for <code>conf</code>
323    * @throws ZooKeeperConnectionException
324    */
325   public static HConnection createConnection(Configuration conf)
326   throws IOException {
327     return createConnection(conf, false, null);
328   }
329 
330   /**
331    * Create a new HConnection instance using the passed <code>conf</code> instance.
332    * <p>Note: This bypasses the usual HConnection life cycle management done by
333    * {@link #getConnection(Configuration)}. The caller is responsible for
334    * calling {@link HConnection#close()} on the returned connection instance.
335    * This is the recommended way to create HConnections.
336    * {@code
337    * ExecutorService pool = ...;
338    * HConnection connection = HConnectionManager.createConnection(conf, pool);
339    * HTableInterface table = connection.getTable("mytable");
340    * table.get(...);
341    * ...
342    * table.close();
343    * connection.close();
344    * }
345    * @param conf configuration
346    * @param pool the thread pool to use for batch operation in HTables used via this HConnection
347    * @return HConnection object for <code>conf</code>
348    * @throws ZooKeeperConnectionException
349    */
350   public static HConnection createConnection(Configuration conf, ExecutorService pool)
351   throws IOException {
352     return createConnection(conf, false, pool);
353   }
354 
355   @Deprecated
356   static HConnection createConnection(final Configuration conf, final boolean managed)
357       throws IOException {
358     return createConnection(conf, managed, null);
359   }
360 
361   @Deprecated
362   static HConnection createConnection(final Configuration conf, final boolean managed, final ExecutorService pool)
363   throws IOException {
364     String className = conf.get("hbase.client.connection.impl",
365       HConnectionManager.HConnectionImplementation.class.getName());
366     Class<?> clazz = null;
367     try {
368       clazz = Class.forName(className);
369     } catch (ClassNotFoundException e) {
370       throw new IOException(e);
371     }
372     try {
373       // Default HCM#HCI is not accessible; make it so before invoking.
374       Constructor<?> constructor =
375         clazz.getDeclaredConstructor(Configuration.class, boolean.class, ExecutorService.class);
376       constructor.setAccessible(true);
377       return (HConnection) constructor.newInstance(conf, managed, pool);
378     } catch (Exception e) {
379       throw new IOException(e);
380     }
381   }
382 
383   /**
384    * Delete connection information for the instance specified by passed configuration.
385    * If there are no more references to the designated connection connection, this method will
386    * then close connection to the zookeeper ensemble and let go of all associated resources.
387    *
388    * @param conf configuration whose identity is used to find {@link HConnection} instance.
389    * @deprecated
390    */
391   public static void deleteConnection(Configuration conf) {
392     deleteConnection(new HConnectionKey(conf), false);
393   }
394 
395   /**
396    * Cleanup a known stale connection.
397    * This will then close connection to the zookeeper ensemble and let go of all resources.
398    *
399    * @param connection
400    * @deprecated
401    */
402   public static void deleteStaleConnection(HConnection connection) {
403     deleteConnection(connection, true);
404   }
405 
406   /**
407    * Delete information for all connections. Close or not the connection, depending on the
408    *  staleConnection boolean and the ref count. By default, you should use it with
409    *  staleConnection to true.
410    * @deprecated
411    */
412   public static void deleteAllConnections(boolean staleConnection) {
413     synchronized (CONNECTION_INSTANCES) {
414       Set<HConnectionKey> connectionKeys = new HashSet<HConnectionKey>();
415       connectionKeys.addAll(CONNECTION_INSTANCES.keySet());
416       for (HConnectionKey connectionKey : connectionKeys) {
417         deleteConnection(connectionKey, staleConnection);
418       }
419       CONNECTION_INSTANCES.clear();
420     }
421   }
422 
423   /**
424    * Delete information for all connections..
425    * @deprecated kept for backward compatibility, but the behavior is broken. HBASE-8983
426    */
427   @Deprecated
428   public static void deleteAllConnections() {
429     deleteAllConnections(false);
430   }
431 
432 
433   @Deprecated
434   private static void deleteConnection(HConnection connection, boolean staleConnection) {
435     synchronized (CONNECTION_INSTANCES) {
436       for (Entry<HConnectionKey, HConnectionImplementation> e: CONNECTION_INSTANCES.entrySet()) {
437         if (e.getValue() == connection) {
438           deleteConnection(e.getKey(), staleConnection);
439           break;
440         }
441       }
442     }
443   }
444 
445   @Deprecated
446   private static void deleteConnection(HConnectionKey connectionKey, boolean staleConnection) {
447     synchronized (CONNECTION_INSTANCES) {
448       HConnectionImplementation connection = CONNECTION_INSTANCES.get(connectionKey);
449       if (connection != null) {
450         connection.decCount();
451         if (connection.isZeroReference() || staleConnection) {
452           CONNECTION_INSTANCES.remove(connectionKey);
453           connection.internalClose();
454         }
455       } else {
456         LOG.error("Connection not found in the list, can't delete it "+
457           "(connection key=" + connectionKey + "). May be the key was modified?", new Exception());
458       }
459     }
460   }
461 
462   /**
463    * It is provided for unit test cases which verify the behavior of region
464    * location cache prefetch.
465    * @return Number of cached regions for the table.
466    * @throws ZooKeeperConnectionException
467    */
468   static int getCachedRegionCount(Configuration conf, final TableName tableName)
469   throws IOException {
470     return execute(new HConnectable<Integer>(conf) {
471       @Override
472       public Integer connect(HConnection connection) {
473         return ((HConnectionImplementation)connection).getNumberOfCachedRegionLocations(tableName);
474       }
475     });
476   }
477 
478   /**
479    * It's provided for unit test cases which verify the behavior of region
480    * location cache prefetch.
481    * @return true if the region where the table and row reside is cached.
482    * @throws ZooKeeperConnectionException
483    */
484   static boolean isRegionCached(Configuration conf,
485                                 final TableName tableName,
486                                 final byte[] row)
487   throws IOException {
488     return execute(new HConnectable<Boolean>(conf) {
489       @Override
490       public Boolean connect(HConnection connection) {
491         return ((HConnectionImplementation) connection).isRegionCached(tableName, row);
492       }
493     });
494   }
495 
496   /**
497    * This convenience method invokes the given {@link HConnectable#connect}
498    * implementation using a {@link HConnection} instance that lasts just for the
499    * duration of the invocation.
500    *
501    * @param <T> the return type of the connect method
502    * @param connectable the {@link HConnectable} instance
503    * @return the value returned by the connect method
504    * @throws IOException
505    */
506   public static <T> T execute(HConnectable<T> connectable) throws IOException {
507     if (connectable == null || connectable.conf == null) {
508       return null;
509     }
510     Configuration conf = connectable.conf;
511     HConnection connection = HConnectionManager.getConnection(conf);
512     boolean connectSucceeded = false;
513     try {
514       T returnValue = connectable.connect(connection);
515       connectSucceeded = true;
516       return returnValue;
517     } finally {
518       try {
519         connection.close();
520       } catch (Exception e) {
521         if (connectSucceeded) {
522           throw new IOException("The connection to " + connection
523               + " could not be deleted.", e);
524         }
525       }
526     }
527   }
528 
529   /** Encapsulates connection to zookeeper and regionservers.*/
530   @edu.umd.cs.findbugs.annotations.SuppressWarnings(
531       value="AT_OPERATION_SEQUENCE_ON_CONCURRENT_ABSTRACTION",
532       justification="Access to the conncurrent hash map is under a lock so should be fine.")
533   static class HConnectionImplementation implements HConnection, Closeable {
534     static final Log LOG = LogFactory.getLog(HConnectionImplementation.class);
535     private final long pause;
536     private final int numTries;
537     final int rpcTimeout;
538     private final int prefetchRegionLimit;
539 
540     private volatile boolean closed;
541     private volatile boolean aborted;
542 
543     // package protected for the tests
544     ClusterStatusListener clusterStatusListener;
545 
546     private final Object userRegionLock = new Object();
547 
548     // We have a single lock for master & zk to prevent deadlocks. Having
549     //  one lock for ZK and one lock for master is not possible:
550     //  When creating a connection to master, we need a connection to ZK to get
551     //  its address. But another thread could have taken the ZK lock, and could
552     //  be waiting for the master lock => deadlock.
553     private final Object masterAndZKLock = new Object();
554 
555     private long keepZooKeeperWatcherAliveUntil = Long.MAX_VALUE;
556     private final DelayedClosing delayedClosing =
557       DelayedClosing.createAndStart(this);
558 
559     // thread executor shared by all HTableInterface instances created
560     // by this connection
561     private volatile ExecutorService batchPool = null;
562     private volatile boolean cleanupPool = false;
563 
564     private final Configuration conf;
565 
566     // Client rpc instance.
567     private RpcClient rpcClient;
568 
569     /**
570       * Map of table to table {@link HRegionLocation}s.
571       */
572     private final Map<TableName, SoftValueSortedMap<byte[], HRegionLocation>>
573         cachedRegionLocations =
574       new HashMap<TableName, SoftValueSortedMap<byte[], HRegionLocation>>();
575 
576     // The presence of a server in the map implies it's likely that there is an
577     // entry in cachedRegionLocations that map to this server; but the absence
578     // of a server in this map guarentees that there is no entry in cache that
579     // maps to the absent server.
580     // The access to this attribute must be protected by a lock on cachedRegionLocations
581     private final Set<ServerName> cachedServers = new HashSet<ServerName>();
582 
583     // region cache prefetch is enabled by default. this set contains all
584     // tables whose region cache prefetch are disabled.
585     private final Set<Integer> regionCachePrefetchDisabledTables =
586       new CopyOnWriteArraySet<Integer>();
587 
588     private int refCount;
589 
590     // indicates whether this connection's life cycle is managed (by us)
591     private boolean managed;
592 
593     /**
594      * Cluster registry of basic info such as clusterid and meta region location.
595      */
596      Registry registry;
597 
598      HConnectionImplementation(Configuration conf, boolean managed) throws IOException {
599        this(conf, managed, null);
600      }
601      
602     /**
603      * constructor
604      * @param conf Configuration object
605      * @param managed If true, does not do full shutdown on close; i.e. cleanup of connection
606      * to zk and shutdown of all services; we just close down the resources this connection was
607      * responsible for and decrement usage counters.  It is up to the caller to do the full
608      * cleanup.  It is set when we want have connection sharing going on -- reuse of zk connection,
609      * and cached region locations, established regionserver connections, etc.  When connections
610      * are shared, we have reference counting going on and will only do full cleanup when no more
611      * users of an HConnectionImplementation instance.
612      */
613     HConnectionImplementation(Configuration conf, boolean managed, ExecutorService pool) throws IOException {
614       this(conf);
615       this.batchPool = pool;
616       this.managed = managed;
617       this.registry = setupRegistry();
618       retrieveClusterId();
619 
620       this.rpcClient = new RpcClient(this.conf, this.clusterId);
621 
622       // Do we publish the status?
623       Class<? extends ClusterStatusListener.Listener> listenerClass =
624           conf.getClass(ClusterStatusListener.STATUS_LISTENER_CLASS,
625               ClusterStatusListener.DEFAULT_STATUS_LISTENER_CLASS,
626               ClusterStatusListener.Listener.class);
627 
628       if (listenerClass != null) {
629         clusterStatusListener = new ClusterStatusListener(
630             new ClusterStatusListener.DeadServerHandler() {
631               @Override
632               public void newDead(ServerName sn) {
633                 clearCaches(sn);
634                 rpcClient.cancelConnections(sn.getHostname(), sn.getPort(),
635                     new SocketException(sn.getServerName() + " is dead: closing its connection."));
636               }
637             }, conf, listenerClass);
638       }
639     }
640 
641 
642     /**
643      * For tests.
644      */
645     protected HConnectionImplementation(Configuration conf) {
646       this.conf = conf;
647       this.closed = false;
648       this.pause = conf.getLong(HConstants.HBASE_CLIENT_PAUSE,
649           HConstants.DEFAULT_HBASE_CLIENT_PAUSE);
650       this.numTries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
651           HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER);
652       this.rpcTimeout = conf.getInt(
653           HConstants.HBASE_RPC_TIMEOUT_KEY,
654           HConstants.DEFAULT_HBASE_RPC_TIMEOUT);
655       this.prefetchRegionLimit = conf.getInt(
656           HConstants.HBASE_CLIENT_PREFETCH_LIMIT,
657           HConstants.DEFAULT_HBASE_CLIENT_PREFETCH_LIMIT);
658     }
659  
660     @Override
661     public HTableInterface getTable(String tableName) throws IOException {
662       return getTable(TableName.valueOf(tableName));
663     }
664 
665     @Override
666     public HTableInterface getTable(byte[] tableName) throws IOException {
667       return getTable(TableName.valueOf(tableName));
668     }
669 
670     @Override
671     public HTableInterface getTable(TableName tableName) throws IOException {
672       return getTable(tableName, getBatchPool());
673     }
674 
675     @Override
676     public HTableInterface getTable(String tableName, ExecutorService pool) throws IOException {
677       return getTable(TableName.valueOf(tableName), pool);
678     }
679 
680     @Override
681     public HTableInterface getTable(byte[] tableName, ExecutorService pool) throws IOException {
682       return getTable(TableName.valueOf(tableName), pool);
683     }
684 
685     @Override
686     public HTableInterface getTable(TableName tableName, ExecutorService pool) throws IOException {
687       if (managed) {
688         throw new IOException("The connection has to be unmanaged.");
689       }
690       return new HTable(tableName, this, pool);
691     }
692 
693     private ExecutorService getBatchPool() {
694       if (batchPool == null) {
695         // shared HTable thread executor not yet initialized
696         synchronized (this) {
697           if (batchPool == null) {
698             int maxThreads = conf.getInt("hbase.hconnection.threads.max",
699                 Integer.MAX_VALUE);
700             if (maxThreads == 0) {
701               maxThreads = Runtime.getRuntime().availableProcessors();
702             }
703             long keepAliveTime = conf.getLong(
704                 "hbase.hconnection.threads.keepalivetime", 60);
705             this.batchPool = new ThreadPoolExecutor(
706                 Runtime.getRuntime().availableProcessors(),
707                 maxThreads,
708                 keepAliveTime,
709                 TimeUnit.SECONDS,
710                 new SynchronousQueue<Runnable>(),
711                 Threads.newDaemonThreadFactory("hbase-connection-shared-executor"));
712             ((ThreadPoolExecutor) this.batchPool)
713             .allowCoreThreadTimeOut(true);
714           }
715           this.cleanupPool = true;
716         }
717       }
718       return this.batchPool;
719     }
720 
721     protected ExecutorService getCurrentBatchPool() {
722       return batchPool;
723     }
724 
725     private void shutdownBatchPool() {
726       if (this.cleanupPool && this.batchPool != null && !this.batchPool.isShutdown()) {
727         this.batchPool.shutdown();
728         try {
729           if (!this.batchPool.awaitTermination(10, TimeUnit.SECONDS)) {
730             this.batchPool.shutdownNow();
731           }
732         } catch (InterruptedException e) {
733           this.batchPool.shutdownNow();
734         }
735       }
736     }
737 
738     /**
739      * @return The cluster registry implementation to use.
740      * @throws IOException
741      */
742     private Registry setupRegistry() throws IOException {
743       String registryClass = this.conf.get("hbase.client.registry.impl",
744         ZooKeeperRegistry.class.getName());
745       Registry registry = null;
746       try {
747         registry = (Registry)Class.forName(registryClass).newInstance();
748       } catch (Throwable t) {
749         throw new IOException(t);
750       }
751       registry.init(this);
752       return registry;
753     }
754 
755     /**
756      * For tests only.
757      * @param rpcClient Client we should use instead.
758      * @return Previous rpcClient
759      */
760     RpcClient setRpcClient(final RpcClient rpcClient) {
761       RpcClient oldRpcClient = this.rpcClient;
762       this.rpcClient = rpcClient;
763       return oldRpcClient;
764     }
765 
766     /**
767      * An identifier that will remain the same for a given connection.
768      * @return
769      */
770     public String toString(){
771       return "hconnection-0x" + Integer.toHexString(hashCode());
772     }
773 
774     protected String clusterId = null;
775 
776     void retrieveClusterId() {
777       if (clusterId != null) return;
778       this.clusterId = this.registry.getClusterId();
779       if (clusterId == null) {
780         clusterId = HConstants.CLUSTER_ID_DEFAULT;
781         LOG.debug("clusterid came back null, using default " + clusterId);
782       }
783     }
784 
785     @Override
786     public Configuration getConfiguration() {
787       return this.conf;
788     }
789 
790     private void checkIfBaseNodeAvailable(ZooKeeperWatcher zkw)
791       throws MasterNotRunningException {
792       String errorMsg;
793       try {
794         if (ZKUtil.checkExists(zkw, zkw.baseZNode) == -1) {
795           errorMsg = "The node " + zkw.baseZNode+" is not in ZooKeeper. "
796             + "It should have been written by the master. "
797             + "Check the value configured in 'zookeeper.znode.parent'. "
798             + "There could be a mismatch with the one configured in the master.";
799           LOG.error(errorMsg);
800           throw new MasterNotRunningException(errorMsg);
801         }
802       } catch (KeeperException e) {
803         errorMsg = "Can't get connection to ZooKeeper: " + e.getMessage();
804         LOG.error(errorMsg);
805         throw new MasterNotRunningException(errorMsg, e);
806       }
807     }
808 
809     /**
810      * @return true if the master is running, throws an exception otherwise
811      * @throws MasterNotRunningException - if the master is not running
812      * @throws ZooKeeperConnectionException
813      */
814     @Override
815     public boolean isMasterRunning()
816     throws MasterNotRunningException, ZooKeeperConnectionException {
817       // When getting the master connection, we check it's running,
818       // so if there is no exception, it means we've been able to get a
819       // connection on a running master
820       MasterMonitorKeepAliveConnection m = getKeepAliveMasterMonitorService();
821       try {
822         m.close();
823       } catch (IOException e) {
824         throw new MasterNotRunningException("Failed close", e);
825       }
826       return true;
827     }
828 
829     @Override
830     public HRegionLocation getRegionLocation(final TableName tableName,
831         final byte [] row, boolean reload)
832     throws IOException {
833       return reload? relocateRegion(tableName, row): locateRegion(tableName, row);
834     }
835 
836     @Override
837     public HRegionLocation getRegionLocation(final byte[] tableName,
838         final byte [] row, boolean reload)
839     throws IOException {
840       return getRegionLocation(TableName.valueOf(tableName), row, reload);
841     }
842 
843     @Override
844     public boolean isTableEnabled(TableName tableName) throws IOException {
845       return this.registry.isTableOnlineState(tableName, true);
846     }
847 
848     @Override
849     public boolean isTableEnabled(byte[] tableName) throws IOException {
850       return isTableEnabled(TableName.valueOf(tableName));
851     }
852 
853     @Override
854     public boolean isTableDisabled(TableName tableName) throws IOException {
855       return this.registry.isTableOnlineState(tableName, false);
856     }
857 
858     @Override
859     public boolean isTableDisabled(byte[] tableName) throws IOException {
860       return isTableDisabled(TableName.valueOf(tableName));
861     }
862 
863     @Override
864     public boolean isTableAvailable(final TableName tableName) throws IOException {
865       final AtomicBoolean available = new AtomicBoolean(true);
866       final AtomicInteger regionCount = new AtomicInteger(0);
867       MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
868         @Override
869         public boolean processRow(Result row) throws IOException {
870           HRegionInfo info = MetaScanner.getHRegionInfo(row);
871           if (info != null) {
872             if (tableName.equals(info.getTableName())) {
873               ServerName server = HRegionInfo.getServerName(row);
874               if (server == null) {
875                 available.set(false);
876                 return false;
877               }
878               regionCount.incrementAndGet();
879             } else if (tableName.compareTo(
880                 info.getTableName()) < 0) {
881               // Return if we are done with the current table
882               return false;
883             }
884           }
885           return true;
886         }
887       };
888       MetaScanner.metaScan(conf, this, visitor, tableName);
889       return available.get() && (regionCount.get() > 0);
890     }
891 
892     @Override
893     public boolean isTableAvailable(final byte[] tableName) throws IOException {
894       return isTableAvailable(TableName.valueOf(tableName));
895     }
896 
897     @Override
898     public boolean isTableAvailable(final TableName tableName, final byte[][] splitKeys)
899         throws IOException {
900       final AtomicBoolean available = new AtomicBoolean(true);
901       final AtomicInteger regionCount = new AtomicInteger(0);
902       MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
903         @Override
904         public boolean processRow(Result row) throws IOException {
905           HRegionInfo info = MetaScanner.getHRegionInfo(row);
906           if (info != null) {
907             if (tableName.equals(info.getTableName())) {
908               ServerName server = HRegionInfo.getServerName(row);
909               if (server == null) {
910                 available.set(false);
911                 return false;
912               }
913               if (!Bytes.equals(info.getStartKey(), HConstants.EMPTY_BYTE_ARRAY)) {
914                 for (byte[] splitKey : splitKeys) {
915                   // Just check if the splitkey is available
916                   if (Bytes.equals(info.getStartKey(), splitKey)) {
917                     regionCount.incrementAndGet();
918                     break;
919                   }
920                 }
921               } else {
922                 // Always empty start row should be counted
923                 regionCount.incrementAndGet();
924               }
925             } else if (tableName.compareTo(info.getTableName()) < 0) {
926               // Return if we are done with the current table
927               return false;
928             }
929           }
930           return true;
931         }
932       };
933       MetaScanner.metaScan(conf, this, visitor, tableName);
934       // +1 needs to be added so that the empty start row is also taken into account
935       return available.get() && (regionCount.get() == splitKeys.length + 1);
936     }
937 
938     @Override
939     public boolean isTableAvailable(final byte[] tableName, final byte[][] splitKeys)
940         throws IOException {
941       return isTableAvailable(TableName.valueOf(tableName), splitKeys);
942     }
943 
944     @Override
945     public HRegionLocation locateRegion(final byte[] regionName) throws IOException {
946       return locateRegion(HRegionInfo.getTableName(regionName),
947           HRegionInfo.getStartKey(regionName), false, true);
948     }
949 
950     @Override
951     public boolean isDeadServer(ServerName sn) {
952       if (clusterStatusListener == null) {
953         return false;
954       } else {
955         return clusterStatusListener.isDeadServer(sn);
956       }
957     }
958 
959     @Override
960     public List<HRegionLocation> locateRegions(final TableName tableName)
961     throws IOException {
962       return locateRegions (tableName, false, true);
963     }
964 
965     @Override
966     public List<HRegionLocation> locateRegions(final byte[] tableName)
967     throws IOException {
968       return locateRegions(TableName.valueOf(tableName));
969     }
970 
971     @Override
972     public List<HRegionLocation> locateRegions(final TableName tableName,
973         final boolean useCache, final boolean offlined) throws IOException {
974       NavigableMap<HRegionInfo, ServerName> regions = MetaScanner.allTableRegions(conf, this,
975           tableName, offlined);
976       final List<HRegionLocation> locations = new ArrayList<HRegionLocation>();
977       for (HRegionInfo regionInfo : regions.keySet()) {
978         locations.add(locateRegion(tableName, regionInfo.getStartKey(), useCache, true));
979       }
980       return locations;
981     }
982 
983     @Override
984     public List<HRegionLocation> locateRegions(final byte[] tableName,
985        final boolean useCache, final boolean offlined) throws IOException {
986       return locateRegions(TableName.valueOf(tableName), useCache, offlined);
987     }
988 
989     @Override
990     public HRegionLocation locateRegion(final TableName tableName,
991         final byte [] row)
992     throws IOException{
993       return locateRegion(tableName, row, true, true);
994     }
995 
996     @Override
997     public HRegionLocation locateRegion(final byte[] tableName,
998         final byte [] row)
999     throws IOException{
1000       return locateRegion(TableName.valueOf(tableName), row);
1001     }
1002 
1003     @Override
1004     public HRegionLocation relocateRegion(final TableName tableName,
1005         final byte [] row) throws IOException{
1006       // Since this is an explicit request not to use any caching, finding
1007       // disabled tables should not be desirable.  This will ensure that an exception is thrown when
1008       // the first time a disabled table is interacted with.
1009       if (isTableDisabled(tableName)) {
1010         throw new DoNotRetryIOException(tableName.getNameAsString() + " is disabled.");
1011       }
1012 
1013       return locateRegion(tableName, row, false, true);
1014     }
1015 
1016     @Override
1017     public HRegionLocation relocateRegion(final byte[] tableName,
1018         final byte [] row) throws IOException {
1019       return relocateRegion(TableName.valueOf(tableName), row);
1020     }
1021 
1022 
1023     private HRegionLocation locateRegion(final TableName tableName,
1024       final byte [] row, boolean useCache, boolean retry)
1025     throws IOException {
1026       if (this.closed) throw new IOException(toString() + " closed");
1027       if (tableName== null || tableName.getName().length == 0) {
1028         throw new IllegalArgumentException(
1029             "table name cannot be null or zero length");
1030       }
1031 
1032       if (tableName.equals(TableName.META_TABLE_NAME)) {
1033         return this.registry.getMetaRegionLocation();
1034       } else {
1035         // Region not in the cache - have to go to the meta RS
1036         return locateRegionInMeta(TableName.META_TABLE_NAME, tableName, row,
1037           useCache, userRegionLock, retry);
1038       }
1039     }
1040 
1041     /*
1042      * Search .META. for the HRegionLocation info that contains the table and
1043      * row we're seeking. It will prefetch certain number of regions info and
1044      * save them to the global region cache.
1045      */
1046     private void prefetchRegionCache(final TableName tableName,
1047         final byte[] row) {
1048       // Implement a new visitor for MetaScanner, and use it to walk through
1049       // the .META.
1050       MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
1051         public boolean processRow(Result result) throws IOException {
1052           try {
1053             HRegionInfo regionInfo = MetaScanner.getHRegionInfo(result);
1054             if (regionInfo == null) {
1055               return true;
1056             }
1057 
1058             // possible we got a region of a different table...
1059             if (!regionInfo.getTableName().equals(tableName)) {
1060               return false; // stop scanning
1061             }
1062             if (regionInfo.isOffline()) {
1063               // don't cache offline regions
1064               return true;
1065             }
1066 
1067             ServerName serverName = HRegionInfo.getServerName(result);
1068             if (serverName == null) {
1069               return true; // don't cache it
1070             }
1071             // instantiate the location
1072             long seqNum = HRegionInfo.getSeqNumDuringOpen(result);
1073             HRegionLocation loc = new HRegionLocation(regionInfo, serverName, seqNum);
1074             // cache this meta entry
1075             cacheLocation(tableName, null, loc);
1076             return true;
1077           } catch (RuntimeException e) {
1078             throw new IOException(e);
1079           }
1080         }
1081       };
1082       try {
1083         // pre-fetch certain number of regions info at region cache.
1084         MetaScanner.metaScan(conf, this, visitor, tableName, row,
1085             this.prefetchRegionLimit, TableName.META_TABLE_NAME);
1086       } catch (IOException e) {
1087         LOG.warn("Encountered problems when prefetch META table: ", e);
1088       }
1089     }
1090 
1091     /*
1092       * Search the .META. table for the HRegionLocation
1093       * info that contains the table and row we're seeking.
1094       */
1095     private HRegionLocation locateRegionInMeta(final TableName parentTable,
1096       final TableName tableName, final byte [] row, boolean useCache,
1097       Object regionLockObject, boolean retry)
1098     throws IOException {
1099       HRegionLocation location;
1100       // If we are supposed to be using the cache, look in the cache to see if
1101       // we already have the region.
1102       if (useCache) {
1103         location = getCachedLocation(tableName, row);
1104         if (location != null) {
1105           return location;
1106         }
1107       }
1108       int localNumRetries = retry ? numTries : 1;
1109       // build the key of the meta region we should be looking for.
1110       // the extra 9's on the end are necessary to allow "exact" matches
1111       // without knowing the precise region names.
1112       byte [] metaKey = HRegionInfo.createRegionName(tableName, row,
1113         HConstants.NINES, false);
1114       for (int tries = 0; true; tries++) {
1115         if (tries >= localNumRetries) {
1116           throw new NoServerForRegionException("Unable to find region for "
1117             + Bytes.toStringBinary(row) + " after " + numTries + " tries.");
1118         }
1119 
1120         HRegionLocation metaLocation = null;
1121         try {
1122           // locate the meta region
1123           metaLocation = locateRegion(parentTable, metaKey, true, false);
1124           // If null still, go around again.
1125           if (metaLocation == null) continue;
1126           ClientService.BlockingInterface service = getClient(metaLocation.getServerName());
1127 
1128           Result regionInfoRow;
1129           // This block guards against two threads trying to load the meta
1130           // region at the same time. The first will load the meta region and
1131           // the second will use the value that the first one found.
1132           synchronized (regionLockObject) {
1133             // Check the cache again for a hit in case some other thread made the
1134             // same query while we were waiting on the lock. 
1135             if (useCache) {
1136               location = getCachedLocation(tableName, row);
1137               if (location != null) {
1138                 return location;
1139               }
1140               // If the parent table is META, we may want to pre-fetch some
1141               // region info into the global region cache for this table.
1142               if (parentTable.equals(TableName.META_TABLE_NAME)
1143                   && (getRegionCachePrefetch(tableName))) {
1144                 prefetchRegionCache(tableName, row);
1145               }
1146               location = getCachedLocation(tableName, row);
1147               if (location != null) {
1148                 return location;
1149               }
1150             } else {
1151               // If we are not supposed to be using the cache, delete any existing cached location
1152               // so it won't interfere.
1153               forceDeleteCachedLocation(tableName, row);
1154             }
1155             // Query the meta region for the location of the meta region
1156             regionInfoRow = ProtobufUtil.getRowOrBefore(service,
1157               metaLocation.getRegionInfo().getRegionName(), metaKey,
1158               HConstants.CATALOG_FAMILY);
1159           }
1160           if (regionInfoRow == null) {
1161             throw new TableNotFoundException(tableName);
1162           }
1163 
1164           // convert the row result into the HRegionLocation we need!
1165           HRegionInfo regionInfo = MetaScanner.getHRegionInfo(regionInfoRow);
1166           if (regionInfo == null) {
1167             throw new IOException("HRegionInfo was null or empty in " +
1168               parentTable + ", row=" + regionInfoRow);
1169           }
1170 
1171           // possible we got a region of a different table...
1172           if (!regionInfo.getTableName().equals(tableName)) {
1173             throw new TableNotFoundException(
1174                   "Table '" + tableName + "' was not found, got: " +
1175                   regionInfo.getTableName() + ".");
1176           }
1177           if (regionInfo.isSplit()) {
1178             throw new RegionOfflineException("the only available region for" +
1179               " the required row is a split parent," +
1180               " the daughters should be online soon: " +
1181               regionInfo.getRegionNameAsString());
1182           }
1183           if (regionInfo.isOffline()) {
1184             throw new RegionOfflineException("the region is offline, could" +
1185               " be caused by a disable table call: " +
1186               regionInfo.getRegionNameAsString());
1187           }
1188 
1189           ServerName serverName = HRegionInfo.getServerName(regionInfoRow);
1190           if (serverName == null) {
1191             throw new NoServerForRegionException("No server address listed " +
1192               "in " + parentTable + " for region " +
1193               regionInfo.getRegionNameAsString() + " containing row " +
1194               Bytes.toStringBinary(row));
1195           }
1196 
1197           if (isDeadServer(serverName)){
1198             throw new RegionServerStoppedException(".META. says the region "+
1199                 regionInfo.getRegionNameAsString()+" is managed by the server " + serverName +
1200                 ", but it is dead.");
1201           }
1202 
1203           // Instantiate the location
1204           location = new HRegionLocation(regionInfo, serverName,
1205             HRegionInfo.getSeqNumDuringOpen(regionInfoRow));
1206           cacheLocation(tableName, null, location);
1207           return location;
1208         } catch (TableNotFoundException e) {
1209           // if we got this error, probably means the table just plain doesn't
1210           // exist. rethrow the error immediately. this should always be coming
1211           // from the HTable constructor.
1212           throw e;
1213         } catch (IOException e) {
1214           if (e instanceof RemoteException) {
1215             e = ((RemoteException)e).unwrapRemoteException();
1216           }
1217           if (tries < numTries - 1) {
1218             if (LOG.isDebugEnabled()) {
1219               LOG.debug("locateRegionInMeta parentTable=" +
1220                 parentTable + ", metaLocation=" +
1221                 ((metaLocation == null)? "null": "{" + metaLocation + "}") +
1222                 ", attempt=" + tries + " of " +
1223                 this.numTries + " failed; retrying after sleep of " +
1224                 ConnectionUtils.getPauseTime(this.pause, tries) + " because: " + e.getMessage());
1225             }
1226           } else {
1227             throw e;
1228           }
1229           // Only relocate the parent region if necessary
1230           if(!(e instanceof RegionOfflineException ||
1231               e instanceof NoServerForRegionException)) {
1232             relocateRegion(parentTable, metaKey);
1233           }
1234         }
1235         try{
1236           Thread.sleep(ConnectionUtils.getPauseTime(this.pause, tries));
1237         } catch (InterruptedException e) {
1238           Thread.currentThread().interrupt();
1239           throw new IOException("Giving up trying to location region in " +
1240             "meta: thread is interrupted.");
1241         }
1242       }
1243     }
1244 
1245     /*
1246      * Search the cache for a location that fits our table and row key.
1247      * Return null if no suitable region is located. TODO: synchronization note
1248      *
1249      * <p>TODO: This method during writing consumes 15% of CPU doing lookup
1250      * into the Soft Reference SortedMap.  Improve.
1251      *
1252      * @param tableName
1253      * @param row
1254      * @return Null or region location found in cache.
1255      */
1256     HRegionLocation getCachedLocation(final TableName tableName,
1257         final byte [] row) {
1258       SoftValueSortedMap<byte[], HRegionLocation> tableLocations =
1259         getTableLocations(tableName);
1260 
1261       // start to examine the cache. we can only do cache actions
1262       // if there's something in the cache for this table.
1263       if (tableLocations.isEmpty()) {
1264         return null;
1265       }
1266 
1267       HRegionLocation possibleRegion = tableLocations.get(row);
1268       if (possibleRegion != null) {
1269         return possibleRegion;
1270       }
1271 
1272       possibleRegion = tableLocations.lowerValueByKey(row);
1273       if (possibleRegion == null) {
1274         return null;
1275       }
1276 
1277       // make sure that the end key is greater than the row we're looking
1278       // for, otherwise the row actually belongs in the next region, not
1279       // this one. the exception case is when the endkey is
1280       // HConstants.EMPTY_END_ROW, signifying that the region we're
1281       // checking is actually the last region in the table.
1282       byte[] endKey = possibleRegion.getRegionInfo().getEndKey();
1283       if (Bytes.equals(endKey, HConstants.EMPTY_END_ROW) ||
1284           KeyValue.getRowComparator(tableName).compareRows(
1285               endKey, 0, endKey.length, row, 0, row.length) > 0) {
1286         return possibleRegion;
1287       }
1288 
1289       // Passed all the way through, so we got nothing - complete cache miss
1290       return null;
1291     }
1292 
1293     /**
1294      * Delete a cached location, no matter what it is. Called when we were told to not use cache.
1295      * @param tableName tableName
1296      * @param row
1297      */
1298     void forceDeleteCachedLocation(final TableName tableName, final byte [] row) {
1299       HRegionLocation rl = null;
1300       synchronized (this.cachedRegionLocations) {
1301         Map<byte[], HRegionLocation> tableLocations = getTableLocations(tableName);
1302         // start to examine the cache. we can only do cache actions
1303         // if there's something in the cache for this table.
1304         if (!tableLocations.isEmpty()) {
1305           rl = getCachedLocation(tableName, row);
1306           if (rl != null) {
1307             tableLocations.remove(rl.getRegionInfo().getStartKey());
1308           }
1309         }
1310       }
1311       if ((rl != null) && LOG.isDebugEnabled()) {
1312         LOG.debug("Removed " + rl.getHostname() + ":" + rl.getPort()
1313           + " as a location of " + rl.getRegionInfo().getRegionNameAsString() +
1314           " for tableName=" + tableName + " from cache");
1315       }
1316     }
1317 
1318     /*
1319      * Delete all cached entries of a table that maps to a specific location.
1320      */
1321     @Override
1322     public void clearCaches(final ServerName serverName){
1323       boolean deletedSomething = false;
1324       synchronized (this.cachedRegionLocations) {
1325         if (!cachedServers.contains(serverName)) {
1326           return;
1327         }
1328         for (Map<byte[], HRegionLocation> tableLocations :
1329             cachedRegionLocations.values()) {
1330           for (Entry<byte[], HRegionLocation> e : tableLocations.entrySet()) {
1331             HRegionLocation value = e.getValue();
1332             if (value != null
1333                 && serverName.equals(value.getServerName())) {
1334               tableLocations.remove(e.getKey());
1335               deletedSomething = true;
1336             }
1337           }
1338         }
1339         cachedServers.remove(serverName);
1340       }
1341       if (deletedSomething && LOG.isDebugEnabled()) {
1342         LOG.debug("Removed all cached region locations that map to " + serverName);
1343       }
1344     }
1345 
1346     /*
1347      * @param tableName
1348      * @return Map of cached locations for passed <code>tableName</code>
1349      */
1350     private SoftValueSortedMap<byte[], HRegionLocation> getTableLocations(
1351         final TableName tableName) {
1352       // find the map of cached locations for this table
1353       SoftValueSortedMap<byte[], HRegionLocation> result;
1354       synchronized (this.cachedRegionLocations) {
1355         result = this.cachedRegionLocations.get(tableName);
1356         // if tableLocations for this table isn't built yet, make one
1357         if (result == null) {
1358           result = new SoftValueSortedMap<byte[], HRegionLocation>(Bytes.BYTES_COMPARATOR);
1359           this.cachedRegionLocations.put(tableName, result);
1360         }
1361       }
1362       return result;
1363     }
1364 
1365     @Override
1366     public void clearRegionCache() {
1367       synchronized(this.cachedRegionLocations) {
1368         this.cachedRegionLocations.clear();
1369         this.cachedServers.clear();
1370       }
1371     }
1372 
1373     @Override
1374     public void clearRegionCache(final TableName tableName) {
1375       synchronized (this.cachedRegionLocations) {
1376         this.cachedRegionLocations.remove(tableName);
1377       }
1378     }
1379 
1380     @Override
1381     public void clearRegionCache(final byte[] tableName) {
1382       clearRegionCache(TableName.valueOf(tableName));
1383     }
1384 
1385     /**
1386      * Put a newly discovered HRegionLocation into the cache.
1387      * @param tableName The table name.
1388      * @param source the source of the new location, if it's not coming from meta
1389      * @param location the new location
1390      */
1391     private void cacheLocation(final TableName tableName, final HRegionLocation source,
1392         final HRegionLocation location) {
1393       boolean isFromMeta = (source == null);
1394       byte [] startKey = location.getRegionInfo().getStartKey();
1395       Map<byte[], HRegionLocation> tableLocations =
1396         getTableLocations(tableName);
1397       boolean isNewCacheEntry = false;
1398       boolean isStaleUpdate = false;
1399       HRegionLocation oldLocation = null;
1400       synchronized (this.cachedRegionLocations) {
1401         cachedServers.add(location.getServerName());
1402         oldLocation = tableLocations.get(startKey);
1403         isNewCacheEntry = (oldLocation == null);
1404         // If the server in cache sends us a redirect, assume it's always valid.
1405         if (!isNewCacheEntry && !oldLocation.equals(source)) {
1406           long newLocationSeqNum = location.getSeqNum();
1407           // Meta record is stale - some (probably the same) server has closed the region
1408           // with later seqNum and told us about the new location.
1409           boolean isStaleMetaRecord = isFromMeta && (oldLocation.getSeqNum() > newLocationSeqNum);
1410           // Same as above for redirect. However, in this case, if the number is equal to previous
1411           // record, the most common case is that first the region was closed with seqNum, and then
1412           // opened with the same seqNum; hence we will ignore the redirect.
1413           // There are so many corner cases with various combinations of opens and closes that
1414           // an additional counter on top of seqNum would be necessary to handle them all.
1415           boolean isStaleRedirect = !isFromMeta && (oldLocation.getSeqNum() >= newLocationSeqNum);
1416           isStaleUpdate = (isStaleMetaRecord || isStaleRedirect);
1417         }
1418         if (!isStaleUpdate) {
1419           tableLocations.put(startKey, location);
1420         }
1421       }
1422       if (isNewCacheEntry) {
1423         if (LOG.isTraceEnabled()) {
1424           LOG.trace("Cached location for " +
1425             location.getRegionInfo().getRegionNameAsString() +
1426             " is " + location.getHostnamePort());
1427         }
1428       } else if (isStaleUpdate && !location.equals(oldLocation)) {
1429         if (LOG.isTraceEnabled()) {
1430           LOG.trace("Ignoring stale location update for "
1431             + location.getRegionInfo().getRegionNameAsString() + ": "
1432             + location.getHostnamePort() + " at " + location.getSeqNum() + "; local "
1433             + oldLocation.getHostnamePort() + " at " + oldLocation.getSeqNum());
1434         }
1435       }
1436     }
1437 
1438     // Map keyed by service name + regionserver to service stub implementation
1439     private final ConcurrentHashMap<String, Object> stubs =
1440       new ConcurrentHashMap<String, Object>();
1441     // Map of locks used creating service stubs per regionserver.
1442     private final ConcurrentHashMap<String, String> connectionLock =
1443       new ConcurrentHashMap<String, String>();
1444 
1445     /**
1446      * Maintains current state of MasterService instance.
1447      */
1448     static abstract class MasterServiceState {
1449       HConnection connection;
1450       int userCount;
1451       long keepAliveUntil = Long.MAX_VALUE;
1452 
1453       MasterServiceState (final HConnection connection) {
1454         super();
1455         this.connection = connection;
1456       }
1457 
1458       abstract Object getStub();
1459       abstract void clearStub();
1460       abstract boolean isMasterRunning() throws ServiceException;
1461     }
1462 
1463     /**
1464      * State of the MasterAdminService connection/setup.
1465      */
1466     static class MasterAdminServiceState extends MasterServiceState {
1467       MasterAdminService.BlockingInterface stub;
1468       MasterAdminServiceState(final HConnection connection) {
1469         super(connection);
1470       }
1471 
1472       @Override
1473       public String toString() {
1474         return "MasterAdminService";
1475       }
1476 
1477       @Override
1478       Object getStub() {
1479         return this.stub;
1480       }
1481 
1482       @Override
1483       void clearStub() {
1484         this.stub = null;
1485       }
1486 
1487       @Override
1488       boolean isMasterRunning() throws ServiceException {
1489         MasterProtos.IsMasterRunningResponse response =
1490           this.stub.isMasterRunning(null, RequestConverter.buildIsMasterRunningRequest());
1491         return response != null? response.getIsMasterRunning(): false;
1492       }
1493     }
1494 
1495     /**
1496      * State of the MasterMonitorService connection/setup.
1497      */
1498     static class MasterMonitorServiceState extends MasterServiceState {
1499       MasterMonitorService.BlockingInterface stub;
1500       MasterMonitorServiceState(final HConnection connection) {
1501         super(connection);
1502       }
1503 
1504       @Override
1505       public String toString() {
1506         return "MasterMonitorService";
1507       }
1508 
1509       @Override
1510       Object getStub() {
1511         return this.stub;
1512       }
1513 
1514       @Override
1515       void clearStub() {
1516         this.stub = null;
1517       }
1518 
1519       @Override
1520       boolean isMasterRunning() throws ServiceException {
1521         MasterProtos.IsMasterRunningResponse response =
1522           this.stub.isMasterRunning(null, RequestConverter.buildIsMasterRunningRequest());
1523         return response != null? response.getIsMasterRunning(): false;
1524       }
1525     }
1526 
1527     /**
1528      * Makes a client-side stub for master services. Sub-class to specialize.
1529      * Depends on hosting class so not static.  Exists so we avoid duplicating a bunch of code
1530      * when setting up the MasterMonitorService and MasterAdminService.
1531      */
1532     abstract class StubMaker {
1533       /**
1534        * Returns the name of the service stub being created.
1535        */
1536       protected abstract String getServiceName();
1537 
1538       /**
1539        * Make stub and cache it internal so can be used later doing the isMasterRunning call.
1540        * @param channel
1541        */
1542       protected abstract Object makeStub(final BlockingRpcChannel channel);
1543 
1544       /**
1545        * Once setup, check it works by doing isMasterRunning check.
1546        * @throws ServiceException
1547        */
1548       protected abstract void isMasterRunning() throws ServiceException;
1549 
1550       /**
1551        * Create a stub. Try once only.  It is not typed because there is no common type to
1552        * protobuf services nor their interfaces.  Let the caller do appropriate casting.
1553        * @return A stub for master services.
1554        * @throws IOException
1555        * @throws KeeperException
1556        * @throws ServiceException
1557        */
1558       private Object makeStubNoRetries() throws IOException, KeeperException, ServiceException {
1559         ZooKeeperKeepAliveConnection zkw;
1560         try {
1561           zkw = getKeepAliveZooKeeperWatcher();
1562         } catch (IOException e) {
1563           throw new ZooKeeperConnectionException("Can't connect to ZooKeeper", e);
1564         }
1565         try {
1566           checkIfBaseNodeAvailable(zkw);
1567           ServerName sn = MasterAddressTracker.getMasterAddress(zkw);
1568           if (sn == null) {
1569             String msg = "ZooKeeper available but no active master location found";
1570             LOG.info(msg);
1571             throw new MasterNotRunningException(msg);
1572           }
1573           if (isDeadServer(sn)) {
1574             throw new MasterNotRunningException(sn + " is dead.");
1575           }
1576           // Use the security info interface name as our stub key
1577           String key = getStubKey(getServiceName(), sn.getHostAndPort());
1578           connectionLock.putIfAbsent(key, key);
1579           Object stub = null;
1580           synchronized (connectionLock.get(key)) {
1581             stub = stubs.get(key);
1582             if (stub == null) {
1583               BlockingRpcChannel channel = rpcClient.createBlockingRpcChannel(sn,
1584                   User.getCurrent(), rpcTimeout);
1585               stub = makeStub(channel);
1586               isMasterRunning();
1587               stubs.put(key, stub);
1588             }
1589           }
1590           return stub;
1591         } finally {
1592           zkw.close();
1593         }
1594       }
1595 
1596       /**
1597        * Create a stub against the master.  Retry if necessary.
1598        * @return A stub to do <code>intf</code> against the master
1599        * @throws MasterNotRunningException
1600        */
1601       @edu.umd.cs.findbugs.annotations.SuppressWarnings (value="SWL_SLEEP_WITH_LOCK_HELD")
1602       Object makeStub() throws MasterNotRunningException {
1603         // The lock must be at the beginning to prevent multiple master creations
1604         //  (and leaks) in a multithread context
1605         synchronized (masterAndZKLock) {
1606           Exception exceptionCaught = null;
1607           Object stub = null;
1608           int tries = 0;
1609           while (!closed && stub == null) {
1610             tries++;
1611             try {
1612               stub = makeStubNoRetries();
1613             } catch (IOException e) {
1614               exceptionCaught = e;
1615             } catch (KeeperException e) {
1616               exceptionCaught = e;
1617             } catch (ServiceException e) {
1618               exceptionCaught = e;
1619             }
1620 
1621             if (exceptionCaught != null)
1622               // It failed. If it's not the last try, we're going to wait a little
1623               if (tries < numTries) {
1624                 // tries at this point is 1 or more; decrement to start from 0.
1625                 long pauseTime = ConnectionUtils.getPauseTime(pause, tries - 1);
1626                 LOG.info("getMaster attempt " + tries + " of " + numTries +
1627                     " failed; retrying after sleep of " + pauseTime + ", exception=" +
1628                   exceptionCaught);
1629 
1630                 try {
1631                   Thread.sleep(pauseTime);
1632                 } catch (InterruptedException e) {
1633                   Thread.currentThread().interrupt();
1634                   throw new RuntimeException(
1635                       "Thread was interrupted while trying to connect to master.", e);
1636                 }
1637               } else {
1638                 // Enough tries, we stop now
1639                 LOG.info("getMaster attempt " + tries + " of " + numTries +
1640                     " failed; no more retrying.", exceptionCaught);
1641                 throw new MasterNotRunningException(exceptionCaught);
1642               }
1643           }
1644 
1645           if (stub == null) {
1646             // implies this.closed true
1647             throw new MasterNotRunningException("Connection was closed while trying to get master");
1648           }
1649           return stub;
1650         }
1651       }
1652     }
1653 
1654     /**
1655      * Class to make a MasterMonitorService stub.
1656      */
1657     class MasterMonitorServiceStubMaker extends StubMaker {
1658       private MasterMonitorService.BlockingInterface stub;
1659       @Override
1660       protected String getServiceName() {
1661         return MasterMonitorService.getDescriptor().getName();
1662       }
1663 
1664       @Override
1665       @edu.umd.cs.findbugs.annotations.SuppressWarnings("SWL_SLEEP_WITH_LOCK_HELD")
1666       MasterMonitorService.BlockingInterface makeStub() throws MasterNotRunningException {
1667         return (MasterMonitorService.BlockingInterface)super.makeStub();
1668       }
1669 
1670       @Override
1671       protected Object makeStub(BlockingRpcChannel channel) {
1672         this.stub = MasterMonitorService.newBlockingStub(channel);
1673         return this.stub;
1674       }
1675 
1676       @Override
1677       protected void isMasterRunning() throws ServiceException {
1678         this.stub.isMasterRunning(null, RequestConverter.buildIsMasterRunningRequest());
1679       }
1680     }
1681 
1682     /**
1683      * Class to make a MasterAdminService stub.
1684      */
1685     class MasterAdminServiceStubMaker extends StubMaker {
1686       private MasterAdminService.BlockingInterface stub;
1687 
1688       @Override
1689       protected String getServiceName() {
1690         return MasterAdminService.getDescriptor().getName();
1691       }
1692 
1693       @Override
1694       @edu.umd.cs.findbugs.annotations.SuppressWarnings("SWL_SLEEP_WITH_LOCK_HELD")
1695       MasterAdminService.BlockingInterface makeStub() throws MasterNotRunningException {
1696         return (MasterAdminService.BlockingInterface)super.makeStub();
1697       }
1698 
1699       @Override
1700       protected Object makeStub(BlockingRpcChannel channel) {
1701         this.stub = MasterAdminService.newBlockingStub(channel);
1702         return this.stub;
1703       }
1704 
1705       @Override
1706       protected void isMasterRunning() throws ServiceException {
1707         this.stub.isMasterRunning(null, RequestConverter.buildIsMasterRunningRequest());
1708       }
1709     };
1710 
1711     @Override
1712     public AdminService.BlockingInterface getAdmin(final ServerName serverName)
1713         throws IOException {
1714       return getAdmin(serverName, false);
1715     }
1716 
1717     @Override
1718     // Nothing is done w/ the 'master' parameter.  It is ignored.
1719     public AdminService.BlockingInterface getAdmin(final ServerName serverName,
1720       final boolean master)
1721     throws IOException {
1722       if (isDeadServer(serverName)) {
1723         throw new RegionServerStoppedException(serverName + " is dead.");
1724       }
1725       String key = getStubKey(AdminService.BlockingInterface.class.getName(),
1726         serverName.getHostAndPort());
1727       this.connectionLock.putIfAbsent(key, key);
1728       AdminService.BlockingInterface stub = null;
1729       synchronized (this.connectionLock.get(key)) {
1730         stub = (AdminService.BlockingInterface)this.stubs.get(key);
1731         if (stub == null) {
1732           BlockingRpcChannel channel = this.rpcClient.createBlockingRpcChannel(serverName,
1733             User.getCurrent(), this.rpcTimeout);
1734           stub = AdminService.newBlockingStub(channel);
1735           this.stubs.put(key, stub);
1736         }
1737       }
1738       return stub;
1739     }
1740 
1741     @Override
1742     public ClientService.BlockingInterface getClient(final ServerName sn)
1743     throws IOException {
1744       if (isDeadServer(sn)) {
1745         throw new RegionServerStoppedException(sn + " is dead.");
1746       }
1747       String key = getStubKey(ClientService.BlockingInterface.class.getName(), sn.getHostAndPort());
1748       this.connectionLock.putIfAbsent(key, key);
1749       ClientService.BlockingInterface stub = null;
1750       synchronized (this.connectionLock.get(key)) {
1751         stub = (ClientService.BlockingInterface)this.stubs.get(key);
1752         if (stub == null) {
1753           BlockingRpcChannel channel = this.rpcClient.createBlockingRpcChannel(sn,
1754             User.getCurrent(), this.rpcTimeout);
1755           stub = ClientService.newBlockingStub(channel);
1756           // In old days, after getting stub/proxy, we'd make a call.  We are not doing that here.
1757           // Just fail on first actual call rather than in here on setup.
1758           this.stubs.put(key, stub);
1759         }
1760       }
1761       return stub;
1762     }
1763 
1764     static String getStubKey(final String serviceName, final String rsHostnamePort) {
1765       return serviceName + "@" + rsHostnamePort;
1766     }
1767 
1768     private ZooKeeperKeepAliveConnection keepAliveZookeeper;
1769     private int keepAliveZookeeperUserCount;
1770     private boolean canCloseZKW = true;
1771 
1772     // keepAlive time, in ms. No reason to make it configurable.
1773     private static final long keepAlive = 5 * 60 * 1000;
1774 
1775     /**
1776      * Retrieve a shared ZooKeeperWatcher. You must close it it once you've have finished with it.
1777      * @return The shared instance. Never returns null.
1778      */
1779     ZooKeeperKeepAliveConnection getKeepAliveZooKeeperWatcher()
1780       throws IOException {
1781       synchronized (masterAndZKLock) {
1782         if (keepAliveZookeeper == null) {
1783           if (this.closed) {
1784             throw new IOException(toString() + " closed");
1785           }
1786           // We don't check that our link to ZooKeeper is still valid
1787           // But there is a retry mechanism in the ZooKeeperWatcher itself
1788           keepAliveZookeeper = new ZooKeeperKeepAliveConnection(conf, this.toString(), this);
1789         }
1790         keepAliveZookeeperUserCount++;
1791         keepZooKeeperWatcherAliveUntil = Long.MAX_VALUE;
1792         return keepAliveZookeeper;
1793       }
1794     }
1795 
1796     void releaseZooKeeperWatcher(final ZooKeeperWatcher zkw) {
1797       if (zkw == null){
1798         return;
1799       }
1800       synchronized (masterAndZKLock) {
1801         --keepAliveZookeeperUserCount;
1802         if (keepAliveZookeeperUserCount <= 0 ){
1803           keepZooKeeperWatcherAliveUntil = System.currentTimeMillis() + keepAlive;
1804         }
1805       }
1806     }
1807 
1808     /**
1809      * Creates a Chore thread to check the connections to master & zookeeper
1810      *  and close them when they reach their closing time (
1811      *  {@link MasterServiceState#keepAliveUntil} and
1812      *  {@link #keepZooKeeperWatcherAliveUntil}). Keep alive time is
1813      *  managed by the release functions and the variable {@link #keepAlive}
1814      */
1815     private static class DelayedClosing extends Chore implements Stoppable {
1816       private HConnectionImplementation hci;
1817       Stoppable stoppable;
1818 
1819       private DelayedClosing(
1820         HConnectionImplementation hci, Stoppable stoppable){
1821         super(
1822           "ZooKeeperWatcher and Master delayed closing for connection "+hci,
1823           60*1000, // We check every minutes
1824           stoppable);
1825         this.hci = hci;
1826         this.stoppable = stoppable;
1827       }
1828 
1829       static DelayedClosing createAndStart(HConnectionImplementation hci){
1830         Stoppable stoppable = new Stoppable() {
1831               private volatile boolean isStopped = false;
1832               @Override public void stop(String why) { isStopped = true;}
1833               @Override public boolean isStopped() {return isStopped;}
1834             };
1835 
1836         return new DelayedClosing(hci, stoppable);
1837       }
1838 
1839       protected void closeMasterProtocol(MasterServiceState protocolState) {
1840         if (System.currentTimeMillis() > protocolState.keepAliveUntil) {
1841           hci.closeMasterService(protocolState);
1842           protocolState.keepAliveUntil = Long.MAX_VALUE;
1843         }
1844       }
1845 
1846       @Override
1847       protected void chore() {
1848         synchronized (hci.masterAndZKLock) {
1849           if (hci.canCloseZKW) {
1850             if (System.currentTimeMillis() >
1851               hci.keepZooKeeperWatcherAliveUntil) {
1852 
1853               hci.closeZooKeeperWatcher();
1854               hci.keepZooKeeperWatcherAliveUntil = Long.MAX_VALUE;
1855             }
1856           }
1857           closeMasterProtocol(hci.adminMasterServiceState);
1858           closeMasterProtocol(hci.monitorMasterServiceState);
1859         }
1860       }
1861 
1862       @Override
1863       public void stop(String why) {
1864         stoppable.stop(why);
1865       }
1866 
1867       @Override
1868       public boolean isStopped() {
1869         return stoppable.isStopped();
1870       }
1871     }
1872 
1873     private void closeZooKeeperWatcher() {
1874       synchronized (masterAndZKLock) {
1875         if (keepAliveZookeeper != null) {
1876           LOG.info("Closing zookeeper sessionid=0x" +
1877             Long.toHexString(
1878               keepAliveZookeeper.getRecoverableZooKeeper().getSessionId()));
1879           keepAliveZookeeper.internalClose();
1880           keepAliveZookeeper = null;
1881         }
1882         keepAliveZookeeperUserCount = 0;
1883       }
1884     }
1885 
1886     final MasterAdminServiceState adminMasterServiceState = new MasterAdminServiceState(this);
1887     final MasterMonitorServiceState monitorMasterServiceState =
1888       new MasterMonitorServiceState(this);
1889 
1890     @Override
1891     public MasterAdminService.BlockingInterface getMasterAdmin() throws MasterNotRunningException {
1892       return getKeepAliveMasterAdminService();
1893     }
1894 
1895     @Override
1896     public MasterMonitorService.BlockingInterface getMasterMonitor()
1897     throws MasterNotRunningException {
1898       return getKeepAliveMasterMonitorService();
1899     }
1900 
1901     private void resetMasterServiceState(final MasterServiceState mss) {
1902       mss.userCount++;
1903       mss.keepAliveUntil = Long.MAX_VALUE;
1904     }
1905 
1906     @Override
1907     public MasterAdminKeepAliveConnection getKeepAliveMasterAdminService()
1908     throws MasterNotRunningException {
1909       synchronized (masterAndZKLock) {
1910         if (!isKeepAliveMasterConnectedAndRunning(this.adminMasterServiceState)) {
1911           MasterAdminServiceStubMaker stubMaker = new MasterAdminServiceStubMaker();
1912           this.adminMasterServiceState.stub = stubMaker.makeStub();
1913         }
1914         resetMasterServiceState(this.adminMasterServiceState);
1915       }
1916       // Ugly delegation just so we can add in a Close method.
1917       final MasterAdminService.BlockingInterface stub = this.adminMasterServiceState.stub;
1918       return new MasterAdminKeepAliveConnection() {
1919         MasterAdminServiceState mss = adminMasterServiceState;
1920         @Override
1921         public AddColumnResponse addColumn(RpcController controller,
1922             AddColumnRequest request) throws ServiceException {
1923           return stub.addColumn(controller, request);
1924         }
1925 
1926         @Override
1927         public DeleteColumnResponse deleteColumn(RpcController controller,
1928             DeleteColumnRequest request) throws ServiceException {
1929           return stub.deleteColumn(controller, request);
1930         }
1931 
1932         @Override
1933         public ModifyColumnResponse modifyColumn(RpcController controller,
1934             ModifyColumnRequest request) throws ServiceException {
1935           return stub.modifyColumn(controller, request);
1936         }
1937 
1938         @Override
1939         public MoveRegionResponse moveRegion(RpcController controller,
1940             MoveRegionRequest request) throws ServiceException {
1941           return stub.moveRegion(controller, request);
1942         }
1943 
1944         @Override
1945         public DispatchMergingRegionsResponse dispatchMergingRegions(
1946             RpcController controller, DispatchMergingRegionsRequest request)
1947             throws ServiceException {
1948           return stub.dispatchMergingRegions(controller, request);
1949         }
1950 
1951         @Override
1952         public AssignRegionResponse assignRegion(RpcController controller,
1953             AssignRegionRequest request) throws ServiceException {
1954           return stub.assignRegion(controller, request);
1955         }
1956 
1957         @Override
1958         public UnassignRegionResponse unassignRegion(RpcController controller,
1959             UnassignRegionRequest request) throws ServiceException {
1960           return stub.unassignRegion(controller, request);
1961         }
1962 
1963         @Override
1964         public OfflineRegionResponse offlineRegion(RpcController controller,
1965             OfflineRegionRequest request) throws ServiceException {
1966           return stub.offlineRegion(controller, request);
1967         }
1968 
1969         @Override
1970         public DeleteTableResponse deleteTable(RpcController controller,
1971             DeleteTableRequest request) throws ServiceException {
1972           return stub.deleteTable(controller, request);
1973         }
1974 
1975         @Override
1976         public EnableTableResponse enableTable(RpcController controller,
1977             EnableTableRequest request) throws ServiceException {
1978           return stub.enableTable(controller, request);
1979         }
1980 
1981         @Override
1982         public DisableTableResponse disableTable(RpcController controller,
1983             DisableTableRequest request) throws ServiceException {
1984           return stub.disableTable(controller, request);
1985         }
1986 
1987         @Override
1988         public ModifyTableResponse modifyTable(RpcController controller,
1989             ModifyTableRequest request) throws ServiceException {
1990           return stub.modifyTable(controller, request);
1991         }
1992 
1993         @Override
1994         public CreateTableResponse createTable(RpcController controller,
1995             CreateTableRequest request) throws ServiceException {
1996           return stub.createTable(controller, request);
1997         }
1998 
1999         @Override
2000         public ShutdownResponse shutdown(RpcController controller,
2001             ShutdownRequest request) throws ServiceException {
2002           return stub.shutdown(controller, request);
2003         }
2004 
2005         @Override
2006         public StopMasterResponse stopMaster(RpcController controller,
2007             StopMasterRequest request) throws ServiceException {
2008           return stub.stopMaster(controller, request);
2009         }
2010 
2011         @Override
2012         public BalanceResponse balance(RpcController controller,
2013             BalanceRequest request) throws ServiceException {
2014           return stub.balance(controller, request);
2015         }
2016 
2017         @Override
2018         public SetBalancerRunningResponse setBalancerRunning(
2019             RpcController controller, SetBalancerRunningRequest request)
2020             throws ServiceException {
2021           return stub.setBalancerRunning(controller, request);
2022         }
2023 
2024         @Override
2025         public CatalogScanResponse runCatalogScan(RpcController controller,
2026             CatalogScanRequest request) throws ServiceException {
2027           return stub.runCatalogScan(controller, request);
2028         }
2029 
2030         @Override
2031         public EnableCatalogJanitorResponse enableCatalogJanitor(
2032             RpcController controller, EnableCatalogJanitorRequest request)
2033             throws ServiceException {
2034           return stub.enableCatalogJanitor(controller, request);
2035         }
2036 
2037         @Override
2038         public IsCatalogJanitorEnabledResponse isCatalogJanitorEnabled(
2039             RpcController controller, IsCatalogJanitorEnabledRequest request)
2040             throws ServiceException {
2041           return stub.isCatalogJanitorEnabled(controller, request);
2042         }
2043 
2044         @Override
2045         public CoprocessorServiceResponse execMasterService(
2046             RpcController controller, CoprocessorServiceRequest request)
2047             throws ServiceException {
2048           return stub.execMasterService(controller, request);
2049         }
2050 
2051         @Override
2052         public TakeSnapshotResponse snapshot(RpcController controller,
2053             TakeSnapshotRequest request) throws ServiceException {
2054           return stub.snapshot(controller, request);
2055         }
2056 
2057         @Override
2058         public ListSnapshotResponse getCompletedSnapshots(
2059             RpcController controller, ListSnapshotRequest request)
2060             throws ServiceException {
2061           return stub.getCompletedSnapshots(controller, request);
2062         }
2063 
2064         @Override
2065         public DeleteSnapshotResponse deleteSnapshot(RpcController controller,
2066             DeleteSnapshotRequest request) throws ServiceException {
2067           return stub.deleteSnapshot(controller, request);
2068         }
2069 
2070         @Override
2071         public IsSnapshotDoneResponse isSnapshotDone(RpcController controller,
2072             IsSnapshotDoneRequest request) throws ServiceException {
2073           return stub.isSnapshotDone(controller, request);
2074         }
2075 
2076         @Override
2077         public RestoreSnapshotResponse restoreSnapshot(
2078             RpcController controller, RestoreSnapshotRequest request)
2079             throws ServiceException {
2080           return stub.restoreSnapshot(controller, request);
2081         }
2082 
2083         @Override
2084         public IsRestoreSnapshotDoneResponse isRestoreSnapshotDone(
2085             RpcController controller, IsRestoreSnapshotDoneRequest request)
2086             throws ServiceException {
2087           return stub.isRestoreSnapshotDone(controller, request);
2088         }
2089 
2090         @Override
2091         public IsMasterRunningResponse isMasterRunning(
2092             RpcController controller, IsMasterRunningRequest request)
2093             throws ServiceException {
2094           return stub.isMasterRunning(controller, request);
2095         }
2096 
2097         @Override
2098         public ModifyNamespaceResponse modifyNamespace(RpcController controller, ModifyNamespaceRequest request) throws ServiceException {
2099           return stub.modifyNamespace(controller, request);
2100         }
2101 
2102         @Override
2103         public CreateNamespaceResponse createNamespace(RpcController controller, CreateNamespaceRequest request) throws ServiceException {
2104           return stub.createNamespace(controller, request);
2105         }
2106 
2107         @Override
2108         public DeleteNamespaceResponse deleteNamespace(RpcController controller, DeleteNamespaceRequest request) throws ServiceException {
2109           return stub.deleteNamespace(controller, request);
2110         }
2111 
2112         @Override
2113         public GetNamespaceDescriptorResponse getNamespaceDescriptor(RpcController controller, GetNamespaceDescriptorRequest request) throws ServiceException {
2114           return stub.getNamespaceDescriptor(controller, request);
2115         }
2116 
2117         @Override
2118         public ListNamespaceDescriptorsResponse listNamespaceDescriptors(RpcController controller, ListNamespaceDescriptorsRequest request) throws ServiceException {
2119           return stub.listNamespaceDescriptors(controller, request);
2120         }
2121 
2122         @Override
2123         public ListTableDescriptorsByNamespaceResponse listTableDescriptorsByNamespace(RpcController controller, ListTableDescriptorsByNamespaceRequest request) throws ServiceException {
2124           return stub.listTableDescriptorsByNamespace(controller, request);
2125         }
2126 
2127         @Override
2128         public ListTableNamesByNamespaceResponse listTableNamesByNamespace(RpcController controller,
2129               ListTableNamesByNamespaceRequest request) throws ServiceException {
2130           return stub.listTableNamesByNamespace(controller, request);
2131         }
2132 
2133         @Override
2134         public void close() {
2135           release(this.mss);
2136         }
2137       };
2138     }
2139 
2140     private static void release(MasterServiceState mss) {
2141       if (mss != null && mss.connection != null) {
2142         ((HConnectionImplementation)mss.connection).releaseMaster(mss);
2143       }
2144     }
2145 
2146     @Override
2147     public MasterMonitorKeepAliveConnection getKeepAliveMasterMonitorService()
2148     throws MasterNotRunningException {
2149       synchronized (masterAndZKLock) {
2150         if (!isKeepAliveMasterConnectedAndRunning(this.monitorMasterServiceState)) {
2151           MasterMonitorServiceStubMaker stubMaker = new MasterMonitorServiceStubMaker();
2152           this.monitorMasterServiceState.stub = stubMaker.makeStub();
2153         }
2154         resetMasterServiceState(this.monitorMasterServiceState);
2155       }
2156       // Ugly delegation just so can implement close
2157       final MasterMonitorService.BlockingInterface stub = this.monitorMasterServiceState.stub;
2158       return new MasterMonitorKeepAliveConnection() {
2159         final MasterMonitorServiceState mss = monitorMasterServiceState;
2160         @Override
2161         public GetSchemaAlterStatusResponse getSchemaAlterStatus(
2162             RpcController controller, GetSchemaAlterStatusRequest request)
2163             throws ServiceException {
2164           return stub.getSchemaAlterStatus(controller, request);
2165         }
2166 
2167         @Override
2168         public GetTableDescriptorsResponse getTableDescriptors(
2169             RpcController controller, GetTableDescriptorsRequest request)
2170             throws ServiceException {
2171           return stub.getTableDescriptors(controller, request);
2172         }
2173 
2174         @Override
2175         public GetTableNamesResponse getTableNames(
2176             RpcController controller, GetTableNamesRequest request)
2177             throws ServiceException {
2178           return stub.getTableNames(controller, request);
2179         }
2180 
2181         @Override
2182         public GetClusterStatusResponse getClusterStatus(
2183             RpcController controller, GetClusterStatusRequest request)
2184             throws ServiceException {
2185           return stub.getClusterStatus(controller, request);
2186         }
2187 
2188         @Override
2189         public IsMasterRunningResponse isMasterRunning(
2190             RpcController controller, IsMasterRunningRequest request)
2191             throws ServiceException {
2192           return stub.isMasterRunning(controller, request);
2193         }
2194 
2195         @Override
2196         public void close() throws IOException {
2197           release(this.mss);
2198         }
2199       };
2200     }
2201 
2202     private boolean isKeepAliveMasterConnectedAndRunning(MasterServiceState mss) {
2203       if (mss.getStub() == null){
2204         return false;
2205       }
2206       try {
2207         return mss.isMasterRunning();
2208       } catch (UndeclaredThrowableException e) {
2209         // It's somehow messy, but we can receive exceptions such as
2210         //  java.net.ConnectException but they're not declared. So we catch it...
2211         LOG.info("Master connection is not running anymore", e.getUndeclaredThrowable());
2212         return false;
2213       } catch (ServiceException se) {
2214         LOG.warn("Checking master connection", se);
2215         return false;
2216       }
2217     }
2218 
2219     void releaseMaster(MasterServiceState mss) {
2220       if (mss.getStub() == null) return;
2221       synchronized (masterAndZKLock) {
2222         --mss.userCount;
2223         if (mss.userCount <= 0) {
2224           mss.keepAliveUntil = System.currentTimeMillis() + keepAlive;
2225         }
2226       }
2227     }
2228 
2229     private void closeMasterService(MasterServiceState mss) {
2230       if (mss.getStub() != null) {
2231         LOG.info("Closing master protocol: " + mss);
2232         mss.clearStub();
2233       }
2234       mss.userCount = 0;
2235     }
2236 
2237     /**
2238      * Immediate close of the shared master. Can be by the delayed close or when closing the
2239      * connection itself.
2240      */
2241     private void closeMaster() {
2242       synchronized (masterAndZKLock) {
2243         closeMasterService(adminMasterServiceState);
2244         closeMasterService(monitorMasterServiceState);
2245       }
2246     }
2247 
2248     void updateCachedLocation(HRegionInfo hri, HRegionLocation source,
2249                               ServerName serverName, long seqNum) {
2250       HRegionLocation newHrl = new HRegionLocation(hri, serverName, seqNum);
2251       synchronized (this.cachedRegionLocations) {
2252         cacheLocation(hri.getTableName(), source, newHrl);
2253       }
2254     }
2255 
2256    /**
2257     * Deletes the cached location of the region if necessary, based on some error from source.
2258     * @param hri The region in question.
2259     * @param source The source of the error that prompts us to invalidate cache.
2260     */
2261     void deleteCachedLocation(HRegionInfo hri, HRegionLocation source) {
2262       boolean isStaleDelete = false;
2263       HRegionLocation oldLocation;
2264       synchronized (this.cachedRegionLocations) {
2265         Map<byte[], HRegionLocation> tableLocations =
2266           getTableLocations(hri.getTableName());
2267         oldLocation = tableLocations.get(hri.getStartKey());
2268         if (oldLocation != null) {
2269            // Do not delete the cache entry if it's not for the same server that gave us the error.
2270           isStaleDelete = (source != null) && !oldLocation.equals(source);
2271           if (!isStaleDelete) {
2272             tableLocations.remove(hri.getStartKey());
2273           }
2274         }
2275       }
2276     }
2277 
2278     @Override
2279     public void deleteCachedRegionLocation(final HRegionLocation location) {
2280       if (location == null) {
2281         return;
2282       }
2283       synchronized (this.cachedRegionLocations) {
2284         TableName tableName = location.getRegionInfo().getTableName();
2285         Map<byte[], HRegionLocation> tableLocations =
2286             getTableLocations(tableName);
2287         if (!tableLocations.isEmpty()) {
2288           // Delete if there's something in the cache for this region.
2289           HRegionLocation removedLocation =
2290           tableLocations.remove(location.getRegionInfo().getStartKey());
2291           if (LOG.isDebugEnabled() && removedLocation != null) {
2292             LOG.debug("Removed " +
2293                 location.getRegionInfo().getRegionNameAsString() +
2294                 " for tableName=" + tableName +
2295                 " from cache");
2296           }
2297         }
2298       }
2299     }
2300 
2301     /**
2302      * Update the location with the new value (if the exception is a RegionMovedException)
2303      * or delete it from the cache.
2304      * @param exception an object (to simplify user code) on which we will try to find a nested
2305      *                  or wrapped or both RegionMovedException
2306      * @param source server that is the source of the location update.
2307      */
2308     @Override
2309     public void updateCachedLocations(final TableName tableName, byte[] rowkey,
2310       final Object exception, final HRegionLocation source) {
2311       if (rowkey == null || tableName == null) {
2312         LOG.warn("Coding error, see method javadoc. row=" + (rowkey == null ? "null" : rowkey) +
2313             ", tableName=" + (tableName == null ? "null" : tableName));
2314         return;
2315       }
2316 
2317       // Is it something we have already updated?
2318       final HRegionLocation oldLocation = getCachedLocation(tableName, rowkey);
2319       if (oldLocation == null) {
2320         // There is no such location in the cache => it's been removed already => nothing to do
2321         return;
2322       }
2323 
2324       HRegionInfo regionInfo = oldLocation.getRegionInfo();
2325       final RegionMovedException rme = RegionMovedException.find(exception);
2326       if (rme != null) {
2327         if (LOG.isTraceEnabled()){
2328           LOG.trace("Region " + regionInfo.getRegionNameAsString() + " moved to " +
2329             rme.getHostname() + ":" + rme.getPort() + " according to " + source.getHostnamePort());
2330         }
2331         updateCachedLocation(
2332             regionInfo, source, rme.getServerName(), rme.getLocationSeqNum());
2333       } else if (RegionOpeningException.find(exception) != null) {
2334         if (LOG.isTraceEnabled()) {
2335           LOG.trace("Region " + regionInfo.getRegionNameAsString() + " is being opened on "
2336               + source.getHostnamePort() + "; not deleting the cache entry");
2337         }
2338       } else {
2339         deleteCachedLocation(regionInfo, source);
2340       }
2341     }
2342 
2343     @Override
2344     public void updateCachedLocations(final byte[] tableName, byte[] rowkey,
2345       final Object exception, final HRegionLocation source) {
2346       updateCachedLocations(TableName.valueOf(tableName), rowkey, exception, source);
2347     }
2348 
2349     @Override
2350     @Deprecated
2351     public void processBatch(List<? extends Row> list,
2352         final TableName tableName,
2353         ExecutorService pool,
2354         Object[] results) throws IOException, InterruptedException {
2355       // This belongs in HTable!!! Not in here.  St.Ack
2356 
2357       // results must be the same size as list
2358       if (results.length != list.size()) {
2359         throw new IllegalArgumentException(
2360           "argument results must be the same size as argument list");
2361       }
2362       processBatchCallback(list, tableName, pool, results, null);
2363     }
2364 
2365     @Override
2366     @Deprecated
2367     public void processBatch(List<? extends Row> list,
2368         final byte[] tableName,
2369         ExecutorService pool,
2370         Object[] results) throws IOException, InterruptedException {
2371       processBatch(list, TableName.valueOf(tableName), pool, results);
2372     }
2373 
2374     /**
2375      * Send the queries in parallel on the different region servers. Retries on failures.
2376      * If the method returns it means that there is no error, and the 'results' array will
2377      * contain no exception. On error, an exception is thrown, and the 'results' array will
2378      * contain results and exceptions.
2379      * @deprecated since 0.96 - Use {@link HTable#processBatchCallback} instead
2380      */
2381     @Override
2382     @Deprecated
2383     public <R> void processBatchCallback(
2384       List<? extends Row> list,
2385       TableName tableName,
2386       ExecutorService pool,
2387       Object[] results,
2388       Batch.Callback<R> callback)
2389       throws IOException, InterruptedException {
2390 
2391       // To fulfill the original contract, we have a special callback. This callback
2392       //  will set the results in the Object array.
2393       ObjectResultFiller<R> cb = new ObjectResultFiller<R>(results, callback);
2394       AsyncProcess<?> asyncProcess = createAsyncProcess(tableName, pool, cb, conf);
2395 
2396       // We're doing a submit all. This way, the originalIndex will match the initial list.
2397       asyncProcess.submitAll(list);
2398       asyncProcess.waitUntilDone();
2399 
2400       if (asyncProcess.hasError()) {
2401         throw asyncProcess.getErrors();
2402       }
2403     }
2404 
2405     @Override
2406     @Deprecated
2407     public <R> void processBatchCallback(
2408       List<? extends Row> list,
2409       byte[] tableName,
2410       ExecutorService pool,
2411       Object[] results,
2412       Batch.Callback<R> callback)
2413       throws IOException, InterruptedException {
2414       processBatchCallback(list, TableName.valueOf(tableName), pool, results, callback);
2415     }
2416 
2417     // For tests.
2418     protected <R> AsyncProcess createAsyncProcess(TableName tableName, ExecutorService pool,
2419            AsyncProcess.AsyncProcessCallback<R> callback, Configuration conf) {
2420       return new AsyncProcess<R>(this, tableName, pool, callback, conf,
2421           RpcRetryingCallerFactory.instantiate(conf));
2422     }
2423 
2424 
2425     /**
2426      * Fill the result array for the interfaces using it.
2427      */
2428     private static class ObjectResultFiller<Res>
2429         implements AsyncProcess.AsyncProcessCallback<Res> {
2430 
2431       private final Object[] results;
2432       private Batch.Callback<Res> callback;
2433 
2434       ObjectResultFiller(Object[] results, Batch.Callback<Res> callback) {
2435         this.results = results;
2436         this.callback = callback;
2437       }
2438 
2439       @Override
2440       public void success(int pos, byte[] region, Row row, Res result) {
2441         assert pos < results.length;
2442         results[pos] = result;
2443         if (callback != null) {
2444           callback.update(region, row.getRow(), result);
2445         }
2446       }
2447 
2448       @Override
2449       public boolean failure(int pos, byte[] region, Row row, Throwable t) {
2450         assert pos < results.length;
2451         results[pos] = t;
2452         //Batch.Callback<Res> was not called on failure in 0.94. We keep this.
2453         return true; // we want to have this failure in the failures list.
2454       }
2455 
2456       @Override
2457       public boolean retriableFailure(int originalIndex, Row row, byte[] region,
2458                                       Throwable exception) {
2459         return true; // we retry
2460       }
2461     }
2462 
2463     /*
2464      * Return the number of cached region for a table. It will only be called
2465      * from a unit test.
2466      */
2467     int getNumberOfCachedRegionLocations(final TableName tableName) {
2468       synchronized (this.cachedRegionLocations) {
2469         Map<byte[], HRegionLocation> tableLocs = this.cachedRegionLocations.get(tableName);
2470         if (tableLocs == null) {
2471           return 0;
2472         }
2473         return tableLocs.values().size();
2474       }
2475     }
2476 
2477     /**
2478      * Check the region cache to see whether a region is cached yet or not.
2479      * Called by unit tests.
2480      * @param tableName tableName
2481      * @param row row
2482      * @return Region cached or not.
2483      */
2484     boolean isRegionCached(TableName tableName, final byte[] row) {
2485       HRegionLocation location = getCachedLocation(tableName, row);
2486       return location != null;
2487     }
2488 
2489     @Override
2490     public void setRegionCachePrefetch(final TableName tableName,
2491         final boolean enable) {
2492       if (!enable) {
2493         regionCachePrefetchDisabledTables.add(Bytes.mapKey(tableName.getName()));
2494       }
2495       else {
2496         regionCachePrefetchDisabledTables.remove(Bytes.mapKey(tableName.getName()));
2497       }
2498     }
2499 
2500     @Override
2501     public void setRegionCachePrefetch(final byte[] tableName,
2502         final boolean enable) {
2503       setRegionCachePrefetch(TableName.valueOf(tableName), enable);
2504     }
2505 
2506     @Override
2507     public boolean getRegionCachePrefetch(TableName tableName) {
2508       return !regionCachePrefetchDisabledTables.contains(Bytes.mapKey(tableName.getName()));
2509     }
2510 
2511     @Override
2512     public boolean getRegionCachePrefetch(byte[] tableName) {
2513       return getRegionCachePrefetch(TableName.valueOf(tableName));
2514     }
2515 
2516     @Override
2517     public void abort(final String msg, Throwable t) {
2518       if (t instanceof KeeperException.SessionExpiredException
2519         && keepAliveZookeeper != null) {
2520         synchronized (masterAndZKLock) {
2521           if (keepAliveZookeeper != null) {
2522             LOG.warn("This client just lost it's session with ZooKeeper," +
2523               " closing it." +
2524               " It will be recreated next time someone needs it", t);
2525             closeZooKeeperWatcher();
2526           }
2527         }
2528       } else {
2529         if (t != null) {
2530           LOG.fatal(msg, t);
2531         } else {
2532           LOG.fatal(msg);
2533         }
2534         this.aborted = true;
2535         close();
2536         this.closed = true;
2537       }
2538     }
2539 
2540     @Override
2541     public boolean isClosed() {
2542       return this.closed;
2543     }
2544 
2545     @Override
2546     public boolean isAborted(){
2547       return this.aborted;
2548     }
2549 
2550     @Override
2551     public int getCurrentNrHRS() throws IOException {
2552       return this.registry.getCurrentNrHRS();
2553     }
2554 
2555     /**
2556      * Increment this client's reference count.
2557      */
2558     void incCount() {
2559       ++refCount;
2560     }
2561 
2562     /**
2563      * Decrement this client's reference count.
2564      */
2565     void decCount() {
2566       if (refCount > 0) {
2567         --refCount;
2568       }
2569     }
2570 
2571     /**
2572      * Return if this client has no reference
2573      *
2574      * @return true if this client has no reference; false otherwise
2575      */
2576     boolean isZeroReference() {
2577       return refCount == 0;
2578     }
2579 
2580     void internalClose() {
2581       if (this.closed) {
2582         return;
2583       }
2584       delayedClosing.stop("Closing connection");
2585       closeMaster();
2586       shutdownBatchPool();
2587       this.closed = true;
2588       closeZooKeeperWatcher();
2589       this.stubs.clear();
2590       if (clusterStatusListener != null) {
2591         clusterStatusListener.close();
2592       }
2593     }
2594 
2595     @Override
2596     public void close() {
2597       if (managed) {
2598         if (aborted) {
2599           HConnectionManager.deleteStaleConnection(this);
2600         } else {
2601           HConnectionManager.deleteConnection(this, false);
2602         }
2603       } else {
2604         internalClose();
2605       }
2606     }
2607 
2608     /**
2609      * Close the connection for good, regardless of what the current value of
2610      * {@link #refCount} is. Ideally, {@link #refCount} should be zero at this
2611      * point, which would be the case if all of its consumers close the
2612      * connection. However, on the off chance that someone is unable to close
2613      * the connection, perhaps because it bailed out prematurely, the method
2614      * below will ensure that this {@link HConnection} instance is cleaned up.
2615      * Caveat: The JVM may take an unknown amount of time to call finalize on an
2616      * unreachable object, so our hope is that every consumer cleans up after
2617      * itself, like any good citizen.
2618      */
2619     @Override
2620     protected void finalize() throws Throwable {
2621       super.finalize();
2622       // Pretend as if we are about to release the last remaining reference
2623       refCount = 1;
2624       close();
2625     }
2626 
2627     @Override
2628     public HTableDescriptor[] listTables() throws IOException {
2629       MasterMonitorKeepAliveConnection master = getKeepAliveMasterMonitorService();
2630       try {
2631         GetTableDescriptorsRequest req =
2632           RequestConverter.buildGetTableDescriptorsRequest((List<TableName>)null);
2633         return ProtobufUtil.getHTableDescriptorArray(master.getTableDescriptors(null, req));
2634       } catch (ServiceException se) {
2635         throw ProtobufUtil.getRemoteException(se);
2636       } finally {
2637         master.close();
2638       }
2639     }
2640 
2641     @Override
2642     public String[] getTableNames() throws IOException {
2643       TableName[] tableNames = listTableNames();
2644       String result[] = new String[tableNames.length];
2645       for (int i = 0; i < tableNames.length; i++) {
2646         result[i] = tableNames[i].getNameAsString();
2647       }
2648       return result;
2649     }
2650 
2651     @Override
2652     public TableName[] listTableNames() throws IOException {
2653       MasterMonitorKeepAliveConnection master = getKeepAliveMasterMonitorService();
2654       try {
2655         return ProtobufUtil.getTableNameArray(master.getTableNames(null,
2656             GetTableNamesRequest.newBuilder().build())
2657           .getTableNamesList());
2658       } catch (ServiceException se) {
2659         throw ProtobufUtil.getRemoteException(se);
2660       } finally {
2661         master.close();
2662       }
2663     }
2664 
2665     @Override
2666     public HTableDescriptor[] getHTableDescriptorsByTableName(
2667         List<TableName> tableNames) throws IOException {
2668       if (tableNames == null || tableNames.isEmpty()) return new HTableDescriptor[0];
2669       MasterMonitorKeepAliveConnection master = getKeepAliveMasterMonitorService();
2670       try {
2671         GetTableDescriptorsRequest req =
2672           RequestConverter.buildGetTableDescriptorsRequest(tableNames);
2673         return ProtobufUtil.getHTableDescriptorArray(master.getTableDescriptors(null, req));
2674       } catch (ServiceException se) {
2675         throw ProtobufUtil.getRemoteException(se);
2676       } finally {
2677         master.close();
2678       }
2679     }
2680 
2681     @Override
2682     public HTableDescriptor[] getHTableDescriptors(
2683         List<String> names) throws IOException {
2684       List<TableName> tableNames = new ArrayList(names.size());
2685       for(String name : names) {
2686         tableNames.add(TableName.valueOf(name));
2687       }
2688 
2689       return getHTableDescriptorsByTableName(tableNames);
2690     }
2691 
2692     /**
2693      * Connects to the master to get the table descriptor.
2694      * @param tableName table name
2695      * @return
2696      * @throws IOException if the connection to master fails or if the table
2697      *  is not found.
2698      */
2699     @Override
2700     public HTableDescriptor getHTableDescriptor(final TableName tableName)
2701     throws IOException {
2702       if (tableName == null) return null;
2703       if (tableName.equals(TableName.META_TABLE_NAME)) {
2704         return HTableDescriptor.META_TABLEDESC;
2705       }
2706       MasterMonitorKeepAliveConnection master = getKeepAliveMasterMonitorService();
2707       GetTableDescriptorsResponse htds;
2708       try {
2709         GetTableDescriptorsRequest req =
2710           RequestConverter.buildGetTableDescriptorsRequest(tableName);
2711         htds = master.getTableDescriptors(null, req);
2712       } catch (ServiceException se) {
2713         throw ProtobufUtil.getRemoteException(se);
2714       } finally {
2715         master.close();
2716       }
2717       if (!htds.getTableSchemaList().isEmpty()) {
2718         return HTableDescriptor.convert(htds.getTableSchemaList().get(0));
2719       }
2720       throw new TableNotFoundException(tableName.getNameAsString());
2721     }
2722 
2723     @Override
2724     public HTableDescriptor getHTableDescriptor(final byte[] tableName)
2725     throws IOException {
2726       return getHTableDescriptor(TableName.valueOf(tableName));
2727     }
2728   }
2729 
2730   /**
2731    * The record of errors for servers.
2732    */
2733   static class ServerErrorTracker {
2734     // We need a concurrent map here, as we could have multiple threads updating it in parallel.
2735     private final ConcurrentMap<HRegionLocation, ServerErrors> errorsByServer =
2736         new ConcurrentHashMap<HRegionLocation, ServerErrors>();
2737     private long canRetryUntil = 0;
2738 
2739     public ServerErrorTracker(long timeout) {
2740       LOG.trace("Server tracker timeout is " + timeout + "ms");
2741       this.canRetryUntil = EnvironmentEdgeManager.currentTimeMillis() + timeout;
2742     }
2743 
2744     boolean canRetryMore() {
2745       return EnvironmentEdgeManager.currentTimeMillis() < this.canRetryUntil;
2746     }
2747 
2748     /**
2749      * Calculates the back-off time for a retrying request to a particular server.
2750      *
2751      * @param server    The server in question.
2752      * @param basePause The default hci pause.
2753      * @return The time to wait before sending next request.
2754      */
2755     long calculateBackoffTime(HRegionLocation server, long basePause) {
2756       long result = 0;
2757       ServerErrors errorStats = errorsByServer.get(server);
2758       if (errorStats != null) {
2759         result = ConnectionUtils.getPauseTime(basePause, errorStats.retries);
2760         // Adjust by the time we already waited since last talking to this server.
2761         long now = EnvironmentEdgeManager.currentTimeMillis();
2762         long timeSinceLastError = now - errorStats.getLastErrorTime();
2763         if (timeSinceLastError > 0) {
2764           result = Math.max(0, result - timeSinceLastError);
2765         }
2766         // Finally, see if the backoff time overshoots the timeout.
2767         if (result > 0 && (now + result > this.canRetryUntil)) {
2768           result = Math.max(0, this.canRetryUntil - now);
2769         }
2770       }
2771       return result;
2772     }
2773 
2774     /**
2775      * Reports that there was an error on the server to do whatever bean-counting necessary.
2776      *
2777      * @param server The server in question.
2778      */
2779     void reportServerError(HRegionLocation server) {
2780       ServerErrors errors = errorsByServer.get(server);
2781       if (errors != null) {
2782         errors.addError();
2783       } else {
2784         errorsByServer.put(server, new ServerErrors());
2785       }
2786     }
2787 
2788     /**
2789      * The record of errors for a server.
2790      */
2791     private static class ServerErrors {
2792       public long lastErrorTime;
2793       public int retries;
2794 
2795       public ServerErrors() {
2796         this.lastErrorTime = EnvironmentEdgeManager.currentTimeMillis();
2797         this.retries = 0;
2798       }
2799 
2800       public void addError() {
2801         this.lastErrorTime = EnvironmentEdgeManager.currentTimeMillis();
2802         ++this.retries;
2803       }
2804 
2805       public long getLastErrorTime() {
2806         return this.lastErrorTime;
2807       }
2808     }
2809   }
2810 
2811   /**
2812    * Set the number of retries to use serverside when trying to communicate
2813    * with another server over {@link HConnection}.  Used updating catalog
2814    * tables, etc.  Call this method before we create any Connections.
2815    * @param c The Configuration instance to set the retries into.
2816    * @param log Used to log what we set in here.
2817    */
2818   public static void setServerSideHConnectionRetries(final Configuration c, final String sn,
2819       final Log log) {
2820     int hcRetries = c.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
2821       HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER);
2822     // Go big.  Multiply by 10.  If we can't get to meta after this many retries
2823     // then something seriously wrong.
2824     int serversideMultiplier = c.getInt("hbase.client.serverside.retries.multiplier", 10);
2825     int retries = hcRetries * serversideMultiplier;
2826     c.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, retries);
2827     log.debug(sn + " HConnection server-to-server retries=" + retries);
2828   }
2829 }