View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.client;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertNull;
25  import static org.junit.Assert.assertTrue;
26  import static org.junit.Assert.fail;
27  
28  import java.io.IOException;
29  import java.lang.reflect.Field;
30  import java.lang.reflect.Modifier;
31  import java.net.SocketTimeoutException;
32  import java.util.ArrayList;
33  import java.util.HashMap;
34  import java.util.List;
35  import java.util.Map;
36  import java.util.Random;
37  import java.util.concurrent.ExecutorService;
38  import java.util.concurrent.SynchronousQueue;
39  import java.util.concurrent.ThreadPoolExecutor;
40  import java.util.concurrent.TimeUnit;
41  import java.util.concurrent.atomic.AtomicBoolean;
42  import java.util.concurrent.atomic.AtomicInteger;
43  import java.util.concurrent.atomic.AtomicLong;
44  import java.util.concurrent.atomic.AtomicReference;
45  
46  import org.apache.commons.logging.Log;
47  import org.apache.commons.logging.LogFactory;
48  import org.apache.hadoop.conf.Configuration;
49  import org.apache.hadoop.hbase.Cell;
50  import org.apache.hadoop.hbase.HBaseConfiguration;
51  import org.apache.hadoop.hbase.HBaseTestingUtility;
52  import org.apache.hadoop.hbase.HConstants;
53  import org.apache.hadoop.hbase.HRegionLocation;
54  import org.apache.hadoop.hbase.HTableDescriptor;
55  import org.apache.hadoop.hbase.testclassification.MediumTests;
56  import org.apache.hadoop.hbase.RegionLocations;
57  import org.apache.hadoop.hbase.ServerName;
58  import org.apache.hadoop.hbase.TableName;
59  import org.apache.hadoop.hbase.Waiter;
60  import org.apache.hadoop.hbase.client.ConnectionManager.HConnectionImplementation;
61  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
62  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
63  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
64  import org.apache.hadoop.hbase.exceptions.DeserializationException;
65  import org.apache.hadoop.hbase.exceptions.RegionMovedException;
66  import org.apache.hadoop.hbase.filter.Filter;
67  import org.apache.hadoop.hbase.filter.FilterBase;
68  import org.apache.hadoop.hbase.ipc.RpcClient;
69  import org.apache.hadoop.hbase.master.HMaster;
70  import org.apache.hadoop.hbase.regionserver.HRegion;
71  import org.apache.hadoop.hbase.regionserver.HRegionServer;
72  import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
73  import org.apache.hadoop.hbase.util.Bytes;
74  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
75  import org.apache.hadoop.hbase.util.JVMClusterUtil;
76  import org.apache.hadoop.hbase.util.ManualEnvironmentEdge;
77  import org.apache.hadoop.hbase.util.Threads;
78  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
79  import org.junit.AfterClass;
80  import org.junit.Assert;
81  import org.junit.BeforeClass;
82  import org.junit.Ignore;
83  import org.junit.Test;
84  import org.junit.experimental.categories.Category;
85  
86  import com.google.common.collect.Lists;
87  
88  /**
89   * This class is for testing HBaseConnectionManager features
90   */
91  @Category(MediumTests.class)
92  public class TestHCM {
93    private static final Log LOG = LogFactory.getLog(TestHCM.class);
94    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
95    private static final TableName TABLE_NAME =
96        TableName.valueOf("test");
97    private static final TableName TABLE_NAME1 =
98        TableName.valueOf("test1");
99    private static final TableName TABLE_NAME2 =
100       TableName.valueOf("test2");
101   private static final TableName TABLE_NAME3 =
102       TableName.valueOf("test3");
103   private static final TableName TABLE_NAME4 =
104       TableName.valueOf("test4");
105   private static final byte[] FAM_NAM = Bytes.toBytes("f");
106   private static final byte[] ROW = Bytes.toBytes("bbb");
107   private static final byte[] ROW_X = Bytes.toBytes("xxx");
108   private static Random _randy = new Random();
109 
110 /**
111 * This copro sleeps 20 second. The first call it fails. The second time, it works.
112 */
113   public static class SleepAndFailFirstTime extends BaseRegionObserver {
114     static final AtomicLong ct = new AtomicLong(0);
115 
116     public SleepAndFailFirstTime() {
117     }
118 
119     @Override
120     public void preGetOp(final ObserverContext<RegionCoprocessorEnvironment> e,
121               final Get get, final List<Cell> results) throws IOException {
122       Threads.sleep(20000);
123       if (ct.incrementAndGet() == 1){
124         throw new IOException("first call I fail");
125       }
126     }
127   }
128 
129   @BeforeClass
130   public static void setUpBeforeClass() throws Exception {
131     TEST_UTIL.getConfiguration().setBoolean(HConstants.STATUS_PUBLISHED, true);
132     TEST_UTIL.startMiniCluster(2);
133   }
134 
135   @AfterClass public static void tearDownAfterClass() throws Exception {
136     TEST_UTIL.shutdownMiniCluster();
137   }
138 
139 
140   private static int getHConnectionManagerCacheSize(){
141     return HConnectionTestingUtility.getConnectionCount();
142   }
143 
144   @Test
145   public void testClusterConnection() throws IOException {
146     ThreadPoolExecutor otherPool = new ThreadPoolExecutor(1, 1,
147         5, TimeUnit.SECONDS,
148         new SynchronousQueue<Runnable>(),
149         Threads.newDaemonThreadFactory("test-hcm"));
150 
151     HConnection con1 = HConnectionManager.createConnection(TEST_UTIL.getConfiguration());
152     HConnection con2 = HConnectionManager.createConnection(TEST_UTIL.getConfiguration(), otherPool);
153     // make sure the internally created ExecutorService is the one passed
154     assertTrue(otherPool == ((HConnectionImplementation)con2).getCurrentBatchPool());
155 
156     String tableName = "testClusterConnection";
157     TEST_UTIL.createTable(tableName.getBytes(), FAM_NAM).close();
158     HTable t = (HTable)con1.getTable(tableName, otherPool);
159     // make sure passing a pool to the getTable does not trigger creation of an internal pool
160     assertNull("Internal Thread pool should be null", ((HConnectionImplementation)con1).getCurrentBatchPool());
161     // table should use the pool passed
162     assertTrue(otherPool == t.getPool());
163     t.close();
164 
165     t = (HTable)con2.getTable(tableName);
166     // table should use the connectin's internal pool
167     assertTrue(otherPool == t.getPool());
168     t.close();
169 
170     t = (HTable)con2.getTable(Bytes.toBytes(tableName));
171     // try other API too
172     assertTrue(otherPool == t.getPool());
173     t.close();
174 
175     t = (HTable)con2.getTable(TableName.valueOf(tableName));
176     // try other API too
177     assertTrue(otherPool == t.getPool());
178     t.close();
179 
180     t = (HTable)con1.getTable(tableName);
181     ExecutorService pool = ((HConnectionImplementation)con1).getCurrentBatchPool();
182     // make sure an internal pool was created
183     assertNotNull("An internal Thread pool should have been created", pool);
184     // and that the table is using it
185     assertTrue(t.getPool() == pool);
186     t.close();
187 
188     t = (HTable)con1.getTable(tableName);
189     // still using the *same* internal pool
190     assertTrue(t.getPool() == pool);
191     t.close();
192 
193     con1.close();
194     // if the pool was created on demand it should be closed upon connection close
195     assertTrue(pool.isShutdown());
196 
197     con2.close();
198     // if the pool is passed, it is not closed
199     assertFalse(otherPool.isShutdown());
200     otherPool.shutdownNow();
201   }
202 
203   /**
204    * Naive test to check that HConnection#getAdmin returns a properly constructed HBaseAdmin object
205    * @throws IOException Unable to construct admin
206    */
207   @Test
208   public void testAdminFactory() throws IOException {
209     Connection con1 = ConnectionFactory.createConnection(TEST_UTIL.getConfiguration());
210     Admin admin = con1.getAdmin();
211     assertTrue(admin.getConnection() == con1);
212     assertTrue(admin.getConfiguration() == TEST_UTIL.getConfiguration());
213     con1.close();
214   }
215 
216   // Fails too often!  Needs work.  HBASE-12558
217   @Ignore @Test(expected = RegionServerStoppedException.class)
218   public void testClusterStatus() throws Exception {
219 
220     TableName tn =
221         TableName.valueOf("testClusterStatus");
222     byte[] cf = "cf".getBytes();
223     byte[] rk = "rk1".getBytes();
224 
225     JVMClusterUtil.RegionServerThread rs = TEST_UTIL.getHBaseCluster().startRegionServer();
226     rs.waitForServerOnline();
227     final ServerName sn = rs.getRegionServer().getServerName();
228 
229     HTable t = TEST_UTIL.createTable(tn, cf);
230     TEST_UTIL.waitTableAvailable(tn);
231 
232     while(TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().
233         getRegionStates().isRegionsInTransition()){
234       Thread.sleep(1);
235     }
236     final HConnectionImplementation hci =  (HConnectionImplementation)t.getConnection();
237     while (t.getRegionLocation(rk).getPort() != sn.getPort()){
238       TEST_UTIL.getHBaseAdmin().move(t.getRegionLocation(rk).getRegionInfo().
239           getEncodedNameAsBytes(), Bytes.toBytes(sn.toString()));
240       while(TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().
241           getRegionStates().isRegionsInTransition()){
242         Thread.sleep(1);
243       }
244       hci.clearRegionCache(tn);
245     }
246     Assert.assertNotNull(hci.clusterStatusListener);
247     TEST_UTIL.assertRegionOnServer(t.getRegionLocation(rk).getRegionInfo(), sn, 20000);
248 
249     Put p1 = new Put(rk);
250     p1.add(cf, "qual".getBytes(), "val".getBytes());
251     t.put(p1);
252 
253     rs.getRegionServer().abort("I'm dead");
254 
255     // We want the status to be updated. That's a least 10 second
256     TEST_UTIL.waitFor(40000, 1000, true, new Waiter.Predicate<Exception>() {
257       @Override
258       public boolean evaluate() throws Exception {
259         return TEST_UTIL.getHBaseCluster().getMaster().getServerManager().
260             getDeadServers().isDeadServer(sn);
261       }
262     });
263 
264     TEST_UTIL.waitFor(40000, 1000, true, new Waiter.Predicate<Exception>() {
265       @Override
266       public boolean evaluate() throws Exception {
267         return hci.clusterStatusListener.isDeadServer(sn);
268       }
269     });
270 
271     t.close();
272     hci.getClient(sn);  // will throw an exception: RegionServerStoppedException
273   }
274 
275   /**
276    * Test that we can handle connection close: it will trigger a retry, but the calls will
277    *  finish.
278    */
279   @Test
280   public void testConnectionCloseAllowsInterrupt() throws Exception {
281     testConnectionClose(true);
282   }
283 
284   @Test
285   public void testConnectionNotAllowsInterrupt() throws Exception {
286     testConnectionClose(false);
287   }
288 
289   /**
290    * Test that an operation can fail if we read the global operation timeout, even if the
291    * individual timeout is fine. We do that with:
292    * - client side: an operation timeout of 30 seconds
293    * - server side: we sleep 20 second at each attempt. The first work fails, the second one
294    * succeeds. But the client won't wait that much, because 20 + 20 > 30, so the client
295    * timeouted when the server answers.
296    */
297   @Test
298   public void testOperationTimeout() throws Exception {
299     HTableDescriptor hdt = TEST_UTIL.createTableDescriptor("HCM-testOperationTimeout");
300     hdt.addCoprocessor(SleepAndFailFirstTime.class.getName());
301     HTable table = TEST_UTIL.createTable(hdt, new byte[][]{FAM_NAM}, TEST_UTIL.getConfiguration());
302 
303     // Check that it works if the timeout is big enough
304     table.setOperationTimeout(120 * 1000);
305     table.get(new Get(FAM_NAM));
306 
307     // Resetting and retrying. Will fail this time, not enough time for the second try
308     SleepAndFailFirstTime.ct.set(0);
309     try {
310       table.setOperationTimeout(30 * 1000);
311       table.get(new Get(FAM_NAM));
312       Assert.fail("We expect an exception here");
313     } catch (SocketTimeoutException e) {
314       // The client has a CallTimeout class, but it's not shared.We're not very clean today,
315       //  in the general case you can expect the call to stop, but the exception may vary.
316       // In this test however, we're sure that it will be a socket timeout.
317       LOG.info("We received an exception, as expected ", e);
318     } catch (IOException e) {
319       Assert.fail("Wrong exception:" + e.getMessage());
320     } finally {
321       table.close();
322     }
323   }
324 
325 
326   private void testConnectionClose(boolean allowsInterrupt) throws Exception {
327     TableName tableName = TableName.valueOf("HCM-testConnectionClose" + allowsInterrupt);
328     TEST_UTIL.createTable(tableName, FAM_NAM).close();
329 
330     boolean previousBalance = TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, true);
331 
332     Configuration c2 = new Configuration(TEST_UTIL.getConfiguration());
333     // We want to work on a separate connection.
334     c2.set(HConstants.HBASE_CLIENT_INSTANCE_ID, String.valueOf(-1));
335     c2.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 100); // retry a lot
336     c2.setInt(HConstants.HBASE_CLIENT_PAUSE, 0); // don't wait between retries.
337     c2.setInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, 0); // Server do not really expire
338     c2.setBoolean(RpcClient.SPECIFIC_WRITE_THREAD, allowsInterrupt);
339 
340     final HTable table = new HTable(c2, tableName);
341 
342     Put put = new Put(ROW);
343     put.add(FAM_NAM, ROW, ROW);
344     table.put(put);
345 
346     // 4 steps: ready=0; doGets=1; mustStop=2; stopped=3
347     final AtomicInteger step = new AtomicInteger(0);
348 
349     final AtomicReference<Throwable> failed = new AtomicReference<Throwable>(null);
350     Thread t = new Thread("testConnectionCloseThread") {
351       @Override
352       public void run() {
353         int done = 0;
354         try {
355           step.set(1);
356           while (step.get() == 1) {
357             Get get = new Get(ROW);
358             table.get(get);
359             done++;
360             if (done % 100 == 0)
361               LOG.info("done=" + done);
362           }
363         } catch (Throwable t) {
364           failed.set(t);
365           LOG.error(t);
366         }
367         step.set(3);
368       }
369     };
370     t.start();
371     TEST_UTIL.waitFor(20000, new Waiter.Predicate<Exception>() {
372       @Override
373       public boolean evaluate() throws Exception {
374         return step.get() == 1;
375       }
376     });
377 
378     ServerName sn = table.getRegionLocation(ROW).getServerName();
379     ConnectionManager.HConnectionImplementation conn =
380         (ConnectionManager.HConnectionImplementation) table.getConnection();
381     RpcClient rpcClient = conn.getRpcClient();
382 
383     LOG.info("Going to cancel connections. connection=" + conn.toString() + ", sn=" + sn);
384     for (int i = 0; i < 5000; i++) {
385       rpcClient.cancelConnections(sn);
386       Thread.sleep(5);
387     }
388 
389     step.compareAndSet(1, 2);
390     // The test may fail here if the thread doing the gets is stuck. The way to find
391     //  out what's happening is to look for the thread named 'testConnectionCloseThread'
392     TEST_UTIL.waitFor(40000, new Waiter.Predicate<Exception>() {
393       @Override
394       public boolean evaluate() throws Exception {
395         return step.get() == 3;
396       }
397     });
398 
399     table.close();
400     Assert.assertTrue("Unexpected exception is " + failed.get(), failed.get() == null);
401     TEST_UTIL.getHBaseAdmin().setBalancerRunning(previousBalance, true);
402   }
403 
404   /**
405    * Test that connection can become idle without breaking everything.
406    */
407   @Test
408   public void testConnectionIdle() throws Exception {
409     TableName tableName = TableName.valueOf("HCM-testConnectionIdle");
410     TEST_UTIL.createTable(tableName, FAM_NAM).close();
411     int idleTime =  20000;
412     boolean previousBalance = TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, true);
413 
414     Configuration c2 = new Configuration(TEST_UTIL.getConfiguration());
415     // We want to work on a separate connection.
416     c2.set(HConstants.HBASE_CLIENT_INSTANCE_ID, String.valueOf(-1));
417     c2.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1); // Don't retry: retry = test failed
418     c2.setInt(RpcClient.IDLE_TIME, idleTime);
419 
420     final Table table = new HTable(c2, tableName);
421 
422     Put put = new Put(ROW);
423     put.add(FAM_NAM, ROW, ROW);
424     table.put(put);
425 
426     ManualEnvironmentEdge mee = new ManualEnvironmentEdge();
427     mee.setValue(System.currentTimeMillis());
428     EnvironmentEdgeManager.injectEdge(mee);
429     LOG.info("first get");
430     table.get(new Get(ROW));
431 
432     LOG.info("first get - changing the time & sleeping");
433     mee.incValue(idleTime + 1000);
434     Thread.sleep(1500); // we need to wait a little for the connection to be seen as idle.
435                         // 1500 = sleep time in RpcClient#waitForWork + a margin
436 
437     LOG.info("second get - connection has been marked idle in the middle");
438     // To check that the connection actually became idle would need to read some private
439     //  fields of RpcClient.
440     table.get(new Get(ROW));
441     mee.incValue(idleTime + 1000);
442 
443     LOG.info("third get - connection is idle, but the reader doesn't know yet");
444     // We're testing here a special case:
445     //  time limit reached BUT connection not yet reclaimed AND a new call.
446     //  in this situation, we don't close the connection, instead we use it immediately.
447     // If we're very unlucky we can have a race condition in the test: the connection is already
448     //  under closing when we do the get, so we have an exception, and we don't retry as the
449     //  retry number is 1. The probability is very very low, and seems acceptable for now. It's
450     //  a test issue only.
451     table.get(new Get(ROW));
452 
453     LOG.info("we're done - time will change back");
454 
455     table.close();
456     EnvironmentEdgeManager.reset();
457     TEST_UTIL.getHBaseAdmin().setBalancerRunning(previousBalance, true);
458   }
459 
460     /**
461      * Test that the connection to the dead server is cut immediately when we receive the
462      *  notification.
463      * @throws Exception
464      */
465   @Test
466   public void testConnectionCut() throws Exception {
467 
468     TableName tableName = TableName.valueOf("HCM-testConnectionCut");
469 
470     TEST_UTIL.createTable(tableName, FAM_NAM).close();
471     boolean previousBalance = TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, true);
472 
473     Configuration c2 = new Configuration(TEST_UTIL.getConfiguration());
474     // We want to work on a separate connection.
475     c2.set(HConstants.HBASE_CLIENT_INSTANCE_ID, String.valueOf(-1));
476     c2.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
477     c2.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 30 * 1000);
478 
479     HTable table = new HTable(c2, tableName);
480 
481     Put p = new Put(FAM_NAM);
482     p.add(FAM_NAM, FAM_NAM, FAM_NAM);
483     table.put(p);
484 
485     final HConnectionImplementation hci =  (HConnectionImplementation)table.getConnection();
486     final HRegionLocation loc = table.getRegionLocation(FAM_NAM);
487 
488     Get get = new Get(FAM_NAM);
489     Assert.assertNotNull(table.get(get));
490 
491     get = new Get(FAM_NAM);
492     get.setFilter(new BlockingFilter());
493 
494     // This thread will mark the server as dead while we're waiting during a get.
495     Thread t = new Thread() {
496       @Override
497       public void run() {
498         synchronized (syncBlockingFilter) {
499           try {
500             syncBlockingFilter.wait();
501           } catch (InterruptedException e) {
502             throw new RuntimeException(e);
503           }
504         }
505         hci.clusterStatusListener.deadServerHandler.newDead(loc.getServerName());
506       }
507     };
508 
509     t.start();
510     try {
511       table.get(get);
512       Assert.fail();
513     } catch (IOException expected) {
514       LOG.debug("Received: " + expected);
515       Assert.assertFalse(expected instanceof SocketTimeoutException);
516       Assert.assertFalse(syncBlockingFilter.get());
517     } finally {
518       syncBlockingFilter.set(true);
519       t.join();
520       HConnectionManager.getConnection(c2).close();
521       TEST_UTIL.getHBaseAdmin().setBalancerRunning(previousBalance, true);
522     }
523 
524     table.close();
525   }
526 
527   protected static final AtomicBoolean syncBlockingFilter = new AtomicBoolean(false);
528 
529   public static class BlockingFilter extends FilterBase {
530     @Override
531     public boolean filterRowKey(byte[] buffer, int offset, int length) throws IOException {
532       int i = 0;
533       while (i++ < 1000 && !syncBlockingFilter.get()) {
534         synchronized (syncBlockingFilter) {
535           syncBlockingFilter.notifyAll();
536         }
537         Threads.sleep(100);
538       }
539       syncBlockingFilter.set(true);
540       return false;
541     }
542     @Override
543     public ReturnCode filterKeyValue(Cell ignored) throws IOException {
544       return ReturnCode.INCLUDE;
545     }
546 
547     public static Filter parseFrom(final byte [] pbBytes) throws DeserializationException{
548       return new BlockingFilter();
549     }
550   }
551 
552   @Test
553   public void abortingHConnectionRemovesItselfFromHCM() throws Exception {
554     // Save off current HConnections
555     Map<HConnectionKey, HConnectionImplementation> oldHBaseInstances =
556         new HashMap<HConnectionKey, HConnectionImplementation>();
557     oldHBaseInstances.putAll(ConnectionManager.CONNECTION_INSTANCES);
558 
559     ConnectionManager.CONNECTION_INSTANCES.clear();
560 
561     try {
562       HConnection connection = HConnectionManager.getConnection(TEST_UTIL.getConfiguration());
563       connection.abort("test abortingHConnectionRemovesItselfFromHCM", new Exception(
564           "test abortingHConnectionRemovesItselfFromHCM"));
565       Assert.assertNotSame(connection,
566         HConnectionManager.getConnection(TEST_UTIL.getConfiguration()));
567     } finally {
568       // Put original HConnections back
569       ConnectionManager.CONNECTION_INSTANCES.clear();
570       ConnectionManager.CONNECTION_INSTANCES.putAll(oldHBaseInstances);
571     }
572   }
573 
574   /**
575    * Test that when we delete a location using the first row of a region
576    * that we really delete it.
577    * @throws Exception
578    */
579   @Test
580   public void testRegionCaching() throws Exception{
581     TEST_UTIL.createTable(TABLE_NAME, FAM_NAM).close();
582     Configuration conf =  new Configuration(TEST_UTIL.getConfiguration());
583     conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
584     HTable table = new HTable(conf, TABLE_NAME);
585 
586     TEST_UTIL.createMultiRegions(table, FAM_NAM);
587     TEST_UTIL.waitUntilAllRegionsAssigned(table.getName());
588     Put put = new Put(ROW);
589     put.add(FAM_NAM, ROW, ROW);
590     table.put(put);
591     ConnectionManager.HConnectionImplementation conn =
592       (ConnectionManager.HConnectionImplementation)table.getConnection();
593 
594     assertNotNull(conn.getCachedLocation(TABLE_NAME, ROW));
595 
596     final int nextPort = conn.getCachedLocation(TABLE_NAME, ROW).getRegionLocation().getPort() + 1;
597     HRegionLocation loc = conn.getCachedLocation(TABLE_NAME, ROW).getRegionLocation();
598     conn.updateCachedLocation(loc.getRegionInfo(), loc.getServerName(),
599         ServerName.valueOf("127.0.0.1", nextPort,
600         HConstants.LATEST_TIMESTAMP), HConstants.LATEST_TIMESTAMP);
601     Assert.assertEquals(conn.getCachedLocation(TABLE_NAME, ROW)
602       .getRegionLocation().getPort(), nextPort);
603 
604     conn.clearRegionCache(TABLE_NAME, ROW.clone());
605     RegionLocations rl = conn.getCachedLocation(TABLE_NAME, ROW);
606     assertNull("What is this location?? " + rl, rl);
607 
608     // We're now going to move the region and check that it works for the client
609     // First a new put to add the location in the cache
610     conn.clearRegionCache(TABLE_NAME);
611     Assert.assertEquals(0, conn.getNumberOfCachedRegionLocations(TABLE_NAME));
612     Put put2 = new Put(ROW);
613     put2.add(FAM_NAM, ROW, ROW);
614     table.put(put2);
615     assertNotNull(conn.getCachedLocation(TABLE_NAME, ROW));
616     assertNotNull(conn.getCachedLocation(TableName.valueOf(TABLE_NAME.getName()), ROW.clone()));
617 
618     TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, false);
619     HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
620 
621     // We can wait for all regions to be online, that makes log reading easier when debugging
622     while (master.getAssignmentManager().getRegionStates().isRegionsInTransition()) {
623       Thread.sleep(1);
624     }
625 
626     // Now moving the region to the second server
627     HRegionLocation toMove = conn.getCachedLocation(TABLE_NAME, ROW).getRegionLocation();
628     byte[] regionName = toMove.getRegionInfo().getRegionName();
629     byte[] encodedRegionNameBytes = toMove.getRegionInfo().getEncodedNameAsBytes();
630 
631     // Choose the other server.
632     int curServerId = TEST_UTIL.getHBaseCluster().getServerWith(regionName);
633     int destServerId = (curServerId == 0 ? 1 : 0);
634 
635     HRegionServer curServer = TEST_UTIL.getHBaseCluster().getRegionServer(curServerId);
636     HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(destServerId);
637 
638     ServerName destServerName = destServer.getServerName();
639 
640     // Check that we are in the expected state
641     Assert.assertTrue(curServer != destServer);
642     Assert.assertFalse(curServer.getServerName().equals(destServer.getServerName()));
643     Assert.assertFalse( toMove.getPort() == destServerName.getPort());
644     Assert.assertNotNull(curServer.getOnlineRegion(regionName));
645     Assert.assertNull(destServer.getOnlineRegion(regionName));
646     Assert.assertFalse(TEST_UTIL.getMiniHBaseCluster().getMaster().
647         getAssignmentManager().getRegionStates().isRegionsInTransition());
648 
649     // Moving. It's possible that we don't have all the regions online at this point, so
650     //  the test must depends only on the region we're looking at.
651     LOG.info("Move starting region="+toMove.getRegionInfo().getRegionNameAsString());
652     TEST_UTIL.getHBaseAdmin().move(
653       toMove.getRegionInfo().getEncodedNameAsBytes(),
654       destServerName.getServerName().getBytes()
655     );
656 
657     while (destServer.getOnlineRegion(regionName) == null ||
658         destServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes) ||
659         curServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes) ||
660         master.getAssignmentManager().getRegionStates().isRegionsInTransition()) {
661       // wait for the move to be finished
662       Thread.sleep(1);
663     }
664 
665     LOG.info("Move finished for region="+toMove.getRegionInfo().getRegionNameAsString());
666 
667     // Check our new state.
668     Assert.assertNull(curServer.getOnlineRegion(regionName));
669     Assert.assertNotNull(destServer.getOnlineRegion(regionName));
670     Assert.assertFalse(destServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes));
671     Assert.assertFalse(curServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes));
672 
673 
674     // Cache was NOT updated and points to the wrong server
675     Assert.assertFalse(
676         conn.getCachedLocation(TABLE_NAME, ROW).getRegionLocation()
677           .getPort() == destServerName.getPort());
678 
679     // This part relies on a number of tries equals to 1.
680     // We do a put and expect the cache to be updated, even if we don't retry
681     LOG.info("Put starting");
682     Put put3 = new Put(ROW);
683     put3.add(FAM_NAM, ROW, ROW);
684     try {
685       table.put(put3);
686       Assert.fail("Unreachable point");
687     } catch (RetriesExhaustedWithDetailsException e){
688       LOG.info("Put done, exception caught: " + e.getClass());
689       Assert.assertEquals(1, e.getNumExceptions());
690       Assert.assertEquals(1, e.getCauses().size());
691       Assert.assertArrayEquals(e.getRow(0).getRow(), ROW);
692 
693       // Check that we unserialized the exception as expected
694       Throwable cause = ConnectionManager.findException(e.getCause(0));
695       Assert.assertNotNull(cause);
696       Assert.assertTrue(cause instanceof RegionMovedException);
697     }
698     Assert.assertNotNull("Cached connection is null", conn.getCachedLocation(TABLE_NAME, ROW));
699     Assert.assertEquals(
700         "Previous server was " + curServer.getServerName().getHostAndPort(),
701         destServerName.getPort(),
702         conn.getCachedLocation(TABLE_NAME, ROW).getRegionLocation().getPort());
703 
704     Assert.assertFalse(destServer.getRegionsInTransitionInRS()
705       .containsKey(encodedRegionNameBytes));
706     Assert.assertFalse(curServer.getRegionsInTransitionInRS()
707       .containsKey(encodedRegionNameBytes));
708 
709     // We move it back to do another test with a scan
710     LOG.info("Move starting region=" + toMove.getRegionInfo().getRegionNameAsString());
711     TEST_UTIL.getHBaseAdmin().move(
712       toMove.getRegionInfo().getEncodedNameAsBytes(),
713       curServer.getServerName().getServerName().getBytes()
714     );
715 
716     while (curServer.getOnlineRegion(regionName) == null ||
717         destServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes) ||
718         curServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes) ||
719         master.getAssignmentManager().getRegionStates().isRegionsInTransition()) {
720       // wait for the move to be finished
721       Thread.sleep(1);
722     }
723 
724     // Check our new state.
725     Assert.assertNotNull(curServer.getOnlineRegion(regionName));
726     Assert.assertNull(destServer.getOnlineRegion(regionName));
727     LOG.info("Move finished for region=" + toMove.getRegionInfo().getRegionNameAsString());
728 
729     // Cache was NOT updated and points to the wrong server
730     Assert.assertFalse(conn.getCachedLocation(TABLE_NAME, ROW).getRegionLocation().getPort() ==
731       curServer.getServerName().getPort());
732 
733     Scan sc = new Scan();
734     sc.setStopRow(ROW);
735     sc.setStartRow(ROW);
736 
737     // The scanner takes the max retries from the connection configuration, not the table as
738     // the put.
739     TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
740 
741     try {
742       ResultScanner rs = table.getScanner(sc);
743       while (rs.next() != null) {
744       }
745       Assert.fail("Unreachable point");
746     } catch (RetriesExhaustedException e) {
747       LOG.info("Scan done, expected exception caught: " + e.getClass());
748     }
749 
750     // Cache is updated with the right value.
751     Assert.assertNotNull(conn.getCachedLocation(TABLE_NAME, ROW));
752     Assert.assertEquals(
753       "Previous server was "+destServer.getServerName().getHostAndPort(),
754       curServer.getServerName().getPort(),
755       conn.getCachedLocation(TABLE_NAME, ROW).getRegionLocation().getPort());
756 
757     TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
758         HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER);
759     table.close();
760   }
761 
762   /**
763    * Test that Connection or Pool are not closed when managed externally
764    * @throws Exception
765    */
766   @Test
767   public void testConnectionManagement() throws Exception{
768     Table table0 = TEST_UTIL.createTable(TABLE_NAME1, FAM_NAM);
769     Connection conn = ConnectionFactory.createConnection(TEST_UTIL.getConfiguration());
770     HTable table = (HTable) conn.getTable(TABLE_NAME1);
771     table.close();
772     assertFalse(conn.isClosed());
773     assertFalse(table.getPool().isShutdown());
774     table = (HTable) conn.getTable(TABLE_NAME1);
775     table.close();
776     assertFalse(table.getPool().isShutdown());
777     conn.close();
778     assertTrue(table.getPool().isShutdown());
779     table0.close();
780   }
781 
782   /**
783    * Test that stale cache updates don't override newer cached values.
784    */
785   @Test(timeout = 60000)
786   public void testCacheSeqNums() throws Exception{
787     HTable table = TEST_UTIL.createTable(TABLE_NAME2, FAM_NAM);
788     TEST_UTIL.createMultiRegions(table, FAM_NAM);
789     Put put = new Put(ROW);
790     put.add(FAM_NAM, ROW, ROW);
791     table.put(put);
792     ConnectionManager.HConnectionImplementation conn =
793       (ConnectionManager.HConnectionImplementation)table.getConnection();
794 
795     HRegionLocation location = conn.getCachedLocation(TABLE_NAME2, ROW).getRegionLocation();
796     assertNotNull(location);
797 
798     ServerName anySource = ServerName.valueOf(location.getHostname(), location.getPort() - 1, 0L);
799 
800     // Same server as already in cache reporting - overwrites any value despite seqNum.
801     int nextPort = location.getPort() + 1;
802     conn.updateCachedLocation(location.getRegionInfo(), location.getServerName(),
803         ServerName.valueOf("127.0.0.1", nextPort, 0), location.getSeqNum() - 1);
804     location = conn.getCachedLocation(TABLE_NAME2, ROW).getRegionLocation();
805     Assert.assertEquals(nextPort, location.getPort());
806 
807     // No source specified - same.
808     nextPort = location.getPort() + 1;
809     conn.updateCachedLocation(location.getRegionInfo(), location.getServerName(),
810         ServerName.valueOf("127.0.0.1", nextPort, 0), location.getSeqNum() - 1);
811     location = conn.getCachedLocation(TABLE_NAME2, ROW).getRegionLocation();
812     Assert.assertEquals(nextPort, location.getPort());
813 
814     // Higher seqNum - overwrites lower seqNum.
815     nextPort = location.getPort() + 1;
816     conn.updateCachedLocation(location.getRegionInfo(), anySource,
817         ServerName.valueOf("127.0.0.1", nextPort, 0), location.getSeqNum() + 1);
818     location = conn.getCachedLocation(TABLE_NAME2, ROW).getRegionLocation();
819     Assert.assertEquals(nextPort, location.getPort());
820 
821     // Lower seqNum - does not overwrite higher seqNum.
822     nextPort = location.getPort() + 1;
823     conn.updateCachedLocation(location.getRegionInfo(), anySource,
824         ServerName.valueOf("127.0.0.1", nextPort, 0), location.getSeqNum() - 1);
825     location = conn.getCachedLocation(TABLE_NAME2, ROW).getRegionLocation();
826     Assert.assertEquals(nextPort - 1, location.getPort());
827     table.close();
828   }
829 
830   /**
831    * Make sure that {@link Configuration} instances that are essentially the
832    * same map to the same {@link HConnection} instance.
833    */
834   @Test
835   public void testConnectionSameness() throws Exception {
836     Connection previousConnection = null;
837     for (int i = 0; i < 2; i++) {
838       // set random key to differentiate the connection from previous ones
839       Configuration configuration = TEST_UTIL.getConfiguration();
840       configuration.set("some_key", String.valueOf(_randy.nextInt()));
841       LOG.info("The hash code of the current configuration is: "
842           + configuration.hashCode());
843       Connection currentConnection = HConnectionManager
844           .getConnection(configuration);
845       if (previousConnection != null) {
846         assertTrue(
847             "Did not get the same connection even though its key didn't change",
848             previousConnection == currentConnection);
849       }
850       previousConnection = currentConnection;
851       // change the configuration, so that it is no longer reachable from the
852       // client's perspective. However, since its part of the LRU doubly linked
853       // list, it will eventually get thrown out, at which time it should also
854       // close the corresponding {@link HConnection}.
855       configuration.set("other_key", String.valueOf(_randy.nextInt()));
856     }
857   }
858 
859   /**
860    * Makes sure that there is no leaking of
861    * {@link ConnectionManager.HConnectionImplementation} in the {@link HConnectionManager}
862    * class.
863    * @deprecated Tests deprecated functionality.  Remove in 1.0.
864    */
865   @Deprecated
866   @Test
867   public void testConnectionUniqueness() throws Exception {
868     int zkmaxconnections = TEST_UTIL.getConfiguration().
869       getInt(HConstants.ZOOKEEPER_MAX_CLIENT_CNXNS,
870           HConstants.DEFAULT_ZOOKEPER_MAX_CLIENT_CNXNS);
871     // Test up to a max that is < the maximum number of zk connections.  If we
872     // go above zk connections, we just fall into cycle where we are failing
873     // to set up a session and test runs for a long time.
874     int maxConnections = Math.min(zkmaxconnections - 1, 20);
875     List<HConnection> connections = new ArrayList<HConnection>(maxConnections);
876     Connection previousConnection = null;
877     try {
878       for (int i = 0; i < maxConnections; i++) {
879         // set random key to differentiate the connection from previous ones
880         Configuration configuration = new Configuration(TEST_UTIL.getConfiguration());
881         configuration.set("some_key", String.valueOf(_randy.nextInt()));
882         configuration.set(HConstants.HBASE_CLIENT_INSTANCE_ID,
883             String.valueOf(_randy.nextInt()));
884         LOG.info("The hash code of the current configuration is: "
885             + configuration.hashCode());
886         HConnection currentConnection =
887           HConnectionManager.getConnection(configuration);
888         if (previousConnection != null) {
889           assertTrue("Got the same connection even though its key changed!",
890               previousConnection != currentConnection);
891         }
892         // change the configuration, so that it is no longer reachable from the
893         // client's perspective. However, since its part of the LRU doubly linked
894         // list, it will eventually get thrown out, at which time it should also
895         // close the corresponding {@link HConnection}.
896         configuration.set("other_key", String.valueOf(_randy.nextInt()));
897 
898         previousConnection = currentConnection;
899         LOG.info("The current HConnectionManager#HBASE_INSTANCES cache size is: "
900             + getHConnectionManagerCacheSize());
901         Thread.sleep(50);
902         connections.add(currentConnection);
903       }
904     } finally {
905       for (Connection c: connections) {
906         // Clean up connections made so we don't interfere w/ subsequent tests.
907         HConnectionManager.deleteConnection(c.getConfiguration());
908       }
909     }
910   }
911 
912   @Test
913   public void testClosing() throws Exception {
914     Configuration configuration =
915       new Configuration(TEST_UTIL.getConfiguration());
916     configuration.set(HConstants.HBASE_CLIENT_INSTANCE_ID,
917         String.valueOf(_randy.nextInt()));
918 
919     Connection c1 = ConnectionFactory.createConnection(configuration);
920     // We create two connections with the same key.
921     Connection c2 = ConnectionFactory.createConnection(configuration);
922 
923     Connection c3 = HConnectionManager.getConnection(configuration);
924     Connection c4 = HConnectionManager.getConnection(configuration);
925     assertTrue(c3 == c4);
926 
927     c1.close();
928     assertTrue(c1.isClosed());
929     assertFalse(c2.isClosed());
930     assertFalse(c3.isClosed());
931 
932     c3.close();
933     // still a reference left
934     assertFalse(c3.isClosed());
935     c3.close();
936     assertTrue(c3.isClosed());
937     // c3 was removed from the cache
938     Connection c5 = HConnectionManager.getConnection(configuration);
939     assertTrue(c5 != c3);
940 
941     assertFalse(c2.isClosed());
942     c2.close();
943     assertTrue(c2.isClosed());
944     c5.close();
945     assertTrue(c5.isClosed());
946   }
947 
948   /**
949    * Trivial test to verify that nobody messes with
950    * {@link HConnectionManager#createConnection(Configuration)}
951    */
952   @Test
953   public void testCreateConnection() throws Exception {
954     Configuration configuration = TEST_UTIL.getConfiguration();
955     Connection c1 = ConnectionFactory.createConnection(configuration);
956     Connection c2 = ConnectionFactory.createConnection(configuration);
957     // created from the same configuration, yet they are different
958     assertTrue(c1 != c2);
959     assertTrue(c1.getConfiguration() == c2.getConfiguration());
960     // make sure these were not cached
961     Connection c3 = HConnectionManager.getConnection(configuration);
962     assertTrue(c1 != c3);
963     assertTrue(c2 != c3);
964   }
965 
966 
967   /**
968    * This test checks that one can connect to the cluster with only the
969    *  ZooKeeper quorum set. Other stuff like master address will be read
970    *  from ZK by the client.
971    */
972   @Test(timeout = 60000)
973   public void testConnection() throws Exception{
974     // We create an empty config and add the ZK address.
975     Configuration c = new Configuration();
976     c.set(HConstants.ZOOKEEPER_QUORUM,
977       TEST_UTIL.getConfiguration().get(HConstants.ZOOKEEPER_QUORUM));
978     c.set(HConstants.ZOOKEEPER_CLIENT_PORT ,
979       TEST_UTIL.getConfiguration().get(HConstants.ZOOKEEPER_CLIENT_PORT));
980 
981     // This should be enough to connect
982     HConnection conn = HConnectionManager.getConnection(c);
983     assertTrue( conn.isMasterRunning() );
984     conn.close();
985   }
986 
987   private int setNumTries(HConnectionImplementation hci, int newVal) throws Exception {
988     Field numTries = hci.getClass().getDeclaredField("numTries");
989     numTries.setAccessible(true);
990     Field modifiersField = Field.class.getDeclaredField("modifiers");
991     modifiersField.setAccessible(true);
992     modifiersField.setInt(numTries, numTries.getModifiers() & ~Modifier.FINAL);
993     final int prevNumRetriesVal = (Integer)numTries.get(hci);
994     numTries.set(hci, newVal);
995 
996     return prevNumRetriesVal;
997   }
998 
999   @Test (timeout=30000)
1000   public void testMulti() throws Exception {
1001     HTable table = TEST_UTIL.createTable(TABLE_NAME3, FAM_NAM);
1002      try {
1003        TEST_UTIL.createMultiRegions(table, FAM_NAM);
1004        ConnectionManager.HConnectionImplementation conn =
1005            ( ConnectionManager.HConnectionImplementation)table.getConnection();
1006 
1007        // We're now going to move the region and check that it works for the client
1008        // First a new put to add the location in the cache
1009        conn.clearRegionCache(TABLE_NAME3);
1010        Assert.assertEquals(0, conn.getNumberOfCachedRegionLocations(TABLE_NAME3));
1011 
1012        TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, false);
1013        HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
1014 
1015        // We can wait for all regions to be online, that makes log reading easier when debugging
1016        while (master.getAssignmentManager().getRegionStates().isRegionsInTransition()) {
1017          Thread.sleep(1);
1018        }
1019 
1020        Put put = new Put(ROW_X);
1021        put.add(FAM_NAM, ROW_X, ROW_X);
1022        table.put(put);
1023 
1024        // Now moving the region to the second server
1025        HRegionLocation toMove = conn.getCachedLocation(TABLE_NAME3, ROW_X).getRegionLocation();
1026        byte[] regionName = toMove.getRegionInfo().getRegionName();
1027        byte[] encodedRegionNameBytes = toMove.getRegionInfo().getEncodedNameAsBytes();
1028 
1029        // Choose the other server.
1030        int curServerId = TEST_UTIL.getHBaseCluster().getServerWith(regionName);
1031        int destServerId = (curServerId == 0 ? 1 : 0);
1032 
1033        HRegionServer curServer = TEST_UTIL.getHBaseCluster().getRegionServer(curServerId);
1034        HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(destServerId);
1035 
1036        ServerName destServerName = destServer.getServerName();
1037 
1038        //find another row in the cur server that is less than ROW_X
1039        List<HRegion> regions = curServer.getOnlineRegions(TABLE_NAME3);
1040        byte[] otherRow = null;
1041        for (HRegion region : regions) {
1042          if (!region.getRegionInfo().getEncodedName().equals(toMove.getRegionInfo().getEncodedName())
1043              && Bytes.BYTES_COMPARATOR.compare(region.getRegionInfo().getStartKey(), ROW_X) < 0) {
1044            otherRow = region.getRegionInfo().getStartKey();
1045            break;
1046          }
1047        }
1048        assertNotNull(otherRow);
1049        // If empty row, set it to first row.-f
1050        if (otherRow.length <= 0) otherRow = Bytes.toBytes("aaa");
1051        Put put2 = new Put(otherRow);
1052        put2.add(FAM_NAM, otherRow, otherRow);
1053        table.put(put2); //cache put2's location
1054 
1055        // Check that we are in the expected state
1056        Assert.assertTrue(curServer != destServer);
1057        Assert.assertNotEquals(curServer.getServerName(), destServer.getServerName());
1058        Assert.assertNotEquals(toMove.getPort(), destServerName.getPort());
1059        Assert.assertNotNull(curServer.getOnlineRegion(regionName));
1060        Assert.assertNull(destServer.getOnlineRegion(regionName));
1061        Assert.assertFalse(TEST_UTIL.getMiniHBaseCluster().getMaster().
1062            getAssignmentManager().getRegionStates().isRegionsInTransition());
1063 
1064        // Moving. It's possible that we don't have all the regions online at this point, so
1065        //  the test must depends only on the region we're looking at.
1066        LOG.info("Move starting region="+toMove.getRegionInfo().getRegionNameAsString());
1067        TEST_UTIL.getHBaseAdmin().move(
1068            toMove.getRegionInfo().getEncodedNameAsBytes(),
1069            destServerName.getServerName().getBytes()
1070            );
1071 
1072        while (destServer.getOnlineRegion(regionName) == null ||
1073            destServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes) ||
1074            curServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes) ||
1075            master.getAssignmentManager().getRegionStates().isRegionsInTransition()) {
1076          // wait for the move to be finished
1077          Thread.sleep(1);
1078         }
1079 
1080        LOG.info("Move finished for region="+toMove.getRegionInfo().getRegionNameAsString());
1081 
1082        // Check our new state.
1083        Assert.assertNull(curServer.getOnlineRegion(regionName));
1084        Assert.assertNotNull(destServer.getOnlineRegion(regionName));
1085        Assert.assertFalse(destServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes));
1086        Assert.assertFalse(curServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes));
1087 
1088 
1089        // Cache was NOT updated and points to the wrong server
1090        Assert.assertFalse(
1091            conn.getCachedLocation(TABLE_NAME3, ROW_X).getRegionLocation()
1092             .getPort() == destServerName.getPort());
1093 
1094        // Hijack the number of retry to fail after 2 tries
1095        final int prevNumRetriesVal = setNumTries(conn, 2);
1096 
1097        Put put3 = new Put(ROW_X);
1098        put3.add(FAM_NAM, ROW_X, ROW_X);
1099        Put put4 = new Put(otherRow);
1100        put4.add(FAM_NAM, otherRow, otherRow);
1101 
1102        // do multi
1103        table.batch(Lists.newArrayList(put4, put3)); // first should be a valid row,
1104        // second we get RegionMovedException.
1105 
1106        setNumTries(conn, prevNumRetriesVal);
1107      } finally {
1108        table.close();
1109      }
1110   }
1111 
1112   @Ignore ("Test presumes RETRY_BACKOFF will never change; it has") @Test
1113   public void testErrorBackoffTimeCalculation() throws Exception {
1114     // TODO: This test would seem to presume hardcoded RETRY_BACKOFF which it should not.
1115     final long ANY_PAUSE = 100;
1116     ServerName location = ServerName.valueOf("127.0.0.1", 1, 0);
1117     ServerName diffLocation = ServerName.valueOf("127.0.0.1", 2, 0);
1118 
1119     ManualEnvironmentEdge timeMachine = new ManualEnvironmentEdge();
1120     EnvironmentEdgeManager.injectEdge(timeMachine);
1121     try {
1122       long timeBase = timeMachine.currentTime();
1123       long largeAmountOfTime = ANY_PAUSE * 1000;
1124       ConnectionManager.ServerErrorTracker tracker =
1125           new ConnectionManager.ServerErrorTracker(largeAmountOfTime, 100);
1126 
1127       // The default backoff is 0.
1128       assertEquals(0, tracker.calculateBackoffTime(location, ANY_PAUSE));
1129 
1130       // Check some backoff values from HConstants sequence.
1131       tracker.reportServerError(location);
1132       assertEqualsWithJitter(ANY_PAUSE, tracker.calculateBackoffTime(location, ANY_PAUSE));
1133       tracker.reportServerError(location);
1134       tracker.reportServerError(location);
1135       tracker.reportServerError(location);
1136       assertEqualsWithJitter(ANY_PAUSE * 5, tracker.calculateBackoffTime(location, ANY_PAUSE));
1137 
1138       // All of this shouldn't affect backoff for different location.
1139       assertEquals(0, tracker.calculateBackoffTime(diffLocation, ANY_PAUSE));
1140       tracker.reportServerError(diffLocation);
1141       assertEqualsWithJitter(ANY_PAUSE, tracker.calculateBackoffTime(diffLocation, ANY_PAUSE));
1142 
1143       // Check with different base.
1144       assertEqualsWithJitter(ANY_PAUSE * 10,
1145           tracker.calculateBackoffTime(location, ANY_PAUSE * 2));
1146 
1147       // See that time from last error is taken into account. Time shift is applied after jitter,
1148       // so pass the original expected backoff as the base for jitter.
1149       long timeShift = (long)(ANY_PAUSE * 0.5);
1150       timeMachine.setValue(timeBase + timeShift);
1151       assertEqualsWithJitter((ANY_PAUSE * 5) - timeShift,
1152         tracker.calculateBackoffTime(location, ANY_PAUSE), ANY_PAUSE * 2);
1153 
1154       // However we should not go into negative.
1155       timeMachine.setValue(timeBase + ANY_PAUSE * 100);
1156       assertEquals(0, tracker.calculateBackoffTime(location, ANY_PAUSE));
1157 
1158       // We also should not go over the boundary; last retry would be on it.
1159       long timeLeft = (long)(ANY_PAUSE * 0.5);
1160       timeMachine.setValue(timeBase + largeAmountOfTime - timeLeft);
1161       assertTrue(tracker.canRetryMore(1));
1162       tracker.reportServerError(location);
1163       assertEquals(timeLeft, tracker.calculateBackoffTime(location, ANY_PAUSE));
1164       timeMachine.setValue(timeBase + largeAmountOfTime);
1165       assertFalse(tracker.canRetryMore(1));
1166     } finally {
1167       EnvironmentEdgeManager.reset();
1168     }
1169   }
1170 
1171   private static void assertEqualsWithJitter(long expected, long actual) {
1172     assertEqualsWithJitter(expected, actual, expected);
1173   }
1174 
1175   private static void assertEqualsWithJitter(long expected, long actual, long jitterBase) {
1176     assertTrue("Value not within jitter: " + expected + " vs " + actual,
1177         Math.abs(actual - expected) <= (0.01f * jitterBase));
1178   }
1179 
1180   /**
1181    * Tests that a destroyed connection does not have a live zookeeper.
1182    * Below is timing based.  We put up a connection to a table and then close the connection while
1183    * having a background thread running that is forcing close of the connection to try and
1184    * provoke a close catastrophe; we are hoping for a car crash so we can see if we are leaking
1185    * zk connections.
1186    * @throws Exception
1187    */
1188   @Ignore ("Flakey test: See HBASE-8996")@Test
1189   public void testDeleteForZKConnLeak() throws Exception {
1190     TEST_UTIL.createTable(TABLE_NAME4, FAM_NAM);
1191     final Configuration config = HBaseConfiguration.create(TEST_UTIL.getConfiguration());
1192     config.setInt("zookeeper.recovery.retry", 1);
1193     config.setInt("zookeeper.recovery.retry.intervalmill", 1000);
1194     config.setInt("hbase.rpc.timeout", 2000);
1195     config.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
1196 
1197     ThreadPoolExecutor pool = new ThreadPoolExecutor(1, 10,
1198       5, TimeUnit.SECONDS,
1199       new SynchronousQueue<Runnable>(),
1200       Threads.newDaemonThreadFactory("test-hcm-delete"));
1201 
1202     pool.submit(new Runnable() {
1203       @Override
1204       public void run() {
1205         while (!Thread.interrupted()) {
1206           try {
1207             HConnection conn = HConnectionManager.getConnection(config);
1208             LOG.info("Connection " + conn);
1209             HConnectionManager.deleteStaleConnection(conn);
1210             LOG.info("Connection closed " + conn);
1211             // TODO: This sleep time should be less than the time that it takes to open and close
1212             // a table.  Ideally we would do a few runs first to measure.  For now this is
1213             // timing based; hopefully we hit the bad condition.
1214             Threads.sleep(10);
1215           } catch (Exception e) {
1216           }
1217         }
1218       }
1219     });
1220 
1221     // Use connection multiple times.
1222     for (int i = 0; i < 30; i++) {
1223       Connection c1 = null;
1224       try {
1225         c1 = ConnectionManager.getConnectionInternal(config);
1226         LOG.info("HTable connection " + i + " " + c1);
1227         Table table = new HTable(config, TABLE_NAME4, pool);
1228         table.close();
1229         LOG.info("HTable connection " + i + " closed " + c1);
1230       } catch (Exception e) {
1231         LOG.info("We actually want this to happen!!!!  So we can see if we are leaking zk", e);
1232       } finally {
1233         if (c1 != null) {
1234           if (c1.isClosed()) {
1235             // cannot use getZooKeeper as method instantiates watcher if null
1236             Field zkwField = c1.getClass().getDeclaredField("keepAliveZookeeper");
1237             zkwField.setAccessible(true);
1238             Object watcher = zkwField.get(c1);
1239 
1240             if (watcher != null) {
1241               if (((ZooKeeperWatcher)watcher).getRecoverableZooKeeper().getState().isAlive()) {
1242                 // non-synchronized access to watcher; sleep and check again in case zk connection
1243                 // hasn't been cleaned up yet.
1244                 Thread.sleep(1000);
1245                 if (((ZooKeeperWatcher) watcher).getRecoverableZooKeeper().getState().isAlive()) {
1246                   pool.shutdownNow();
1247                   fail("Live zookeeper in closed connection");
1248                 }
1249               }
1250             }
1251           }
1252           c1.close();
1253         }
1254       }
1255     }
1256     pool.shutdownNow();
1257   }
1258 
1259   @Test(timeout = 60000)
1260   public void testConnectionRideOverClusterRestart() throws IOException, InterruptedException {
1261     Configuration config = new Configuration(TEST_UTIL.getConfiguration());
1262 
1263     TableName tableName = TableName.valueOf("testConnectionRideOverClusterRestart");
1264     TEST_UTIL.createTable(tableName.getName(), new byte[][] {FAM_NAM}, config).close();
1265 
1266     Connection connection = ConnectionFactory.createConnection(config);
1267     Table table = connection.getTable(tableName);
1268 
1269     // this will cache the meta location and table's region location
1270     table.get(new Get(Bytes.toBytes("foo")));
1271 
1272     // restart HBase
1273     TEST_UTIL.shutdownMiniHBaseCluster();
1274     TEST_UTIL.restartHBaseCluster(2);
1275     // this should be able to discover new locations for meta and table's region
1276     table.get(new Get(Bytes.toBytes("foo")));
1277     TEST_UTIL.deleteTable(tableName);
1278     table.close();
1279     connection.close();
1280   }
1281 }
1282