View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.client;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertNull;
25  import static org.junit.Assert.assertTrue;
26  import static org.junit.Assert.fail;
27  
28  import java.io.IOException;
29  import java.lang.reflect.Field;
30  import java.lang.reflect.Modifier;
31  import java.net.SocketTimeoutException;
32  import java.util.ArrayList;
33  import java.util.HashMap;
34  import java.util.List;
35  import java.util.Map;
36  import java.util.Random;
37  import java.util.concurrent.ExecutorService;
38  import java.util.concurrent.SynchronousQueue;
39  import java.util.concurrent.ThreadPoolExecutor;
40  import java.util.concurrent.TimeUnit;
41  import java.util.concurrent.atomic.AtomicBoolean;
42  import java.util.concurrent.atomic.AtomicInteger;
43  import java.util.concurrent.atomic.AtomicLong;
44  import java.util.concurrent.atomic.AtomicReference;
45  
46  import org.apache.commons.logging.Log;
47  import org.apache.commons.logging.LogFactory;
48  import org.apache.hadoop.conf.Configuration;
49  import org.apache.hadoop.hbase.Cell;
50  import org.apache.hadoop.hbase.HBaseConfiguration;
51  import org.apache.hadoop.hbase.HBaseTestingUtility;
52  import org.apache.hadoop.hbase.HConstants;
53  import org.apache.hadoop.hbase.HRegionLocation;
54  import org.apache.hadoop.hbase.HTableDescriptor;
55  import org.apache.hadoop.hbase.testclassification.MediumTests;
56  import org.apache.hadoop.hbase.RegionLocations;
57  import org.apache.hadoop.hbase.ServerName;
58  import org.apache.hadoop.hbase.TableName;
59  import org.apache.hadoop.hbase.Waiter;
60  import org.apache.hadoop.hbase.client.ConnectionManager.HConnectionImplementation;
61  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
62  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
63  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
64  import org.apache.hadoop.hbase.exceptions.DeserializationException;
65  import org.apache.hadoop.hbase.exceptions.RegionMovedException;
66  import org.apache.hadoop.hbase.filter.Filter;
67  import org.apache.hadoop.hbase.filter.FilterBase;
68  import org.apache.hadoop.hbase.ipc.RpcClient;
69  import org.apache.hadoop.hbase.master.HMaster;
70  import org.apache.hadoop.hbase.regionserver.HRegion;
71  import org.apache.hadoop.hbase.regionserver.HRegionServer;
72  import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
73  import org.apache.hadoop.hbase.util.Bytes;
74  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
75  import org.apache.hadoop.hbase.util.JVMClusterUtil;
76  import org.apache.hadoop.hbase.util.ManualEnvironmentEdge;
77  import org.apache.hadoop.hbase.util.Threads;
78  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
79  import org.jboss.netty.util.internal.DetectionUtil;
80  import org.junit.AfterClass;
81  import org.junit.Assert;
82  import org.junit.BeforeClass;
83  import org.junit.Ignore;
84  import org.junit.Test;
85  import org.junit.experimental.categories.Category;
86  
87  import com.google.common.collect.Lists;
88  
89  /**
90   * This class is for testing HBaseConnectionManager features
91   */
92  @Category(MediumTests.class)
93  public class TestHCM {
94    private static final Log LOG = LogFactory.getLog(TestHCM.class);
95    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
96    private static final TableName TABLE_NAME =
97        TableName.valueOf("test");
98    private static final TableName TABLE_NAME1 =
99        TableName.valueOf("test1");
100   private static final TableName TABLE_NAME2 =
101       TableName.valueOf("test2");
102   private static final TableName TABLE_NAME3 =
103       TableName.valueOf("test3");
104   private static final TableName TABLE_NAME4 =
105       TableName.valueOf("test4");
106   private static final byte[] FAM_NAM = Bytes.toBytes("f");
107   private static final byte[] ROW = Bytes.toBytes("bbb");
108   private static final byte[] ROW_X = Bytes.toBytes("xxx");
109   private static Random _randy = new Random();
110 
111   private static boolean isJavaOk = DetectionUtil.javaVersion() > 6;
112 
113 /**
114 * This copro sleeps 20 second. The first call it fails. The second time, it works.
115 */
116   public static class SleepAndFailFirstTime extends BaseRegionObserver {
117     static final AtomicLong ct = new AtomicLong(0);
118 
119     public SleepAndFailFirstTime() {
120     }
121 
122     @Override
123     public void preGetOp(final ObserverContext<RegionCoprocessorEnvironment> e,
124               final Get get, final List<Cell> results) throws IOException {
125       Threads.sleep(20000);
126       if (ct.incrementAndGet() == 1){
127         throw new IOException("first call I fail");
128       }
129     }
130   }
131 
132   @BeforeClass
133   public static void setUpBeforeClass() throws Exception {
134     TEST_UTIL.getConfiguration().setBoolean(HConstants.STATUS_PUBLISHED,
135       HConstants.STATUS_PUBLISHED_DEFAULT);
136     if (isJavaOk) {
137       TEST_UTIL.getConfiguration().setBoolean(HConstants.STATUS_PUBLISHED, true);
138     }
139     TEST_UTIL.startMiniCluster(2);
140   }
141 
142   @AfterClass public static void tearDownAfterClass() throws Exception {
143     TEST_UTIL.shutdownMiniCluster();
144   }
145 
146 
147   private static int getHConnectionManagerCacheSize(){
148     return HConnectionTestingUtility.getConnectionCount();
149   }
150 
151   @Test
152   public void testClusterConnection() throws IOException {
153     ThreadPoolExecutor otherPool = new ThreadPoolExecutor(1, 1,
154         5, TimeUnit.SECONDS,
155         new SynchronousQueue<Runnable>(),
156         Threads.newDaemonThreadFactory("test-hcm"));
157 
158     HConnection con1 = HConnectionManager.createConnection(TEST_UTIL.getConfiguration());
159     HConnection con2 = HConnectionManager.createConnection(TEST_UTIL.getConfiguration(), otherPool);
160     // make sure the internally created ExecutorService is the one passed
161     assertTrue(otherPool == ((HConnectionImplementation)con2).getCurrentBatchPool());
162 
163     String tableName = "testClusterConnection";
164     TEST_UTIL.createTable(tableName.getBytes(), FAM_NAM).close();
165     HTable t = (HTable)con1.getTable(tableName, otherPool);
166     // make sure passing a pool to the getTable does not trigger creation of an internal pool
167     assertNull("Internal Thread pool should be null", ((HConnectionImplementation)con1).getCurrentBatchPool());
168     // table should use the pool passed
169     assertTrue(otherPool == t.getPool());
170     t.close();
171 
172     t = (HTable)con2.getTable(tableName);
173     // table should use the connectin's internal pool
174     assertTrue(otherPool == t.getPool());
175     t.close();
176 
177     t = (HTable)con2.getTable(Bytes.toBytes(tableName));
178     // try other API too
179     assertTrue(otherPool == t.getPool());
180     t.close();
181 
182     t = (HTable)con2.getTable(TableName.valueOf(tableName));
183     // try other API too
184     assertTrue(otherPool == t.getPool());
185     t.close();
186 
187     t = (HTable)con1.getTable(tableName);
188     ExecutorService pool = ((HConnectionImplementation)con1).getCurrentBatchPool();
189     // make sure an internal pool was created
190     assertNotNull("An internal Thread pool should have been created", pool);
191     // and that the table is using it
192     assertTrue(t.getPool() == pool);
193     t.close();
194 
195     t = (HTable)con1.getTable(tableName);
196     // still using the *same* internal pool
197     assertTrue(t.getPool() == pool);
198     t.close();
199 
200     con1.close();
201     // if the pool was created on demand it should be closed upon connection close
202     assertTrue(pool.isShutdown());
203 
204     con2.close();
205     // if the pool is passed, it is not closed
206     assertFalse(otherPool.isShutdown());
207     otherPool.shutdownNow();
208   }
209 
210   /**
211    * Naive test to check that HConnection#getAdmin returns a properly constructed HBaseAdmin object
212    * @throws IOException Unable to construct admin
213    */
214   @Test
215   public void testAdminFactory() throws IOException {
216     Connection con1 = ConnectionFactory.createConnection(TEST_UTIL.getConfiguration());
217     Admin admin = con1.getAdmin();
218     assertTrue(admin.getConnection() == con1);
219     assertTrue(admin.getConfiguration() == TEST_UTIL.getConfiguration());
220     con1.close();
221   }
222 
223   // Fails too often!  Needs work.  HBASE-12558
224   @Ignore @Test(expected = RegionServerStoppedException.class)
225   public void testClusterStatus() throws Exception {
226     if (!isJavaOk){
227       // This test requires jdk 1.7+
228       throw new RegionServerStoppedException("as expected by the test...");
229     }
230 
231     TableName tn =
232         TableName.valueOf("testClusterStatus");
233     byte[] cf = "cf".getBytes();
234     byte[] rk = "rk1".getBytes();
235 
236     JVMClusterUtil.RegionServerThread rs = TEST_UTIL.getHBaseCluster().startRegionServer();
237     rs.waitForServerOnline();
238     final ServerName sn = rs.getRegionServer().getServerName();
239 
240     HTable t = TEST_UTIL.createTable(tn, cf);
241     TEST_UTIL.waitTableAvailable(tn);
242 
243     while(TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().
244         getRegionStates().isRegionsInTransition()){
245       Thread.sleep(1);
246     }
247     final HConnectionImplementation hci =  (HConnectionImplementation)t.getConnection();
248     while (t.getRegionLocation(rk).getPort() != sn.getPort()){
249       TEST_UTIL.getHBaseAdmin().move(t.getRegionLocation(rk).getRegionInfo().
250           getEncodedNameAsBytes(), Bytes.toBytes(sn.toString()));
251       while(TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().
252           getRegionStates().isRegionsInTransition()){
253         Thread.sleep(1);
254       }
255       hci.clearRegionCache(tn);
256     }
257     Assert.assertNotNull(hci.clusterStatusListener);
258     TEST_UTIL.assertRegionOnServer(t.getRegionLocation(rk).getRegionInfo(), sn, 20000);
259 
260     Put p1 = new Put(rk);
261     p1.add(cf, "qual".getBytes(), "val".getBytes());
262     t.put(p1);
263 
264     rs.getRegionServer().abort("I'm dead");
265 
266     // We want the status to be updated. That's a least 10 second
267     TEST_UTIL.waitFor(40000, 1000, true, new Waiter.Predicate<Exception>() {
268       @Override
269       public boolean evaluate() throws Exception {
270         return TEST_UTIL.getHBaseCluster().getMaster().getServerManager().
271             getDeadServers().isDeadServer(sn);
272       }
273     });
274 
275     TEST_UTIL.waitFor(40000, 1000, true, new Waiter.Predicate<Exception>() {
276       @Override
277       public boolean evaluate() throws Exception {
278         return hci.clusterStatusListener.isDeadServer(sn);
279       }
280     });
281 
282     t.close();
283     hci.getClient(sn);  // will throw an exception: RegionServerStoppedException
284   }
285 
286   /**
287    * Test that we can handle connection close: it will trigger a retry, but the calls will
288    *  finish.
289    */
290   @Test
291   public void testConnectionCloseAllowsInterrupt() throws Exception {
292     testConnectionClose(true);
293   }
294 
295   @Test
296   public void testConnectionNotAllowsInterrupt() throws Exception {
297     testConnectionClose(false);
298   }
299 
300   /**
301    * Test that an operation can fail if we read the global operation timeout, even if the
302    * individual timeout is fine. We do that with:
303    * - client side: an operation timeout of 30 seconds
304    * - server side: we sleep 20 second at each attempt. The first work fails, the second one
305    * succeeds. But the client won't wait that much, because 20 + 20 > 30, so the client
306    * timeouted when the server answers.
307    */
308   @Test
309   public void testOperationTimeout() throws Exception {
310     HTableDescriptor hdt = TEST_UTIL.createTableDescriptor("HCM-testOperationTimeout");
311     hdt.addCoprocessor(SleepAndFailFirstTime.class.getName());
312     HTable table = TEST_UTIL.createTable(hdt, new byte[][]{FAM_NAM}, TEST_UTIL.getConfiguration());
313 
314     // Check that it works if the timeout is big enough
315     table.setOperationTimeout(120 * 1000);
316     table.get(new Get(FAM_NAM));
317 
318     // Resetting and retrying. Will fail this time, not enough time for the second try
319     SleepAndFailFirstTime.ct.set(0);
320     try {
321       table.setOperationTimeout(30 * 1000);
322       table.get(new Get(FAM_NAM));
323       Assert.fail("We expect an exception here");
324     } catch (SocketTimeoutException e) {
325       // The client has a CallTimeout class, but it's not shared.We're not very clean today,
326       //  in the general case you can expect the call to stop, but the exception may vary.
327       // In this test however, we're sure that it will be a socket timeout.
328       LOG.info("We received an exception, as expected ", e);
329     } catch (IOException e) {
330       Assert.fail("Wrong exception:" + e.getMessage());
331     } finally {
332       table.close();
333     }
334   }
335 
336 
337   private void testConnectionClose(boolean allowsInterrupt) throws Exception {
338     TableName tableName = TableName.valueOf("HCM-testConnectionClose" + allowsInterrupt);
339     TEST_UTIL.createTable(tableName, FAM_NAM).close();
340 
341     boolean previousBalance = TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, true);
342 
343     Configuration c2 = new Configuration(TEST_UTIL.getConfiguration());
344     // We want to work on a separate connection.
345     c2.set(HConstants.HBASE_CLIENT_INSTANCE_ID, String.valueOf(-1));
346     c2.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 100); // retry a lot
347     c2.setInt(HConstants.HBASE_CLIENT_PAUSE, 0); // don't wait between retries.
348     c2.setInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, 0); // Server do not really expire
349     c2.setBoolean(RpcClient.SPECIFIC_WRITE_THREAD, allowsInterrupt);
350 
351     final HTable table = new HTable(c2, tableName);
352 
353     Put put = new Put(ROW);
354     put.add(FAM_NAM, ROW, ROW);
355     table.put(put);
356 
357     // 4 steps: ready=0; doGets=1; mustStop=2; stopped=3
358     final AtomicInteger step = new AtomicInteger(0);
359 
360     final AtomicReference<Throwable> failed = new AtomicReference<Throwable>(null);
361     Thread t = new Thread("testConnectionCloseThread") {
362       @Override
363       public void run() {
364         int done = 0;
365         try {
366           step.set(1);
367           while (step.get() == 1) {
368             Get get = new Get(ROW);
369             table.get(get);
370             done++;
371             if (done % 100 == 0)
372               LOG.info("done=" + done);
373           }
374         } catch (Throwable t) {
375           failed.set(t);
376           LOG.error(t);
377         }
378         step.set(3);
379       }
380     };
381     t.start();
382     TEST_UTIL.waitFor(20000, new Waiter.Predicate<Exception>() {
383       @Override
384       public boolean evaluate() throws Exception {
385         return step.get() == 1;
386       }
387     });
388 
389     ServerName sn = table.getRegionLocation(ROW).getServerName();
390     ConnectionManager.HConnectionImplementation conn =
391         (ConnectionManager.HConnectionImplementation) table.getConnection();
392     RpcClient rpcClient = conn.getRpcClient();
393 
394     LOG.info("Going to cancel connections. connection=" + conn.toString() + ", sn=" + sn);
395     for (int i = 0; i < 5000; i++) {
396       rpcClient.cancelConnections(sn);
397       Thread.sleep(5);
398     }
399 
400     step.compareAndSet(1, 2);
401     // The test may fail here if the thread doing the gets is stuck. The way to find
402     //  out what's happening is to look for the thread named 'testConnectionCloseThread'
403     TEST_UTIL.waitFor(40000, new Waiter.Predicate<Exception>() {
404       @Override
405       public boolean evaluate() throws Exception {
406         return step.get() == 3;
407       }
408     });
409 
410     table.close();
411     Assert.assertTrue("Unexpected exception is " + failed.get(), failed.get() == null);
412     TEST_UTIL.getHBaseAdmin().setBalancerRunning(previousBalance, true);
413   }
414 
415   /**
416    * Test that connection can become idle without breaking everything.
417    */
418   @Test
419   public void testConnectionIdle() throws Exception {
420     TableName tableName = TableName.valueOf("HCM-testConnectionIdle");
421     TEST_UTIL.createTable(tableName, FAM_NAM).close();
422     int idleTime =  20000;
423     boolean previousBalance = TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, true);
424 
425     Configuration c2 = new Configuration(TEST_UTIL.getConfiguration());
426     // We want to work on a separate connection.
427     c2.set(HConstants.HBASE_CLIENT_INSTANCE_ID, String.valueOf(-1));
428     c2.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1); // Don't retry: retry = test failed
429     c2.setInt(RpcClient.IDLE_TIME, idleTime);
430 
431     final Table table = new HTable(c2, tableName);
432 
433     Put put = new Put(ROW);
434     put.add(FAM_NAM, ROW, ROW);
435     table.put(put);
436 
437     ManualEnvironmentEdge mee = new ManualEnvironmentEdge();
438     mee.setValue(System.currentTimeMillis());
439     EnvironmentEdgeManager.injectEdge(mee);
440     LOG.info("first get");
441     table.get(new Get(ROW));
442 
443     LOG.info("first get - changing the time & sleeping");
444     mee.incValue(idleTime + 1000);
445     Thread.sleep(1500); // we need to wait a little for the connection to be seen as idle.
446                         // 1500 = sleep time in RpcClient#waitForWork + a margin
447 
448     LOG.info("second get - connection has been marked idle in the middle");
449     // To check that the connection actually became idle would need to read some private
450     //  fields of RpcClient.
451     table.get(new Get(ROW));
452     mee.incValue(idleTime + 1000);
453 
454     LOG.info("third get - connection is idle, but the reader doesn't know yet");
455     // We're testing here a special case:
456     //  time limit reached BUT connection not yet reclaimed AND a new call.
457     //  in this situation, we don't close the connection, instead we use it immediately.
458     // If we're very unlucky we can have a race condition in the test: the connection is already
459     //  under closing when we do the get, so we have an exception, and we don't retry as the
460     //  retry number is 1. The probability is very very low, and seems acceptable for now. It's
461     //  a test issue only.
462     table.get(new Get(ROW));
463 
464     LOG.info("we're done - time will change back");
465 
466     table.close();
467     EnvironmentEdgeManager.reset();
468     TEST_UTIL.getHBaseAdmin().setBalancerRunning(previousBalance, true);
469   }
470 
471     /**
472      * Test that the connection to the dead server is cut immediately when we receive the
473      *  notification.
474      * @throws Exception
475      */
476   @Test
477   public void testConnectionCut() throws Exception {
478     if (!isJavaOk){
479       // This test requires jdk 1.7+
480       return;
481     }
482 
483     TableName tableName = TableName.valueOf("HCM-testConnectionCut");
484 
485     TEST_UTIL.createTable(tableName, FAM_NAM).close();
486     boolean previousBalance = TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, true);
487 
488     Configuration c2 = new Configuration(TEST_UTIL.getConfiguration());
489     // We want to work on a separate connection.
490     c2.set(HConstants.HBASE_CLIENT_INSTANCE_ID, String.valueOf(-1));
491     c2.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
492     c2.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 30 * 1000);
493 
494     HTable table = new HTable(c2, tableName);
495 
496     Put p = new Put(FAM_NAM);
497     p.add(FAM_NAM, FAM_NAM, FAM_NAM);
498     table.put(p);
499 
500     final HConnectionImplementation hci =  (HConnectionImplementation)table.getConnection();
501     final HRegionLocation loc = table.getRegionLocation(FAM_NAM);
502 
503     Get get = new Get(FAM_NAM);
504     Assert.assertNotNull(table.get(get));
505 
506     get = new Get(FAM_NAM);
507     get.setFilter(new BlockingFilter());
508 
509     // This thread will mark the server as dead while we're waiting during a get.
510     Thread t = new Thread() {
511       @Override
512       public void run() {
513         synchronized (syncBlockingFilter) {
514           try {
515             syncBlockingFilter.wait();
516           } catch (InterruptedException e) {
517             throw new RuntimeException(e);
518           }
519         }
520         hci.clusterStatusListener.deadServerHandler.newDead(loc.getServerName());
521       }
522     };
523 
524     t.start();
525     try {
526       table.get(get);
527       Assert.fail();
528     } catch (IOException expected) {
529       LOG.debug("Received: " + expected);
530       Assert.assertFalse(expected instanceof SocketTimeoutException);
531       Assert.assertFalse(syncBlockingFilter.get());
532     } finally {
533       syncBlockingFilter.set(true);
534       t.join();
535       HConnectionManager.getConnection(c2).close();
536       TEST_UTIL.getHBaseAdmin().setBalancerRunning(previousBalance, true);
537     }
538 
539     table.close();
540   }
541 
542   protected static final AtomicBoolean syncBlockingFilter = new AtomicBoolean(false);
543 
544   public static class BlockingFilter extends FilterBase {
545     @Override
546     public boolean filterRowKey(byte[] buffer, int offset, int length) throws IOException {
547       int i = 0;
548       while (i++ < 1000 && !syncBlockingFilter.get()) {
549         synchronized (syncBlockingFilter) {
550           syncBlockingFilter.notifyAll();
551         }
552         Threads.sleep(100);
553       }
554       syncBlockingFilter.set(true);
555       return false;
556     }
557     @Override
558     public ReturnCode filterKeyValue(Cell ignored) throws IOException {
559       return ReturnCode.INCLUDE;
560     }
561 
562     public static Filter parseFrom(final byte [] pbBytes) throws DeserializationException{
563       return new BlockingFilter();
564     }
565   }
566 
567   @Test
568   public void abortingHConnectionRemovesItselfFromHCM() throws Exception {
569     // Save off current HConnections
570     Map<HConnectionKey, HConnectionImplementation> oldHBaseInstances =
571         new HashMap<HConnectionKey, HConnectionImplementation>();
572     oldHBaseInstances.putAll(ConnectionManager.CONNECTION_INSTANCES);
573 
574     ConnectionManager.CONNECTION_INSTANCES.clear();
575 
576     try {
577       HConnection connection = HConnectionManager.getConnection(TEST_UTIL.getConfiguration());
578       connection.abort("test abortingHConnectionRemovesItselfFromHCM", new Exception(
579           "test abortingHConnectionRemovesItselfFromHCM"));
580       Assert.assertNotSame(connection,
581         HConnectionManager.getConnection(TEST_UTIL.getConfiguration()));
582     } finally {
583       // Put original HConnections back
584       ConnectionManager.CONNECTION_INSTANCES.clear();
585       ConnectionManager.CONNECTION_INSTANCES.putAll(oldHBaseInstances);
586     }
587   }
588 
589   /**
590    * Test that when we delete a location using the first row of a region
591    * that we really delete it.
592    * @throws Exception
593    */
594   @Test
595   public void testRegionCaching() throws Exception{
596     TEST_UTIL.createTable(TABLE_NAME, FAM_NAM).close();
597     Configuration conf =  new Configuration(TEST_UTIL.getConfiguration());
598     conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
599     HTable table = new HTable(conf, TABLE_NAME);
600 
601     TEST_UTIL.createMultiRegions(table, FAM_NAM);
602     TEST_UTIL.waitUntilAllRegionsAssigned(table.getName());
603     Put put = new Put(ROW);
604     put.add(FAM_NAM, ROW, ROW);
605     table.put(put);
606     ConnectionManager.HConnectionImplementation conn =
607       (ConnectionManager.HConnectionImplementation)table.getConnection();
608 
609     assertNotNull(conn.getCachedLocation(TABLE_NAME, ROW));
610 
611     final int nextPort = conn.getCachedLocation(TABLE_NAME, ROW).getRegionLocation().getPort() + 1;
612     HRegionLocation loc = conn.getCachedLocation(TABLE_NAME, ROW).getRegionLocation();
613     conn.updateCachedLocation(loc.getRegionInfo(), loc.getServerName(),
614         ServerName.valueOf("127.0.0.1", nextPort,
615         HConstants.LATEST_TIMESTAMP), HConstants.LATEST_TIMESTAMP);
616     Assert.assertEquals(conn.getCachedLocation(TABLE_NAME, ROW)
617       .getRegionLocation().getPort(), nextPort);
618 
619     conn.clearRegionCache(TABLE_NAME, ROW.clone());
620     RegionLocations rl = conn.getCachedLocation(TABLE_NAME, ROW);
621     assertNull("What is this location?? " + rl, rl);
622 
623     // We're now going to move the region and check that it works for the client
624     // First a new put to add the location in the cache
625     conn.clearRegionCache(TABLE_NAME);
626     Assert.assertEquals(0, conn.getNumberOfCachedRegionLocations(TABLE_NAME));
627     Put put2 = new Put(ROW);
628     put2.add(FAM_NAM, ROW, ROW);
629     table.put(put2);
630     assertNotNull(conn.getCachedLocation(TABLE_NAME, ROW));
631     assertNotNull(conn.getCachedLocation(TableName.valueOf(TABLE_NAME.getName()), ROW.clone()));
632 
633     TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, false);
634     HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
635 
636     // We can wait for all regions to be online, that makes log reading easier when debugging
637     while (master.getAssignmentManager().getRegionStates().isRegionsInTransition()) {
638       Thread.sleep(1);
639     }
640 
641     // Now moving the region to the second server
642     HRegionLocation toMove = conn.getCachedLocation(TABLE_NAME, ROW).getRegionLocation();
643     byte[] regionName = toMove.getRegionInfo().getRegionName();
644     byte[] encodedRegionNameBytes = toMove.getRegionInfo().getEncodedNameAsBytes();
645 
646     // Choose the other server.
647     int curServerId = TEST_UTIL.getHBaseCluster().getServerWith(regionName);
648     int destServerId = (curServerId == 0 ? 1 : 0);
649 
650     HRegionServer curServer = TEST_UTIL.getHBaseCluster().getRegionServer(curServerId);
651     HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(destServerId);
652 
653     ServerName destServerName = destServer.getServerName();
654 
655     // Check that we are in the expected state
656     Assert.assertTrue(curServer != destServer);
657     Assert.assertFalse(curServer.getServerName().equals(destServer.getServerName()));
658     Assert.assertFalse( toMove.getPort() == destServerName.getPort());
659     Assert.assertNotNull(curServer.getOnlineRegion(regionName));
660     Assert.assertNull(destServer.getOnlineRegion(regionName));
661     Assert.assertFalse(TEST_UTIL.getMiniHBaseCluster().getMaster().
662         getAssignmentManager().getRegionStates().isRegionsInTransition());
663 
664     // Moving. It's possible that we don't have all the regions online at this point, so
665     //  the test must depends only on the region we're looking at.
666     LOG.info("Move starting region="+toMove.getRegionInfo().getRegionNameAsString());
667     TEST_UTIL.getHBaseAdmin().move(
668       toMove.getRegionInfo().getEncodedNameAsBytes(),
669       destServerName.getServerName().getBytes()
670     );
671 
672     while (destServer.getOnlineRegion(regionName) == null ||
673         destServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes) ||
674         curServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes) ||
675         master.getAssignmentManager().getRegionStates().isRegionsInTransition()) {
676       // wait for the move to be finished
677       Thread.sleep(1);
678     }
679 
680     LOG.info("Move finished for region="+toMove.getRegionInfo().getRegionNameAsString());
681 
682     // Check our new state.
683     Assert.assertNull(curServer.getOnlineRegion(regionName));
684     Assert.assertNotNull(destServer.getOnlineRegion(regionName));
685     Assert.assertFalse(destServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes));
686     Assert.assertFalse(curServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes));
687 
688 
689     // Cache was NOT updated and points to the wrong server
690     Assert.assertFalse(
691         conn.getCachedLocation(TABLE_NAME, ROW).getRegionLocation()
692           .getPort() == destServerName.getPort());
693 
694     // This part relies on a number of tries equals to 1.
695     // We do a put and expect the cache to be updated, even if we don't retry
696     LOG.info("Put starting");
697     Put put3 = new Put(ROW);
698     put3.add(FAM_NAM, ROW, ROW);
699     try {
700       table.put(put3);
701       Assert.fail("Unreachable point");
702     } catch (RetriesExhaustedWithDetailsException e){
703       LOG.info("Put done, exception caught: " + e.getClass());
704       Assert.assertEquals(1, e.getNumExceptions());
705       Assert.assertEquals(1, e.getCauses().size());
706       Assert.assertArrayEquals(e.getRow(0).getRow(), ROW);
707 
708       // Check that we unserialized the exception as expected
709       Throwable cause = ConnectionManager.findException(e.getCause(0));
710       Assert.assertNotNull(cause);
711       Assert.assertTrue(cause instanceof RegionMovedException);
712     }
713     Assert.assertNotNull("Cached connection is null", conn.getCachedLocation(TABLE_NAME, ROW));
714     Assert.assertEquals(
715         "Previous server was " + curServer.getServerName().getHostAndPort(),
716         destServerName.getPort(),
717         conn.getCachedLocation(TABLE_NAME, ROW).getRegionLocation().getPort());
718 
719     Assert.assertFalse(destServer.getRegionsInTransitionInRS()
720       .containsKey(encodedRegionNameBytes));
721     Assert.assertFalse(curServer.getRegionsInTransitionInRS()
722       .containsKey(encodedRegionNameBytes));
723 
724     // We move it back to do another test with a scan
725     LOG.info("Move starting region=" + toMove.getRegionInfo().getRegionNameAsString());
726     TEST_UTIL.getHBaseAdmin().move(
727       toMove.getRegionInfo().getEncodedNameAsBytes(),
728       curServer.getServerName().getServerName().getBytes()
729     );
730 
731     while (curServer.getOnlineRegion(regionName) == null ||
732         destServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes) ||
733         curServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes) ||
734         master.getAssignmentManager().getRegionStates().isRegionsInTransition()) {
735       // wait for the move to be finished
736       Thread.sleep(1);
737     }
738 
739     // Check our new state.
740     Assert.assertNotNull(curServer.getOnlineRegion(regionName));
741     Assert.assertNull(destServer.getOnlineRegion(regionName));
742     LOG.info("Move finished for region=" + toMove.getRegionInfo().getRegionNameAsString());
743 
744     // Cache was NOT updated and points to the wrong server
745     Assert.assertFalse(conn.getCachedLocation(TABLE_NAME, ROW).getRegionLocation().getPort() ==
746       curServer.getServerName().getPort());
747 
748     Scan sc = new Scan();
749     sc.setStopRow(ROW);
750     sc.setStartRow(ROW);
751 
752     // The scanner takes the max retries from the connection configuration, not the table as
753     // the put.
754     TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
755 
756     try {
757       ResultScanner rs = table.getScanner(sc);
758       while (rs.next() != null) {
759       }
760       Assert.fail("Unreachable point");
761     } catch (RetriesExhaustedException e) {
762       LOG.info("Scan done, expected exception caught: " + e.getClass());
763     }
764 
765     // Cache is updated with the right value.
766     Assert.assertNotNull(conn.getCachedLocation(TABLE_NAME, ROW));
767     Assert.assertEquals(
768       "Previous server was "+destServer.getServerName().getHostAndPort(),
769       curServer.getServerName().getPort(),
770       conn.getCachedLocation(TABLE_NAME, ROW).getRegionLocation().getPort());
771 
772     TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
773         HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER);
774     table.close();
775   }
776 
777   /**
778    * Test that Connection or Pool are not closed when managed externally
779    * @throws Exception
780    */
781   @Test
782   public void testConnectionManagement() throws Exception{
783     Table table0 = TEST_UTIL.createTable(TABLE_NAME1, FAM_NAM);
784     Connection conn = ConnectionFactory.createConnection(TEST_UTIL.getConfiguration());
785     HTable table = (HTable) conn.getTable(TABLE_NAME1);
786     table.close();
787     assertFalse(conn.isClosed());
788     assertFalse(table.getPool().isShutdown());
789     table = (HTable) conn.getTable(TABLE_NAME1);
790     table.close();
791     assertFalse(table.getPool().isShutdown());
792     conn.close();
793     assertTrue(table.getPool().isShutdown());
794     table0.close();
795   }
796 
797   /**
798    * Test that stale cache updates don't override newer cached values.
799    */
800   @Test(timeout = 60000)
801   public void testCacheSeqNums() throws Exception{
802     HTable table = TEST_UTIL.createTable(TABLE_NAME2, FAM_NAM);
803     TEST_UTIL.createMultiRegions(table, FAM_NAM);
804     Put put = new Put(ROW);
805     put.add(FAM_NAM, ROW, ROW);
806     table.put(put);
807     ConnectionManager.HConnectionImplementation conn =
808       (ConnectionManager.HConnectionImplementation)table.getConnection();
809 
810     HRegionLocation location = conn.getCachedLocation(TABLE_NAME2, ROW).getRegionLocation();
811     assertNotNull(location);
812 
813     ServerName anySource = ServerName.valueOf(location.getHostname(), location.getPort() - 1, 0L);
814 
815     // Same server as already in cache reporting - overwrites any value despite seqNum.
816     int nextPort = location.getPort() + 1;
817     conn.updateCachedLocation(location.getRegionInfo(), location.getServerName(),
818         ServerName.valueOf("127.0.0.1", nextPort, 0), location.getSeqNum() - 1);
819     location = conn.getCachedLocation(TABLE_NAME2, ROW).getRegionLocation();
820     Assert.assertEquals(nextPort, location.getPort());
821 
822     // No source specified - same.
823     nextPort = location.getPort() + 1;
824     conn.updateCachedLocation(location.getRegionInfo(), location.getServerName(),
825         ServerName.valueOf("127.0.0.1", nextPort, 0), location.getSeqNum() - 1);
826     location = conn.getCachedLocation(TABLE_NAME2, ROW).getRegionLocation();
827     Assert.assertEquals(nextPort, location.getPort());
828 
829     // Higher seqNum - overwrites lower seqNum.
830     nextPort = location.getPort() + 1;
831     conn.updateCachedLocation(location.getRegionInfo(), anySource,
832         ServerName.valueOf("127.0.0.1", nextPort, 0), location.getSeqNum() + 1);
833     location = conn.getCachedLocation(TABLE_NAME2, ROW).getRegionLocation();
834     Assert.assertEquals(nextPort, location.getPort());
835 
836     // Lower seqNum - does not overwrite higher seqNum.
837     nextPort = location.getPort() + 1;
838     conn.updateCachedLocation(location.getRegionInfo(), anySource,
839         ServerName.valueOf("127.0.0.1", nextPort, 0), location.getSeqNum() - 1);
840     location = conn.getCachedLocation(TABLE_NAME2, ROW).getRegionLocation();
841     Assert.assertEquals(nextPort - 1, location.getPort());
842     table.close();
843   }
844 
845   /**
846    * Make sure that {@link Configuration} instances that are essentially the
847    * same map to the same {@link HConnection} instance.
848    */
849   @Test
850   public void testConnectionSameness() throws Exception {
851     Connection previousConnection = null;
852     for (int i = 0; i < 2; i++) {
853       // set random key to differentiate the connection from previous ones
854       Configuration configuration = TEST_UTIL.getConfiguration();
855       configuration.set("some_key", String.valueOf(_randy.nextInt()));
856       LOG.info("The hash code of the current configuration is: "
857           + configuration.hashCode());
858       Connection currentConnection = HConnectionManager
859           .getConnection(configuration);
860       if (previousConnection != null) {
861         assertTrue(
862             "Did not get the same connection even though its key didn't change",
863             previousConnection == currentConnection);
864       }
865       previousConnection = currentConnection;
866       // change the configuration, so that it is no longer reachable from the
867       // client's perspective. However, since its part of the LRU doubly linked
868       // list, it will eventually get thrown out, at which time it should also
869       // close the corresponding {@link HConnection}.
870       configuration.set("other_key", String.valueOf(_randy.nextInt()));
871     }
872   }
873 
874   /**
875    * Makes sure that there is no leaking of
876    * {@link ConnectionManager.HConnectionImplementation} in the {@link HConnectionManager}
877    * class.
878    * @deprecated Tests deprecated functionality.  Remove in 1.0.
879    */
880   @Deprecated
881   @Test
882   public void testConnectionUniqueness() throws Exception {
883     int zkmaxconnections = TEST_UTIL.getConfiguration().
884       getInt(HConstants.ZOOKEEPER_MAX_CLIENT_CNXNS,
885           HConstants.DEFAULT_ZOOKEPER_MAX_CLIENT_CNXNS);
886     // Test up to a max that is < the maximum number of zk connections.  If we
887     // go above zk connections, we just fall into cycle where we are failing
888     // to set up a session and test runs for a long time.
889     int maxConnections = Math.min(zkmaxconnections - 1, 20);
890     List<HConnection> connections = new ArrayList<HConnection>(maxConnections);
891     Connection previousConnection = null;
892     try {
893       for (int i = 0; i < maxConnections; i++) {
894         // set random key to differentiate the connection from previous ones
895         Configuration configuration = new Configuration(TEST_UTIL.getConfiguration());
896         configuration.set("some_key", String.valueOf(_randy.nextInt()));
897         configuration.set(HConstants.HBASE_CLIENT_INSTANCE_ID,
898             String.valueOf(_randy.nextInt()));
899         LOG.info("The hash code of the current configuration is: "
900             + configuration.hashCode());
901         HConnection currentConnection =
902           HConnectionManager.getConnection(configuration);
903         if (previousConnection != null) {
904           assertTrue("Got the same connection even though its key changed!",
905               previousConnection != currentConnection);
906         }
907         // change the configuration, so that it is no longer reachable from the
908         // client's perspective. However, since its part of the LRU doubly linked
909         // list, it will eventually get thrown out, at which time it should also
910         // close the corresponding {@link HConnection}.
911         configuration.set("other_key", String.valueOf(_randy.nextInt()));
912 
913         previousConnection = currentConnection;
914         LOG.info("The current HConnectionManager#HBASE_INSTANCES cache size is: "
915             + getHConnectionManagerCacheSize());
916         Thread.sleep(50);
917         connections.add(currentConnection);
918       }
919     } finally {
920       for (Connection c: connections) {
921         // Clean up connections made so we don't interfere w/ subsequent tests.
922         HConnectionManager.deleteConnection(c.getConfiguration());
923       }
924     }
925   }
926 
927   @Test
928   public void testClosing() throws Exception {
929     Configuration configuration =
930       new Configuration(TEST_UTIL.getConfiguration());
931     configuration.set(HConstants.HBASE_CLIENT_INSTANCE_ID,
932         String.valueOf(_randy.nextInt()));
933 
934     Connection c1 = ConnectionFactory.createConnection(configuration);
935     // We create two connections with the same key.
936     Connection c2 = ConnectionFactory.createConnection(configuration);
937 
938     Connection c3 = HConnectionManager.getConnection(configuration);
939     Connection c4 = HConnectionManager.getConnection(configuration);
940     assertTrue(c3 == c4);
941 
942     c1.close();
943     assertTrue(c1.isClosed());
944     assertFalse(c2.isClosed());
945     assertFalse(c3.isClosed());
946 
947     c3.close();
948     // still a reference left
949     assertFalse(c3.isClosed());
950     c3.close();
951     assertTrue(c3.isClosed());
952     // c3 was removed from the cache
953     Connection c5 = HConnectionManager.getConnection(configuration);
954     assertTrue(c5 != c3);
955 
956     assertFalse(c2.isClosed());
957     c2.close();
958     assertTrue(c2.isClosed());
959     c5.close();
960     assertTrue(c5.isClosed());
961   }
962 
963   /**
964    * Trivial test to verify that nobody messes with
965    * {@link HConnectionManager#createConnection(Configuration)}
966    */
967   @Test
968   public void testCreateConnection() throws Exception {
969     Configuration configuration = TEST_UTIL.getConfiguration();
970     Connection c1 = ConnectionFactory.createConnection(configuration);
971     Connection c2 = ConnectionFactory.createConnection(configuration);
972     // created from the same configuration, yet they are different
973     assertTrue(c1 != c2);
974     assertTrue(c1.getConfiguration() == c2.getConfiguration());
975     // make sure these were not cached
976     Connection c3 = HConnectionManager.getConnection(configuration);
977     assertTrue(c1 != c3);
978     assertTrue(c2 != c3);
979   }
980 
981 
982   /**
983    * This test checks that one can connect to the cluster with only the
984    *  ZooKeeper quorum set. Other stuff like master address will be read
985    *  from ZK by the client.
986    */
987   @Test(timeout = 60000)
988   public void testConnection() throws Exception{
989     // We create an empty config and add the ZK address.
990     Configuration c = new Configuration();
991     c.set(HConstants.ZOOKEEPER_QUORUM,
992       TEST_UTIL.getConfiguration().get(HConstants.ZOOKEEPER_QUORUM));
993     c.set(HConstants.ZOOKEEPER_CLIENT_PORT ,
994       TEST_UTIL.getConfiguration().get(HConstants.ZOOKEEPER_CLIENT_PORT));
995 
996     // This should be enough to connect
997     HConnection conn = HConnectionManager.getConnection(c);
998     assertTrue( conn.isMasterRunning() );
999     conn.close();
1000   }
1001 
1002   private int setNumTries(HConnectionImplementation hci, int newVal) throws Exception {
1003     Field numTries = hci.getClass().getDeclaredField("numTries");
1004     numTries.setAccessible(true);
1005     Field modifiersField = Field.class.getDeclaredField("modifiers");
1006     modifiersField.setAccessible(true);
1007     modifiersField.setInt(numTries, numTries.getModifiers() & ~Modifier.FINAL);
1008     final int prevNumRetriesVal = (Integer)numTries.get(hci);
1009     numTries.set(hci, newVal);
1010 
1011     return prevNumRetriesVal;
1012   }
1013 
1014   @Test (timeout=30000)
1015   public void testMulti() throws Exception {
1016     HTable table = TEST_UTIL.createTable(TABLE_NAME3, FAM_NAM);
1017      try {
1018        TEST_UTIL.createMultiRegions(table, FAM_NAM);
1019        ConnectionManager.HConnectionImplementation conn =
1020            ( ConnectionManager.HConnectionImplementation)table.getConnection();
1021 
1022        // We're now going to move the region and check that it works for the client
1023        // First a new put to add the location in the cache
1024        conn.clearRegionCache(TABLE_NAME3);
1025        Assert.assertEquals(0, conn.getNumberOfCachedRegionLocations(TABLE_NAME3));
1026 
1027        TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, false);
1028        HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
1029 
1030        // We can wait for all regions to be online, that makes log reading easier when debugging
1031        while (master.getAssignmentManager().getRegionStates().isRegionsInTransition()) {
1032          Thread.sleep(1);
1033        }
1034 
1035        Put put = new Put(ROW_X);
1036        put.add(FAM_NAM, ROW_X, ROW_X);
1037        table.put(put);
1038 
1039        // Now moving the region to the second server
1040        HRegionLocation toMove = conn.getCachedLocation(TABLE_NAME3, ROW_X).getRegionLocation();
1041        byte[] regionName = toMove.getRegionInfo().getRegionName();
1042        byte[] encodedRegionNameBytes = toMove.getRegionInfo().getEncodedNameAsBytes();
1043 
1044        // Choose the other server.
1045        int curServerId = TEST_UTIL.getHBaseCluster().getServerWith(regionName);
1046        int destServerId = (curServerId == 0 ? 1 : 0);
1047 
1048        HRegionServer curServer = TEST_UTIL.getHBaseCluster().getRegionServer(curServerId);
1049        HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(destServerId);
1050 
1051        ServerName destServerName = destServer.getServerName();
1052 
1053        //find another row in the cur server that is less than ROW_X
1054        List<HRegion> regions = curServer.getOnlineRegions(TABLE_NAME3);
1055        byte[] otherRow = null;
1056        for (HRegion region : regions) {
1057          if (!region.getRegionInfo().getEncodedName().equals(toMove.getRegionInfo().getEncodedName())
1058              && Bytes.BYTES_COMPARATOR.compare(region.getRegionInfo().getStartKey(), ROW_X) < 0) {
1059            otherRow = region.getRegionInfo().getStartKey();
1060            break;
1061          }
1062        }
1063        assertNotNull(otherRow);
1064        // If empty row, set it to first row.-f
1065        if (otherRow.length <= 0) otherRow = Bytes.toBytes("aaa");
1066        Put put2 = new Put(otherRow);
1067        put2.add(FAM_NAM, otherRow, otherRow);
1068        table.put(put2); //cache put2's location
1069 
1070        // Check that we are in the expected state
1071        Assert.assertTrue(curServer != destServer);
1072        Assert.assertNotEquals(curServer.getServerName(), destServer.getServerName());
1073        Assert.assertNotEquals(toMove.getPort(), destServerName.getPort());
1074        Assert.assertNotNull(curServer.getOnlineRegion(regionName));
1075        Assert.assertNull(destServer.getOnlineRegion(regionName));
1076        Assert.assertFalse(TEST_UTIL.getMiniHBaseCluster().getMaster().
1077            getAssignmentManager().getRegionStates().isRegionsInTransition());
1078 
1079        // Moving. It's possible that we don't have all the regions online at this point, so
1080        //  the test must depends only on the region we're looking at.
1081        LOG.info("Move starting region="+toMove.getRegionInfo().getRegionNameAsString());
1082        TEST_UTIL.getHBaseAdmin().move(
1083            toMove.getRegionInfo().getEncodedNameAsBytes(),
1084            destServerName.getServerName().getBytes()
1085            );
1086 
1087        while (destServer.getOnlineRegion(regionName) == null ||
1088            destServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes) ||
1089            curServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes) ||
1090            master.getAssignmentManager().getRegionStates().isRegionsInTransition()) {
1091          // wait for the move to be finished
1092          Thread.sleep(1);
1093         }
1094 
1095        LOG.info("Move finished for region="+toMove.getRegionInfo().getRegionNameAsString());
1096 
1097        // Check our new state.
1098        Assert.assertNull(curServer.getOnlineRegion(regionName));
1099        Assert.assertNotNull(destServer.getOnlineRegion(regionName));
1100        Assert.assertFalse(destServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes));
1101        Assert.assertFalse(curServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes));
1102 
1103 
1104        // Cache was NOT updated and points to the wrong server
1105        Assert.assertFalse(
1106            conn.getCachedLocation(TABLE_NAME3, ROW_X).getRegionLocation()
1107             .getPort() == destServerName.getPort());
1108 
1109        // Hijack the number of retry to fail after 2 tries
1110        final int prevNumRetriesVal = setNumTries(conn, 2);
1111 
1112        Put put3 = new Put(ROW_X);
1113        put3.add(FAM_NAM, ROW_X, ROW_X);
1114        Put put4 = new Put(otherRow);
1115        put4.add(FAM_NAM, otherRow, otherRow);
1116 
1117        // do multi
1118        table.batch(Lists.newArrayList(put4, put3)); // first should be a valid row,
1119        // second we get RegionMovedException.
1120 
1121        setNumTries(conn, prevNumRetriesVal);
1122      } finally {
1123        table.close();
1124      }
1125   }
1126 
1127   @Ignore ("Test presumes RETRY_BACKOFF will never change; it has") @Test
1128   public void testErrorBackoffTimeCalculation() throws Exception {
1129     // TODO: This test would seem to presume hardcoded RETRY_BACKOFF which it should not.
1130     final long ANY_PAUSE = 100;
1131     ServerName location = ServerName.valueOf("127.0.0.1", 1, 0);
1132     ServerName diffLocation = ServerName.valueOf("127.0.0.1", 2, 0);
1133 
1134     ManualEnvironmentEdge timeMachine = new ManualEnvironmentEdge();
1135     EnvironmentEdgeManager.injectEdge(timeMachine);
1136     try {
1137       long timeBase = timeMachine.currentTime();
1138       long largeAmountOfTime = ANY_PAUSE * 1000;
1139       ConnectionManager.ServerErrorTracker tracker =
1140           new ConnectionManager.ServerErrorTracker(largeAmountOfTime, 100);
1141 
1142       // The default backoff is 0.
1143       assertEquals(0, tracker.calculateBackoffTime(location, ANY_PAUSE));
1144 
1145       // Check some backoff values from HConstants sequence.
1146       tracker.reportServerError(location);
1147       assertEqualsWithJitter(ANY_PAUSE, tracker.calculateBackoffTime(location, ANY_PAUSE));
1148       tracker.reportServerError(location);
1149       tracker.reportServerError(location);
1150       tracker.reportServerError(location);
1151       assertEqualsWithJitter(ANY_PAUSE * 5, tracker.calculateBackoffTime(location, ANY_PAUSE));
1152 
1153       // All of this shouldn't affect backoff for different location.
1154       assertEquals(0, tracker.calculateBackoffTime(diffLocation, ANY_PAUSE));
1155       tracker.reportServerError(diffLocation);
1156       assertEqualsWithJitter(ANY_PAUSE, tracker.calculateBackoffTime(diffLocation, ANY_PAUSE));
1157 
1158       // Check with different base.
1159       assertEqualsWithJitter(ANY_PAUSE * 10,
1160           tracker.calculateBackoffTime(location, ANY_PAUSE * 2));
1161 
1162       // See that time from last error is taken into account. Time shift is applied after jitter,
1163       // so pass the original expected backoff as the base for jitter.
1164       long timeShift = (long)(ANY_PAUSE * 0.5);
1165       timeMachine.setValue(timeBase + timeShift);
1166       assertEqualsWithJitter((ANY_PAUSE * 5) - timeShift,
1167         tracker.calculateBackoffTime(location, ANY_PAUSE), ANY_PAUSE * 2);
1168 
1169       // However we should not go into negative.
1170       timeMachine.setValue(timeBase + ANY_PAUSE * 100);
1171       assertEquals(0, tracker.calculateBackoffTime(location, ANY_PAUSE));
1172 
1173       // We also should not go over the boundary; last retry would be on it.
1174       long timeLeft = (long)(ANY_PAUSE * 0.5);
1175       timeMachine.setValue(timeBase + largeAmountOfTime - timeLeft);
1176       assertTrue(tracker.canRetryMore(1));
1177       tracker.reportServerError(location);
1178       assertEquals(timeLeft, tracker.calculateBackoffTime(location, ANY_PAUSE));
1179       timeMachine.setValue(timeBase + largeAmountOfTime);
1180       assertFalse(tracker.canRetryMore(1));
1181     } finally {
1182       EnvironmentEdgeManager.reset();
1183     }
1184   }
1185 
1186   private static void assertEqualsWithJitter(long expected, long actual) {
1187     assertEqualsWithJitter(expected, actual, expected);
1188   }
1189 
1190   private static void assertEqualsWithJitter(long expected, long actual, long jitterBase) {
1191     assertTrue("Value not within jitter: " + expected + " vs " + actual,
1192         Math.abs(actual - expected) <= (0.01f * jitterBase));
1193   }
1194 
1195   /**
1196    * Tests that a destroyed connection does not have a live zookeeper.
1197    * Below is timing based.  We put up a connection to a table and then close the connection while
1198    * having a background thread running that is forcing close of the connection to try and
1199    * provoke a close catastrophe; we are hoping for a car crash so we can see if we are leaking
1200    * zk connections.
1201    * @throws Exception
1202    */
1203   @Ignore ("Flakey test: See HBASE-8996")@Test
1204   public void testDeleteForZKConnLeak() throws Exception {
1205     TEST_UTIL.createTable(TABLE_NAME4, FAM_NAM);
1206     final Configuration config = HBaseConfiguration.create(TEST_UTIL.getConfiguration());
1207     config.setInt("zookeeper.recovery.retry", 1);
1208     config.setInt("zookeeper.recovery.retry.intervalmill", 1000);
1209     config.setInt("hbase.rpc.timeout", 2000);
1210     config.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
1211 
1212     ThreadPoolExecutor pool = new ThreadPoolExecutor(1, 10,
1213       5, TimeUnit.SECONDS,
1214       new SynchronousQueue<Runnable>(),
1215       Threads.newDaemonThreadFactory("test-hcm-delete"));
1216 
1217     pool.submit(new Runnable() {
1218       @Override
1219       public void run() {
1220         while (!Thread.interrupted()) {
1221           try {
1222             HConnection conn = HConnectionManager.getConnection(config);
1223             LOG.info("Connection " + conn);
1224             HConnectionManager.deleteStaleConnection(conn);
1225             LOG.info("Connection closed " + conn);
1226             // TODO: This sleep time should be less than the time that it takes to open and close
1227             // a table.  Ideally we would do a few runs first to measure.  For now this is
1228             // timing based; hopefully we hit the bad condition.
1229             Threads.sleep(10);
1230           } catch (Exception e) {
1231           }
1232         }
1233       }
1234     });
1235 
1236     // Use connection multiple times.
1237     for (int i = 0; i < 30; i++) {
1238       Connection c1 = null;
1239       try {
1240         c1 = ConnectionManager.getConnectionInternal(config);
1241         LOG.info("HTable connection " + i + " " + c1);
1242         Table table = new HTable(config, TABLE_NAME4, pool);
1243         table.close();
1244         LOG.info("HTable connection " + i + " closed " + c1);
1245       } catch (Exception e) {
1246         LOG.info("We actually want this to happen!!!!  So we can see if we are leaking zk", e);
1247       } finally {
1248         if (c1 != null) {
1249           if (c1.isClosed()) {
1250             // cannot use getZooKeeper as method instantiates watcher if null
1251             Field zkwField = c1.getClass().getDeclaredField("keepAliveZookeeper");
1252             zkwField.setAccessible(true);
1253             Object watcher = zkwField.get(c1);
1254 
1255             if (watcher != null) {
1256               if (((ZooKeeperWatcher)watcher).getRecoverableZooKeeper().getState().isAlive()) {
1257                 // non-synchronized access to watcher; sleep and check again in case zk connection
1258                 // hasn't been cleaned up yet.
1259                 Thread.sleep(1000);
1260                 if (((ZooKeeperWatcher) watcher).getRecoverableZooKeeper().getState().isAlive()) {
1261                   pool.shutdownNow();
1262                   fail("Live zookeeper in closed connection");
1263                 }
1264               }
1265             }
1266           }
1267           c1.close();
1268         }
1269       }
1270     }
1271     pool.shutdownNow();
1272   }
1273 
1274   @Test(timeout = 60000)
1275   public void testConnectionRideOverClusterRestart() throws IOException, InterruptedException {
1276     Configuration config = new Configuration(TEST_UTIL.getConfiguration());
1277 
1278     TableName tableName = TableName.valueOf("testConnectionRideOverClusterRestart");
1279     TEST_UTIL.createTable(tableName.getName(), new byte[][] {FAM_NAM}, config).close();
1280 
1281     Connection connection = ConnectionFactory.createConnection(config);
1282     Table table = connection.getTable(tableName);
1283 
1284     // this will cache the meta location and table's region location
1285     table.get(new Get(Bytes.toBytes("foo")));
1286 
1287     // restart HBase
1288     TEST_UTIL.shutdownMiniHBaseCluster();
1289     TEST_UTIL.restartHBaseCluster(2);
1290     // this should be able to discover new locations for meta and table's region
1291     table.get(new Get(Bytes.toBytes("foo")));
1292     TEST_UTIL.deleteTable(tableName);
1293     table.close();
1294     connection.close();
1295   }
1296 }
1297