View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.client;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertNull;
25  import static org.junit.Assert.assertTrue;
26  import static org.junit.Assert.fail;
27  
28  import java.io.IOException;
29  import java.lang.reflect.Field;
30  import java.lang.reflect.Modifier;
31  import java.net.SocketTimeoutException;
32  import java.util.ArrayList;
33  import java.util.HashMap;
34  import java.util.List;
35  import java.util.Map;
36  import java.util.Random;
37  import java.util.concurrent.ExecutorService;
38  import java.util.concurrent.SynchronousQueue;
39  import java.util.concurrent.ThreadPoolExecutor;
40  import java.util.concurrent.TimeUnit;
41  import java.util.concurrent.atomic.AtomicBoolean;
42  
43  import org.apache.commons.logging.Log;
44  import org.apache.commons.logging.LogFactory;
45  import org.apache.hadoop.conf.Configuration;
46  import org.apache.hadoop.hbase.TableName;
47  import org.apache.hadoop.hbase.HBaseConfiguration;
48  import org.apache.hadoop.hbase.HBaseTestingUtility;
49  import org.apache.hadoop.hbase.HConstants;
50  import org.apache.hadoop.hbase.HRegionInfo;
51  import org.apache.hadoop.hbase.HRegionLocation;
52  import org.apache.hadoop.hbase.MediumTests;
53  import org.apache.hadoop.hbase.ServerName;
54  import org.apache.hadoop.hbase.Waiter;
55  import org.apache.hadoop.hbase.client.HConnectionManager.HConnectionImplementation;
56  import org.apache.hadoop.hbase.exceptions.DeserializationException;
57  import org.apache.hadoop.hbase.filter.Filter;
58  import org.apache.hadoop.hbase.filter.FilterBase;
59  import org.apache.hadoop.hbase.master.ClusterStatusPublisher;
60  import org.apache.hadoop.hbase.master.HMaster;
61  import org.apache.hadoop.hbase.regionserver.HRegion;
62  import org.apache.hadoop.hbase.regionserver.HRegionServer;
63  import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
64  import org.apache.hadoop.hbase.util.Bytes;
65  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
66  import org.apache.hadoop.hbase.util.JVMClusterUtil;
67  import org.apache.hadoop.hbase.util.ManualEnvironmentEdge;
68  import org.apache.hadoop.hbase.util.Threads;
69  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
70  import org.junit.AfterClass;
71  import org.junit.Assert;
72  import org.junit.BeforeClass;
73  import org.junit.Ignore;
74  import org.junit.Test;
75  import org.junit.experimental.categories.Category;
76  
77  import com.google.common.collect.Lists;
78  
79  /**
80   * This class is for testing HCM features
81   */
82  @Category(MediumTests.class)
83  public class TestHCM {
84    private static final Log LOG = LogFactory.getLog(TestHCM.class);
85    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
86    private static final TableName TABLE_NAME =
87        TableName.valueOf("test");
88    private static final TableName TABLE_NAME1 =
89        TableName.valueOf("test1");
90    private static final TableName TABLE_NAME2 =
91        TableName.valueOf("test2");
92    private static final TableName TABLE_NAME3 =
93        TableName.valueOf("test3");
94    private static final TableName TABLE_NAME4 =
95        TableName.valueOf("test4");
96    private static final byte[] FAM_NAM = Bytes.toBytes("f");
97    private static final byte[] ROW = Bytes.toBytes("bbb");
98    private static final byte[] ROW_X = Bytes.toBytes("xxx");
99    private static Random _randy = new Random();
100 
101   @BeforeClass
102   public static void setUpBeforeClass() throws Exception {
103     TEST_UTIL.getConfiguration().setClass(ClusterStatusPublisher.STATUS_PUBLISHER_CLASS,
104         ClusterStatusPublisher.MulticastPublisher.class, ClusterStatusPublisher.Publisher.class);
105     TEST_UTIL.getConfiguration().setClass(ClusterStatusListener.STATUS_LISTENER_CLASS,
106         ClusterStatusListener.MultiCastListener.class, ClusterStatusListener.Listener.class);
107 
108     TEST_UTIL.startMiniCluster(2);
109   }
110 
111   @AfterClass public static void tearDownAfterClass() throws Exception {
112     TEST_UTIL.shutdownMiniCluster();
113   }
114 
115 
116   private static int getHConnectionManagerCacheSize(){
117     return HConnectionTestingUtility.getConnectionCount();
118   }
119 
120   @Test
121   public void testClusterConnection() throws IOException {
122     ThreadPoolExecutor otherPool = new ThreadPoolExecutor(1, 1,
123         5, TimeUnit.SECONDS,
124         new SynchronousQueue<Runnable>(),
125         Threads.newDaemonThreadFactory("test-hcm"));
126 
127     HConnection con1 = HConnectionManager.createConnection(TEST_UTIL.getConfiguration());
128     HConnection con2 = HConnectionManager.createConnection(TEST_UTIL.getConfiguration(), otherPool);
129     // make sure the internally created ExecutorService is the one passed
130     assertTrue(otherPool == ((HConnectionImplementation)con2).getCurrentBatchPool());
131 
132     String tableName = "testClusterConnection";
133     TEST_UTIL.createTable(tableName.getBytes(), FAM_NAM).close();
134     HTable t = (HTable)con1.getTable(tableName, otherPool);
135     // make sure passing a pool to the getTable does not trigger creation of an internal pool
136     assertNull("Internal Thread pool should be null", ((HConnectionImplementation)con1).getCurrentBatchPool());
137     // table should use the pool passed
138     assertTrue(otherPool == t.getPool());
139     t.close();
140 
141     t = (HTable)con2.getTable(tableName);
142     // table should use the connectin's internal pool
143     assertTrue(otherPool == t.getPool());
144     t.close();
145 
146     t = (HTable)con2.getTable(Bytes.toBytes(tableName));
147     // try other API too
148     assertTrue(otherPool == t.getPool());
149     t.close();
150 
151     t = (HTable)con2.getTable(TableName.valueOf(tableName));
152     // try other API too
153     assertTrue(otherPool == t.getPool());
154     t.close();
155 
156     t = (HTable)con1.getTable(tableName);
157     ExecutorService pool = ((HConnectionImplementation)con1).getCurrentBatchPool();
158     // make sure an internal pool was created
159     assertNotNull("An internal Thread pool should have been created", pool);
160     // and that the table is using it
161     assertTrue(t.getPool() == pool);
162     t.close();
163 
164     t = (HTable)con1.getTable(tableName);
165     // still using the *same* internal pool
166     assertTrue(t.getPool() == pool);
167     t.close();
168 
169     con1.close();
170     // if the pool was created on demand it should be closed upon connectin close
171     assertTrue(pool.isShutdown());
172 
173     con2.close();
174     // if the pool is passed, it is not closed
175     assertFalse(otherPool.isShutdown());
176     otherPool.shutdownNow();
177   }
178 
179   @Ignore ("Fails in IDEs: HBASE-9042") @Test(expected = RegionServerStoppedException.class)
180   public void testClusterStatus() throws Exception {
181     TableName tn =
182         TableName.valueOf("testClusterStatus");
183     byte[] cf = "cf".getBytes();
184     byte[] rk = "rk1".getBytes();
185 
186     JVMClusterUtil.RegionServerThread rs = TEST_UTIL.getHBaseCluster().startRegionServer();
187     rs.waitForServerOnline();
188     final ServerName sn = rs.getRegionServer().getServerName();
189 
190     HTable t = TEST_UTIL.createTable(tn, cf);
191     TEST_UTIL.waitTableAvailable(tn.getName());
192 
193     while(TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().
194         getRegionStates().isRegionsInTransition()){
195       Thread.sleep(1);
196     }
197     final HConnectionImplementation hci =  (HConnectionImplementation)t.getConnection();
198     while (t.getRegionLocation(rk).getPort() != sn.getPort()){
199       TEST_UTIL.getHBaseAdmin().move(t.getRegionLocation(rk).getRegionInfo().
200           getEncodedNameAsBytes(), Bytes.toBytes(sn.toString()));
201       while(TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().
202           getRegionStates().isRegionsInTransition()){
203         Thread.sleep(1);
204       }
205       hci.clearRegionCache(tn);
206     }
207     Assert.assertNotNull(hci.clusterStatusListener);
208     TEST_UTIL.assertRegionOnServer(t.getRegionLocation(rk).getRegionInfo(), sn, 20000);
209 
210     Put p1 = new Put(rk);
211     p1.add(cf, "qual".getBytes(), "val".getBytes());
212     t.put(p1);
213 
214     rs.getRegionServer().abort("I'm dead");
215 
216     // We want the status to be updated. That's a least 10 second
217     TEST_UTIL.waitFor(40000, 1000, true, new Waiter.Predicate<Exception>() {
218       @Override
219       public boolean evaluate() throws Exception {
220         return TEST_UTIL.getHBaseCluster().getMaster().getServerManager().
221             getDeadServers().isDeadServer(sn);
222       }
223     });
224 
225     TEST_UTIL.waitFor(40000, 1000, true, new Waiter.Predicate<Exception>() {
226       @Override
227       public boolean evaluate() throws Exception {
228         return hci.clusterStatusListener.isDeadServer(sn);
229       }
230     });
231 
232     hci.getClient(sn);  // will throw an exception: RegionServerStoppedException
233   }
234 
235   /**
236    * Test that the connection to the dead server is cut immediately when we receive the
237    *  notification.
238    * @throws Exception
239    */
240   @Test
241   public void testConnectionCut() throws Exception {
242     String tableName = "HCM-testConnectionCut";
243 
244     TEST_UTIL.createTable(tableName.getBytes(), FAM_NAM).close();
245     boolean previousBalance = TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, true);
246 
247     Configuration c2 = new Configuration(TEST_UTIL.getConfiguration());
248     // We want to work on a separate connection.
249     c2.set(HConstants.HBASE_CLIENT_INSTANCE_ID, String.valueOf(-1));
250     c2.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
251     c2.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 30 * 1000);
252 
253     HTable table = new HTable(c2, tableName);
254 
255     Put p = new Put(FAM_NAM);
256     p.add(FAM_NAM, FAM_NAM, FAM_NAM);
257     table.put(p);
258 
259     final HConnectionImplementation hci =  (HConnectionImplementation)table.getConnection();
260     final HRegionLocation loc = table.getRegionLocation(FAM_NAM);
261 
262     Get get = new Get(FAM_NAM);
263     Assert.assertNotNull(table.get(get));
264 
265     get = new Get(FAM_NAM);
266     get.setFilter(new BlockingFilter());
267 
268     // This thread will mark the server as dead while we're waiting during a get.
269     Thread t = new Thread() {
270       @Override
271       public void run() {
272         synchronized (syncBlockingFilter) {
273           try {
274             syncBlockingFilter.wait();
275           } catch (InterruptedException e) {
276             throw new RuntimeException(e);
277           }
278         }
279         hci.clusterStatusListener.deadServerHandler.newDead(loc.getServerName());
280       }
281     };
282 
283     t.start();
284     try {
285       table.get(get);
286       Assert.fail();
287     } catch (IOException expected) {
288       LOG.debug("Received: " + expected);
289       Assert.assertFalse(expected instanceof SocketTimeoutException);
290       Assert.assertFalse(syncBlockingFilter.get());
291     } finally {
292       syncBlockingFilter.set(true);
293       t.join();
294       HConnectionManager.getConnection(c2).close();
295       TEST_UTIL.getHBaseAdmin().setBalancerRunning(previousBalance, true);
296     }
297 
298     table.close();
299   }
300 
301   protected static final AtomicBoolean syncBlockingFilter = new AtomicBoolean(false);
302 
303   public static class BlockingFilter extends FilterBase {
304     @Override
305     public boolean filterRowKey(byte[] buffer, int offset, int length) throws IOException {
306       int i = 0;
307       while (i++ < 1000 && !syncBlockingFilter.get()) {
308         synchronized (syncBlockingFilter) {
309           syncBlockingFilter.notifyAll();
310         }
311         Threads.sleep(100);
312       }
313       syncBlockingFilter.set(true);
314       return false;
315     }
316 
317     public static Filter parseFrom(final byte [] pbBytes) throws DeserializationException{
318       return new BlockingFilter();
319     }
320   }
321 
322   @Test
323   public void abortingHConnectionRemovesItselfFromHCM() throws Exception {
324     // Save off current HConnections
325     Map<HConnectionKey, HConnectionImplementation> oldHBaseInstances =
326         new HashMap<HConnectionKey, HConnectionImplementation>();
327     oldHBaseInstances.putAll(HConnectionManager.CONNECTION_INSTANCES);
328 
329     HConnectionManager.CONNECTION_INSTANCES.clear();
330 
331     try {
332       HConnection connection = HConnectionManager.getConnection(TEST_UTIL.getConfiguration());
333       connection.abort("test abortingHConnectionRemovesItselfFromHCM", new Exception(
334           "test abortingHConnectionRemovesItselfFromHCM"));
335       Assert.assertNotSame(connection,
336         HConnectionManager.getConnection(TEST_UTIL.getConfiguration()));
337     } finally {
338       // Put original HConnections back
339       HConnectionManager.CONNECTION_INSTANCES.clear();
340       HConnectionManager.CONNECTION_INSTANCES.putAll(oldHBaseInstances);
341     }
342   }
343 
344   /**
345    * Test that when we delete a location using the first row of a region
346    * that we really delete it.
347    * @throws Exception
348    */
349   @Test
350   public void testRegionCaching() throws Exception{
351     TEST_UTIL.createTable(TABLE_NAME, FAM_NAM).close();
352     Configuration conf =  new Configuration(TEST_UTIL.getConfiguration());
353     conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
354     HTable table = new HTable(conf, TABLE_NAME);
355 
356     TEST_UTIL.createMultiRegions(table, FAM_NAM);
357     TEST_UTIL.waitUntilAllRegionsAssigned(table.getName());
358     Put put = new Put(ROW);
359     put.add(FAM_NAM, ROW, ROW);
360     table.put(put);
361     HConnectionManager.HConnectionImplementation conn =
362       (HConnectionManager.HConnectionImplementation)table.getConnection();
363 
364     assertNotNull(conn.getCachedLocation(TABLE_NAME, ROW));
365 
366     final int nextPort = conn.getCachedLocation(TABLE_NAME, ROW).getPort() + 1;
367     HRegionLocation loc = conn.getCachedLocation(TABLE_NAME, ROW);
368     conn.updateCachedLocation(loc.getRegionInfo(), loc, new ServerName("127.0.0.1", nextPort,
369       HConstants.LATEST_TIMESTAMP), HConstants.LATEST_TIMESTAMP);
370     Assert.assertEquals(conn.getCachedLocation(TABLE_NAME, ROW).getPort(), nextPort);
371 
372     conn.forceDeleteCachedLocation(TABLE_NAME, ROW.clone());
373     HRegionLocation rl = conn.getCachedLocation(TABLE_NAME, ROW);
374     assertNull("What is this location?? " + rl, rl);
375 
376     // We're now going to move the region and check that it works for the client
377     // First a new put to add the location in the cache
378     conn.clearRegionCache(TABLE_NAME);
379     Assert.assertEquals(0, conn.getNumberOfCachedRegionLocations(TABLE_NAME));
380     Put put2 = new Put(ROW);
381     put2.add(FAM_NAM, ROW, ROW);
382     table.put(put2);
383     assertNotNull(conn.getCachedLocation(TABLE_NAME, ROW));
384     assertNotNull(conn.getCachedLocation(TableName.valueOf(TABLE_NAME.getName()), ROW.clone()));
385 
386     TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, false);
387     HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
388 
389     // We can wait for all regions to be online, that makes log reading easier when debugging
390     while (master.getAssignmentManager().getRegionStates().isRegionsInTransition()) {
391       Thread.sleep(1);
392     }
393 
394     // Now moving the region to the second server
395     HRegionLocation toMove = conn.getCachedLocation(TABLE_NAME, ROW);
396     byte[] regionName = toMove.getRegionInfo().getRegionName();
397     byte[] encodedRegionNameBytes = toMove.getRegionInfo().getEncodedNameAsBytes();
398 
399     // Choose the other server.
400     int curServerId = TEST_UTIL.getHBaseCluster().getServerWith(regionName);
401     int destServerId = (curServerId == 0 ? 1 : 0);
402 
403     HRegionServer curServer = TEST_UTIL.getHBaseCluster().getRegionServer(curServerId);
404     HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(destServerId);
405 
406     ServerName destServerName = destServer.getServerName();
407 
408     // Check that we are in the expected state
409     Assert.assertTrue(curServer != destServer);
410     Assert.assertFalse(curServer.getServerName().equals(destServer.getServerName()));
411     Assert.assertFalse( toMove.getPort() == destServerName.getPort());
412     Assert.assertNotNull(curServer.getOnlineRegion(regionName));
413     Assert.assertNull(destServer.getOnlineRegion(regionName));
414     Assert.assertFalse(TEST_UTIL.getMiniHBaseCluster().getMaster().
415         getAssignmentManager().getRegionStates().isRegionsInTransition());
416 
417     // Moving. It's possible that we don't have all the regions online at this point, so
418     //  the test must depends only on the region we're looking at.
419     LOG.info("Move starting region="+toMove.getRegionInfo().getRegionNameAsString());
420     TEST_UTIL.getHBaseAdmin().move(
421       toMove.getRegionInfo().getEncodedNameAsBytes(),
422       destServerName.getServerName().getBytes()
423     );
424 
425     while (destServer.getOnlineRegion(regionName) == null ||
426         destServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes) ||
427         curServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes) ||
428         master.getAssignmentManager().getRegionStates().isRegionsInTransition()) {
429       // wait for the move to be finished
430       Thread.sleep(1);
431     }
432 
433     LOG.info("Move finished for region="+toMove.getRegionInfo().getRegionNameAsString());
434 
435     // Check our new state.
436     Assert.assertNull(curServer.getOnlineRegion(regionName));
437     Assert.assertNotNull(destServer.getOnlineRegion(regionName));
438     Assert.assertFalse(destServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes));
439     Assert.assertFalse(curServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes));
440 
441 
442     // Cache was NOT updated and points to the wrong server
443     Assert.assertFalse(
444         conn.getCachedLocation(TABLE_NAME, ROW).getPort() == destServerName.getPort());
445 
446     // This part relies on a number of tries equals to 1.
447     // We do a put and expect the cache to be updated, even if we don't retry
448     LOG.info("Put starting");
449     Put put3 = new Put(ROW);
450     put3.add(FAM_NAM, ROW, ROW);
451     try {
452       table.put(put3);
453       Assert.fail("Unreachable point");
454     }catch (RetriesExhaustedWithDetailsException e){
455       LOG.info("Put done, exception caught: " + e.getClass());
456       Assert.assertEquals(1, e.getNumExceptions());
457       Assert.assertArrayEquals(e.getRow(0).getRow(), ROW);
458     }
459     Assert.assertNotNull(conn.getCachedLocation(TABLE_NAME, ROW));
460     Assert.assertEquals(
461       "Previous server was "+curServer.getServerName().getHostAndPort(),
462       destServerName.getPort(), conn.getCachedLocation(TABLE_NAME, ROW).getPort());
463 
464     Assert.assertFalse(destServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes));
465     Assert.assertFalse(curServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes));
466 
467     // We move it back to do another test with a scan
468     LOG.info("Move starting region=" + toMove.getRegionInfo().getRegionNameAsString());
469     TEST_UTIL.getHBaseAdmin().move(
470       toMove.getRegionInfo().getEncodedNameAsBytes(),
471       curServer.getServerName().getServerName().getBytes()
472     );
473 
474     while (curServer.getOnlineRegion(regionName) == null ||
475         destServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes) ||
476         curServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes) ||
477         master.getAssignmentManager().getRegionStates().isRegionsInTransition()) {
478       // wait for the move to be finished
479       Thread.sleep(1);
480     }
481 
482     // Check our new state.
483     Assert.assertNotNull(curServer.getOnlineRegion(regionName));
484     Assert.assertNull(destServer.getOnlineRegion(regionName));
485     LOG.info("Move finished for region=" + toMove.getRegionInfo().getRegionNameAsString());
486 
487     // Cache was NOT updated and points to the wrong server
488     Assert.assertFalse(conn.getCachedLocation(TABLE_NAME, ROW).getPort() ==
489       curServer.getServerName().getPort());
490 
491     Scan sc = new Scan();
492     sc.setStopRow(ROW);
493     sc.setStartRow(ROW);
494 
495     // The scanner takes the max retries from the connection configuration, not the table as
496     // the put.
497     TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
498 
499     try {
500       ResultScanner rs = table.getScanner(sc);
501       while (rs.next() != null) {
502       }
503       Assert.fail("Unreachable point");
504     } catch (RetriesExhaustedException e) {
505       LOG.info("Scan done, expected exception caught: " + e.getClass());
506     }
507 
508     // Cache is updated with the right value.
509     Assert.assertNotNull(conn.getCachedLocation(TABLE_NAME, ROW));
510     Assert.assertEquals(
511       "Previous server was "+destServer.getServerName().getHostAndPort(),
512       curServer.getServerName().getPort(), conn.getCachedLocation(TABLE_NAME, ROW).getPort());
513 
514     TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
515         HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER);
516     table.close();
517   }
518 
519   /**
520    * Test that Connection or Pool are not closed when managed externally
521    * @throws Exception
522    */
523   @Test
524   public void testConnectionManagement() throws Exception{
525     TEST_UTIL.createTable(TABLE_NAME1, FAM_NAM);
526     HConnection conn = HConnectionManager.createConnection(TEST_UTIL.getConfiguration());
527     HTableInterface table = conn.getTable(TABLE_NAME1.getName());
528     table.close();
529     assertFalse(conn.isClosed());
530     assertFalse(((HTable)table).getPool().isShutdown());
531     table = conn.getTable(TABLE_NAME1.getName());
532     table.close();
533     assertFalse(((HTable)table).getPool().isShutdown());
534     conn.close();
535     assertTrue(((HTable)table).getPool().isShutdown());
536   }
537 
538   /**
539    * Test that stale cache updates don't override newer cached values.
540    */
541   @Test(timeout = 60000)
542   public void testCacheSeqNums() throws Exception{
543     HTable table = TEST_UTIL.createTable(TABLE_NAME2, FAM_NAM);
544     TEST_UTIL.createMultiRegions(table, FAM_NAM);
545     Put put = new Put(ROW);
546     put.add(FAM_NAM, ROW, ROW);
547     table.put(put);
548     HConnectionManager.HConnectionImplementation conn =
549       (HConnectionManager.HConnectionImplementation)table.getConnection();
550 
551     HRegionLocation location = conn.getCachedLocation(TABLE_NAME2, ROW);
552     assertNotNull(location);
553 
554     HRegionLocation anySource = new HRegionLocation(location.getRegionInfo(), new ServerName(
555         location.getHostname(), location.getPort() - 1, 0L));
556 
557     // Same server as already in cache reporting - overwrites any value despite seqNum.
558     int nextPort = location.getPort() + 1;
559     conn.updateCachedLocation(location.getRegionInfo(), location,
560         new ServerName("127.0.0.1", nextPort, 0), location.getSeqNum() - 1);
561     location = conn.getCachedLocation(TABLE_NAME2, ROW);
562     Assert.assertEquals(nextPort, location.getPort());
563 
564     // No source specified - same.
565     nextPort = location.getPort() + 1;
566     conn.updateCachedLocation(location.getRegionInfo(), location,
567         new ServerName("127.0.0.1", nextPort, 0), location.getSeqNum() - 1);
568     location = conn.getCachedLocation(TABLE_NAME2, ROW);
569     Assert.assertEquals(nextPort, location.getPort());
570 
571     // Higher seqNum - overwrites lower seqNum.
572     nextPort = location.getPort() + 1;
573     conn.updateCachedLocation(location.getRegionInfo(), anySource,
574         new ServerName("127.0.0.1", nextPort, 0), location.getSeqNum() + 1);
575     location = conn.getCachedLocation(TABLE_NAME2, ROW);
576     Assert.assertEquals(nextPort, location.getPort());
577 
578     // Lower seqNum - does not overwrite higher seqNum.
579     nextPort = location.getPort() + 1;
580     conn.updateCachedLocation(location.getRegionInfo(), anySource,
581         new ServerName("127.0.0.1", nextPort, 0), location.getSeqNum() - 1);
582     location = conn.getCachedLocation(TABLE_NAME2, ROW);
583     Assert.assertEquals(nextPort - 1, location.getPort());
584   }
585 
586   /**
587    * Make sure that {@link Configuration} instances that are essentially the
588    * same map to the same {@link HConnection} instance.
589    */
590   @Test
591   public void testConnectionSameness() throws Exception {
592     HConnection previousConnection = null;
593     for (int i = 0; i < 2; i++) {
594       // set random key to differentiate the connection from previous ones
595       Configuration configuration = TEST_UTIL.getConfiguration();
596       configuration.set("some_key", String.valueOf(_randy.nextInt()));
597       LOG.info("The hash code of the current configuration is: "
598           + configuration.hashCode());
599       HConnection currentConnection = HConnectionManager
600           .getConnection(configuration);
601       if (previousConnection != null) {
602         assertTrue(
603             "Did not get the same connection even though its key didn't change",
604             previousConnection == currentConnection);
605       }
606       previousConnection = currentConnection;
607       // change the configuration, so that it is no longer reachable from the
608       // client's perspective. However, since its part of the LRU doubly linked
609       // list, it will eventually get thrown out, at which time it should also
610       // close the corresponding {@link HConnection}.
611       configuration.set("other_key", String.valueOf(_randy.nextInt()));
612     }
613   }
614 
615   /**
616    * Makes sure that there is no leaking of
617    * {@link HConnectionManager.HConnectionImplementation} in the {@link HConnectionManager}
618    * class.
619    */
620   @Test
621   public void testConnectionUniqueness() throws Exception {
622     int zkmaxconnections = TEST_UTIL.getConfiguration().
623       getInt(HConstants.ZOOKEEPER_MAX_CLIENT_CNXNS,
624           HConstants.DEFAULT_ZOOKEPER_MAX_CLIENT_CNXNS);
625     // Test up to a max that is < the maximum number of zk connections.  If we
626     // go above zk connections, we just fall into cycle where we are failing
627     // to set up a session and test runs for a long time.
628     int maxConnections = Math.min(zkmaxconnections - 1, 20);
629     List<HConnection> connections = new ArrayList<HConnection>(maxConnections);
630     HConnection previousConnection = null;
631     try {
632       for (int i = 0; i < maxConnections; i++) {
633         // set random key to differentiate the connection from previous ones
634         Configuration configuration = new Configuration(TEST_UTIL.getConfiguration());
635         configuration.set("some_key", String.valueOf(_randy.nextInt()));
636         configuration.set(HConstants.HBASE_CLIENT_INSTANCE_ID,
637             String.valueOf(_randy.nextInt()));
638         LOG.info("The hash code of the current configuration is: "
639             + configuration.hashCode());
640         HConnection currentConnection =
641           HConnectionManager.getConnection(configuration);
642         if (previousConnection != null) {
643           assertTrue("Got the same connection even though its key changed!",
644               previousConnection != currentConnection);
645         }
646         // change the configuration, so that it is no longer reachable from the
647         // client's perspective. However, since its part of the LRU doubly linked
648         // list, it will eventually get thrown out, at which time it should also
649         // close the corresponding {@link HConnection}.
650         configuration.set("other_key", String.valueOf(_randy.nextInt()));
651 
652         previousConnection = currentConnection;
653         LOG.info("The current HConnectionManager#HBASE_INSTANCES cache size is: "
654             + getHConnectionManagerCacheSize());
655         Thread.sleep(50);
656         connections.add(currentConnection);
657       }
658     } finally {
659       for (HConnection c: connections) {
660         // Clean up connections made so we don't interfere w/ subsequent tests.
661         HConnectionManager.deleteConnection(c.getConfiguration());
662       }
663     }
664   }
665 
666   @Test
667   public void testClosing() throws Exception {
668     Configuration configuration =
669       new Configuration(TEST_UTIL.getConfiguration());
670     configuration.set(HConstants.HBASE_CLIENT_INSTANCE_ID,
671         String.valueOf(_randy.nextInt()));
672 
673     HConnection c1 = HConnectionManager.createConnection(configuration);
674     // We create two connections with the same key.
675     HConnection c2 = HConnectionManager.createConnection(configuration);
676 
677     HConnection c3 = HConnectionManager.getConnection(configuration);
678     HConnection c4 = HConnectionManager.getConnection(configuration);
679     assertTrue(c3 == c4);
680 
681     c1.close();
682     assertTrue(c1.isClosed());
683     assertFalse(c2.isClosed());
684     assertFalse(c3.isClosed());
685 
686     c3.close();
687     // still a reference left
688     assertFalse(c3.isClosed());
689     c3.close();
690     assertTrue(c3.isClosed());
691     // c3 was removed from the cache
692     HConnection c5 = HConnectionManager.getConnection(configuration);
693     assertTrue(c5 != c3);
694 
695     assertFalse(c2.isClosed());
696     c2.close();
697     assertTrue(c2.isClosed());
698     c5.close();
699     assertTrue(c5.isClosed());
700   }
701 
702   /**
703    * Trivial test to verify that nobody messes with
704    * {@link HConnectionManager#createConnection(Configuration)}
705    */
706   @Test
707   public void testCreateConnection() throws Exception {
708     Configuration configuration = TEST_UTIL.getConfiguration();
709     HConnection c1 = HConnectionManager.createConnection(configuration);
710     HConnection c2 = HConnectionManager.createConnection(configuration);
711     // created from the same configuration, yet they are different
712     assertTrue(c1 != c2);
713     assertTrue(c1.getConfiguration() == c2.getConfiguration());
714     // make sure these were not cached
715     HConnection c3 = HConnectionManager.getConnection(configuration);
716     assertTrue(c1 != c3);
717     assertTrue(c2 != c3);
718   }
719 
720 
721   /**
722    * This test checks that one can connect to the cluster with only the
723    *  ZooKeeper quorum set. Other stuff like master address will be read
724    *  from ZK by the client.
725    */
726   @Test(timeout = 60000)
727   public void testConnection() throws Exception{
728     // We create an empty config and add the ZK address.
729     Configuration c = new Configuration();
730     c.set(HConstants.ZOOKEEPER_QUORUM,
731       TEST_UTIL.getConfiguration().get(HConstants.ZOOKEEPER_QUORUM));
732     c.set(HConstants.ZOOKEEPER_CLIENT_PORT ,
733       TEST_UTIL.getConfiguration().get(HConstants.ZOOKEEPER_CLIENT_PORT));
734 
735     // This should be enough to connect
736     HConnection conn = HConnectionManager.getConnection(c);
737     assertTrue( conn.isMasterRunning() );
738     conn.close();
739   }
740 
741   private int setNumTries(HConnectionImplementation hci, int newVal) throws Exception {
742     Field numTries = hci.getClass().getDeclaredField("numTries");
743     numTries.setAccessible(true);
744     Field modifiersField = Field.class.getDeclaredField("modifiers");
745     modifiersField.setAccessible(true);
746     modifiersField.setInt(numTries, numTries.getModifiers() & ~Modifier.FINAL);
747     final int prevNumRetriesVal = (Integer)numTries.get(hci);
748     numTries.set(hci, newVal);
749 
750     return prevNumRetriesVal;
751   }
752 
753   @Test
754   public void testMulti() throws Exception {
755     HTable table = TEST_UTIL.createTable(TABLE_NAME3, FAM_NAM);
756     TEST_UTIL.createMultiRegions(table, FAM_NAM);
757     HConnectionManager.HConnectionImplementation conn =
758       (HConnectionManager.HConnectionImplementation)
759         HConnectionManager.getConnection(TEST_UTIL.getConfiguration());
760 
761     // We're now going to move the region and check that it works for the client
762     // First a new put to add the location in the cache
763     conn.clearRegionCache(TABLE_NAME3);
764     Assert.assertEquals(0, conn.getNumberOfCachedRegionLocations(TABLE_NAME3));
765 
766     TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, false);
767     HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
768 
769     // We can wait for all regions to be online, that makes log reading easier when debugging
770     while (master.getAssignmentManager().getRegionStates().isRegionsInTransition()) {
771       Thread.sleep(1);
772     }
773 
774     Put put = new Put(ROW_X);
775     put.add(FAM_NAM, ROW_X, ROW_X);
776     table.put(put);
777 
778     // Now moving the region to the second server
779     HRegionLocation toMove = conn.getCachedLocation(TABLE_NAME3, ROW_X);
780     byte[] regionName = toMove.getRegionInfo().getRegionName();
781     byte[] encodedRegionNameBytes = toMove.getRegionInfo().getEncodedNameAsBytes();
782 
783     // Choose the other server.
784     int curServerId = TEST_UTIL.getHBaseCluster().getServerWith(regionName);
785     int destServerId = (curServerId == 0 ? 1 : 0);
786 
787     HRegionServer curServer = TEST_UTIL.getHBaseCluster().getRegionServer(curServerId);
788     HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(destServerId);
789 
790     ServerName destServerName = destServer.getServerName();
791 
792     //find another row in the cur server that is less than ROW_X
793     List<HRegion> regions = curServer.getOnlineRegions(TABLE_NAME3);
794     byte[] otherRow = null;
795     for (HRegion region : regions) {
796       if (!region.getRegionInfo().getEncodedName().equals(toMove.getRegionInfo().getEncodedName())
797           && Bytes.BYTES_COMPARATOR.compare(region.getRegionInfo().getStartKey(), ROW_X) < 0) {
798         otherRow = region.getRegionInfo().getStartKey();
799         break;
800       }
801     }
802     assertNotNull(otherRow);
803     // If empty row, set it to first row.-f
804     if (otherRow.length <= 0) otherRow = Bytes.toBytes("aaa");
805     Put put2 = new Put(otherRow);
806     put2.add(FAM_NAM, otherRow, otherRow);
807     table.put(put2); //cache put2's location
808 
809     // Check that we are in the expected state
810     Assert.assertTrue(curServer != destServer);
811     Assert.assertNotEquals(curServer.getServerName(), destServer.getServerName());
812     Assert.assertNotEquals(toMove.getPort(), destServerName.getPort());
813     Assert.assertNotNull(curServer.getOnlineRegion(regionName));
814     Assert.assertNull(destServer.getOnlineRegion(regionName));
815     Assert.assertFalse(TEST_UTIL.getMiniHBaseCluster().getMaster().
816         getAssignmentManager().getRegionStates().isRegionsInTransition());
817 
818     // Moving. It's possible that we don't have all the regions online at this point, so
819     //  the test must depends only on the region we're looking at.
820     LOG.info("Move starting region="+toMove.getRegionInfo().getRegionNameAsString());
821     TEST_UTIL.getHBaseAdmin().move(
822       toMove.getRegionInfo().getEncodedNameAsBytes(),
823       destServerName.getServerName().getBytes()
824     );
825 
826     while (destServer.getOnlineRegion(regionName) == null ||
827         destServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes) ||
828         curServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes) ||
829         master.getAssignmentManager().getRegionStates().isRegionsInTransition()) {
830       // wait for the move to be finished
831       Thread.sleep(1);
832     }
833 
834     LOG.info("Move finished for region="+toMove.getRegionInfo().getRegionNameAsString());
835 
836     // Check our new state.
837     Assert.assertNull(curServer.getOnlineRegion(regionName));
838     Assert.assertNotNull(destServer.getOnlineRegion(regionName));
839     Assert.assertFalse(destServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes));
840     Assert.assertFalse(curServer.getRegionsInTransitionInRS().containsKey(encodedRegionNameBytes));
841 
842 
843     // Cache was NOT updated and points to the wrong server
844     Assert.assertFalse(
845         conn.getCachedLocation(TABLE_NAME3, ROW_X).getPort() == destServerName.getPort());
846 
847     // Hijack the number of retry to fail after 2 tries
848     final int prevNumRetriesVal = setNumTries(conn, 2);
849 
850     Put put3 = new Put(ROW_X);
851     put3.add(FAM_NAM, ROW_X, ROW_X);
852     Put put4 = new Put(otherRow);
853     put4.add(FAM_NAM, otherRow, otherRow);
854 
855     // do multi
856     table.batch(Lists.newArrayList(put4, put3)); // first should be a valid row,
857                                                  // second we get RegionMovedException.
858 
859     setNumTries(conn, prevNumRetriesVal);
860     table.close();
861     conn.close();
862   }
863 
864   @Ignore ("Test presumes RETRY_BACKOFF will never change; it has") @Test
865   public void testErrorBackoffTimeCalculation() throws Exception {
866     // TODO: This test would seem to presume hardcoded RETRY_BACKOFF which it should not.
867     final long ANY_PAUSE = 100;
868     HRegionInfo ri = new HRegionInfo(TABLE_NAME);
869     HRegionLocation location = new HRegionLocation(ri, new ServerName("127.0.0.1", 1, 0));
870     HRegionLocation diffLocation = new HRegionLocation(ri, new ServerName("127.0.0.1", 2, 0));
871 
872     ManualEnvironmentEdge timeMachine = new ManualEnvironmentEdge();
873     EnvironmentEdgeManager.injectEdge(timeMachine);
874     try {
875       long timeBase = timeMachine.currentTimeMillis();
876       long largeAmountOfTime = ANY_PAUSE * 1000;
877       HConnectionManager.ServerErrorTracker tracker =
878           new HConnectionManager.ServerErrorTracker(largeAmountOfTime);
879 
880       // The default backoff is 0.
881       assertEquals(0, tracker.calculateBackoffTime(location, ANY_PAUSE));
882 
883       // Check some backoff values from HConstants sequence.
884       tracker.reportServerError(location);
885       assertEqualsWithJitter(ANY_PAUSE, tracker.calculateBackoffTime(location, ANY_PAUSE));
886       tracker.reportServerError(location);
887       tracker.reportServerError(location);
888       tracker.reportServerError(location);
889       assertEqualsWithJitter(ANY_PAUSE * 5, tracker.calculateBackoffTime(location, ANY_PAUSE));
890 
891       // All of this shouldn't affect backoff for different location.
892 
893       assertEquals(0, tracker.calculateBackoffTime(diffLocation, ANY_PAUSE));
894       tracker.reportServerError(diffLocation);
895       assertEqualsWithJitter(ANY_PAUSE, tracker.calculateBackoffTime(diffLocation, ANY_PAUSE));
896 
897       // But should still work for a different region in the same location.
898       HRegionInfo ri2 = new HRegionInfo(TABLE_NAME2);
899       HRegionLocation diffRegion = new HRegionLocation(ri2, location.getServerName());
900       assertEqualsWithJitter(ANY_PAUSE * 5, tracker.calculateBackoffTime(diffRegion, ANY_PAUSE));
901 
902       // Check with different base.
903       assertEqualsWithJitter(ANY_PAUSE * 10,
904           tracker.calculateBackoffTime(location, ANY_PAUSE * 2));
905 
906       // See that time from last error is taken into account. Time shift is applied after jitter,
907       // so pass the original expected backoff as the base for jitter.
908       long timeShift = (long)(ANY_PAUSE * 0.5);
909       timeMachine.setValue(timeBase + timeShift);
910       assertEqualsWithJitter((ANY_PAUSE * 5) - timeShift,
911         tracker.calculateBackoffTime(location, ANY_PAUSE), ANY_PAUSE * 2);
912 
913       // However we should not go into negative.
914       timeMachine.setValue(timeBase + ANY_PAUSE * 100);
915       assertEquals(0, tracker.calculateBackoffTime(location, ANY_PAUSE));
916 
917       // We also should not go over the boundary; last retry would be on it.
918       long timeLeft = (long)(ANY_PAUSE * 0.5);
919       timeMachine.setValue(timeBase + largeAmountOfTime - timeLeft);
920       assertTrue(tracker.canRetryMore());
921       tracker.reportServerError(location);
922       assertEquals(timeLeft, tracker.calculateBackoffTime(location, ANY_PAUSE));
923       timeMachine.setValue(timeBase + largeAmountOfTime);
924       assertFalse(tracker.canRetryMore());
925     } finally {
926       EnvironmentEdgeManager.reset();
927     }
928   }
929 
930   private static void assertEqualsWithJitter(long expected, long actual) {
931     assertEqualsWithJitter(expected, actual, expected);
932   }
933 
934   private static void assertEqualsWithJitter(long expected, long actual, long jitterBase) {
935     assertTrue("Value not within jitter: " + expected + " vs " + actual,
936         Math.abs(actual - expected) <= (0.01f * jitterBase));
937   }
938 
939   /**
940    * Tests that a destroyed connection does not have a live zookeeper.
941    * Below is timing based.  We put up a connection to a table and then close the connection while
942    * having a background thread running that is forcing close of the connection to try and
943    * provoke a close catastrophe; we are hoping for a car crash so we can see if we are leaking
944    * zk connections.
945    * @throws Exception
946    */
947   @Ignore ("Flakey test: See HBASE-8996")@Test
948   public void testDeleteForZKConnLeak() throws Exception {
949     TEST_UTIL.createTable(TABLE_NAME4, FAM_NAM);
950     final Configuration config = HBaseConfiguration.create(TEST_UTIL.getConfiguration());
951     config.setInt("zookeeper.recovery.retry", 1);
952     config.setInt("zookeeper.recovery.retry.intervalmill", 1000);
953     config.setInt("hbase.rpc.timeout", 2000);
954     config.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
955 
956     ThreadPoolExecutor pool = new ThreadPoolExecutor(1, 10,
957       5, TimeUnit.SECONDS,
958       new SynchronousQueue<Runnable>(),
959       Threads.newDaemonThreadFactory("test-hcm-delete"));
960 
961     pool.submit(new Runnable() {
962       @Override
963       public void run() {
964         while (!Thread.interrupted()) {
965           try {
966             HConnection conn = HConnectionManager.getConnection(config);
967             LOG.info("Connection " + conn);
968             HConnectionManager.deleteStaleConnection(conn);
969             LOG.info("Connection closed " + conn);
970             // TODO: This sleep time should be less than the time that it takes to open and close
971             // a table.  Ideally we would do a few runs first to measure.  For now this is
972             // timing based; hopefully we hit the bad condition.
973             Threads.sleep(10);
974           } catch (Exception e) {
975           }
976         }
977       }
978     });
979 
980     // Use connection multiple times.
981     for (int i = 0; i < 30; i++) {
982       HConnection c1 = null;
983       try {
984         c1 = HConnectionManager.getConnection(config);
985         LOG.info("HTable connection " + i + " " + c1);
986         HTable table = new HTable(TABLE_NAME4, c1, pool);
987         table.close();
988         LOG.info("HTable connection " + i + " closed " + c1);
989       } catch (Exception e) {
990         LOG.info("We actually want this to happen!!!!  So we can see if we are leaking zk", e);
991       } finally {
992         if (c1 != null) {
993           if (c1.isClosed()) {
994             // cannot use getZooKeeper as method instantiates watcher if null
995             Field zkwField = c1.getClass().getDeclaredField("keepAliveZookeeper");
996             zkwField.setAccessible(true);
997             Object watcher = zkwField.get(c1);
998 
999             if (watcher != null) {
1000               if (((ZooKeeperWatcher)watcher).getRecoverableZooKeeper().getState().isAlive()) {
1001                 // non-synchronized access to watcher; sleep and check again in case zk connection
1002                 // hasn't been cleaned up yet.
1003                 Thread.sleep(1000);
1004                 if (((ZooKeeperWatcher) watcher).getRecoverableZooKeeper().getState().isAlive()) {
1005                   pool.shutdownNow();
1006                   fail("Live zookeeper in closed connection");
1007                 }
1008               }
1009             }
1010           }
1011           c1.close();
1012         }
1013       }
1014     }
1015     pool.shutdownNow();
1016   }
1017 }
1018