View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertNull;
25  import static org.junit.Assert.assertTrue;
26  import static org.junit.Assert.fail;
27  
28  import java.io.IOException;
29  import java.lang.reflect.InvocationTargetException;
30  import java.lang.reflect.Method;
31  import java.util.List;
32  import java.util.Map;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.hbase.client.Get;
38  import org.apache.hadoop.hbase.client.HBaseAdmin;
39  import org.apache.hadoop.hbase.client.HConnection;
40  import org.apache.hadoop.hbase.client.HConnectionManager;
41  import org.apache.hadoop.hbase.client.HTable;
42  import org.apache.hadoop.hbase.client.Put;
43  import org.apache.hadoop.hbase.client.Result;
44  import org.apache.hadoop.hbase.client.ResultScanner;
45  import org.apache.hadoop.hbase.client.Scan;
46  import org.apache.hadoop.hbase.master.HMaster;
47  import org.apache.hadoop.hbase.master.LoadBalancer;
48  import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer;
49  import org.apache.hadoop.hbase.testclassification.LargeTests;
50  import org.apache.hadoop.hbase.util.Bytes;
51  import org.apache.hadoop.hbase.util.FSUtils;
52  import org.apache.hadoop.hbase.util.Threads;
53  import org.apache.hadoop.hbase.zookeeper.EmptyWatcher;
54  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
55  import org.apache.hadoop.hbase.zookeeper.ZKConfig;
56  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
57  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
58  import org.apache.zookeeper.CreateMode;
59  import org.apache.zookeeper.KeeperException;
60  import org.apache.zookeeper.ZooDefs;
61  import org.apache.zookeeper.ZooKeeper;
62  import org.apache.zookeeper.ZooKeeper.States;
63  import org.apache.zookeeper.data.ACL;
64  import org.apache.zookeeper.data.Stat;
65  import org.junit.After;
66  import org.junit.AfterClass;
67  import org.junit.Assert;
68  import org.junit.Before;
69  import org.junit.BeforeClass;
70  import org.junit.Test;
71  import org.junit.experimental.categories.Category;
72  
73  
74  
75  @Category(LargeTests.class)
76  public class TestZooKeeper {
77    private final Log LOG = LogFactory.getLog(this.getClass());
78  
79    private final static HBaseTestingUtility
80        TEST_UTIL = new HBaseTestingUtility();
81  
82    /**
83     * @throws java.lang.Exception
84     */
85    @BeforeClass
86    public static void setUpBeforeClass() throws Exception {
87      // Test we can first start the ZK cluster by itself
88      Configuration conf = TEST_UTIL.getConfiguration();
89      TEST_UTIL.startMiniDFSCluster(2);
90      TEST_UTIL.startMiniZKCluster();
91      conf.setBoolean("dfs.support.append", true);
92      conf.setInt(HConstants.ZK_SESSION_TIMEOUT, 1000);
93      conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MockLoadBalancer.class,
94          LoadBalancer.class);
95    }
96  
97    /**
98     * @throws java.lang.Exception
99     */
100   @AfterClass
101   public static void tearDownAfterClass() throws Exception {
102     TEST_UTIL.shutdownMiniCluster();
103   }
104 
105   /**
106    * @throws java.lang.Exception
107    */
108   @Before
109   public void setUp() throws Exception {
110     TEST_UTIL.startMiniHBaseCluster(1, 2);
111   }
112 
113   @After
114   public void after() throws Exception {
115     try {
116       TEST_UTIL.shutdownMiniHBaseCluster();
117     } finally {
118       TEST_UTIL.getTestFileSystem().delete(FSUtils.getRootDir(TEST_UTIL.getConfiguration()), true);
119       ZKUtil.deleteNodeRecursively(TEST_UTIL.getZooKeeperWatcher(), "/hbase");
120     }
121   }
122 
123   private ZooKeeperWatcher getZooKeeperWatcher(HConnection c)
124   throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
125     Method getterZK = c.getClass().getDeclaredMethod("getKeepAliveZooKeeperWatcher");
126     getterZK.setAccessible(true);
127     return (ZooKeeperWatcher) getterZK.invoke(c);
128   }
129 
130 
131   /**
132    * See HBASE-1232 and http://wiki.apache.org/hadoop/ZooKeeper/FAQ#4.
133    * @throws IOException
134    * @throws InterruptedException
135    */
136   // fails frequently, disabled for now, see HBASE-6406
137   //@Test
138   public void testClientSessionExpired() throws Exception {
139     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
140 
141     // We don't want to share the connection as we will check its state
142     c.set(HConstants.HBASE_CLIENT_INSTANCE_ID, "1111");
143 
144     HConnection connection = HConnectionManager.getConnection(c);
145 
146     ZooKeeperWatcher connectionZK = getZooKeeperWatcher(connection);
147     LOG.info("ZooKeeperWatcher= 0x"+ Integer.toHexString(
148       connectionZK.hashCode()));
149     LOG.info("getRecoverableZooKeeper= 0x"+ Integer.toHexString(
150       connectionZK.getRecoverableZooKeeper().hashCode()));
151     LOG.info("session="+Long.toHexString(
152       connectionZK.getRecoverableZooKeeper().getSessionId()));
153 
154     TEST_UTIL.expireSession(connectionZK);
155 
156     LOG.info("Before using zkw state=" +
157       connectionZK.getRecoverableZooKeeper().getState());
158     // provoke session expiration by doing something with ZK
159     try {
160       connectionZK.getRecoverableZooKeeper().getZooKeeper().exists(
161         "/1/1", false);
162     } catch (KeeperException ignored) {
163     }
164 
165     // Check that the old ZK connection is closed, means we did expire
166     States state = connectionZK.getRecoverableZooKeeper().getState();
167     LOG.info("After using zkw state=" + state);
168     LOG.info("session="+Long.toHexString(
169       connectionZK.getRecoverableZooKeeper().getSessionId()));
170 
171     // It's asynchronous, so we may have to wait a little...
172     final long limit1 = System.currentTimeMillis() + 3000;
173     while (System.currentTimeMillis() < limit1 && state != States.CLOSED){
174       state = connectionZK.getRecoverableZooKeeper().getState();
175     }
176     LOG.info("After using zkw loop=" + state);
177     LOG.info("ZooKeeper should have timed out");
178     LOG.info("session="+Long.toHexString(
179       connectionZK.getRecoverableZooKeeper().getSessionId()));
180 
181     // It's surprising but sometimes we can still be in connected state.
182     // As it's known (even if not understood) we don't make the the test fail
183     // for this reason.)
184     // Assert.assertTrue("state=" + state, state == States.CLOSED);
185 
186     // Check that the client recovered
187     ZooKeeperWatcher newConnectionZK = getZooKeeperWatcher(connection);
188 
189     States state2 = newConnectionZK.getRecoverableZooKeeper().getState();
190     LOG.info("After new get state=" +state2);
191 
192     // As it's an asynchronous event we may got the same ZKW, if it's not
193     //  yet invalidated. Hence this loop.
194     final long limit2 = System.currentTimeMillis() + 3000;
195     while (System.currentTimeMillis() < limit2 &&
196       state2 != States.CONNECTED && state2 != States.CONNECTING) {
197 
198       newConnectionZK = getZooKeeperWatcher(connection);
199       state2 = newConnectionZK.getRecoverableZooKeeper().getState();
200     }
201     LOG.info("After new get state loop=" + state2);
202 
203     Assert.assertTrue(
204       state2 == States.CONNECTED || state2 == States.CONNECTING);
205 
206     connection.close();
207   }
208 
209   @Test (timeout = 60000)
210   public void testRegionServerSessionExpired() throws Exception {
211     LOG.info("Starting testRegionServerSessionExpired");
212     int metaIndex = TEST_UTIL.getMiniHBaseCluster().getServerWithMeta();
213     TEST_UTIL.expireRegionServerSession(metaIndex);
214     testSanity("testRegionServerSessionExpired");
215   }
216 
217   // @Test Disabled because seems to make no sense expiring master session
218   // and then trying to create table (down in testSanity); on master side
219   // it will fail because the master's session has expired -- St.Ack 07/24/2012
220   public void testMasterSessionExpired() throws Exception {
221     LOG.info("Starting testMasterSessionExpired");
222     TEST_UTIL.expireMasterSession();
223     testSanity("testMasterSessionExpired");
224   }
225 
226   /**
227    * Master recovery when the znode already exists. Internally, this
228    *  test differs from {@link #testMasterSessionExpired} because here
229    *  the master znode will exist in ZK.
230    */
231   @Test(timeout = 60000)
232   public void testMasterZKSessionRecoveryFailure() throws Exception {
233     LOG.info("Starting testMasterZKSessionRecoveryFailure");
234     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
235     HMaster m = cluster.getMaster();
236     m.abort("Test recovery from zk session expired",
237       new KeeperException.SessionExpiredException());
238     assertFalse(m.isStopped());
239     testSanity("testMasterZKSessionRecoveryFailure");
240   }
241 
242   /**
243    * Make sure we can use the cluster
244    * @throws Exception
245    */
246   private void testSanity(final String testName) throws Exception{
247     String tableName = testName + "_" + System.currentTimeMillis();
248     HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(tableName));
249     HColumnDescriptor family = new HColumnDescriptor("fam");
250     desc.addFamily(family);
251     LOG.info("Creating table " + tableName);
252     HBaseAdmin admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
253     try {
254       admin.createTable(desc);
255     } finally {
256       admin.close();
257     }
258 
259     HTable table =
260       new HTable(new Configuration(TEST_UTIL.getConfiguration()), tableName);
261     Put put = new Put(Bytes.toBytes("testrow"));
262     put.add(Bytes.toBytes("fam"),
263         Bytes.toBytes("col"), Bytes.toBytes("testdata"));
264     LOG.info("Putting table " + tableName);
265     table.put(put);
266     table.close();
267   }
268 
269   @Test
270   public void testMultipleZK()
271   throws IOException, NoSuchMethodException, InvocationTargetException, IllegalAccessException {
272     HTable localMeta =
273       new HTable(new Configuration(TEST_UTIL.getConfiguration()), TableName.META_TABLE_NAME);
274     Configuration otherConf = new Configuration(TEST_UTIL.getConfiguration());
275     otherConf.set(HConstants.ZOOKEEPER_QUORUM, "127.0.0.1");
276     HTable ipMeta = new HTable(otherConf, TableName.META_TABLE_NAME);
277 
278     // dummy, just to open the connection
279     final byte [] row = new byte [] {'r'};
280     localMeta.exists(new Get(row));
281     ipMeta.exists(new Get(row));
282 
283     // make sure they aren't the same
284     ZooKeeperWatcher z1 =
285       getZooKeeperWatcher(HConnectionManager.getConnection(localMeta.getConfiguration()));
286     ZooKeeperWatcher z2 =
287       getZooKeeperWatcher(HConnectionManager.getConnection(otherConf));
288     assertFalse(z1 == z2);
289     assertFalse(z1.getQuorum().equals(z2.getQuorum()));
290 
291     localMeta.close();
292     ipMeta.close();
293   }
294 
295   /**
296    * Create a znode with data
297    * @throws Exception
298    */
299   @Test
300   public void testCreateWithParents() throws Exception {
301     ZooKeeperWatcher zkw =
302         new ZooKeeperWatcher(new Configuration(TEST_UTIL.getConfiguration()),
303             TestZooKeeper.class.getName(), null);
304     byte[] expectedData = new byte[] { 1, 2, 3 };
305     ZKUtil.createWithParents(zkw, "/l1/l2/l3/l4/testCreateWithParents", expectedData);
306     byte[] data = ZKUtil.getData(zkw, "/l1/l2/l3/l4/testCreateWithParents");
307     assertTrue(Bytes.equals(expectedData, data));
308     ZKUtil.deleteNodeRecursively(zkw, "/l1");
309 
310     ZKUtil.createWithParents(zkw, "/testCreateWithParents", expectedData);
311     data = ZKUtil.getData(zkw, "/testCreateWithParents");
312     assertTrue(Bytes.equals(expectedData, data));
313     ZKUtil.deleteNodeRecursively(zkw, "/testCreateWithParents");
314   }
315 
316   /**
317    * Create a bunch of znodes in a hierarchy, try deleting one that has childs (it will fail), then
318    * delete it recursively, then delete the last znode
319    * @throws Exception
320    */
321   @Test
322   public void testZNodeDeletes() throws Exception {
323     ZooKeeperWatcher zkw = new ZooKeeperWatcher(
324       new Configuration(TEST_UTIL.getConfiguration()),
325       TestZooKeeper.class.getName(), null);
326     ZKUtil.createWithParents(zkw, "/l1/l2/l3/l4");
327     try {
328       ZKUtil.deleteNode(zkw, "/l1/l2");
329       fail("We should not be able to delete if znode has childs");
330     } catch (KeeperException ex) {
331       assertNotNull(ZKUtil.getDataNoWatch(zkw, "/l1/l2/l3/l4", null));
332     }
333     ZKUtil.deleteNodeRecursively(zkw, "/l1/l2");
334     // make sure it really is deleted
335     assertNull(ZKUtil.getDataNoWatch(zkw, "/l1/l2/l3/l4", null));
336 
337     // do the same delete again and make sure it doesn't crash
338     ZKUtil.deleteNodeRecursively(zkw, "/l1/l2");
339 
340     ZKUtil.deleteNode(zkw, "/l1");
341     assertNull(ZKUtil.getDataNoWatch(zkw, "/l1/l2", null));
342   }
343 
344   /**
345    * A test for HBASE-3238
346    * @throws IOException A connection attempt to zk failed
347    * @throws InterruptedException One of the non ZKUtil actions was interrupted
348    * @throws KeeperException Any of the zookeeper connections had a
349    * KeeperException
350    */
351   @Test
352   public void testCreateSilentIsReallySilent() throws InterruptedException,
353       KeeperException, IOException {
354     Configuration c = TEST_UTIL.getConfiguration();
355 
356     String aclZnode = "/aclRoot";
357     String quorumServers = ZKConfig.getZKQuorumServersString(c);
358     int sessionTimeout = 5 * 1000; // 5 seconds
359     ZooKeeper zk = new ZooKeeper(quorumServers, sessionTimeout, EmptyWatcher.instance);
360     zk.addAuthInfo("digest", "hbase:rox".getBytes());
361 
362     // Assumes the  root of the ZooKeeper space is writable as it creates a node
363     // wherever the cluster home is defined.
364     ZooKeeperWatcher zk2 = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
365       "testCreateSilentIsReallySilent", null);
366 
367     // Save the previous ACL
368     Stat s =  null;
369     List<ACL> oldACL = null;
370     while (true) {
371       try {
372         s = new Stat();
373         oldACL = zk.getACL("/", s);
374         break;
375       } catch (KeeperException e) {
376         switch (e.code()) {
377           case CONNECTIONLOSS:
378           case SESSIONEXPIRED:
379           case OPERATIONTIMEOUT:
380             LOG.warn("Possibly transient ZooKeeper exception", e);
381             Threads.sleep(100);
382             break;
383          default:
384             throw e;
385         }
386       }
387     }
388 
389     // I set this acl after the attempted creation of the cluster home node.
390     // Add retries in case of retryable zk exceptions.
391     while (true) {
392       try {
393         zk.setACL("/", ZooDefs.Ids.CREATOR_ALL_ACL, -1);
394         break;
395       } catch (KeeperException e) {
396         switch (e.code()) {
397           case CONNECTIONLOSS:
398           case SESSIONEXPIRED:
399           case OPERATIONTIMEOUT:
400             LOG.warn("Possibly transient ZooKeeper exception: " + e);
401             Threads.sleep(100);
402             break;
403          default:
404             throw e;
405         }
406       }
407     }
408 
409     while (true) {
410       try {
411         zk.create(aclZnode, null, ZooDefs.Ids.CREATOR_ALL_ACL, CreateMode.PERSISTENT);
412         break;
413       } catch (KeeperException e) {
414         switch (e.code()) {
415           case CONNECTIONLOSS:
416           case SESSIONEXPIRED:
417           case OPERATIONTIMEOUT:
418             LOG.warn("Possibly transient ZooKeeper exception: " + e);
419             Threads.sleep(100);
420             break;
421          default:
422             throw e;
423         }
424       }
425     }
426     zk.close();
427     ZKUtil.createAndFailSilent(zk2, aclZnode);
428 
429     // Restore the ACL
430     ZooKeeper zk3 = new ZooKeeper(quorumServers, sessionTimeout, EmptyWatcher.instance);
431     zk3.addAuthInfo("digest", "hbase:rox".getBytes());
432     try {
433       zk3.setACL("/", oldACL, -1);
434     } finally {
435       zk3.close();
436     }
437  }
438 
439   /**
440    * Test should not fail with NPE when getChildDataAndWatchForNewChildren
441    * invoked with wrongNode
442    */
443   @Test
444   @SuppressWarnings("deprecation")
445   public void testGetChildDataAndWatchForNewChildrenShouldNotThrowNPE()
446       throws Exception {
447     ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
448         "testGetChildDataAndWatchForNewChildrenShouldNotThrowNPE", null);
449     ZKUtil.getChildDataAndWatchForNewChildren(zkw, "/wrongNode");
450   }
451 
452   /**
453    * Tests that the master does not call retainAssignment after recovery from expired zookeeper
454    * session. Without the HBASE-6046 fix master always tries to assign all the user regions by
455    * calling retainAssignment.
456    */
457   @Test
458   public void testRegionAssignmentAfterMasterRecoveryDueToZKExpiry() throws Exception {
459     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
460     cluster.startRegionServer();
461     cluster.waitForActiveAndReadyMaster(10000);
462     HMaster m = cluster.getMaster();
463     ZooKeeperWatcher zkw = m.getZooKeeperWatcher();
464     int expectedNumOfListeners = zkw.getNumberOfListeners();
465     // now the cluster is up. So assign some regions.
466     HBaseAdmin admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
467     try {
468       byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("a"), Bytes.toBytes("b"),
469         Bytes.toBytes("c"), Bytes.toBytes("d"), Bytes.toBytes("e"), Bytes.toBytes("f"),
470         Bytes.toBytes("g"), Bytes.toBytes("h"), Bytes.toBytes("i"), Bytes.toBytes("j") };
471       String tableName = "testRegionAssignmentAfterMasterRecoveryDueToZKExpiry";
472       HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));
473       htd.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
474       admin.createTable(htd, SPLIT_KEYS);
475       ZooKeeperWatcher zooKeeperWatcher = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
476       ZKAssign.blockUntilNoRIT(zooKeeperWatcher);
477       m.getZooKeeperWatcher().close();
478       MockLoadBalancer.retainAssignCalled = false;
479       m.abort("Test recovery from zk session expired",
480         new KeeperException.SessionExpiredException());
481       assertFalse(m.isStopped());
482       // The recovered master should not call retainAssignment, as it is not a
483       // clean startup.
484       assertFalse("Retain assignment should not be called", MockLoadBalancer.retainAssignCalled);
485       // number of listeners should be same as the value before master aborted
486       // wait for new master is initialized
487       cluster.waitForActiveAndReadyMaster(10000);
488       assertEquals(expectedNumOfListeners, zkw.getNumberOfListeners());
489     } finally {
490       admin.close();
491     }
492   }
493 
494   /**
495    * Tests whether the logs are split when master recovers from a expired zookeeper session and an
496    * RS goes down.
497    */
498   @Test(timeout = 240000)
499   public void testLogSplittingAfterMasterRecoveryDueToZKExpiry() throws IOException,
500       KeeperException, InterruptedException {
501     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
502     cluster.startRegionServer();
503     HMaster m = cluster.getMaster();
504     // now the cluster is up. So assign some regions.
505     HBaseAdmin admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
506     HTable table = null;
507     try {
508       byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("1"), Bytes.toBytes("2"),
509         Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5") };
510 
511       String tableName = "testLogSplittingAfterMasterRecoveryDueToZKExpiry";
512       HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));
513       HColumnDescriptor hcd = new HColumnDescriptor("col");
514       htd.addFamily(hcd);
515       admin.createTable(htd, SPLIT_KEYS);
516       ZooKeeperWatcher zooKeeperWatcher = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
517       ZKAssign.blockUntilNoRIT(zooKeeperWatcher);
518       table = new HTable(TEST_UTIL.getConfiguration(), tableName);
519       Put p;
520       int numberOfPuts;
521       for (numberOfPuts = 0; numberOfPuts < 6; numberOfPuts++) {
522         p = new Put(Bytes.toBytes(numberOfPuts));
523         p.add(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes("value" + numberOfPuts));
524         table.put(p);
525       }
526       m.getZooKeeperWatcher().close();
527       m.abort("Test recovery from zk session expired",
528         new KeeperException.SessionExpiredException());
529       assertFalse(m.isStopped());
530       cluster.getRegionServer(0).abort("Aborting");
531       // Without patch for HBASE-6046 this test case will always timeout
532       // with patch the test case should pass.
533       Scan scan = new Scan();
534       int numberOfRows = 0;
535       ResultScanner scanner = table.getScanner(scan);
536       Result[] result = scanner.next(1);
537       while (result != null && result.length > 0) {
538         numberOfRows++;
539         result = scanner.next(1);
540       }
541       assertEquals("Number of rows should be equal to number of puts.", numberOfPuts,
542         numberOfRows);
543     } finally {
544       if (table != null) table.close();
545       admin.close();
546     }
547   }
548 
549   static class MockLoadBalancer extends SimpleLoadBalancer {
550     static boolean retainAssignCalled = false;
551 
552     @Override
553     public Map<ServerName, List<HRegionInfo>> retainAssignment(
554         Map<HRegionInfo, ServerName> regions, List<ServerName> servers) {
555       retainAssignCalled = true;
556       return super.retainAssignment(regions, servers);
557     }
558   }
559 
560 }
561