View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  
24  import org.apache.hadoop.hbase.HBaseTestingUtility;
25  import org.apache.hadoop.hbase.HConstants;
26  import org.apache.hadoop.hbase.HRegionInfo;
27  import org.apache.hadoop.hbase.HTableDescriptor;
28  import org.apache.hadoop.hbase.testclassification.MediumTests;
29  import org.apache.hadoop.hbase.NotServingRegionException;
30  import org.apache.hadoop.hbase.ServerName;
31  import org.apache.hadoop.hbase.MetaTableAccessor;
32  import org.apache.hadoop.hbase.TableName;
33  import org.apache.hadoop.hbase.client.HTable;
34  import org.apache.hadoop.hbase.client.Put;
35  import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
36  import org.apache.hadoop.hbase.coordination.ZkCoordinatedStateManager;
37  import org.apache.hadoop.hbase.coordination.ZkOpenRegionCoordination;
38  import org.apache.hadoop.hbase.executor.EventType;
39  import org.apache.hadoop.hbase.protobuf.RequestConverter;
40  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
41  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CloseRegionRequest;
42  import org.apache.hadoop.hbase.regionserver.handler.OpenRegionHandler;
43  import org.apache.hadoop.hbase.util.Threads;
44  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
45  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
46  import org.apache.zookeeper.KeeperException;
47  import org.apache.zookeeper.KeeperException.NodeExistsException;
48  import org.junit.After;
49  import org.junit.AfterClass;
50  import org.junit.Assert;
51  import org.junit.BeforeClass;
52  import org.junit.Test;
53  import org.junit.experimental.categories.Category;
54  import org.mortbay.log.Log;
55  
56  import com.google.protobuf.ServiceException;
57  
58  
59  /**
60   * Tests on the region server, without the master.
61   */
62  @Category(MediumTests.class)
63  public class TestRegionServerNoMaster {
64  
65    private static final int NB_SERVERS = 1;
66    private static HTable table;
67    private static final byte[] row = "ee".getBytes();
68  
69    private static HRegionInfo hri;
70  
71    private static byte[] regionName;
72    private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
73  
74  
75    @BeforeClass
76    public static void before() throws Exception {
77      HTU.getConfiguration().setBoolean("hbase.assignment.usezk", true);
78      HTU.startMiniCluster(NB_SERVERS);
79      final TableName tableName = TableName.valueOf(TestRegionServerNoMaster.class.getSimpleName());
80  
81      // Create table then get the single region for our new table.
82      table = HTU.createTable(tableName,HConstants.CATALOG_FAMILY);
83      Put p = new Put(row);
84      p.add(HConstants.CATALOG_FAMILY, row, row);
85      table.put(p);
86  
87      hri = table.getRegionLocation(row, false).getRegionInfo();
88      regionName = hri.getRegionName();
89  
90      stopMasterAndAssignMeta(HTU);
91    }
92  
93    public static void stopMasterAndAssignMeta(HBaseTestingUtility HTU)
94        throws NodeExistsException, KeeperException, IOException, InterruptedException {
95      // No master
96      HTU.getHBaseCluster().getMaster().stopMaster();
97  
98      Log.info("Waiting until master thread exits");
99      while (HTU.getHBaseCluster().getMasterThread() != null
100         && HTU.getHBaseCluster().getMasterThread().isAlive()) {
101       Threads.sleep(100);
102     }
103   }
104 
105   /** Flush the given region in the mini cluster. Since no master, we cannot use HBaseAdmin.flush() */
106   public static void flushRegion(HBaseTestingUtility HTU, HRegionInfo regionInfo) throws IOException {
107     for (RegionServerThread rst : HTU.getMiniHBaseCluster().getRegionServerThreads()) {
108       HRegion region = rst.getRegionServer().getRegionByEncodedName(regionInfo.getEncodedName());
109       if (region != null) {
110         region.flushcache();
111         return;
112       }
113     }
114     throw new IOException("Region to flush cannot be found");
115   }
116 
117   @AfterClass
118   public static void afterClass() throws Exception {
119     table.close();
120     HTU.shutdownMiniCluster();
121   }
122 
123   @After
124   public void after() throws Exception {
125     // Clean the state if the test failed before cleaning the znode
126     // It does not manage all bad failures, so if there are multiple failures, only
127     //  the first one should be looked at.
128     ZKAssign.deleteNodeFailSilent(HTU.getZooKeeperWatcher(), hri);
129   }
130 
131 
132   private static HRegionServer getRS() {
133     return HTU.getHBaseCluster().getLiveRegionServerThreads().get(0).getRegionServer();
134   }
135 
136 
137   /**
138    * Reopen the region. Reused in multiple tests as we always leave the region open after a test.
139    */
140   private void reopenRegion() throws Exception {
141     // We reopen. We need a ZK node here, as a open is always triggered by a master.
142     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
143     // first version is '0'
144     AdminProtos.OpenRegionRequest orr =
145       RequestConverter.buildOpenRegionRequest(getRS().getServerName(), hri, 0, null, null);
146     AdminProtos.OpenRegionResponse responseOpen = getRS().rpcServices.openRegion(null, orr);
147     Assert.assertTrue(responseOpen.getOpeningStateCount() == 1);
148     Assert.assertTrue(responseOpen.getOpeningState(0).
149         equals(AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED));
150 
151 
152     checkRegionIsOpened();
153   }
154 
155   private void checkRegionIsOpened() throws Exception {
156 
157     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
158       Thread.sleep(1);
159     }
160 
161     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
162 
163     Assert.assertTrue(
164       ZKAssign.deleteOpenedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
165         getRS().getServerName()));
166   }
167 
168 
169   private void checkRegionIsClosed() throws Exception {
170 
171     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
172       Thread.sleep(1);
173     }
174 
175     try {
176       Assert.assertFalse(getRS().getRegion(regionName).isAvailable());
177     } catch (NotServingRegionException expected) {
178       // That's how it work: if the region is closed we have an exception.
179     }
180 
181     // We don't delete the znode here, because there is not always a znode.
182   }
183 
184 
185   /**
186    * Close the region without using ZK
187    */
188   private void closeNoZK() throws Exception {
189     // no transition in ZK
190     AdminProtos.CloseRegionRequest crr =
191         RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false);
192     AdminProtos.CloseRegionResponse responseClose = getRS().rpcServices.closeRegion(null, crr);
193     Assert.assertTrue(responseClose.getClosed());
194 
195     // now waiting & checking. After a while, the transition should be done and the region closed
196     checkRegionIsClosed();
197   }
198 
199 
200   @Test(timeout = 60000)
201   public void testCloseByRegionServer() throws Exception {
202     closeNoZK();
203     reopenRegion();
204   }
205 
206   @Test(timeout = 60000)
207   public void testCloseByMasterWithoutZNode() throws Exception {
208 
209     // Transition in ZK on. This should fail, as there is no znode
210     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
211       getRS().getServerName(), regionName, true);
212     AdminProtos.CloseRegionResponse responseClose = getRS().rpcServices.closeRegion(null, crr);
213     Assert.assertTrue(responseClose.getClosed());
214 
215     // now waiting. After a while, the transition should be done
216     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
217       Thread.sleep(1);
218     }
219 
220     // the region is still available, the close got rejected at the end
221     Assert.assertTrue("The close should have failed", getRS().getRegion(regionName).isAvailable());
222   }
223 
224   @Test(timeout = 60000)
225   public void testOpenCloseByMasterWithZNode() throws Exception {
226 
227     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
228 
229     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
230       getRS().getServerName(), regionName, true);
231     AdminProtos.CloseRegionResponse responseClose = getRS().rpcServices.closeRegion(null, crr);
232     Assert.assertTrue(responseClose.getClosed());
233 
234     checkRegionIsClosed();
235 
236     ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
237       getRS().getServerName());
238 
239     reopenRegion();
240   }
241 
242   /**
243    * Test that we can send multiple openRegion to the region server.
244    * This is used when:
245    * - there is a SocketTimeout: in this case, the master does not know if the region server
246    * received the request before the timeout.
247    * - We have a socket error during the operation: same stuff: we don't know
248    * - a master failover: if we find a znode in thz M_ZK_REGION_OFFLINE, we don't know if
249    * the region server has received the query or not. Only solution to be efficient: re-ask
250    * immediately.
251    */
252   @Test(timeout = 60000)
253   public void testMultipleOpen() throws Exception {
254 
255     // We close
256     closeNoZK();
257     checkRegionIsClosed();
258 
259     // We reopen. We need a ZK node here, as a open is always triggered by a master.
260     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
261 
262     // We're sending multiple requests in a row. The region server must handle this nicely.
263     for (int i = 0; i < 10; i++) {
264       AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(
265         getRS().getServerName(), hri, 0, null, null);
266       AdminProtos.OpenRegionResponse responseOpen = getRS().rpcServices.openRegion(null, orr);
267       Assert.assertTrue(responseOpen.getOpeningStateCount() == 1);
268 
269       AdminProtos.OpenRegionResponse.RegionOpeningState ors = responseOpen.getOpeningState(0);
270       Assert.assertTrue("request " + i + " failed",
271           ors.equals(AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED) ||
272               ors.equals(AdminProtos.OpenRegionResponse.RegionOpeningState.ALREADY_OPENED)
273       );
274     }
275 
276     checkRegionIsOpened();
277   }
278 
279   @Test
280   public void testOpenClosingRegion() throws Exception {
281     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
282 
283     try {
284       // we re-opened meta so some of its data is lost
285       ServerName sn = getRS().getServerName();
286       MetaTableAccessor.updateRegionLocation(getRS().getConnection(),
287         hri, sn, getRS().getRegion(regionName).getOpenSeqNum());
288       // fake region to be closing now, need to clear state afterwards
289       getRS().regionsInTransitionInRS.put(hri.getEncodedNameAsBytes(), Boolean.FALSE);
290       AdminProtos.OpenRegionRequest orr =
291         RequestConverter.buildOpenRegionRequest(sn, hri, 0, null, null);
292       getRS().rpcServices.openRegion(null, orr);
293       Assert.fail("The closing region should not be opened");
294     } catch (ServiceException se) {
295       Assert.assertTrue("The region should be already in transition",
296         se.getCause() instanceof RegionAlreadyInTransitionException);
297     } finally {
298       getRS().regionsInTransitionInRS.remove(hri.getEncodedNameAsBytes());
299     }
300   }
301 
302   @Test(timeout = 60000)
303   public void testMultipleCloseFromMaster() throws Exception {
304 
305     // As opening, we must support multiple requests on the same region
306     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
307     for (int i = 0; i < 10; i++) {
308       AdminProtos.CloseRegionRequest crr =
309           RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, 0, null, true);
310       try {
311         AdminProtos.CloseRegionResponse responseClose = getRS().rpcServices.closeRegion(null, crr);
312         Assert.assertEquals("The first request should succeeds", 0, i);
313         Assert.assertTrue("request " + i + " failed",
314             responseClose.getClosed() || responseClose.hasClosed());
315       } catch (ServiceException se) {
316         Assert.assertTrue("The next queries should throw an exception.", i > 0);
317       }
318     }
319 
320     checkRegionIsClosed();
321 
322     Assert.assertTrue(
323       ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
324         getRS().getServerName())
325     );
326 
327     reopenRegion();
328   }
329 
330   /**
331    * Test that if we do a close while opening it stops the opening.
332    */
333   @Test(timeout = 60000)
334   public void testCancelOpeningWithoutZK() throws Exception {
335     // We close
336     closeNoZK();
337     checkRegionIsClosed();
338 
339     // Let do the initial steps, without having a handler
340     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
341     getRS().getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE);
342 
343     // That's a close without ZK.
344     AdminProtos.CloseRegionRequest crr =
345         RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false);
346     try {
347       getRS().rpcServices.closeRegion(null, crr);
348       Assert.assertTrue(false);
349     } catch (ServiceException expected) {
350     }
351 
352     // The state in RIT should have changed to close
353     Assert.assertEquals(Boolean.FALSE, getRS().getRegionsInTransitionInRS().get(
354         hri.getEncodedNameAsBytes()));
355 
356     // Let's start the open handler
357     HTableDescriptor htd = getRS().tableDescriptors.get(hri.getTable());
358 
359     BaseCoordinatedStateManager csm = new ZkCoordinatedStateManager();
360     csm.initialize(getRS());
361     csm.start();
362 
363     ZkOpenRegionCoordination.ZkOpenRegionDetails zkCrd =
364       new ZkOpenRegionCoordination.ZkOpenRegionDetails();
365     zkCrd.setServerName(getRS().getServerName());
366     zkCrd.setVersionOfOfflineNode(0);
367 
368     getRS().service.submit(new OpenRegionHandler(getRS(), getRS(), hri, htd,
369       csm.getOpenRegionCoordination(), zkCrd));
370 
371     // The open handler should have removed the region from RIT but kept the region closed
372     checkRegionIsClosed();
373 
374     // The open handler should have updated the value in ZK.
375     Assert.assertTrue(ZKAssign.deleteNode(
376         getRS().getZooKeeper(), hri.getEncodedName(),
377         EventType.RS_ZK_REGION_FAILED_OPEN, 1)
378     );
379 
380     reopenRegion();
381   }
382 
383   /**
384    * Test an open then a close with ZK. This is going to mess-up the ZK states, so
385    * the opening will fail as well because it doesn't find what it expects in ZK.
386    */
387   @Test(timeout = 60000)
388   public void testCancelOpeningWithZK() throws Exception {
389     // We close
390     closeNoZK();
391     checkRegionIsClosed();
392 
393     // Let do the initial steps, without having a handler
394     getRS().getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE);
395 
396     // That's a close without ZK.
397     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
398     AdminProtos.CloseRegionRequest crr =
399         RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false);
400     try {
401       getRS().rpcServices.closeRegion(null, crr);
402       Assert.assertTrue(false);
403     } catch (ServiceException expected) {
404       Assert.assertTrue(expected.getCause() instanceof RegionAlreadyInTransitionException);
405     }
406 
407     // The close should have left the ZK state as it is: it's the job the AM to delete it
408     Assert.assertTrue(ZKAssign.deleteNode(
409         getRS().getZooKeeper(), hri.getEncodedName(),
410         EventType.M_ZK_REGION_CLOSING, 0)
411     );
412 
413     // The state in RIT should have changed to close
414     Assert.assertEquals(Boolean.FALSE, getRS().getRegionsInTransitionInRS().get(
415         hri.getEncodedNameAsBytes()));
416 
417     // Let's start the open handler
418     // It should not succeed for two reasons:
419     //  1) There is no ZK node
420     //  2) The region in RIT was changed.
421     // The order is more or less implementation dependant.
422     HTableDescriptor htd = getRS().tableDescriptors.get(hri.getTable());
423 
424     BaseCoordinatedStateManager csm = new ZkCoordinatedStateManager();
425     csm.initialize(getRS());
426     csm.start();
427 
428     ZkOpenRegionCoordination.ZkOpenRegionDetails zkCrd =
429       new ZkOpenRegionCoordination.ZkOpenRegionDetails();
430     zkCrd.setServerName(getRS().getServerName());
431     zkCrd.setVersionOfOfflineNode(0);
432 
433     getRS().service.submit(new OpenRegionHandler(getRS(), getRS(), hri, htd,
434       csm.getOpenRegionCoordination(), zkCrd));
435 
436     // The open handler should have removed the region from RIT but kept the region closed
437     checkRegionIsClosed();
438 
439     // We should not find any znode here.
440     Assert.assertEquals(-1, ZKAssign.getVersion(HTU.getZooKeeperWatcher(), hri));
441 
442     reopenRegion();
443   }
444 
445   /**
446    * Tests an on-the-fly RPC that was scheduled for the earlier RS on the same port
447    * for openRegion. The region server should reject this RPC. (HBASE-9721)
448    */
449   @Test
450   public void testOpenCloseRegionRPCIntendedForPreviousServer() throws Exception {
451     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
452 
453     ServerName sn = getRS().getServerName();
454     ServerName earlierServerName = ServerName.valueOf(sn.getHostname(), sn.getPort(), 1);
455 
456     try {
457       CloseRegionRequest request = RequestConverter.buildCloseRegionRequest(earlierServerName, regionName, true);
458       getRS().getRSRpcServices().closeRegion(null, request);
459       Assert.fail("The closeRegion should have been rejected");
460     } catch (ServiceException se) {
461       Assert.assertTrue(se.getCause() instanceof IOException);
462       Assert.assertTrue(se.getCause().getMessage().contains("This RPC was intended for a different server"));
463     }
464 
465     //actual close
466     closeNoZK();
467     try {
468       AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(
469         earlierServerName, hri, 0, null, null);
470       getRS().getRSRpcServices().openRegion(null, orr);
471       Assert.fail("The openRegion should have been rejected");
472     } catch (ServiceException se) {
473       Assert.assertTrue(se.getCause() instanceof IOException);
474       Assert.assertTrue(se.getCause().getMessage().contains("This RPC was intended for a different server"));
475     } finally {
476       reopenRegion();
477     }
478   }
479 }