View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import org.apache.hadoop.hbase.HBaseTestingUtility;
23  import org.apache.hadoop.hbase.HConstants;
24  import org.apache.hadoop.hbase.HRegionInfo;
25  import org.apache.hadoop.hbase.HTableDescriptor;
26  import org.apache.hadoop.hbase.MediumTests;
27  import org.apache.hadoop.hbase.NotServingRegionException;
28  import org.apache.hadoop.hbase.client.HTable;
29  import org.apache.hadoop.hbase.client.Put;
30  import org.apache.hadoop.hbase.executor.EventType;
31  import org.apache.hadoop.hbase.protobuf.RequestConverter;
32  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
33  import org.apache.hadoop.hbase.regionserver.handler.OpenRegionHandler;
34  import org.apache.hadoop.hbase.util.Bytes;
35  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
36  import org.junit.After;
37  import org.junit.AfterClass;
38  import org.junit.Assert;
39  import org.junit.BeforeClass;
40  import org.junit.Test;
41  import org.junit.experimental.categories.Category;
42  
43  import com.google.protobuf.ServiceException;
44  
45  
46  /**
47   * Tests on the region server, without the master.
48   */
49  @Category(MediumTests.class)
50  public class TestRegionServerNoMaster {
51  
52    private static final int NB_SERVERS = 1;
53    private static HTable table;
54    private static final byte[] row = "ee".getBytes();
55  
56    private static HRegionInfo hri;
57  
58    private static byte[] regionName;
59    private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
60  
61  
62    @BeforeClass
63    public static void before() throws Exception {
64      HTU.startMiniCluster(NB_SERVERS);
65      final byte[] tableName = Bytes.toBytes(TestRegionServerNoMaster.class.getSimpleName());
66  
67      // Create table then get the single region for our new table.
68      table = HTU.createTable(tableName, HConstants.CATALOG_FAMILY);
69      Put p = new Put(row);
70      p.add(HConstants.CATALOG_FAMILY, row, row);
71      table.put(p);
72  
73      hri = table.getRegionLocation(row, false).getRegionInfo();
74      regionName = hri.getRegionName();
75  
76      // No master
77      HTU.getHBaseCluster().getMaster().stopMaster();
78    }
79  
80    @AfterClass
81    public static void afterClass() throws Exception {
82      table.close();
83      HTU.shutdownMiniCluster();
84    }
85  
86    @After
87    public void after() throws Exception {
88      // Clean the state if the test failed before cleaning the znode
89      // It does not manage all bad failures, so if there are multiple failures, only
90      //  the first one should be looked at.
91      ZKAssign.deleteNodeFailSilent(HTU.getZooKeeperWatcher(), hri);
92    }
93  
94  
95    private static HRegionServer getRS() {
96      return HTU.getHBaseCluster().getLiveRegionServerThreads().get(0).getRegionServer();
97    }
98  
99  
100   /**
101    * Reopen the region. Reused in multiple tests as we always leave the region open after a test.
102    */
103   private void reopenRegion() throws Exception {
104     // We reopen. We need a ZK node here, as a open is always triggered by a master.
105     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
106     // first version is '0'
107     AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(hri, 0, null);
108     AdminProtos.OpenRegionResponse responseOpen = getRS().openRegion(null, orr);
109     Assert.assertTrue(responseOpen.getOpeningStateCount() == 1);
110     Assert.assertTrue(responseOpen.getOpeningState(0).
111         equals(AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED));
112 
113 
114     checkRegionIsOpened();
115   }
116 
117   private void checkRegionIsOpened() throws Exception {
118 
119     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
120       Thread.sleep(1);
121     }
122 
123     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
124 
125     Assert.assertTrue(
126         ZKAssign.deleteOpenedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName()));
127   }
128 
129 
130   private void checkRegionIsClosed() throws Exception {
131 
132     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
133       Thread.sleep(1);
134     }
135 
136     try {
137       Assert.assertFalse(getRS().getRegion(regionName).isAvailable());
138     } catch (NotServingRegionException expected) {
139       // That's how it work: if the region is closed we have an exception.
140     }
141 
142     // We don't delete the znode here, because there is not always a znode.
143   }
144 
145 
146   /**
147    * Close the region without using ZK
148    */
149   private void closeNoZK() throws Exception {
150     // no transition in ZK
151     AdminProtos.CloseRegionRequest crr =
152         RequestConverter.buildCloseRegionRequest(regionName, false);
153     AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
154     Assert.assertTrue(responseClose.getClosed());
155 
156     // now waiting & checking. After a while, the transition should be done and the region closed
157     checkRegionIsClosed();
158   }
159 
160 
161   @Test(timeout = 60000)
162   public void testCloseByRegionServer() throws Exception {
163     closeNoZK();
164     reopenRegion();
165   }
166 
167   @Test(timeout = 60000)
168   public void testCloseByMasterWithoutZNode() throws Exception {
169 
170     // Transition in ZK on. This should fail, as there is no znode
171     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
172         regionName, true);
173     AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
174     Assert.assertTrue(responseClose.getClosed());
175 
176     // now waiting. After a while, the transition should be done
177     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
178       Thread.sleep(1);
179     }
180 
181     // the region is still available, the close got rejected at the end
182     Assert.assertTrue("The close should have failed", getRS().getRegion(regionName).isAvailable());
183   }
184 
185   @Test(timeout = 60000)
186   public void testOpenCloseByMasterWithZNode() throws Exception {
187 
188     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
189 
190     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
191         regionName, true);
192     AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
193     Assert.assertTrue(responseClose.getClosed());
194 
195     checkRegionIsClosed();
196 
197     ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName());
198 
199     reopenRegion();
200   }
201 
202   /**
203    * Test that we can send multiple openRegion to the region server.
204    * This is used when:
205    * - there is a SocketTimeout: in this case, the master does not know if the region server
206    * received the request before the timeout.
207    * - We have a socket error during the operation: same stuff: we don't know
208    * - a master failover: if we find a znode in thz M_ZK_REGION_OFFLINE, we don't know if
209    * the region server has received the query or not. Only solution to be efficient: re-ask
210    * immediately.
211    */
212   @Test(timeout = 60000)
213   public void testMultipleOpen() throws Exception {
214 
215     // We close
216     closeNoZK();
217     checkRegionIsClosed();
218 
219     // We reopen. We need a ZK node here, as a open is always triggered by a master.
220     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
221 
222     // We're sending multiple requests in a row. The region server must handle this nicely.
223     for (int i = 0; i < 10; i++) {
224       AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(hri, 0, null);
225       AdminProtos.OpenRegionResponse responseOpen = getRS().openRegion(null, orr);
226       Assert.assertTrue(responseOpen.getOpeningStateCount() == 1);
227 
228       AdminProtos.OpenRegionResponse.RegionOpeningState ors = responseOpen.getOpeningState(0);
229       Assert.assertTrue("request " + i + " failed",
230           ors.equals(AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED) ||
231               ors.equals(AdminProtos.OpenRegionResponse.RegionOpeningState.ALREADY_OPENED)
232       );
233     }
234 
235     checkRegionIsOpened();
236   }
237 
238   @Test
239   public void testOpenClosingRegion() throws Exception {
240     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
241 
242     try {
243       // fake region to be closing now, need to clear state afterwards
244       getRS().regionsInTransitionInRS.put(hri.getEncodedNameAsBytes(), Boolean.FALSE);
245       AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(hri, 0, null);
246       getRS().openRegion(null, orr);
247       Assert.fail("The closing region should not be opened");
248     } catch (ServiceException se) {
249       Assert.assertTrue("The region should be already in transition",
250         se.getCause() instanceof RegionAlreadyInTransitionException);
251     } finally {
252       getRS().regionsInTransitionInRS.remove(hri.getEncodedNameAsBytes());
253     }
254   }
255 
256   @Test(timeout = 60000)
257   public void testMultipleCloseFromMaster() throws Exception {
258 
259     // As opening, we must support multiple requests on the same region
260     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
261     for (int i = 0; i < 10; i++) {
262       AdminProtos.CloseRegionRequest crr =
263           RequestConverter.buildCloseRegionRequest(regionName, 0, null, true);
264       try {
265         AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
266         Assert.assertEquals("The first request should succeeds", 0, i);
267         Assert.assertTrue("request " + i + " failed",
268             responseClose.getClosed() || responseClose.hasClosed());
269       } catch (ServiceException se) {
270         Assert.assertTrue("The next queries should throw an exception.", i > 0);
271       }
272     }
273 
274     checkRegionIsClosed();
275 
276     Assert.assertTrue(
277         ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName())
278     );
279 
280     reopenRegion();
281   }
282 
283   /**
284    * Test that if we do a close while opening it stops the opening.
285    */
286   @Test(timeout = 60000)
287   public void testCancelOpeningWithoutZK() throws Exception {
288     // We close
289     closeNoZK();
290     checkRegionIsClosed();
291 
292     // Let do the initial steps, without having a handler
293     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
294     getRS().getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE);
295 
296     // That's a close without ZK.
297     AdminProtos.CloseRegionRequest crr =
298         RequestConverter.buildCloseRegionRequest(regionName, false);
299     try {
300       getRS().closeRegion(null, crr);
301       Assert.assertTrue(false);
302     } catch (ServiceException expected) {
303     }
304 
305     // The state in RIT should have changed to close
306     Assert.assertEquals(Boolean.FALSE, getRS().getRegionsInTransitionInRS().get(
307         hri.getEncodedNameAsBytes()));
308 
309     // Let's start the open handler
310     HTableDescriptor htd = getRS().tableDescriptors.get(hri.getTableName());
311     getRS().service.submit(new OpenRegionHandler(getRS(), getRS(), hri, htd, 0));
312 
313     // The open handler should have removed the region from RIT but kept the region closed
314     checkRegionIsClosed();
315 
316     // The open handler should have updated the value in ZK.
317     Assert.assertTrue(ZKAssign.deleteNode(
318         getRS().getZooKeeperWatcher(), hri.getEncodedName(),
319         EventType.RS_ZK_REGION_FAILED_OPEN, 1)
320     );
321 
322     reopenRegion();
323   }
324 
325   /**
326    * Test an open then a close with ZK. This is going to mess-up the ZK states, so
327    * the opening will fail as well because it doesn't find what it expects in ZK.
328    */
329   @Test(timeout = 60000)
330   public void testCancelOpeningWithZK() throws Exception {
331     // We close
332     closeNoZK();
333     checkRegionIsClosed();
334 
335     // Let do the initial steps, without having a handler
336     getRS().getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE);
337 
338     // That's a close without ZK.
339     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
340     AdminProtos.CloseRegionRequest crr =
341         RequestConverter.buildCloseRegionRequest(regionName, false);
342     try {
343       getRS().closeRegion(null, crr);
344       Assert.assertTrue(false);
345     } catch (ServiceException expected) {
346       Assert.assertTrue(expected.getCause() instanceof NotServingRegionException);
347     }
348 
349     // The close should have left the ZK state as it is: it's the job the AM to delete it
350     Assert.assertTrue(ZKAssign.deleteNode(
351         getRS().getZooKeeperWatcher(), hri.getEncodedName(),
352         EventType.M_ZK_REGION_CLOSING, 0)
353     );
354 
355     // The state in RIT should have changed to close
356     Assert.assertEquals(Boolean.FALSE, getRS().getRegionsInTransitionInRS().get(
357         hri.getEncodedNameAsBytes()));
358 
359     // Let's start the open handler
360     // It should not succeed for two reasons:
361     //  1) There is no ZK node
362     //  2) The region in RIT was changed.
363     // The order is more or less implementation dependant.
364     HTableDescriptor htd = getRS().tableDescriptors.get(hri.getTableName());
365     getRS().service.submit(new OpenRegionHandler(getRS(), getRS(), hri, htd, 0));
366 
367     // The open handler should have removed the region from RIT but kept the region closed
368     checkRegionIsClosed();
369 
370     // We should not find any znode here.
371     Assert.assertEquals(-1, ZKAssign.getVersion(HTU.getZooKeeperWatcher(), hri));
372 
373     reopenRegion();
374   }
375 }