View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import org.apache.hadoop.hbase.HBaseTestingUtility;
23  import org.apache.hadoop.hbase.HConstants;
24  import org.apache.hadoop.hbase.HRegionInfo;
25  import org.apache.hadoop.hbase.HTableDescriptor;
26  import org.apache.hadoop.hbase.MediumTests;
27  import org.apache.hadoop.hbase.NotServingRegionException;
28  import org.apache.hadoop.hbase.client.HTable;
29  import org.apache.hadoop.hbase.client.Put;
30  import org.apache.hadoop.hbase.executor.EventType;
31  import org.apache.hadoop.hbase.protobuf.RequestConverter;
32  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
33  import org.apache.hadoop.hbase.regionserver.handler.OpenRegionHandler;
34  import org.apache.hadoop.hbase.util.Bytes;
35  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
36  import org.junit.After;
37  import org.junit.AfterClass;
38  import org.junit.Assert;
39  import org.junit.BeforeClass;
40  import org.junit.Test;
41  import org.junit.experimental.categories.Category;
42  
43  import com.google.protobuf.ServiceException;
44  
45  
46  /**
47   * Tests on the region server, without the master.
48   */
49  @Category(MediumTests.class)
50  public class TestRegionServerNoMaster {
51  
52    private static final int NB_SERVERS = 1;
53    private static HTable table;
54    private static final byte[] row = "ee".getBytes();
55  
56    private static HRegionInfo hri;
57  
58    private static byte[] regionName;
59    private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
60  
61  
62    @BeforeClass
63    public static void before() throws Exception {
64      HTU.startMiniCluster(NB_SERVERS);
65      final byte[] tableName = Bytes.toBytes(TestRegionServerNoMaster.class.getSimpleName());
66  
67      // Create table then get the single region for our new table.
68      table = HTU.createTable(tableName, HConstants.CATALOG_FAMILY);
69      Put p = new Put(row);
70      p.add(HConstants.CATALOG_FAMILY, row, row);
71      table.put(p);
72  
73      hri = table.getRegionLocation(row, false).getRegionInfo();
74      regionName = hri.getRegionName();
75  
76      // No master
77      HTU.getHBaseCluster().getMaster().stopMaster();
78    }
79  
80    @AfterClass
81    public static void afterClass() throws Exception {
82      table.close();
83      HTU.shutdownMiniCluster();
84    }
85  
86    @After
87    public void after() throws Exception {
88      // Clean the state if the test failed before cleaning the znode
89      // It does not manage all bad failures, so if there are multiple failures, only
90      //  the first one should be looked at.
91      ZKAssign.deleteNodeFailSilent(HTU.getZooKeeperWatcher(), hri);
92    }
93  
94  
95    private static HRegionServer getRS() {
96      return HTU.getHBaseCluster().getLiveRegionServerThreads().get(0).getRegionServer();
97    }
98  
99  
100   /**
101    * Reopen the region. Reused in multiple tests as we always leave the region open after a test.
102    */
103   private void reopenRegion() throws Exception {
104     // We reopen. We need a ZK node here, as a open is always triggered by a master.
105     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
106     // first version is '0'
107     AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(hri, 0, null);
108     AdminProtos.OpenRegionResponse responseOpen = getRS().openRegion(null, orr);
109     Assert.assertTrue(responseOpen.getOpeningStateCount() == 1);
110     Assert.assertTrue(responseOpen.getOpeningState(0).
111         equals(AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED));
112 
113 
114     checkRegionIsOpened();
115   }
116 
117   private void checkRegionIsOpened() throws Exception {
118 
119     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
120       Thread.sleep(1);
121     }
122 
123     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
124 
125     Assert.assertTrue(
126       ZKAssign.deleteOpenedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
127         getRS().getServerName()));
128   }
129 
130 
131   private void checkRegionIsClosed() throws Exception {
132 
133     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
134       Thread.sleep(1);
135     }
136 
137     try {
138       Assert.assertFalse(getRS().getRegion(regionName).isAvailable());
139     } catch (NotServingRegionException expected) {
140       // That's how it work: if the region is closed we have an exception.
141     }
142 
143     // We don't delete the znode here, because there is not always a znode.
144   }
145 
146 
147   /**
148    * Close the region without using ZK
149    */
150   private void closeNoZK() throws Exception {
151     // no transition in ZK
152     AdminProtos.CloseRegionRequest crr =
153         RequestConverter.buildCloseRegionRequest(regionName, false);
154     AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
155     Assert.assertTrue(responseClose.getClosed());
156 
157     // now waiting & checking. After a while, the transition should be done and the region closed
158     checkRegionIsClosed();
159   }
160 
161 
162   @Test(timeout = 60000)
163   public void testCloseByRegionServer() throws Exception {
164     closeNoZK();
165     reopenRegion();
166   }
167 
168   @Test(timeout = 60000)
169   public void testCloseByMasterWithoutZNode() throws Exception {
170 
171     // Transition in ZK on. This should fail, as there is no znode
172     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
173         regionName, true);
174     AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
175     Assert.assertTrue(responseClose.getClosed());
176 
177     // now waiting. After a while, the transition should be done
178     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
179       Thread.sleep(1);
180     }
181 
182     // the region is still available, the close got rejected at the end
183     Assert.assertTrue("The close should have failed", getRS().getRegion(regionName).isAvailable());
184   }
185 
186   @Test(timeout = 60000)
187   public void testOpenCloseByMasterWithZNode() throws Exception {
188 
189     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
190 
191     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
192         regionName, true);
193     AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
194     Assert.assertTrue(responseClose.getClosed());
195 
196     checkRegionIsClosed();
197 
198     ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
199       getRS().getServerName());
200 
201     reopenRegion();
202   }
203 
204   /**
205    * Test that we can send multiple openRegion to the region server.
206    * This is used when:
207    * - there is a SocketTimeout: in this case, the master does not know if the region server
208    * received the request before the timeout.
209    * - We have a socket error during the operation: same stuff: we don't know
210    * - a master failover: if we find a znode in thz M_ZK_REGION_OFFLINE, we don't know if
211    * the region server has received the query or not. Only solution to be efficient: re-ask
212    * immediately.
213    */
214   @Test(timeout = 60000)
215   public void testMultipleOpen() throws Exception {
216 
217     // We close
218     closeNoZK();
219     checkRegionIsClosed();
220 
221     // We reopen. We need a ZK node here, as a open is always triggered by a master.
222     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
223 
224     // We're sending multiple requests in a row. The region server must handle this nicely.
225     for (int i = 0; i < 10; i++) {
226       AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(hri, 0, null);
227       AdminProtos.OpenRegionResponse responseOpen = getRS().openRegion(null, orr);
228       Assert.assertTrue(responseOpen.getOpeningStateCount() == 1);
229 
230       AdminProtos.OpenRegionResponse.RegionOpeningState ors = responseOpen.getOpeningState(0);
231       Assert.assertTrue("request " + i + " failed",
232           ors.equals(AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED) ||
233               ors.equals(AdminProtos.OpenRegionResponse.RegionOpeningState.ALREADY_OPENED)
234       );
235     }
236 
237     checkRegionIsOpened();
238   }
239 
240   @Test
241   public void testOpenClosingRegion() throws Exception {
242     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
243 
244     try {
245       // fake region to be closing now, need to clear state afterwards
246       getRS().regionsInTransitionInRS.put(hri.getEncodedNameAsBytes(), Boolean.FALSE);
247       AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(hri, 0, null);
248       getRS().openRegion(null, orr);
249       Assert.fail("The closing region should not be opened");
250     } catch (ServiceException se) {
251       Assert.assertTrue("The region should be already in transition",
252         se.getCause() instanceof RegionAlreadyInTransitionException);
253     } finally {
254       getRS().regionsInTransitionInRS.remove(hri.getEncodedNameAsBytes());
255     }
256   }
257 
258   @Test(timeout = 60000)
259   public void testMultipleCloseFromMaster() throws Exception {
260 
261     // As opening, we must support multiple requests on the same region
262     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
263     for (int i = 0; i < 10; i++) {
264       AdminProtos.CloseRegionRequest crr =
265           RequestConverter.buildCloseRegionRequest(regionName, 0, null, true);
266       try {
267         AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
268         Assert.assertEquals("The first request should succeeds", 0, i);
269         Assert.assertTrue("request " + i + " failed",
270             responseClose.getClosed() || responseClose.hasClosed());
271       } catch (ServiceException se) {
272         Assert.assertTrue("The next queries should throw an exception.", i > 0);
273       }
274     }
275 
276     checkRegionIsClosed();
277 
278     Assert.assertTrue(
279       ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
280         getRS().getServerName())
281     );
282 
283     reopenRegion();
284   }
285 
286   /**
287    * Test that if we do a close while opening it stops the opening.
288    */
289   @Test(timeout = 60000)
290   public void testCancelOpeningWithoutZK() throws Exception {
291     // We close
292     closeNoZK();
293     checkRegionIsClosed();
294 
295     // Let do the initial steps, without having a handler
296     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
297     getRS().getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE);
298 
299     // That's a close without ZK.
300     AdminProtos.CloseRegionRequest crr =
301         RequestConverter.buildCloseRegionRequest(regionName, false);
302     try {
303       getRS().closeRegion(null, crr);
304       Assert.assertTrue(false);
305     } catch (ServiceException expected) {
306     }
307 
308     // The state in RIT should have changed to close
309     Assert.assertEquals(Boolean.FALSE, getRS().getRegionsInTransitionInRS().get(
310         hri.getEncodedNameAsBytes()));
311 
312     // Let's start the open handler
313     HTableDescriptor htd = getRS().tableDescriptors.get(hri.getTable());
314     getRS().service.submit(new OpenRegionHandler(getRS(), getRS(), hri, htd, 0));
315 
316     // The open handler should have removed the region from RIT but kept the region closed
317     checkRegionIsClosed();
318 
319     // The open handler should have updated the value in ZK.
320     Assert.assertTrue(ZKAssign.deleteNode(
321         getRS().getZooKeeperWatcher(), hri.getEncodedName(),
322         EventType.RS_ZK_REGION_FAILED_OPEN, 1)
323     );
324 
325     reopenRegion();
326   }
327 
328   /**
329    * Test an open then a close with ZK. This is going to mess-up the ZK states, so
330    * the opening will fail as well because it doesn't find what it expects in ZK.
331    */
332   @Test(timeout = 60000)
333   public void testCancelOpeningWithZK() throws Exception {
334     // We close
335     closeNoZK();
336     checkRegionIsClosed();
337 
338     // Let do the initial steps, without having a handler
339     getRS().getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE);
340 
341     // That's a close without ZK.
342     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
343     AdminProtos.CloseRegionRequest crr =
344         RequestConverter.buildCloseRegionRequest(regionName, false);
345     try {
346       getRS().closeRegion(null, crr);
347       Assert.assertTrue(false);
348     } catch (ServiceException expected) {
349       Assert.assertTrue(expected.getCause() instanceof NotServingRegionException);
350     }
351 
352     // The close should have left the ZK state as it is: it's the job the AM to delete it
353     Assert.assertTrue(ZKAssign.deleteNode(
354         getRS().getZooKeeperWatcher(), hri.getEncodedName(),
355         EventType.M_ZK_REGION_CLOSING, 0)
356     );
357 
358     // The state in RIT should have changed to close
359     Assert.assertEquals(Boolean.FALSE, getRS().getRegionsInTransitionInRS().get(
360         hri.getEncodedNameAsBytes()));
361 
362     // Let's start the open handler
363     // It should not succeed for two reasons:
364     //  1) There is no ZK node
365     //  2) The region in RIT was changed.
366     // The order is more or less implementation dependant.
367     HTableDescriptor htd = getRS().tableDescriptors.get(hri.getTable());
368     getRS().service.submit(new OpenRegionHandler(getRS(), getRS(), hri, htd, 0));
369 
370     // The open handler should have removed the region from RIT but kept the region closed
371     checkRegionIsClosed();
372 
373     // We should not find any znode here.
374     Assert.assertEquals(-1, ZKAssign.getVersion(HTU.getZooKeeperWatcher(), hri));
375 
376     reopenRegion();
377   }
378 }