View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  
24  import org.apache.hadoop.hbase.HBaseTestingUtility;
25  import org.apache.hadoop.hbase.HConstants;
26  import org.apache.hadoop.hbase.HRegionInfo;
27  import org.apache.hadoop.hbase.HTableDescriptor;
28  import org.apache.hadoop.hbase.MediumTests;
29  import org.apache.hadoop.hbase.NotServingRegionException;
30  import org.apache.hadoop.hbase.ServerName;
31  import org.apache.hadoop.hbase.client.HTable;
32  import org.apache.hadoop.hbase.client.Put;
33  import org.apache.hadoop.hbase.executor.EventType;
34  import org.apache.hadoop.hbase.protobuf.RequestConverter;
35  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
36  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CloseRegionRequest;
37  import org.apache.hadoop.hbase.regionserver.handler.OpenRegionHandler;
38  import org.apache.hadoop.hbase.util.Bytes;
39  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
40  import org.junit.After;
41  import org.junit.AfterClass;
42  import org.junit.Assert;
43  import org.junit.BeforeClass;
44  import org.junit.Test;
45  import org.junit.experimental.categories.Category;
46  
47  import com.google.protobuf.ServiceException;
48  
49  
50  /**
51   * Tests on the region server, without the master.
52   */
53  @Category(MediumTests.class)
54  public class TestRegionServerNoMaster {
55  
56    private static final int NB_SERVERS = 1;
57    private static HTable table;
58    private static final byte[] row = "ee".getBytes();
59  
60    private static HRegionInfo hri;
61  
62    private static byte[] regionName;
63    private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
64  
65  
66    @BeforeClass
67    public static void before() throws Exception {
68      HTU.startMiniCluster(NB_SERVERS);
69      final byte[] tableName = Bytes.toBytes(TestRegionServerNoMaster.class.getSimpleName());
70  
71      // Create table then get the single region for our new table.
72      table = HTU.createTable(tableName, HConstants.CATALOG_FAMILY);
73      Put p = new Put(row);
74      p.add(HConstants.CATALOG_FAMILY, row, row);
75      table.put(p);
76  
77      hri = table.getRegionLocation(row, false).getRegionInfo();
78      regionName = hri.getRegionName();
79  
80      // No master
81      HTU.getHBaseCluster().getMaster().stopMaster();
82    }
83  
84    @AfterClass
85    public static void afterClass() throws Exception {
86      table.close();
87      HTU.shutdownMiniCluster();
88    }
89  
90    @After
91    public void after() throws Exception {
92      // Clean the state if the test failed before cleaning the znode
93      // It does not manage all bad failures, so if there are multiple failures, only
94      //  the first one should be looked at.
95      ZKAssign.deleteNodeFailSilent(HTU.getZooKeeperWatcher(), hri);
96    }
97  
98  
99    private static HRegionServer getRS() {
100     return HTU.getHBaseCluster().getLiveRegionServerThreads().get(0).getRegionServer();
101   }
102 
103 
104   /**
105    * Reopen the region. Reused in multiple tests as we always leave the region open after a test.
106    */
107   private void reopenRegion() throws Exception {
108     // We reopen. We need a ZK node here, as a open is always triggered by a master.
109     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
110     // first version is '0'
111     AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(getRS().getServerName(), hri, 0, null);
112     AdminProtos.OpenRegionResponse responseOpen = getRS().openRegion(null, orr);
113     Assert.assertTrue(responseOpen.getOpeningStateCount() == 1);
114     Assert.assertTrue(responseOpen.getOpeningState(0).
115         equals(AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED));
116 
117 
118     checkRegionIsOpened();
119   }
120 
121   private void checkRegionIsOpened() throws Exception {
122 
123     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
124       Thread.sleep(1);
125     }
126 
127     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
128 
129     Assert.assertTrue(
130       ZKAssign.deleteOpenedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
131         getRS().getServerName()));
132   }
133 
134 
135   private void checkRegionIsClosed() throws Exception {
136 
137     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
138       Thread.sleep(1);
139     }
140 
141     try {
142       Assert.assertFalse(getRS().getRegion(regionName).isAvailable());
143     } catch (NotServingRegionException expected) {
144       // That's how it work: if the region is closed we have an exception.
145     }
146 
147     // We don't delete the znode here, because there is not always a znode.
148   }
149 
150 
151   /**
152    * Close the region without using ZK
153    */
154   private void closeNoZK() throws Exception {
155     // no transition in ZK
156     AdminProtos.CloseRegionRequest crr =
157         RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false);
158     AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
159     Assert.assertTrue(responseClose.getClosed());
160 
161     // now waiting & checking. After a while, the transition should be done and the region closed
162     checkRegionIsClosed();
163   }
164 
165 
166   @Test(timeout = 60000)
167   public void testCloseByRegionServer() throws Exception {
168     closeNoZK();
169     reopenRegion();
170   }
171 
172   @Test(timeout = 60000)
173   public void testCloseByMasterWithoutZNode() throws Exception {
174 
175     // Transition in ZK on. This should fail, as there is no znode
176     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
177       getRS().getServerName(), regionName, true);
178     AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
179     Assert.assertTrue(responseClose.getClosed());
180 
181     // now waiting. After a while, the transition should be done
182     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
183       Thread.sleep(1);
184     }
185 
186     // the region is still available, the close got rejected at the end
187     Assert.assertTrue("The close should have failed", getRS().getRegion(regionName).isAvailable());
188   }
189 
190   @Test(timeout = 60000)
191   public void testOpenCloseByMasterWithZNode() throws Exception {
192 
193     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
194 
195     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
196       getRS().getServerName(), regionName, true);
197     AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
198     Assert.assertTrue(responseClose.getClosed());
199 
200     checkRegionIsClosed();
201 
202     ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
203       getRS().getServerName());
204 
205     reopenRegion();
206   }
207 
208   /**
209    * Test that we can send multiple openRegion to the region server.
210    * This is used when:
211    * - there is a SocketTimeout: in this case, the master does not know if the region server
212    * received the request before the timeout.
213    * - We have a socket error during the operation: same stuff: we don't know
214    * - a master failover: if we find a znode in thz M_ZK_REGION_OFFLINE, we don't know if
215    * the region server has received the query or not. Only solution to be efficient: re-ask
216    * immediately.
217    */
218   @Test(timeout = 60000)
219   public void testMultipleOpen() throws Exception {
220 
221     // We close
222     closeNoZK();
223     checkRegionIsClosed();
224 
225     // We reopen. We need a ZK node here, as a open is always triggered by a master.
226     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
227 
228     // We're sending multiple requests in a row. The region server must handle this nicely.
229     for (int i = 0; i < 10; i++) {
230       AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(getRS().getServerName(), hri, 0, null);
231       AdminProtos.OpenRegionResponse responseOpen = getRS().openRegion(null, orr);
232       Assert.assertTrue(responseOpen.getOpeningStateCount() == 1);
233 
234       AdminProtos.OpenRegionResponse.RegionOpeningState ors = responseOpen.getOpeningState(0);
235       Assert.assertTrue("request " + i + " failed",
236           ors.equals(AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED) ||
237               ors.equals(AdminProtos.OpenRegionResponse.RegionOpeningState.ALREADY_OPENED)
238       );
239     }
240 
241     checkRegionIsOpened();
242   }
243 
244   @Test
245   public void testOpenClosingRegion() throws Exception {
246     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
247 
248     try {
249       // fake region to be closing now, need to clear state afterwards
250       getRS().regionsInTransitionInRS.put(hri.getEncodedNameAsBytes(), Boolean.FALSE);
251       AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(getRS().getServerName(), hri, 0, null);
252       getRS().openRegion(null, orr);
253       Assert.fail("The closing region should not be opened");
254     } catch (ServiceException se) {
255       Assert.assertTrue("The region should be already in transition",
256         se.getCause() instanceof RegionAlreadyInTransitionException);
257     } finally {
258       getRS().regionsInTransitionInRS.remove(hri.getEncodedNameAsBytes());
259     }
260   }
261 
262   @Test(timeout = 60000)
263   public void testMultipleCloseFromMaster() throws Exception {
264 
265     // As opening, we must support multiple requests on the same region
266     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
267     for (int i = 0; i < 10; i++) {
268       AdminProtos.CloseRegionRequest crr =
269           RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, 0, null, true);
270       try {
271         AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
272         Assert.assertEquals("The first request should succeeds", 0, i);
273         Assert.assertTrue("request " + i + " failed",
274             responseClose.getClosed() || responseClose.hasClosed());
275       } catch (ServiceException se) {
276         Assert.assertTrue("The next queries should throw an exception.", i > 0);
277       }
278     }
279 
280     checkRegionIsClosed();
281 
282     Assert.assertTrue(
283       ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
284         getRS().getServerName())
285     );
286 
287     reopenRegion();
288   }
289 
290   /**
291    * Test that if we do a close while opening it stops the opening.
292    */
293   @Test(timeout = 60000)
294   public void testCancelOpeningWithoutZK() throws Exception {
295     // We close
296     closeNoZK();
297     checkRegionIsClosed();
298 
299     // Let do the initial steps, without having a handler
300     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
301     getRS().getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE);
302 
303     // That's a close without ZK.
304     AdminProtos.CloseRegionRequest crr =
305         RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false);
306     try {
307       getRS().closeRegion(null, crr);
308       Assert.assertTrue(false);
309     } catch (ServiceException expected) {
310     }
311 
312     // The state in RIT should have changed to close
313     Assert.assertEquals(Boolean.FALSE, getRS().getRegionsInTransitionInRS().get(
314         hri.getEncodedNameAsBytes()));
315 
316     // Let's start the open handler
317     HTableDescriptor htd = getRS().tableDescriptors.get(hri.getTable());
318     getRS().service.submit(new OpenRegionHandler(getRS(), getRS(), hri, htd, 0));
319 
320     // The open handler should have removed the region from RIT but kept the region closed
321     checkRegionIsClosed();
322 
323     // The open handler should have updated the value in ZK.
324     Assert.assertTrue(ZKAssign.deleteNode(
325         getRS().getZooKeeperWatcher(), hri.getEncodedName(),
326         EventType.RS_ZK_REGION_FAILED_OPEN, 1)
327     );
328 
329     reopenRegion();
330   }
331 
332   /**
333    * Test an open then a close with ZK. This is going to mess-up the ZK states, so
334    * the opening will fail as well because it doesn't find what it expects in ZK.
335    */
336   @Test(timeout = 60000)
337   public void testCancelOpeningWithZK() throws Exception {
338     // We close
339     closeNoZK();
340     checkRegionIsClosed();
341 
342     // Let do the initial steps, without having a handler
343     getRS().getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE);
344 
345     // That's a close without ZK.
346     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
347     AdminProtos.CloseRegionRequest crr =
348         RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false);
349     try {
350       getRS().closeRegion(null, crr);
351       Assert.assertTrue(false);
352     } catch (ServiceException expected) {
353       Assert.assertTrue(expected.getCause() instanceof NotServingRegionException);
354     }
355 
356     // The close should have left the ZK state as it is: it's the job the AM to delete it
357     Assert.assertTrue(ZKAssign.deleteNode(
358         getRS().getZooKeeperWatcher(), hri.getEncodedName(),
359         EventType.M_ZK_REGION_CLOSING, 0)
360     );
361 
362     // The state in RIT should have changed to close
363     Assert.assertEquals(Boolean.FALSE, getRS().getRegionsInTransitionInRS().get(
364         hri.getEncodedNameAsBytes()));
365 
366     // Let's start the open handler
367     // It should not succeed for two reasons:
368     //  1) There is no ZK node
369     //  2) The region in RIT was changed.
370     // The order is more or less implementation dependant.
371     HTableDescriptor htd = getRS().tableDescriptors.get(hri.getTable());
372     getRS().service.submit(new OpenRegionHandler(getRS(), getRS(), hri, htd, 0));
373 
374     // The open handler should have removed the region from RIT but kept the region closed
375     checkRegionIsClosed();
376 
377     // We should not find any znode here.
378     Assert.assertEquals(-1, ZKAssign.getVersion(HTU.getZooKeeperWatcher(), hri));
379 
380     reopenRegion();
381   }
382 
383   /**
384    * Tests an on-the-fly RPC that was scheduled for the earlier RS on the same port
385    * for openRegion. The region server should reject this RPC. (HBASE-9721)
386    */
387   @Test
388   public void testOpenCloseRegionRPCIntendedForPreviousServer() throws Exception {
389     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
390 
391     ServerName sn = getRS().getServerName();
392     ServerName earlierServerName = ServerName.valueOf(sn.getHostname(), sn.getPort(), 1);
393 
394     try {
395       CloseRegionRequest request = RequestConverter.buildCloseRegionRequest(earlierServerName, regionName, true);
396       getRS().closeRegion(null, request);
397       Assert.fail("The closeRegion should have been rejected");
398     } catch (ServiceException se) {
399       Assert.assertTrue(se.getCause() instanceof IOException);
400       Assert.assertTrue(se.getCause().getMessage().contains("This RPC was intended for a different server"));
401     }
402 
403     //actual close
404     closeNoZK();
405     try {
406       AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(earlierServerName, hri, 0, null);
407       getRS().openRegion(null, orr);
408       Assert.fail("The openRegion should have been rejected");
409     } catch (ServiceException se) {
410       Assert.assertTrue(se.getCause() instanceof IOException);
411       Assert.assertTrue(se.getCause().getMessage().contains("This RPC was intended for a different server"));
412     } finally {
413       reopenRegion();
414     }
415   }
416 }