View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotNull;
23  import static org.junit.Assert.assertNull;
24  import static org.junit.Assert.assertTrue;
25  import static org.junit.Assert.fail;
26  
27  import java.io.IOException;
28  import java.util.List;
29  import java.util.Set;
30  
31  import org.apache.hadoop.conf.Configuration;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.HBaseTestingUtility;
35  import org.apache.hadoop.hbase.HColumnDescriptor;
36  import org.apache.hadoop.hbase.HConstants;
37  import org.apache.hadoop.hbase.HRegionInfo;
38  import org.apache.hadoop.hbase.HTableDescriptor;
39  import org.apache.hadoop.hbase.MediumTests;
40  import org.apache.hadoop.hbase.ServerLoad;
41  import org.apache.hadoop.hbase.ServerName;
42  import org.apache.hadoop.hbase.TableName;
43  import org.apache.hadoop.hbase.catalog.MetaEditor;
44  import org.apache.hadoop.hbase.client.HBaseAdmin;
45  import org.apache.hadoop.hbase.client.HTable;
46  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
47  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
48  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
49  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
50  import org.apache.hadoop.hbase.coprocessor.RegionObserver;
51  import org.apache.hadoop.hbase.executor.EventType;
52  import org.apache.hadoop.hbase.master.RegionState.State;
53  import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
54  import org.apache.hadoop.hbase.regionserver.HRegionServer;
55  import org.apache.hadoop.hbase.util.Bytes;
56  import org.apache.hadoop.hbase.util.FSUtils;
57  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
58  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
59  import org.junit.AfterClass;
60  import org.junit.BeforeClass;
61  import org.junit.Test;
62  import org.junit.experimental.categories.Category;
63  
64  /**
65   * This tests AssignmentManager with a testing cluster.
66   */
67  @Category(MediumTests.class)
68  public class TestAssignmentManagerOnCluster {
69    private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
70    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
71    private final static Configuration conf = TEST_UTIL.getConfiguration();
72    private static HBaseAdmin admin;
73  
74    @BeforeClass
75    public static void setUpBeforeClass() throws Exception {
76      // Using the mock load balancer to control region plans
77      conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
78        MockLoadBalancer.class, LoadBalancer.class);
79      conf.setClass(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
80        MockRegionObserver.class, RegionObserver.class);
81      // Reduce the maximum attempts to speed up the test
82      conf.setInt("hbase.assignment.maximum.attempts", 3);
83  
84      TEST_UTIL.startMiniCluster(3);
85      admin = TEST_UTIL.getHBaseAdmin();
86    }
87  
88    @AfterClass
89    public static void tearDownAfterClass() throws Exception {
90      TEST_UTIL.shutdownMiniCluster();
91    }
92  
93    /**
94     * This tests region assignment
95     */
96    @Test (timeout=60000)
97    public void testAssignRegion() throws Exception {
98      String table = "testAssignRegion";
99      try {
100       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
101       desc.addFamily(new HColumnDescriptor(FAMILY));
102       admin.createTable(desc);
103 
104       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
105       HRegionInfo hri = new HRegionInfo(
106         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
107       MetaEditor.addRegionToMeta(meta, hri);
108 
109       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
110       master.assignRegion(hri);
111       master.getAssignmentManager().waitForAssignment(hri);
112 
113       ServerName serverName = master.getAssignmentManager().
114         getRegionStates().getRegionServerOfRegion(hri);
115       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
116     } finally {
117       TEST_UTIL.deleteTable(Bytes.toBytes(table));
118     }
119   }
120 
121   /**
122    * This tests region assignment on a simulated restarted server
123    */
124   @Test (timeout=60000)
125   public void testAssignRegionOnRestartedServer() throws Exception {
126     String table = "testAssignRegionOnRestartedServer";
127     ServerName deadServer = null;
128     HMaster master = null;
129     try {
130       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
131       desc.addFamily(new HColumnDescriptor(FAMILY));
132       admin.createTable(desc);
133 
134       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
135       HRegionInfo hri = new HRegionInfo(
136         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
137       MetaEditor.addRegionToMeta(meta, hri);
138 
139       master = TEST_UTIL.getHBaseCluster().getMaster();
140       Set<ServerName> onlineServers = master.serverManager.getOnlineServers().keySet();
141       assertFalse("There should be some servers online", onlineServers.isEmpty());
142 
143       // Use the first server as the destination server
144       ServerName destServer = onlineServers.iterator().next();
145 
146       // Created faked dead server
147       deadServer = new ServerName(destServer.getHostname(),
148         destServer.getPort(), destServer.getStartcode() - 100L);
149       master.serverManager.recordNewServer(deadServer, ServerLoad.EMPTY_SERVERLOAD);
150 
151       AssignmentManager am = master.getAssignmentManager();
152       RegionPlan plan = new RegionPlan(hri, null, deadServer);
153       am.addPlan(hri.getEncodedName(), plan);
154       master.assignRegion(hri);
155 
156       int version = ZKAssign.transitionNode(master.getZooKeeper(), hri,
157         destServer, EventType.M_ZK_REGION_OFFLINE,
158         EventType.RS_ZK_REGION_OPENING, 0);
159       assertEquals("TansitionNode should fail", -1, version);
160 
161       // Give region 2 seconds to assign, which may not be enough.
162       // However, if HBASE-8545 is broken, this test will be flaky.
163       // Otherwise, this test should never be flaky.
164       Thread.sleep(2000);
165 
166       assertTrue("Region should still be in transition",
167         am.getRegionStates().isRegionInTransition(hri));
168       assertEquals("Assign node should still be in version 0", 0,
169         ZKAssign.getVersion(master.getZooKeeper(), hri));
170     } finally {
171       if (deadServer != null) {
172         master.serverManager.expireServer(deadServer);
173       }
174 
175       TEST_UTIL.deleteTable(Bytes.toBytes(table));
176     }
177   }
178 
179   /**
180    * This tests offlining a region
181    */
182   @Test (timeout=60000)
183   public void testOfflineRegion() throws Exception {
184     TableName table =
185         TableName.valueOf("testOfflineRegion");
186     try {
187       HRegionInfo hri = createTableAndGetOneRegion(table);
188 
189       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
190         getMaster().getAssignmentManager().getRegionStates();
191       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
192       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
193       admin.offline(hri.getRegionName());
194 
195       long timeoutTime = System.currentTimeMillis() + 800;
196       while (true) {
197         List<HRegionInfo> regions =
198           regionStates.getRegionsOfTable(table);
199         if (!regions.contains(hri)) break;
200         long now = System.currentTimeMillis();
201         if (now > timeoutTime) {
202           fail("Failed to offline the region in time");
203           break;
204         }
205         Thread.sleep(10);
206       }
207       RegionState regionState = regionStates.getRegionState(hri);
208       assertTrue(regionState.isOffline());
209     } finally {
210       TEST_UTIL.deleteTable(table);
211     }
212   }
213 
214   /**
215    * This tests moving a region
216    */
217   @Test (timeout=50000)
218   public void testMoveRegion() throws Exception {
219     TableName table =
220         TableName.valueOf("testMoveRegion");
221     try {
222       HRegionInfo hri = createTableAndGetOneRegion(table);
223 
224       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
225         getMaster().getAssignmentManager().getRegionStates();
226       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
227       ServerName destServerName = null;
228       for (int i = 0; i < 3; i++) {
229         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
230         if (!destServer.getServerName().equals(serverName)) {
231           destServerName = destServer.getServerName();
232           break;
233         }
234       }
235       assertTrue(destServerName != null
236         && !destServerName.equals(serverName));
237       TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
238         Bytes.toBytes(destServerName.getServerName()));
239 
240       long timeoutTime = System.currentTimeMillis() + 30000;
241       while (true) {
242         ServerName sn = regionStates.getRegionServerOfRegion(hri);
243         if (sn != null && sn.equals(destServerName)) {
244           TEST_UTIL.assertRegionOnServer(hri, sn, 200);
245           break;
246         }
247         long now = System.currentTimeMillis();
248         if (now > timeoutTime) {
249           fail("Failed to move the region in time: "
250             + regionStates.getRegionState(hri));
251         }
252         regionStates.waitForUpdate(50);
253       }
254 
255     } finally {
256       TEST_UTIL.deleteTable(table);
257     }
258   }
259 
260   HRegionInfo createTableAndGetOneRegion(
261       final TableName tableName) throws IOException, InterruptedException {
262     HTableDescriptor desc = new HTableDescriptor(tableName);
263     desc.addFamily(new HColumnDescriptor(FAMILY));
264     admin.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), 5);
265 
266     // wait till the table is assigned
267     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
268     long timeoutTime = System.currentTimeMillis() + 1000;
269     while (true) {
270       List<HRegionInfo> regions = master.getAssignmentManager().
271         getRegionStates().getRegionsOfTable(tableName);
272       if (regions.size() > 3) {
273         return regions.get(2);
274       }
275       long now = System.currentTimeMillis();
276       if (now > timeoutTime) {
277         fail("Could not find an online region");
278       }
279       Thread.sleep(10);
280     }
281   }
282 
283   /**
284    * This tests forcefully assign a region
285    * while it's closing and re-assigned.
286    *
287    * This test should not be flaky. If it is flaky, it means something
288    * wrong with AssignmentManager which should be reported and fixed
289    */
290   @Test (timeout=60000)
291   public void testForceAssignWhileClosing() throws Exception {
292     String table = "testForceAssignWhileClosing";
293     try {
294       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
295       desc.addFamily(new HColumnDescriptor(FAMILY));
296       admin.createTable(desc);
297 
298       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
299       HRegionInfo hri = new HRegionInfo(
300         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
301       MetaEditor.addRegionToMeta(meta, hri);
302 
303       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
304       master.assignRegion(hri);
305       AssignmentManager am = master.getAssignmentManager();
306       assertTrue(am.waitForAssignment(hri));
307 
308       MockRegionObserver.enabled = true;
309       am.unassign(hri);
310       RegionState state = am.getRegionStates().getRegionState(hri);
311       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
312 
313       MockRegionObserver.enabled = false;
314       am.unassign(hri, true);
315 
316       // region is closing now, will be re-assigned automatically.
317       // now, let's forcefully assign it again. it should be
318       // assigned properly and no double-assignment
319       am.assign(hri, true, true);
320 
321       // region should be closed and re-assigned
322       assertTrue(am.waitForAssignment(hri));
323 
324       ServerName serverName = master.getAssignmentManager().
325         getRegionStates().getRegionServerOfRegion(hri);
326       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
327     } finally {
328       MockRegionObserver.enabled = false;
329       TEST_UTIL.deleteTable(Bytes.toBytes(table));
330     }
331   }
332 
333   /**
334    * This tests region close failed
335    */
336   @Test (timeout=60000)
337   public void testCloseFailed() throws Exception {
338     String table = "testCloseFailed";
339     try {
340       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
341       desc.addFamily(new HColumnDescriptor(FAMILY));
342       admin.createTable(desc);
343 
344       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
345       HRegionInfo hri = new HRegionInfo(
346         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
347       MetaEditor.addRegionToMeta(meta, hri);
348 
349       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
350       master.assignRegion(hri);
351       AssignmentManager am = master.getAssignmentManager();
352       assertTrue(am.waitForAssignment(hri));
353 
354       MockRegionObserver.enabled = true;
355       am.unassign(hri);
356       RegionState state = am.getRegionStates().getRegionState(hri);
357       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
358 
359       MockRegionObserver.enabled = false;
360       am.unassign(hri, true);
361 
362       // region may still be assigned now since it's closing,
363       // let's check if it's assigned after it's out of transition
364       am.waitOnRegionToClearRegionsInTransition(hri);
365 
366       // region should be closed and re-assigned
367       assertTrue(am.waitForAssignment(hri));
368       ServerName serverName = master.getAssignmentManager().
369         getRegionStates().getRegionServerOfRegion(hri);
370       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
371     } finally {
372       MockRegionObserver.enabled = false;
373       TEST_UTIL.deleteTable(Bytes.toBytes(table));
374     }
375   }
376 
377   /**
378    * This tests region open failed
379    */
380   @Test (timeout=60000)
381   public void testOpenFailed() throws Exception {
382     String table = "testOpenFailed";
383     try {
384       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
385       desc.addFamily(new HColumnDescriptor(FAMILY));
386       admin.createTable(desc);
387 
388       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
389       HRegionInfo hri = new HRegionInfo(
390         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
391       MetaEditor.addRegionToMeta(meta, hri);
392 
393       MockLoadBalancer.controledRegion = hri.getEncodedName();
394 
395       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
396       master.assignRegion(hri);
397       AssignmentManager am = master.getAssignmentManager();
398       assertFalse(am.waitForAssignment(hri));
399 
400       RegionState state = am.getRegionStates().getRegionState(hri);
401       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
402       // Failed to open since no plan, so it's on no server
403       assertNull(state.getServerName());
404 
405       MockLoadBalancer.controledRegion = null;
406       master.assignRegion(hri);
407       assertTrue(am.waitForAssignment(hri));
408 
409       ServerName serverName = master.getAssignmentManager().
410         getRegionStates().getRegionServerOfRegion(hri);
411       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
412     } finally {
413       MockLoadBalancer.controledRegion = null;
414       TEST_UTIL.deleteTable(Bytes.toBytes(table));
415     }
416   }
417 
418   /**
419    * This tests region open failure which is not recoverable
420    */
421   @Test (timeout=60000)
422   public void testOpenFailedUnrecoverable() throws Exception {
423     TableName table =
424         TableName.valueOf("testOpenFailedUnrecoverable");
425     try {
426       HTableDescriptor desc = new HTableDescriptor(table);
427       desc.addFamily(new HColumnDescriptor(FAMILY));
428       admin.createTable(desc);
429 
430       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
431       HRegionInfo hri = new HRegionInfo(
432         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
433       MetaEditor.addRegionToMeta(meta, hri);
434 
435       FileSystem fs = FileSystem.get(conf);
436       Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
437       Path regionDir = new Path(tableDir, hri.getEncodedName());
438       // create a file named the same as the region dir to
439       // mess up with region opening
440       fs.create(regionDir, true);
441 
442       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
443       master.assignRegion(hri);
444       AssignmentManager am = master.getAssignmentManager();
445       assertFalse(am.waitForAssignment(hri));
446 
447       RegionState state = am.getRegionStates().getRegionState(hri);
448       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
449       // Failed to open due to file system issue. Region state should
450       // carry the opening region server so that we can force close it
451       // later on before opening it again. See HBASE-9092.
452       assertNotNull(state.getServerName());
453 
454       // remove the blocking file, so that region can be opened
455       fs.delete(regionDir, true);
456       master.assignRegion(hri);
457       assertTrue(am.waitForAssignment(hri));
458 
459       ServerName serverName = master.getAssignmentManager().
460         getRegionStates().getRegionServerOfRegion(hri);
461       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
462     } finally {
463       TEST_UTIL.deleteTable(table);
464     }
465   }
466 
467   @Test (timeout=60000)
468   public void testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState() throws Exception {
469     final TableName table =
470         TableName.valueOf
471             ("testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState");
472     AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
473     HRegionInfo hri = null;
474     ServerName serverName = null;
475     try {
476       hri = createTableAndGetOneRegion(table);
477       serverName = am.getRegionStates().getRegionServerOfRegion(hri);
478       ServerName destServerName = null;
479       HRegionServer destServer = null;
480       for (int i = 0; i < 3; i++) {
481         destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
482         if (!destServer.getServerName().equals(serverName)) {
483           destServerName = destServer.getServerName();
484           break;
485         }
486       }
487       am.regionOffline(hri);
488       ZooKeeperWatcher zkw = TEST_UTIL.getHBaseCluster().getMaster().getZooKeeper();
489       am.getRegionStates().updateRegionState(hri, State.OFFLINE);
490       ZKAssign.createNodeOffline(zkw, hri, destServerName);
491       ZKAssign.transitionNodeOpening(zkw, hri, destServerName);
492 
493       // Wait till the event is processed and the region is in transition
494       long timeoutTime = System.currentTimeMillis() + 20000;
495       while (!am.getRegionStates().isRegionInTransition(hri)) {
496         assertTrue("Failed to process ZK opening event in time",
497           System.currentTimeMillis() < timeoutTime);
498         Thread.sleep(100);
499       }
500 
501       am.getZKTable().setDisablingTable(table);
502       List<HRegionInfo> toAssignRegions = am.processServerShutdown(destServerName);
503       assertTrue("Regions to be assigned should be empty.", toAssignRegions.isEmpty());
504       assertTrue("Regions to be assigned should be empty.", am.getRegionStates()
505           .getRegionState(hri).isOffline());
506     } finally {
507       if (hri != null && serverName != null) {
508         am.regionOnline(hri, serverName);
509       }
510       am.getZKTable().setDisabledTable(table);
511       TEST_UTIL.deleteTable(table);
512     }
513   }
514 
515   static class MockLoadBalancer extends StochasticLoadBalancer {
516     // For this region, if specified, always assign to nowhere
517     static volatile String controledRegion = null;
518 
519     @Override
520     public ServerName randomAssignment(HRegionInfo regionInfo,
521         List<ServerName> servers) {
522       if (regionInfo.getEncodedName().equals(controledRegion)) {
523         return null;
524       }
525       return super.randomAssignment(regionInfo, servers);
526     }
527   }
528 
529   public static class MockRegionObserver extends BaseRegionObserver {
530     // If enabled, fail all preClose calls
531     static volatile boolean enabled = false;
532 
533     @Override
534     public void preClose(ObserverContext<RegionCoprocessorEnvironment> c,
535         boolean abortRequested) throws IOException {
536       if (enabled) throw new IOException("fail preClose from coprocessor");
537     }
538   }
539 }