View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotNull;
23  import static org.junit.Assert.assertNull;
24  import static org.junit.Assert.assertTrue;
25  import static org.junit.Assert.fail;
26  
27  import java.io.IOException;
28  import java.util.List;
29  import java.util.Set;
30  import java.util.concurrent.atomic.AtomicBoolean;
31  
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.fs.FileSystem;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.hbase.HBaseTestingUtility;
36  import org.apache.hadoop.hbase.HColumnDescriptor;
37  import org.apache.hadoop.hbase.HConstants;
38  import org.apache.hadoop.hbase.HRegionInfo;
39  import org.apache.hadoop.hbase.HTableDescriptor;
40  import org.apache.hadoop.hbase.MediumTests;
41  import org.apache.hadoop.hbase.MiniHBaseCluster;
42  import org.apache.hadoop.hbase.ServerLoad;
43  import org.apache.hadoop.hbase.ServerName;
44  import org.apache.hadoop.hbase.TableName;
45  import org.apache.hadoop.hbase.catalog.MetaEditor;
46  import org.apache.hadoop.hbase.client.HBaseAdmin;
47  import org.apache.hadoop.hbase.client.HTable;
48  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
49  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
50  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
51  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
52  import org.apache.hadoop.hbase.coprocessor.RegionObserver;
53  import org.apache.hadoop.hbase.executor.EventType;
54  import org.apache.hadoop.hbase.master.RegionState.State;
55  import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
56  import org.apache.hadoop.hbase.regionserver.HRegionServer;
57  import org.apache.hadoop.hbase.util.Bytes;
58  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
59  import org.apache.hadoop.hbase.util.FSUtils;
60  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
61  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
62  import org.apache.zookeeper.KeeperException;
63  import org.junit.AfterClass;
64  import org.junit.BeforeClass;
65  import org.junit.Test;
66  import org.junit.experimental.categories.Category;
67  
68  /**
69   * This tests AssignmentManager with a testing cluster.
70   */
71  @Category(MediumTests.class)
72  public class TestAssignmentManagerOnCluster {
73    private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
74    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
75    private final static Configuration conf = TEST_UTIL.getConfiguration();
76    private static HBaseAdmin admin;
77  
78    @BeforeClass
79    public static void setUpBeforeClass() throws Exception {
80      // Using the our load balancer to control region plans
81      conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
82        MyLoadBalancer.class, LoadBalancer.class);
83      conf.setClass(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
84        MyRegionObserver.class, RegionObserver.class);
85      // Reduce the maximum attempts to speed up the test
86      conf.setInt("hbase.assignment.maximum.attempts", 3);
87  
88      TEST_UTIL.startMiniCluster(1, 4, null, MyMaster.class, null);
89      admin = TEST_UTIL.getHBaseAdmin();
90    }
91  
92    @AfterClass
93    public static void tearDownAfterClass() throws Exception {
94      TEST_UTIL.shutdownMiniCluster();
95    }
96  
97    /**
98     * This tests region assignment
99     */
100   @Test (timeout=60000)
101   public void testAssignRegion() throws Exception {
102     String table = "testAssignRegion";
103     try {
104       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
105       desc.addFamily(new HColumnDescriptor(FAMILY));
106       admin.createTable(desc);
107 
108       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
109       HRegionInfo hri = new HRegionInfo(
110         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
111       MetaEditor.addRegionToMeta(meta, hri);
112 
113       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
114       master.assignRegion(hri);
115       AssignmentManager am = master.getAssignmentManager();
116       am.waitForAssignment(hri);
117 
118       RegionStates regionStates = am.getRegionStates();
119       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
120       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
121 
122       // Region is assigned now. Let's assign it again.
123       // Master should not abort, and region should be assigned.
124       RegionState oldState = regionStates.getRegionState(hri);
125       TEST_UTIL.getHBaseAdmin().assign(hri.getRegionName());
126       master.getAssignmentManager().waitForAssignment(hri);
127       RegionState newState = regionStates.getRegionState(hri);
128       assertTrue(newState.isOpened()
129         && newState.getStamp() != oldState.getStamp());
130     } finally {
131       TEST_UTIL.deleteTable(Bytes.toBytes(table));
132     }
133   }
134 
135   /**
136    * This tests region assignment on a simulated restarted server
137    */
138   @Test (timeout=60000)
139   public void testAssignRegionOnRestartedServer() throws Exception {
140     String table = "testAssignRegionOnRestartedServer";
141     ServerName deadServer = null;
142     HMaster master = null;
143     try {
144       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
145       desc.addFamily(new HColumnDescriptor(FAMILY));
146       admin.createTable(desc);
147 
148       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
149       HRegionInfo hri = new HRegionInfo(
150         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
151       MetaEditor.addRegionToMeta(meta, hri);
152 
153       master = TEST_UTIL.getHBaseCluster().getMaster();
154       Set<ServerName> onlineServers = master.serverManager.getOnlineServers().keySet();
155       assertFalse("There should be some servers online", onlineServers.isEmpty());
156 
157       // Use the first server as the destination server
158       ServerName destServer = onlineServers.iterator().next();
159 
160       // Created faked dead server
161       deadServer = ServerName.valueOf(destServer.getHostname(),
162           destServer.getPort(), destServer.getStartcode() - 100L);
163       master.serverManager.recordNewServer(deadServer, ServerLoad.EMPTY_SERVERLOAD);
164 
165       AssignmentManager am = master.getAssignmentManager();
166       RegionPlan plan = new RegionPlan(hri, null, deadServer);
167       am.addPlan(hri.getEncodedName(), plan);
168       master.assignRegion(hri);
169 
170       int version = ZKAssign.transitionNode(master.getZooKeeper(), hri,
171         destServer, EventType.M_ZK_REGION_OFFLINE,
172         EventType.RS_ZK_REGION_OPENING, 0);
173       assertEquals("TansitionNode should fail", -1, version);
174 
175       // Give region 2 seconds to assign, which may not be enough.
176       // However, if HBASE-8545 is broken, this test will be flaky.
177       // Otherwise, this test should never be flaky.
178       Thread.sleep(2000);
179 
180       assertTrue("Region should still be in transition",
181         am.getRegionStates().isRegionInTransition(hri));
182       assertEquals("Assign node should still be in version 0", 0,
183         ZKAssign.getVersion(master.getZooKeeper(), hri));
184     } finally {
185       if (deadServer != null) {
186         master.serverManager.expireServer(deadServer);
187       }
188 
189       TEST_UTIL.deleteTable(Bytes.toBytes(table));
190     }
191   }
192 
193   /**
194    * This tests offlining a region
195    */
196   @Test (timeout=60000)
197   public void testOfflineRegion() throws Exception {
198     TableName table =
199         TableName.valueOf("testOfflineRegion");
200     try {
201       HRegionInfo hri = createTableAndGetOneRegion(table);
202 
203       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
204         getMaster().getAssignmentManager().getRegionStates();
205       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
206       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
207       admin.offline(hri.getRegionName());
208 
209       long timeoutTime = System.currentTimeMillis() + 800;
210       while (true) {
211         List<HRegionInfo> regions =
212           regionStates.getRegionsOfTable(table);
213         if (!regions.contains(hri)) break;
214         long now = System.currentTimeMillis();
215         if (now > timeoutTime) {
216           fail("Failed to offline the region in time");
217           break;
218         }
219         Thread.sleep(10);
220       }
221       RegionState regionState = regionStates.getRegionState(hri);
222       assertTrue(regionState.isOffline());
223     } finally {
224       TEST_UTIL.deleteTable(table);
225     }
226   }
227 
228   /**
229    * This tests moving a region
230    */
231   @Test (timeout=50000)
232   public void testMoveRegion() throws Exception {
233     TableName table =
234         TableName.valueOf("testMoveRegion");
235     try {
236       HRegionInfo hri = createTableAndGetOneRegion(table);
237 
238       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
239         getMaster().getAssignmentManager().getRegionStates();
240       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
241       ServerName destServerName = null;
242       for (int i = 0; i < 3; i++) {
243         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
244         if (!destServer.getServerName().equals(serverName)) {
245           destServerName = destServer.getServerName();
246           break;
247         }
248       }
249       assertTrue(destServerName != null
250         && !destServerName.equals(serverName));
251       TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
252         Bytes.toBytes(destServerName.getServerName()));
253 
254       long timeoutTime = System.currentTimeMillis() + 30000;
255       while (true) {
256         ServerName sn = regionStates.getRegionServerOfRegion(hri);
257         if (sn != null && sn.equals(destServerName)) {
258           TEST_UTIL.assertRegionOnServer(hri, sn, 200);
259           break;
260         }
261         long now = System.currentTimeMillis();
262         if (now > timeoutTime) {
263           fail("Failed to move the region in time: "
264             + regionStates.getRegionState(hri));
265         }
266         regionStates.waitForUpdate(50);
267       }
268 
269     } finally {
270       TEST_UTIL.deleteTable(table);
271     }
272   }
273 
274   HRegionInfo createTableAndGetOneRegion(
275       final TableName tableName) throws IOException, InterruptedException {
276     HTableDescriptor desc = new HTableDescriptor(tableName);
277     desc.addFamily(new HColumnDescriptor(FAMILY));
278     admin.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), 5);
279 
280     // wait till the table is assigned
281     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
282     long timeoutTime = System.currentTimeMillis() + 1000;
283     while (true) {
284       List<HRegionInfo> regions = master.getAssignmentManager().
285         getRegionStates().getRegionsOfTable(tableName);
286       if (regions.size() > 3) {
287         return regions.get(2);
288       }
289       long now = System.currentTimeMillis();
290       if (now > timeoutTime) {
291         fail("Could not find an online region");
292       }
293       Thread.sleep(10);
294     }
295   }
296 
297   /**
298    * This test should not be flaky. If it is flaky, it means something
299    * wrong with AssignmentManager which should be reported and fixed
300    *
301    * This tests forcefully assign a region while it's closing and re-assigned.
302    */
303   @Test (timeout=60000)
304   public void testForceAssignWhileClosing() throws Exception {
305     String table = "testForceAssignWhileClosing";
306     try {
307       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
308       desc.addFamily(new HColumnDescriptor(FAMILY));
309       admin.createTable(desc);
310 
311       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
312       HRegionInfo hri = new HRegionInfo(
313         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
314       MetaEditor.addRegionToMeta(meta, hri);
315 
316       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
317       master.assignRegion(hri);
318       AssignmentManager am = master.getAssignmentManager();
319       assertTrue(am.waitForAssignment(hri));
320 
321       MyRegionObserver.preCloseEnabled.set(true);
322       am.unassign(hri);
323       RegionState state = am.getRegionStates().getRegionState(hri);
324       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
325 
326       MyRegionObserver.preCloseEnabled.set(false);
327       am.unassign(hri, true);
328 
329       // region is closing now, will be re-assigned automatically.
330       // now, let's forcefully assign it again. it should be
331       // assigned properly and no double-assignment
332       am.assign(hri, true, true);
333 
334       // let's check if it's assigned after it's out of transition
335       am.waitOnRegionToClearRegionsInTransition(hri);
336       assertTrue(am.waitForAssignment(hri));
337 
338       ServerName serverName = master.getAssignmentManager().
339         getRegionStates().getRegionServerOfRegion(hri);
340       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
341     } finally {
342       MyRegionObserver.preCloseEnabled.set(false);
343       TEST_UTIL.deleteTable(Bytes.toBytes(table));
344     }
345   }
346 
347   /**
348    * This tests region close failed
349    */
350   @Test (timeout=60000)
351   public void testCloseFailed() throws Exception {
352     String table = "testCloseFailed";
353     try {
354       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
355       desc.addFamily(new HColumnDescriptor(FAMILY));
356       admin.createTable(desc);
357 
358       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
359       HRegionInfo hri = new HRegionInfo(
360         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
361       MetaEditor.addRegionToMeta(meta, hri);
362 
363       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
364       master.assignRegion(hri);
365       AssignmentManager am = master.getAssignmentManager();
366       assertTrue(am.waitForAssignment(hri));
367 
368       MyRegionObserver.preCloseEnabled.set(true);
369       am.unassign(hri);
370       RegionState state = am.getRegionStates().getRegionState(hri);
371       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
372 
373       MyRegionObserver.preCloseEnabled.set(false);
374       am.unassign(hri, true);
375 
376       // region may still be assigned now since it's closing,
377       // let's check if it's assigned after it's out of transition
378       am.waitOnRegionToClearRegionsInTransition(hri);
379 
380       // region should be closed and re-assigned
381       assertTrue(am.waitForAssignment(hri));
382       ServerName serverName = master.getAssignmentManager().
383         getRegionStates().getRegionServerOfRegion(hri);
384       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
385     } finally {
386       MyRegionObserver.preCloseEnabled.set(false);
387       TEST_UTIL.deleteTable(Bytes.toBytes(table));
388     }
389   }
390 
391   /**
392    * This tests region open failed
393    */
394   @Test (timeout=60000)
395   public void testOpenFailed() throws Exception {
396     String table = "testOpenFailed";
397     try {
398       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
399       desc.addFamily(new HColumnDescriptor(FAMILY));
400       admin.createTable(desc);
401 
402       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
403       HRegionInfo hri = new HRegionInfo(
404         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
405       MetaEditor.addRegionToMeta(meta, hri);
406 
407       MyLoadBalancer.controledRegion = hri.getEncodedName();
408 
409       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
410       master.assignRegion(hri);
411       AssignmentManager am = master.getAssignmentManager();
412       assertFalse(am.waitForAssignment(hri));
413 
414       RegionState state = am.getRegionStates().getRegionState(hri);
415       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
416       // Failed to open since no plan, so it's on no server
417       assertNull(state.getServerName());
418 
419       MyLoadBalancer.controledRegion = null;
420       master.assignRegion(hri);
421       assertTrue(am.waitForAssignment(hri));
422 
423       ServerName serverName = master.getAssignmentManager().
424         getRegionStates().getRegionServerOfRegion(hri);
425       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
426     } finally {
427       MyLoadBalancer.controledRegion = null;
428       TEST_UTIL.deleteTable(Bytes.toBytes(table));
429     }
430   }
431 
432   /**
433    * This tests region open failure which is not recoverable
434    */
435   @Test (timeout=60000)
436   public void testOpenFailedUnrecoverable() throws Exception {
437     TableName table =
438         TableName.valueOf("testOpenFailedUnrecoverable");
439     try {
440       HTableDescriptor desc = new HTableDescriptor(table);
441       desc.addFamily(new HColumnDescriptor(FAMILY));
442       admin.createTable(desc);
443 
444       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
445       HRegionInfo hri = new HRegionInfo(
446         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
447       MetaEditor.addRegionToMeta(meta, hri);
448 
449       FileSystem fs = FileSystem.get(conf);
450       Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
451       Path regionDir = new Path(tableDir, hri.getEncodedName());
452       // create a file named the same as the region dir to
453       // mess up with region opening
454       fs.create(regionDir, true);
455 
456       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
457       master.assignRegion(hri);
458       AssignmentManager am = master.getAssignmentManager();
459       assertFalse(am.waitForAssignment(hri));
460 
461       RegionState state = am.getRegionStates().getRegionState(hri);
462       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
463       // Failed to open due to file system issue. Region state should
464       // carry the opening region server so that we can force close it
465       // later on before opening it again. See HBASE-9092.
466       assertNotNull(state.getServerName());
467 
468       // remove the blocking file, so that region can be opened
469       fs.delete(regionDir, true);
470       master.assignRegion(hri);
471       assertTrue(am.waitForAssignment(hri));
472 
473       ServerName serverName = master.getAssignmentManager().
474         getRegionStates().getRegionServerOfRegion(hri);
475       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
476     } finally {
477       TEST_UTIL.deleteTable(table);
478     }
479   }
480 
481   @Test (timeout=60000)
482   public void testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState() throws Exception {
483     final TableName table =
484         TableName.valueOf
485             ("testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState");
486     AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
487     HRegionInfo hri = null;
488     ServerName serverName = null;
489     try {
490       hri = createTableAndGetOneRegion(table);
491       serverName = am.getRegionStates().getRegionServerOfRegion(hri);
492       ServerName destServerName = null;
493       HRegionServer destServer = null;
494       for (int i = 0; i < 3; i++) {
495         destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
496         if (!destServer.getServerName().equals(serverName)) {
497           destServerName = destServer.getServerName();
498           break;
499         }
500       }
501       am.regionOffline(hri);
502       ZooKeeperWatcher zkw = TEST_UTIL.getHBaseCluster().getMaster().getZooKeeper();
503       am.getRegionStates().updateRegionState(hri, State.OFFLINE);
504       ZKAssign.createNodeOffline(zkw, hri, destServerName);
505       ZKAssign.transitionNodeOpening(zkw, hri, destServerName);
506 
507       // Wait till the event is processed and the region is in transition
508       long timeoutTime = System.currentTimeMillis() + 20000;
509       while (!am.getRegionStates().isRegionInTransition(hri)) {
510         assertTrue("Failed to process ZK opening event in time",
511           System.currentTimeMillis() < timeoutTime);
512         Thread.sleep(100);
513       }
514 
515       am.getZKTable().setDisablingTable(table);
516       List<HRegionInfo> toAssignRegions = am.processServerShutdown(destServerName);
517       assertTrue("Regions to be assigned should be empty.", toAssignRegions.isEmpty());
518       assertTrue("Regions to be assigned should be empty.", am.getRegionStates()
519           .getRegionState(hri).isOffline());
520     } finally {
521       if (hri != null && serverName != null) {
522         am.regionOnline(hri, serverName);
523       }
524       am.getZKTable().setDisabledTable(table);
525       TEST_UTIL.deleteTable(table);
526     }
527   }
528 
529   /**
530    * This tests region close hanging
531    */
532   @Test (timeout=60000)
533   public void testCloseHang() throws Exception {
534     String table = "testCloseHang";
535     try {
536       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
537       desc.addFamily(new HColumnDescriptor(FAMILY));
538       admin.createTable(desc);
539 
540       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
541       HRegionInfo hri = new HRegionInfo(
542         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
543       MetaEditor.addRegionToMeta(meta, hri);
544 
545       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
546       master.assignRegion(hri);
547       AssignmentManager am = master.getAssignmentManager();
548       assertTrue(am.waitForAssignment(hri));
549 
550       MyRegionObserver.postCloseEnabled.set(true);
551       am.unassign(hri);
552       // Now region should pending_close or closing
553       // Unassign it again forcefully so that we can trigger already
554       // in transition exception. This test is to make sure this scenario
555       // is handled properly.
556       am.server.getConfiguration().setLong(
557         AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000);
558       am.unassign(hri, true);
559       RegionState state = am.getRegionStates().getRegionState(hri);
560       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
561 
562       // Let region closing move ahead. The region should be closed
563       // properly and re-assigned automatically
564       MyRegionObserver.postCloseEnabled.set(false);
565 
566       // region may still be assigned now since it's closing,
567       // let's check if it's assigned after it's out of transition
568       am.waitOnRegionToClearRegionsInTransition(hri);
569 
570       // region should be closed and re-assigned
571       assertTrue(am.waitForAssignment(hri));
572       ServerName serverName = master.getAssignmentManager().
573         getRegionStates().getRegionServerOfRegion(hri);
574       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
575     } finally {
576       MyRegionObserver.postCloseEnabled.set(false);
577       TEST_UTIL.deleteTable(Bytes.toBytes(table));
578     }
579   }
580 
581   /**
582    * This tests region close racing with open
583    */
584   @Test (timeout=60000)
585   public void testOpenCloseRacing() throws Exception {
586     String table = "testOpenCloseRacing";
587     try {
588       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
589       desc.addFamily(new HColumnDescriptor(FAMILY));
590       admin.createTable(desc);
591 
592       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
593       HRegionInfo hri = new HRegionInfo(
594         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
595       MetaEditor.addRegionToMeta(meta, hri);
596 
597       MyRegionObserver.postOpenEnabled.set(true);
598       MyRegionObserver.postOpenCalled = false;
599       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
600       // Region will be opened, but it won't complete
601       master.assignRegion(hri);
602       long end = EnvironmentEdgeManager.currentTimeMillis() + 20000;
603       // Wait till postOpen is called
604       while (!MyRegionObserver.postOpenCalled ) {
605         assertFalse("Timed out waiting for postOpen to be called",
606           EnvironmentEdgeManager.currentTimeMillis() > end);
607         Thread.sleep(300);
608       }
609 
610       AssignmentManager am = master.getAssignmentManager();
611       // Now let's unassign it, it should do nothing
612       am.unassign(hri);
613       RegionState state = am.getRegionStates().getRegionState(hri);
614       ServerName oldServerName = state.getServerName();
615       assertTrue(state.isPendingOpenOrOpening() && oldServerName != null);
616 
617       // Now the region is stuck in opening
618       // Let's forcefully re-assign it to trigger closing/opening
619       // racing. This test is to make sure this scenario
620       // is handled properly.
621       MyRegionObserver.postOpenEnabled.set(false);
622       am.assign(hri, true, true);
623 
624       // let's check if it's assigned after it's out of transition
625       am.waitOnRegionToClearRegionsInTransition(hri);
626       assertTrue(am.waitForAssignment(hri));
627 
628       ServerName serverName = master.getAssignmentManager().
629         getRegionStates().getRegionServerOfRegion(hri);
630       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
631       assertFalse("Region should assigned on a new region server",
632         oldServerName.equals(serverName));
633     } finally {
634       MyRegionObserver.postOpenEnabled.set(false);
635       TEST_UTIL.deleteTable(Bytes.toBytes(table));
636     }
637   }
638 
639   /**
640    * Test force unassign/assign a region hosted on a dead server
641    */
642   @Test (timeout=60000)
643   public void testAssignRacingWithSSH() throws Exception {
644     String table = "testAssignRacingWithSSH";
645     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
646     MyMaster master = null;
647     try {
648       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
649       desc.addFamily(new HColumnDescriptor(FAMILY));
650       admin.createTable(desc);
651 
652       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
653       HRegionInfo hri = new HRegionInfo(
654         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
655       MetaEditor.addRegionToMeta(meta, hri);
656 
657       // Assign the region
658       master = (MyMaster)cluster.getMaster();
659       master.assignRegion(hri);
660 
661       // Hold SSH before killing the hosting server
662       master.enableSSH(false);
663 
664       AssignmentManager am = master.getAssignmentManager();
665       RegionStates regionStates = am.getRegionStates();
666       ServerName metaServer = regionStates.getRegionServerOfRegion(
667         HRegionInfo.FIRST_META_REGIONINFO);
668       while (true) {
669         assertTrue(am.waitForAssignment(hri));
670         RegionState state = regionStates.getRegionState(hri);
671         ServerName oldServerName = state.getServerName();
672         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
673           // Kill the hosting server, which doesn't have meta on it.
674           cluster.killRegionServer(oldServerName);
675           cluster.waitForRegionServerToStop(oldServerName, -1);
676           break;
677         }
678         int i = cluster.getServerWithMeta();
679         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
680         oldServerName = rs.getServerName();
681         master.move(hri.getEncodedNameAsBytes(),
682           Bytes.toBytes(oldServerName.getServerName()));
683       }
684 
685       // You can't assign a dead region before SSH
686       am.assign(hri, true, true);
687       RegionState state = regionStates.getRegionState(hri);
688       assertTrue(state.isFailedClose());
689 
690       // You can't unassign a dead region before SSH either
691       am.unassign(hri, true);
692       state = regionStates.getRegionState(hri);
693       assertTrue(state.isFailedClose());
694 
695       synchronized (regionStates) {
696         // Enable SSH so that log can be split
697         master.enableSSH(true);
698 
699         // We hold regionStates now, so logSplit
700         // won't be known to AM yet.
701         am.unassign(hri, true);
702         state = regionStates.getRegionState(hri);
703         assertTrue(state.isOffline());
704       }
705 
706       // let's check if it's assigned after it's out of transition.
707       // no need to assign it manually, SSH should do it
708       am.waitOnRegionToClearRegionsInTransition(hri);
709       assertTrue(am.waitForAssignment(hri));
710 
711       ServerName serverName = master.getAssignmentManager().
712         getRegionStates().getRegionServerOfRegion(hri);
713       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
714     } finally {
715       if (master != null) {
716         master.enableSSH(true);
717       }
718       TEST_UTIL.deleteTable(Bytes.toBytes(table));
719     }
720   }
721 
722   /**
723    * Test force unassign/assign a region of a disabled table
724    */
725   @Test (timeout=60000)
726   public void testAssignDisabledRegion() throws Exception {
727     String table = "testAssignDisabledRegion";
728     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
729     MyMaster master = null;
730     try {
731       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
732       desc.addFamily(new HColumnDescriptor(FAMILY));
733       admin.createTable(desc);
734 
735       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
736       HRegionInfo hri = new HRegionInfo(
737         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
738       MetaEditor.addRegionToMeta(meta, hri);
739 
740       // Assign the region
741       master = (MyMaster)cluster.getMaster();
742       master.assignRegion(hri);
743       AssignmentManager am = master.getAssignmentManager();
744       RegionStates regionStates = am.getRegionStates();
745       assertTrue(am.waitForAssignment(hri));
746 
747       // Disable the table
748       admin.disableTable(table);
749       assertTrue(regionStates.isRegionOffline(hri));
750 
751       // You can't assign a disabled region
752       am.assign(hri, true, true);
753       assertTrue(regionStates.isRegionOffline(hri));
754 
755       // You can't unassign a disabled region either
756       am.unassign(hri, true);
757       assertTrue(regionStates.isRegionOffline(hri));
758     } finally {
759       TEST_UTIL.deleteTable(Bytes.toBytes(table));
760     }
761   }
762 
763   static class MyLoadBalancer extends StochasticLoadBalancer {
764     // For this region, if specified, always assign to nowhere
765     static volatile String controledRegion = null;
766 
767     @Override
768     public ServerName randomAssignment(HRegionInfo regionInfo,
769         List<ServerName> servers) {
770       if (regionInfo.getEncodedName().equals(controledRegion)) {
771         return null;
772       }
773       return super.randomAssignment(regionInfo, servers);
774     }
775   }
776 
777   public static class MyMaster extends HMaster {
778     AtomicBoolean enabled = new AtomicBoolean(true);
779 
780     public MyMaster(Configuration conf) throws IOException, KeeperException,
781         InterruptedException {
782       super(conf);
783     }
784 
785     @Override
786     public boolean isServerShutdownHandlerEnabled() {
787       return enabled.get() && super.isServerShutdownHandlerEnabled();
788     }
789 
790     public void enableSSH(boolean enabled) {
791       this.enabled.set(enabled);
792       if (enabled) {
793         serverManager.processQueuedDeadServers();
794       }
795     }
796   }
797 
798   public static class MyRegionObserver extends BaseRegionObserver {
799     // If enabled, fail all preClose calls
800     static AtomicBoolean preCloseEnabled = new AtomicBoolean(false);
801 
802     // If enabled, stall postClose calls
803     static AtomicBoolean postCloseEnabled = new AtomicBoolean(false);
804 
805     // If enabled, stall postOpen calls
806     static AtomicBoolean postOpenEnabled = new AtomicBoolean(false);
807 
808     // A flag to track if postOpen is called
809     static volatile boolean postOpenCalled = false;
810 
811     @Override
812     public void preClose(ObserverContext<RegionCoprocessorEnvironment> c,
813         boolean abortRequested) throws IOException {
814       if (preCloseEnabled.get()) throw new IOException("fail preClose from coprocessor");
815     }
816 
817     @Override
818     public void postClose(ObserverContext<RegionCoprocessorEnvironment> c,
819         boolean abortRequested) {
820       stallOnFlag(postCloseEnabled);
821     }
822 
823     @Override
824     public void postOpen(ObserverContext<RegionCoprocessorEnvironment> c) {
825       postOpenCalled = true;
826       stallOnFlag(postOpenEnabled);
827     }
828 
829     private void stallOnFlag(final AtomicBoolean flag) {
830       try {
831         // If enabled, stall
832         while (flag.get()) {
833           Thread.sleep(1000);
834         }
835       } catch (InterruptedException ie) {
836         Thread.currentThread().interrupt();
837       }
838     }
839   }
840 }