View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotNull;
23  import static org.junit.Assert.assertNull;
24  import static org.junit.Assert.assertTrue;
25  import static org.junit.Assert.fail;
26  
27  import java.io.IOException;
28  import java.util.List;
29  import java.util.Set;
30  import java.util.concurrent.atomic.AtomicBoolean;
31  
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.fs.FileSystem;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.hbase.HBaseTestingUtility;
36  import org.apache.hadoop.hbase.HColumnDescriptor;
37  import org.apache.hadoop.hbase.HConstants;
38  import org.apache.hadoop.hbase.HRegionInfo;
39  import org.apache.hadoop.hbase.HTableDescriptor;
40  import org.apache.hadoop.hbase.MediumTests;
41  import org.apache.hadoop.hbase.MiniHBaseCluster;
42  import org.apache.hadoop.hbase.ServerLoad;
43  import org.apache.hadoop.hbase.ServerName;
44  import org.apache.hadoop.hbase.TableName;
45  import org.apache.hadoop.hbase.UnknownRegionException;
46  import org.apache.hadoop.hbase.Waiter;
47  import org.apache.hadoop.hbase.catalog.MetaEditor;
48  import org.apache.hadoop.hbase.client.HBaseAdmin;
49  import org.apache.hadoop.hbase.client.HTable;
50  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
51  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
52  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
53  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
54  import org.apache.hadoop.hbase.coprocessor.RegionObserver;
55  import org.apache.hadoop.hbase.executor.EventType;
56  import org.apache.hadoop.hbase.master.RegionState.State;
57  import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
58  import org.apache.hadoop.hbase.regionserver.HRegionServer;
59  import org.apache.hadoop.hbase.util.Bytes;
60  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
61  import org.apache.hadoop.hbase.util.FSUtils;
62  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
63  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
64  import org.apache.zookeeper.KeeperException;
65  import org.junit.AfterClass;
66  import org.junit.BeforeClass;
67  import org.junit.Test;
68  import org.junit.experimental.categories.Category;
69  
70  /**
71   * This tests AssignmentManager with a testing cluster.
72   */
73  @Category(MediumTests.class)
74  public class TestAssignmentManagerOnCluster {
75    private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
76    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
77    private final static Configuration conf = TEST_UTIL.getConfiguration();
78    private static HBaseAdmin admin;
79  
80    @BeforeClass
81    public static void setUpBeforeClass() throws Exception {
82      // Using the our load balancer to control region plans
83      conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
84        MyLoadBalancer.class, LoadBalancer.class);
85      conf.setClass(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
86        MyRegionObserver.class, RegionObserver.class);
87      // Reduce the maximum attempts to speed up the test
88      conf.setInt("hbase.assignment.maximum.attempts", 3);
89  
90      TEST_UTIL.startMiniCluster(1, 4, null, MyMaster.class, null);
91      admin = TEST_UTIL.getHBaseAdmin();
92    }
93  
94    @AfterClass
95    public static void tearDownAfterClass() throws Exception {
96      TEST_UTIL.shutdownMiniCluster();
97    }
98  
99    /**
100    * This tests region assignment
101    */
102   @Test (timeout=60000)
103   public void testAssignRegion() throws Exception {
104     String table = "testAssignRegion";
105     try {
106       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
107       desc.addFamily(new HColumnDescriptor(FAMILY));
108       admin.createTable(desc);
109 
110       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
111       HRegionInfo hri = new HRegionInfo(
112         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
113       MetaEditor.addRegionToMeta(meta, hri);
114 
115       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
116       master.assignRegion(hri);
117       AssignmentManager am = master.getAssignmentManager();
118       am.waitForAssignment(hri);
119 
120       RegionStates regionStates = am.getRegionStates();
121       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
122       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
123 
124       // Region is assigned now. Let's assign it again.
125       // Master should not abort, and region should be assigned.
126       RegionState oldState = regionStates.getRegionState(hri);
127       TEST_UTIL.getHBaseAdmin().assign(hri.getRegionName());
128       master.getAssignmentManager().waitForAssignment(hri);
129       RegionState newState = regionStates.getRegionState(hri);
130       assertTrue(newState.isOpened()
131         && newState.getStamp() != oldState.getStamp());
132     } finally {
133       TEST_UTIL.deleteTable(Bytes.toBytes(table));
134     }
135   }
136 
137   /**
138    * This tests region assignment on a simulated restarted server
139    */
140   @Test (timeout=120000)
141   public void testAssignRegionOnRestartedServer() throws Exception {
142     String table = "testAssignRegionOnRestartedServer";
143     TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 40);
144     TEST_UTIL.getMiniHBaseCluster().stopMaster(0);
145     TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect
146 
147     ServerName deadServer = null;
148     HMaster master = null;
149     try {
150       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
151       desc.addFamily(new HColumnDescriptor(FAMILY));
152       admin.createTable(desc);
153 
154       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
155       final HRegionInfo hri = new HRegionInfo(
156         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
157       MetaEditor.addRegionToMeta(meta, hri);
158 
159       master = TEST_UTIL.getHBaseCluster().getMaster();
160       Set<ServerName> onlineServers = master.serverManager.getOnlineServers().keySet();
161       assertFalse("There should be some servers online", onlineServers.isEmpty());
162 
163       // Use the first server as the destination server
164       ServerName destServer = onlineServers.iterator().next();
165 
166       // Created faked dead server
167       deadServer = ServerName.valueOf(destServer.getHostname(),
168           destServer.getPort(), destServer.getStartcode() - 100L);
169       master.serverManager.recordNewServer(deadServer, ServerLoad.EMPTY_SERVERLOAD);
170 
171       final AssignmentManager am = master.getAssignmentManager();
172       RegionPlan plan = new RegionPlan(hri, null, deadServer);
173       am.addPlan(hri.getEncodedName(), plan);
174       master.assignRegion(hri);
175 
176       int version = ZKAssign.transitionNode(master.getZooKeeper(), hri,
177         destServer, EventType.M_ZK_REGION_OFFLINE,
178         EventType.RS_ZK_REGION_OPENING, 0);
179       assertEquals("TansitionNode should fail", -1, version);
180 
181       TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
182         @Override
183         public boolean evaluate() throws Exception {
184           return ! am.getRegionStates().isRegionInTransition(hri);
185         }
186       });
187 
188     assertFalse("Region should be assigned", am.getRegionStates().isRegionInTransition(hri));
189     } finally {
190       if (deadServer != null) {
191         master.serverManager.expireServer(deadServer);
192       }
193 
194       TEST_UTIL.deleteTable(Bytes.toBytes(table));
195     }
196   }
197 
198   /**
199    * This tests offlining a region
200    */
201   @Test (timeout=60000)
202   public void testOfflineRegion() throws Exception {
203     TableName table =
204         TableName.valueOf("testOfflineRegion");
205     try {
206       HRegionInfo hri = createTableAndGetOneRegion(table);
207 
208       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
209         getMaster().getAssignmentManager().getRegionStates();
210       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
211       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
212       admin.offline(hri.getRegionName());
213 
214       long timeoutTime = System.currentTimeMillis() + 800;
215       while (true) {
216         List<HRegionInfo> regions =
217           regionStates.getRegionsOfTable(table);
218         if (!regions.contains(hri)) break;
219         long now = System.currentTimeMillis();
220         if (now > timeoutTime) {
221           fail("Failed to offline the region in time");
222           break;
223         }
224         Thread.sleep(10);
225       }
226       RegionState regionState = regionStates.getRegionState(hri);
227       assertTrue(regionState.isOffline());
228     } finally {
229       TEST_UTIL.deleteTable(table);
230     }
231   }
232 
233   /**
234    * This tests moving a region
235    */
236   @Test (timeout=50000)
237   public void testMoveRegion() throws Exception {
238     TableName table =
239         TableName.valueOf("testMoveRegion");
240     try {
241       HRegionInfo hri = createTableAndGetOneRegion(table);
242 
243       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
244         getMaster().getAssignmentManager().getRegionStates();
245       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
246       ServerName destServerName = null;
247       for (int i = 0; i < 3; i++) {
248         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
249         if (!destServer.getServerName().equals(serverName)) {
250           destServerName = destServer.getServerName();
251           break;
252         }
253       }
254       assertTrue(destServerName != null
255         && !destServerName.equals(serverName));
256       TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
257         Bytes.toBytes(destServerName.getServerName()));
258 
259       long timeoutTime = System.currentTimeMillis() + 30000;
260       while (true) {
261         ServerName sn = regionStates.getRegionServerOfRegion(hri);
262         if (sn != null && sn.equals(destServerName)) {
263           TEST_UTIL.assertRegionOnServer(hri, sn, 200);
264           break;
265         }
266         long now = System.currentTimeMillis();
267         if (now > timeoutTime) {
268           fail("Failed to move the region in time: "
269             + regionStates.getRegionState(hri));
270         }
271         regionStates.waitForUpdate(50);
272       }
273 
274     } finally {
275       TEST_UTIL.deleteTable(table);
276     }
277   }
278 
279   /**
280    * If a table is deleted, we should not be able to move it anymore.
281    * Otherwise, the region will be brought back.
282    * @throws Exception
283    */
284   @Test (timeout=50000)
285   public void testMoveRegionOfDeletedTable() throws Exception {
286     TableName table =
287         TableName.valueOf("testMoveRegionOfDeletedTable");
288     HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
289     try {
290       HRegionInfo hri = createTableAndGetOneRegion(table);
291 
292       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
293       AssignmentManager am = master.getAssignmentManager();
294       RegionStates regionStates = am.getRegionStates();
295       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
296       ServerName destServerName = null;
297       for (int i = 0; i < 3; i++) {
298         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
299         if (!destServer.getServerName().equals(serverName)) {
300           destServerName = destServer.getServerName();
301           break;
302         }
303       }
304       assertTrue(destServerName != null
305         && !destServerName.equals(serverName));
306 
307       TEST_UTIL.deleteTable(table);
308 
309       try {
310         admin.move(hri.getEncodedNameAsBytes(),
311           Bytes.toBytes(destServerName.getServerName()));
312         fail("We should not find the region");
313       } catch (IOException ioe) {
314         assertTrue(ioe instanceof UnknownRegionException);
315       }
316 
317       am.balance(new RegionPlan(hri, serverName, destServerName));
318       assertFalse("The region should not be in transition",
319         regionStates.isRegionInTransition(hri));
320     } finally {
321       if (admin.tableExists(table)) {
322         TEST_UTIL.deleteTable(table);
323       }
324     }
325   }
326 
327   HRegionInfo createTableAndGetOneRegion(
328       final TableName tableName) throws IOException, InterruptedException {
329     HTableDescriptor desc = new HTableDescriptor(tableName);
330     desc.addFamily(new HColumnDescriptor(FAMILY));
331     admin.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), 5);
332 
333     // wait till the table is assigned
334     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
335     long timeoutTime = System.currentTimeMillis() + 1000;
336     while (true) {
337       List<HRegionInfo> regions = master.getAssignmentManager().
338         getRegionStates().getRegionsOfTable(tableName);
339       if (regions.size() > 3) {
340         return regions.get(2);
341       }
342       long now = System.currentTimeMillis();
343       if (now > timeoutTime) {
344         fail("Could not find an online region");
345       }
346       Thread.sleep(10);
347     }
348   }
349 
350   /**
351    * This test should not be flaky. If it is flaky, it means something
352    * wrong with AssignmentManager which should be reported and fixed
353    *
354    * This tests forcefully assign a region while it's closing and re-assigned.
355    */
356   @Test (timeout=60000)
357   public void testForceAssignWhileClosing() throws Exception {
358     String table = "testForceAssignWhileClosing";
359     try {
360       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
361       desc.addFamily(new HColumnDescriptor(FAMILY));
362       admin.createTable(desc);
363 
364       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
365       HRegionInfo hri = new HRegionInfo(
366         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
367       MetaEditor.addRegionToMeta(meta, hri);
368 
369       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
370       master.assignRegion(hri);
371       AssignmentManager am = master.getAssignmentManager();
372       assertTrue(am.waitForAssignment(hri));
373 
374       MyRegionObserver.preCloseEnabled.set(true);
375       am.unassign(hri);
376       RegionState state = am.getRegionStates().getRegionState(hri);
377       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
378 
379       MyRegionObserver.preCloseEnabled.set(false);
380       am.unassign(hri, true);
381 
382       // region is closing now, will be re-assigned automatically.
383       // now, let's forcefully assign it again. it should be
384       // assigned properly and no double-assignment
385       am.assign(hri, true, true);
386 
387       // let's check if it's assigned after it's out of transition
388       am.waitOnRegionToClearRegionsInTransition(hri);
389       assertTrue(am.waitForAssignment(hri));
390 
391       ServerName serverName = master.getAssignmentManager().
392         getRegionStates().getRegionServerOfRegion(hri);
393       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
394     } finally {
395       MyRegionObserver.preCloseEnabled.set(false);
396       TEST_UTIL.deleteTable(Bytes.toBytes(table));
397     }
398   }
399 
400   /**
401    * This tests region close failed
402    */
403   @Test (timeout=60000)
404   public void testCloseFailed() throws Exception {
405     String table = "testCloseFailed";
406     try {
407       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
408       desc.addFamily(new HColumnDescriptor(FAMILY));
409       admin.createTable(desc);
410 
411       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
412       HRegionInfo hri = new HRegionInfo(
413         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
414       MetaEditor.addRegionToMeta(meta, hri);
415 
416       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
417       master.assignRegion(hri);
418       AssignmentManager am = master.getAssignmentManager();
419       assertTrue(am.waitForAssignment(hri));
420 
421       MyRegionObserver.preCloseEnabled.set(true);
422       am.unassign(hri);
423       RegionState state = am.getRegionStates().getRegionState(hri);
424       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
425 
426       MyRegionObserver.preCloseEnabled.set(false);
427       am.unassign(hri, true);
428 
429       // region may still be assigned now since it's closing,
430       // let's check if it's assigned after it's out of transition
431       am.waitOnRegionToClearRegionsInTransition(hri);
432 
433       // region should be closed and re-assigned
434       assertTrue(am.waitForAssignment(hri));
435       ServerName serverName = master.getAssignmentManager().
436         getRegionStates().getRegionServerOfRegion(hri);
437       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
438     } finally {
439       MyRegionObserver.preCloseEnabled.set(false);
440       TEST_UTIL.deleteTable(Bytes.toBytes(table));
441     }
442   }
443 
444   /**
445    * This tests region open failed
446    */
447   @Test (timeout=60000)
448   public void testOpenFailed() throws Exception {
449     String table = "testOpenFailed";
450     try {
451       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
452       desc.addFamily(new HColumnDescriptor(FAMILY));
453       admin.createTable(desc);
454 
455       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
456       HRegionInfo hri = new HRegionInfo(
457         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
458       MetaEditor.addRegionToMeta(meta, hri);
459 
460       MyLoadBalancer.controledRegion = hri.getEncodedName();
461 
462       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
463       master.assignRegion(hri);
464       AssignmentManager am = master.getAssignmentManager();
465       assertFalse(am.waitForAssignment(hri));
466 
467       RegionState state = am.getRegionStates().getRegionState(hri);
468       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
469       // Failed to open since no plan, so it's on no server
470       assertNull(state.getServerName());
471 
472       MyLoadBalancer.controledRegion = null;
473       master.assignRegion(hri);
474       assertTrue(am.waitForAssignment(hri));
475 
476       ServerName serverName = master.getAssignmentManager().
477         getRegionStates().getRegionServerOfRegion(hri);
478       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
479     } finally {
480       MyLoadBalancer.controledRegion = null;
481       TEST_UTIL.deleteTable(Bytes.toBytes(table));
482     }
483   }
484 
485   /**
486    * This tests region open failure which is not recoverable
487    */
488   @Test (timeout=60000)
489   public void testOpenFailedUnrecoverable() throws Exception {
490     TableName table =
491         TableName.valueOf("testOpenFailedUnrecoverable");
492     try {
493       HTableDescriptor desc = new HTableDescriptor(table);
494       desc.addFamily(new HColumnDescriptor(FAMILY));
495       admin.createTable(desc);
496 
497       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
498       HRegionInfo hri = new HRegionInfo(
499         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
500       MetaEditor.addRegionToMeta(meta, hri);
501 
502       FileSystem fs = FileSystem.get(conf);
503       Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
504       Path regionDir = new Path(tableDir, hri.getEncodedName());
505       // create a file named the same as the region dir to
506       // mess up with region opening
507       fs.create(regionDir, true);
508 
509       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
510       master.assignRegion(hri);
511       AssignmentManager am = master.getAssignmentManager();
512       assertFalse(am.waitForAssignment(hri));
513 
514       RegionState state = am.getRegionStates().getRegionState(hri);
515       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
516       // Failed to open due to file system issue. Region state should
517       // carry the opening region server so that we can force close it
518       // later on before opening it again. See HBASE-9092.
519       assertNotNull(state.getServerName());
520 
521       // remove the blocking file, so that region can be opened
522       fs.delete(regionDir, true);
523       master.assignRegion(hri);
524       assertTrue(am.waitForAssignment(hri));
525 
526       ServerName serverName = master.getAssignmentManager().
527         getRegionStates().getRegionServerOfRegion(hri);
528       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
529     } finally {
530       TEST_UTIL.deleteTable(table);
531     }
532   }
533 
534   @Test (timeout=60000)
535   public void testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState() throws Exception {
536     final TableName table =
537         TableName.valueOf
538             ("testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState");
539     AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
540     HRegionInfo hri = null;
541     ServerName serverName = null;
542     try {
543       hri = createTableAndGetOneRegion(table);
544       serverName = am.getRegionStates().getRegionServerOfRegion(hri);
545       ServerName destServerName = null;
546       HRegionServer destServer = null;
547       for (int i = 0; i < 3; i++) {
548         destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
549         if (!destServer.getServerName().equals(serverName)) {
550           destServerName = destServer.getServerName();
551           break;
552         }
553       }
554       am.regionOffline(hri);
555       ZooKeeperWatcher zkw = TEST_UTIL.getHBaseCluster().getMaster().getZooKeeper();
556       am.getRegionStates().updateRegionState(hri, State.OFFLINE);
557       ZKAssign.createNodeOffline(zkw, hri, destServerName);
558       ZKAssign.transitionNodeOpening(zkw, hri, destServerName);
559 
560       // Wait till the event is processed and the region is in transition
561       long timeoutTime = System.currentTimeMillis() + 20000;
562       while (!am.getRegionStates().isRegionInTransition(hri)) {
563         assertTrue("Failed to process ZK opening event in time",
564           System.currentTimeMillis() < timeoutTime);
565         Thread.sleep(100);
566       }
567 
568       am.getZKTable().setDisablingTable(table);
569       List<HRegionInfo> toAssignRegions = am.processServerShutdown(destServerName);
570       assertTrue("Regions to be assigned should be empty.", toAssignRegions.isEmpty());
571       assertTrue("Regions to be assigned should be empty.", am.getRegionStates()
572           .getRegionState(hri).isOffline());
573     } finally {
574       if (hri != null && serverName != null) {
575         am.regionOnline(hri, serverName);
576       }
577       am.getZKTable().setDisabledTable(table);
578       TEST_UTIL.deleteTable(table);
579     }
580   }
581 
582   /**
583    * This tests region close hanging
584    */
585   @Test (timeout=60000)
586   public void testCloseHang() throws Exception {
587     String table = "testCloseHang";
588     try {
589       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
590       desc.addFamily(new HColumnDescriptor(FAMILY));
591       admin.createTable(desc);
592 
593       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
594       HRegionInfo hri = new HRegionInfo(
595         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
596       MetaEditor.addRegionToMeta(meta, hri);
597 
598       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
599       master.assignRegion(hri);
600       AssignmentManager am = master.getAssignmentManager();
601       assertTrue(am.waitForAssignment(hri));
602 
603       MyRegionObserver.postCloseEnabled.set(true);
604       am.unassign(hri);
605       // Now region should pending_close or closing
606       // Unassign it again forcefully so that we can trigger already
607       // in transition exception. This test is to make sure this scenario
608       // is handled properly.
609       am.server.getConfiguration().setLong(
610         AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000);
611       am.unassign(hri, true);
612       RegionState state = am.getRegionStates().getRegionState(hri);
613       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
614 
615       // Let region closing move ahead. The region should be closed
616       // properly and re-assigned automatically
617       MyRegionObserver.postCloseEnabled.set(false);
618 
619       // region may still be assigned now since it's closing,
620       // let's check if it's assigned after it's out of transition
621       am.waitOnRegionToClearRegionsInTransition(hri);
622 
623       // region should be closed and re-assigned
624       assertTrue(am.waitForAssignment(hri));
625       ServerName serverName = master.getAssignmentManager().
626         getRegionStates().getRegionServerOfRegion(hri);
627       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
628     } finally {
629       MyRegionObserver.postCloseEnabled.set(false);
630       TEST_UTIL.deleteTable(Bytes.toBytes(table));
631     }
632   }
633 
634   /**
635    * This tests region close racing with open
636    */
637   @Test (timeout=60000)
638   public void testOpenCloseRacing() throws Exception {
639     String table = "testOpenCloseRacing";
640     try {
641       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
642       desc.addFamily(new HColumnDescriptor(FAMILY));
643       admin.createTable(desc);
644 
645       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
646       HRegionInfo hri = new HRegionInfo(
647         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
648       MetaEditor.addRegionToMeta(meta, hri);
649 
650       MyRegionObserver.postOpenEnabled.set(true);
651       MyRegionObserver.postOpenCalled = false;
652       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
653       // Region will be opened, but it won't complete
654       master.assignRegion(hri);
655       long end = EnvironmentEdgeManager.currentTimeMillis() + 20000;
656       // Wait till postOpen is called
657       while (!MyRegionObserver.postOpenCalled ) {
658         assertFalse("Timed out waiting for postOpen to be called",
659           EnvironmentEdgeManager.currentTimeMillis() > end);
660         Thread.sleep(300);
661       }
662 
663       AssignmentManager am = master.getAssignmentManager();
664       // Now let's unassign it, it should do nothing
665       am.unassign(hri);
666       RegionState state = am.getRegionStates().getRegionState(hri);
667       ServerName oldServerName = state.getServerName();
668       assertTrue(state.isPendingOpenOrOpening() && oldServerName != null);
669 
670       // Now the region is stuck in opening
671       // Let's forcefully re-assign it to trigger closing/opening
672       // racing. This test is to make sure this scenario
673       // is handled properly.
674       MyRegionObserver.postOpenEnabled.set(false);
675       am.assign(hri, true, true);
676 
677       // let's check if it's assigned after it's out of transition
678       am.waitOnRegionToClearRegionsInTransition(hri);
679       assertTrue(am.waitForAssignment(hri));
680 
681       ServerName serverName = master.getAssignmentManager().
682         getRegionStates().getRegionServerOfRegion(hri);
683       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
684       assertFalse("Region should assigned on a new region server",
685         oldServerName.equals(serverName));
686     } finally {
687       MyRegionObserver.postOpenEnabled.set(false);
688       TEST_UTIL.deleteTable(Bytes.toBytes(table));
689     }
690   }
691 
692   /**
693    * Test force unassign/assign a region hosted on a dead server
694    */
695   @Test (timeout=60000)
696   public void testAssignRacingWithSSH() throws Exception {
697     String table = "testAssignRacingWithSSH";
698     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
699     MyMaster master = null;
700     try {
701       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
702       desc.addFamily(new HColumnDescriptor(FAMILY));
703       admin.createTable(desc);
704 
705       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
706       HRegionInfo hri = new HRegionInfo(
707         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
708       MetaEditor.addRegionToMeta(meta, hri);
709 
710       // Assign the region
711       master = (MyMaster)cluster.getMaster();
712       master.assignRegion(hri);
713 
714       // Hold SSH before killing the hosting server
715       master.enableSSH(false);
716 
717       AssignmentManager am = master.getAssignmentManager();
718       RegionStates regionStates = am.getRegionStates();
719       ServerName metaServer = regionStates.getRegionServerOfRegion(
720         HRegionInfo.FIRST_META_REGIONINFO);
721       while (true) {
722         assertTrue(am.waitForAssignment(hri));
723         RegionState state = regionStates.getRegionState(hri);
724         ServerName oldServerName = state.getServerName();
725         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
726           // Kill the hosting server, which doesn't have meta on it.
727           cluster.killRegionServer(oldServerName);
728           cluster.waitForRegionServerToStop(oldServerName, -1);
729           break;
730         }
731         int i = cluster.getServerWithMeta();
732         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
733         oldServerName = rs.getServerName();
734         master.move(hri.getEncodedNameAsBytes(),
735           Bytes.toBytes(oldServerName.getServerName()));
736       }
737 
738       // You can't assign a dead region before SSH
739       am.assign(hri, true, true);
740       RegionState state = regionStates.getRegionState(hri);
741       assertTrue(state.isOffline());
742 
743       // You can't unassign a dead region before SSH either
744       am.unassign(hri, true);
745       assertTrue(state.isOffline());
746 
747       // Enable SSH so that log can be split
748       master.enableSSH(true);
749 
750       // let's check if it's assigned after it's out of transition.
751       // no need to assign it manually, SSH should do it
752       am.waitOnRegionToClearRegionsInTransition(hri);
753       assertTrue(am.waitForAssignment(hri));
754 
755       ServerName serverName = master.getAssignmentManager().
756         getRegionStates().getRegionServerOfRegion(hri);
757       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
758     } finally {
759       if (master != null) {
760         master.enableSSH(true);
761       }
762       TEST_UTIL.deleteTable(Bytes.toBytes(table));
763     }
764   }
765 
766   /**
767    * Test force unassign/assign a region of a disabled table
768    */
769   @Test (timeout=60000)
770   public void testAssignDisabledRegion() throws Exception {
771     String table = "testAssignDisabledRegion";
772     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
773     MyMaster master = null;
774     try {
775       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
776       desc.addFamily(new HColumnDescriptor(FAMILY));
777       admin.createTable(desc);
778 
779       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
780       HRegionInfo hri = new HRegionInfo(
781         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
782       MetaEditor.addRegionToMeta(meta, hri);
783 
784       // Assign the region
785       master = (MyMaster)cluster.getMaster();
786       master.assignRegion(hri);
787       AssignmentManager am = master.getAssignmentManager();
788       RegionStates regionStates = am.getRegionStates();
789       assertTrue(am.waitForAssignment(hri));
790 
791       // Disable the table
792       admin.disableTable(table);
793       assertTrue(regionStates.isRegionOffline(hri));
794 
795       // You can't assign a disabled region
796       am.assign(hri, true, true);
797       assertTrue(regionStates.isRegionOffline(hri));
798 
799       // You can't unassign a disabled region either
800       am.unassign(hri, true);
801       assertTrue(regionStates.isRegionOffline(hri));
802     } finally {
803       TEST_UTIL.deleteTable(Bytes.toBytes(table));
804     }
805   }
806 
807   static class MyLoadBalancer extends StochasticLoadBalancer {
808     // For this region, if specified, always assign to nowhere
809     static volatile String controledRegion = null;
810 
811     @Override
812     public ServerName randomAssignment(HRegionInfo regionInfo,
813         List<ServerName> servers) {
814       if (regionInfo.getEncodedName().equals(controledRegion)) {
815         return null;
816       }
817       return super.randomAssignment(regionInfo, servers);
818     }
819   }
820 
821   public static class MyMaster extends HMaster {
822     AtomicBoolean enabled = new AtomicBoolean(true);
823 
824     public MyMaster(Configuration conf) throws IOException, KeeperException,
825         InterruptedException {
826       super(conf);
827     }
828 
829     @Override
830     public boolean isServerShutdownHandlerEnabled() {
831       return enabled.get() && super.isServerShutdownHandlerEnabled();
832     }
833 
834     public void enableSSH(boolean enabled) {
835       this.enabled.set(enabled);
836       if (enabled) {
837         serverManager.processQueuedDeadServers();
838       }
839     }
840   }
841 
842   public static class MyRegionObserver extends BaseRegionObserver {
843     // If enabled, fail all preClose calls
844     static AtomicBoolean preCloseEnabled = new AtomicBoolean(false);
845 
846     // If enabled, stall postClose calls
847     static AtomicBoolean postCloseEnabled = new AtomicBoolean(false);
848 
849     // If enabled, stall postOpen calls
850     static AtomicBoolean postOpenEnabled = new AtomicBoolean(false);
851 
852     // A flag to track if postOpen is called
853     static volatile boolean postOpenCalled = false;
854 
855     @Override
856     public void preClose(ObserverContext<RegionCoprocessorEnvironment> c,
857         boolean abortRequested) throws IOException {
858       if (preCloseEnabled.get()) throw new IOException("fail preClose from coprocessor");
859     }
860 
861     @Override
862     public void postClose(ObserverContext<RegionCoprocessorEnvironment> c,
863         boolean abortRequested) {
864       stallOnFlag(postCloseEnabled);
865     }
866 
867     @Override
868     public void postOpen(ObserverContext<RegionCoprocessorEnvironment> c) {
869       postOpenCalled = true;
870       stallOnFlag(postOpenEnabled);
871     }
872 
873     private void stallOnFlag(final AtomicBoolean flag) {
874       try {
875         // If enabled, stall
876         while (flag.get()) {
877           Thread.sleep(1000);
878         }
879       } catch (InterruptedException ie) {
880         Thread.currentThread().interrupt();
881       }
882     }
883   }
884 }