View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotNull;
23  import static org.junit.Assert.assertNull;
24  import static org.junit.Assert.assertTrue;
25  import static org.junit.Assert.fail;
26  
27  import java.io.IOException;
28  import java.util.ArrayList;
29  import java.util.List;
30  import java.util.Set;
31  import java.util.concurrent.atomic.AtomicBoolean;
32  
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FileSystem;
35  import org.apache.hadoop.fs.Path;
36  import org.apache.hadoop.hbase.HBaseTestingUtility;
37  import org.apache.hadoop.hbase.HColumnDescriptor;
38  import org.apache.hadoop.hbase.HConstants;
39  import org.apache.hadoop.hbase.HRegionInfo;
40  import org.apache.hadoop.hbase.HTableDescriptor;
41  import org.apache.hadoop.hbase.MediumTests;
42  import org.apache.hadoop.hbase.MiniHBaseCluster;
43  import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer;
44  import org.apache.hadoop.hbase.ServerLoad;
45  import org.apache.hadoop.hbase.ServerName;
46  import org.apache.hadoop.hbase.TableName;
47  import org.apache.hadoop.hbase.UnknownRegionException;
48  import org.apache.hadoop.hbase.Waiter;
49  import org.apache.hadoop.hbase.catalog.MetaEditor;
50  import org.apache.hadoop.hbase.catalog.MetaReader;
51  import org.apache.hadoop.hbase.client.HBaseAdmin;
52  import org.apache.hadoop.hbase.client.HTable;
53  import org.apache.hadoop.hbase.client.Result;
54  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
55  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
56  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
57  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
58  import org.apache.hadoop.hbase.coprocessor.RegionObserver;
59  import org.apache.hadoop.hbase.executor.EventType;
60  import org.apache.hadoop.hbase.master.RegionState.State;
61  import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
62  import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
63  import org.apache.hadoop.hbase.regionserver.HRegionServer;
64  import org.apache.hadoop.hbase.util.Bytes;
65  import org.apache.hadoop.hbase.util.ConfigUtil;
66  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
67  import org.apache.hadoop.hbase.util.FSUtils;
68  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
69  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
70  import org.apache.zookeeper.KeeperException;
71  import org.junit.AfterClass;
72  import org.junit.BeforeClass;
73  import org.junit.Test;
74  import org.junit.experimental.categories.Category;
75  
76  
77  /**
78   * This tests AssignmentManager with a testing cluster.
79   */
80  @Category(MediumTests.class)
81  public class TestAssignmentManagerOnCluster {
82    private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
83    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
84    final static Configuration conf = TEST_UTIL.getConfiguration();
85    private static HBaseAdmin admin;
86  
87    static void setupOnce() throws Exception {
88      // Using the our load balancer to control region plans
89      conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
90        MyLoadBalancer.class, LoadBalancer.class);
91      conf.setClass(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
92        MyRegionObserver.class, RegionObserver.class);
93      // Reduce the maximum attempts to speed up the test
94      conf.setInt("hbase.assignment.maximum.attempts", 3);
95  
96      TEST_UTIL.startMiniCluster(1, 4, null, MyMaster.class, MyRegionServer.class);
97      admin = TEST_UTIL.getHBaseAdmin();
98    }
99  
100   @BeforeClass
101   public static void setUpBeforeClass() throws Exception {
102     // Use ZK for region assignment
103     conf.setBoolean("hbase.assignment.usezk", true);
104     setupOnce();
105   }
106 
107   @AfterClass
108   public static void tearDownAfterClass() throws Exception {
109     TEST_UTIL.shutdownMiniCluster();
110   }
111 
112   /**
113    * This tests region assignment
114    */
115   @Test (timeout=60000)
116   public void testAssignRegion() throws Exception {
117     String table = "testAssignRegion";
118     try {
119       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
120       desc.addFamily(new HColumnDescriptor(FAMILY));
121       admin.createTable(desc);
122 
123       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
124       HRegionInfo hri = new HRegionInfo(
125         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
126       MetaEditor.addRegionToMeta(meta, hri);
127 
128       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
129       master.assignRegion(hri);
130       AssignmentManager am = master.getAssignmentManager();
131       am.waitForAssignment(hri);
132 
133       RegionStates regionStates = am.getRegionStates();
134       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
135       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
136 
137       // Region is assigned now. Let's assign it again.
138       // Master should not abort, and region should be assigned.
139       RegionState oldState = regionStates.getRegionState(hri);
140       TEST_UTIL.getHBaseAdmin().assign(hri.getRegionName());
141       master.getAssignmentManager().waitForAssignment(hri);
142       RegionState newState = regionStates.getRegionState(hri);
143       assertTrue(newState.isOpened()
144         && newState.getStamp() != oldState.getStamp());
145     } finally {
146       TEST_UTIL.deleteTable(Bytes.toBytes(table));
147     }
148   }
149   
150   // Simulate a scenario where the AssignCallable and SSH are trying to assign a region
151   @Test (timeout=60000)
152   public void testAssignRegionBySSH() throws Exception {
153     if (!conf.getBoolean("hbase.assignment.usezk", true)) {
154       return;
155     }
156     String table = "testAssignRegionBySSH";
157     MyMaster master = (MyMaster) TEST_UTIL.getHBaseCluster().getMaster();
158     try {
159       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
160       desc.addFamily(new HColumnDescriptor(FAMILY));
161       admin.createTable(desc);
162 
163       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
164       HRegionInfo hri = new HRegionInfo(
165         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
166       MetaEditor.addRegionToMeta(meta, hri);
167       // Add some dummy server for the region entry
168       MetaEditor.updateRegionLocation(TEST_UTIL.getHBaseCluster().getMaster().getCatalogTracker(), hri,
169         ServerName.valueOf("example.org", 1234, System.currentTimeMillis()), 0);
170       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
171       int i = TEST_UTIL.getHBaseCluster().getServerWithMeta();
172       HRegionServer rs = TEST_UTIL.getHBaseCluster().getRegionServer(i == 0 ? 1 : 0);
173       // Choose a server other than meta to kill
174       ServerName controlledServer = rs.getServerName();
175       master.enableSSH(false);
176       TEST_UTIL.getHBaseCluster().killRegionServer(controlledServer);
177       TEST_UTIL.getHBaseCluster().waitForRegionServerToStop(controlledServer, -1);
178       AssignmentManager am = master.getAssignmentManager();
179       
180       // Simulate the AssignCallable trying to assign the region. Have the region in OFFLINE state,
181       // but not in transition and the server is the dead 'controlledServer'  
182       regionStates.createRegionState(hri, State.OFFLINE, controlledServer);
183       am.assign(hri, true, true);
184       // Region should remain in OFFLINE and go to transition
185       assertEquals(State.OFFLINE, regionStates.getRegionState(hri).getState());
186       assertTrue (regionStates.isRegionInTransition(hri));
187       
188       master.enableSSH(true);
189       am.waitForAssignment(hri);
190       assertTrue (regionStates.getRegionState(hri).isOpened());
191       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
192       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
193     } finally {
194       if (master != null) {
195         master.enableSSH(true);
196       }
197       TEST_UTIL.deleteTable(Bytes.toBytes(table));
198       TEST_UTIL.getHBaseCluster().startRegionServer();
199     }
200   }
201 
202   /**
203    * This tests region assignment on a simulated restarted server
204    */
205   @Test (timeout=120000)
206   public void testAssignRegionOnRestartedServer() throws Exception {
207     String table = "testAssignRegionOnRestartedServer";
208     TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 20);
209     TEST_UTIL.getMiniHBaseCluster().stopMaster(0);
210     TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect
211 
212     ServerName deadServer = null;
213     HMaster master = null;
214     try {
215       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
216       desc.addFamily(new HColumnDescriptor(FAMILY));
217       admin.createTable(desc);
218 
219       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
220       final HRegionInfo hri = new HRegionInfo(
221         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
222       MetaEditor.addRegionToMeta(meta, hri);
223 
224       master = TEST_UTIL.getHBaseCluster().getMaster();
225       Set<ServerName> onlineServers = master.serverManager.getOnlineServers().keySet();
226       assertFalse("There should be some servers online", onlineServers.isEmpty());
227 
228       // Use the first server as the destination server
229       ServerName destServer = onlineServers.iterator().next();
230 
231       // Created faked dead server
232       deadServer = ServerName.valueOf(destServer.getHostname(),
233           destServer.getPort(), destServer.getStartcode() - 100L);
234       master.serverManager.recordNewServerWithLock(deadServer, ServerLoad.EMPTY_SERVERLOAD);
235 
236       final AssignmentManager am = master.getAssignmentManager();
237       RegionPlan plan = new RegionPlan(hri, null, deadServer);
238       am.addPlan(hri.getEncodedName(), plan);
239       master.assignRegion(hri);
240 
241       int version = ZKAssign.transitionNode(master.getZooKeeper(), hri,
242         destServer, EventType.M_ZK_REGION_OFFLINE,
243         EventType.RS_ZK_REGION_OPENING, 0);
244       assertEquals("TansitionNode should fail", -1, version);
245 
246       TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
247         @Override
248         public boolean evaluate() throws Exception {
249           return ! am.getRegionStates().isRegionInTransition(hri);
250         }
251       });
252 
253     assertFalse("Region should be assigned", am.getRegionStates().isRegionInTransition(hri));
254     } finally {
255       if (deadServer != null) {
256         master.serverManager.expireServer(deadServer);
257       }
258 
259       TEST_UTIL.deleteTable(Bytes.toBytes(table));
260     }
261   }
262 
263   /**
264    * This tests offlining a region
265    */
266   @Test (timeout=60000)
267   public void testOfflineRegion() throws Exception {
268     TableName table =
269         TableName.valueOf("testOfflineRegion");
270     try {
271       HRegionInfo hri = createTableAndGetOneRegion(table);
272 
273       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
274         getMaster().getAssignmentManager().getRegionStates();
275       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
276       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
277       admin.offline(hri.getRegionName());
278 
279       long timeoutTime = System.currentTimeMillis() + 800;
280       while (true) {
281         List<HRegionInfo> regions =
282           regionStates.getRegionsOfTable(table);
283         if (!regions.contains(hri)) break;
284         long now = System.currentTimeMillis();
285         if (now > timeoutTime) {
286           fail("Failed to offline the region in time");
287           break;
288         }
289         Thread.sleep(10);
290       }
291       RegionState regionState = regionStates.getRegionState(hri);
292       assertTrue(regionState.isOffline());
293     } finally {
294       TEST_UTIL.deleteTable(table);
295     }
296   }
297 
298   /**
299    * This tests moving a region
300    */
301   @Test (timeout=50000)
302   public void testMoveRegion() throws Exception {
303     TableName table =
304         TableName.valueOf("testMoveRegion");
305     try {
306       HRegionInfo hri = createTableAndGetOneRegion(table);
307 
308       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
309         getMaster().getAssignmentManager().getRegionStates();
310       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
311       ServerName destServerName = null;
312       for (int i = 0; i < 3; i++) {
313         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
314         if (!destServer.getServerName().equals(serverName)) {
315           destServerName = destServer.getServerName();
316           break;
317         }
318       }
319       assertTrue(destServerName != null
320         && !destServerName.equals(serverName));
321       TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
322         Bytes.toBytes(destServerName.getServerName()));
323 
324       long timeoutTime = System.currentTimeMillis() + 30000;
325       while (true) {
326         ServerName sn = regionStates.getRegionServerOfRegion(hri);
327         if (sn != null && sn.equals(destServerName)) {
328           TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
329           break;
330         }
331         long now = System.currentTimeMillis();
332         if (now > timeoutTime) {
333           fail("Failed to move the region in time: "
334             + regionStates.getRegionState(hri));
335         }
336         regionStates.waitForUpdate(50);
337       }
338 
339     } finally {
340       TEST_UTIL.deleteTable(table);
341     }
342   }
343 
344   /**
345    * If a table is deleted, we should not be able to move it anymore.
346    * Otherwise, the region will be brought back.
347    * @throws Exception
348    */
349   @Test (timeout=50000)
350   public void testMoveRegionOfDeletedTable() throws Exception {
351     TableName table =
352         TableName.valueOf("testMoveRegionOfDeletedTable");
353     HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
354     try {
355       HRegionInfo hri = createTableAndGetOneRegion(table);
356 
357       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
358       AssignmentManager am = master.getAssignmentManager();
359       RegionStates regionStates = am.getRegionStates();
360       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
361       ServerName destServerName = null;
362       for (int i = 0; i < 3; i++) {
363         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
364         if (!destServer.getServerName().equals(serverName)) {
365           destServerName = destServer.getServerName();
366           break;
367         }
368       }
369       assertTrue(destServerName != null
370         && !destServerName.equals(serverName));
371 
372       TEST_UTIL.deleteTable(table);
373 
374       try {
375         admin.move(hri.getEncodedNameAsBytes(),
376           Bytes.toBytes(destServerName.getServerName()));
377         fail("We should not find the region");
378       } catch (IOException ioe) {
379         assertTrue(ioe instanceof UnknownRegionException);
380       }
381 
382       am.balance(new RegionPlan(hri, serverName, destServerName));
383       assertFalse("The region should not be in transition",
384         regionStates.isRegionInTransition(hri));
385     } finally {
386       if (admin.tableExists(table)) {
387         TEST_UTIL.deleteTable(table);
388       }
389     }
390   }
391 
392   HRegionInfo createTableAndGetOneRegion(
393       final TableName tableName) throws IOException, InterruptedException {
394     HTableDescriptor desc = new HTableDescriptor(tableName);
395     desc.addFamily(new HColumnDescriptor(FAMILY));
396     admin.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), 5);
397 
398     // wait till the table is assigned
399     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
400     long timeoutTime = System.currentTimeMillis() + 1000;
401     while (true) {
402       List<HRegionInfo> regions = master.getAssignmentManager().
403         getRegionStates().getRegionsOfTable(tableName);
404       if (regions.size() > 3) {
405         return regions.get(2);
406       }
407       long now = System.currentTimeMillis();
408       if (now > timeoutTime) {
409         fail("Could not find an online region");
410       }
411       Thread.sleep(10);
412     }
413   }
414 
415   /**
416    * This test should not be flaky. If it is flaky, it means something
417    * wrong with AssignmentManager which should be reported and fixed
418    *
419    * This tests forcefully assign a region while it's closing and re-assigned.
420    */
421   @Test (timeout=60000)
422   public void testForceAssignWhileClosing() throws Exception {
423     String table = "testForceAssignWhileClosing";
424     try {
425       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
426       desc.addFamily(new HColumnDescriptor(FAMILY));
427       admin.createTable(desc);
428 
429       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
430       HRegionInfo hri = new HRegionInfo(
431         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
432       MetaEditor.addRegionToMeta(meta, hri);
433 
434       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
435       master.assignRegion(hri);
436       AssignmentManager am = master.getAssignmentManager();
437       assertTrue(am.waitForAssignment(hri));
438       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
439       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
440       
441       MyRegionObserver.preCloseEnabled.set(true);
442       am.unassign(hri);
443       RegionState state = am.getRegionStates().getRegionState(hri);
444       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
445 
446       MyRegionObserver.preCloseEnabled.set(false);
447       am.unassign(hri, true);
448 
449       // region is closing now, will be re-assigned automatically.
450       // now, let's forcefully assign it again. it should be
451       // assigned properly and no double-assignment
452       am.assign(hri, true, true);
453 
454       // let's check if it's assigned after it's out of transition
455       am.waitOnRegionToClearRegionsInTransition(hri);
456       assertTrue(am.waitForAssignment(hri));
457 
458       ServerName serverName = master.getAssignmentManager().
459         getRegionStates().getRegionServerOfRegion(hri);
460       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
461     } finally {
462       MyRegionObserver.preCloseEnabled.set(false);
463       TEST_UTIL.deleteTable(Bytes.toBytes(table));
464     }
465   }
466 
467   /**
468    * This tests region close failed
469    */
470   @Test (timeout=60000)
471   public void testCloseFailed() throws Exception {
472     String table = "testCloseFailed";
473     try {
474       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
475       desc.addFamily(new HColumnDescriptor(FAMILY));
476       admin.createTable(desc);
477 
478       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
479       HRegionInfo hri = new HRegionInfo(
480         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
481       MetaEditor.addRegionToMeta(meta, hri);
482 
483       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
484       master.assignRegion(hri);
485       AssignmentManager am = master.getAssignmentManager();
486       assertTrue(am.waitForAssignment(hri));
487       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
488       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
489 
490       MyRegionObserver.preCloseEnabled.set(true);
491       am.unassign(hri);
492       RegionState state = am.getRegionStates().getRegionState(hri);
493       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
494 
495       MyRegionObserver.preCloseEnabled.set(false);
496       am.unassign(hri, true);
497 
498       // region may still be assigned now since it's closing,
499       // let's check if it's assigned after it's out of transition
500       am.waitOnRegionToClearRegionsInTransition(hri);
501 
502       // region should be closed and re-assigned
503       assertTrue(am.waitForAssignment(hri));
504       ServerName serverName = master.getAssignmentManager().
505         getRegionStates().getRegionServerOfRegion(hri);
506       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
507     } finally {
508       MyRegionObserver.preCloseEnabled.set(false);
509       TEST_UTIL.deleteTable(Bytes.toBytes(table));
510     }
511   }
512 
513   /**
514    * This tests region open failed
515    */
516   @Test (timeout=60000)
517   public void testOpenFailed() throws Exception {
518     String table = "testOpenFailed";
519     try {
520       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
521       desc.addFamily(new HColumnDescriptor(FAMILY));
522       admin.createTable(desc);
523 
524       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
525       HRegionInfo hri = new HRegionInfo(
526         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
527       MetaEditor.addRegionToMeta(meta, hri);
528 
529       MyLoadBalancer.controledRegion = hri.getEncodedName();
530 
531       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
532       master.assignRegion(hri);
533       AssignmentManager am = master.getAssignmentManager();
534       assertFalse(am.waitForAssignment(hri));
535 
536       RegionState state = am.getRegionStates().getRegionState(hri);
537       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
538       // Failed to open since no plan, so it's on no server
539       assertNull(state.getServerName());
540 
541       MyLoadBalancer.controledRegion = null;
542       master.assignRegion(hri);
543       assertTrue(am.waitForAssignment(hri));
544 
545       ServerName serverName = master.getAssignmentManager().
546         getRegionStates().getRegionServerOfRegion(hri);
547       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
548     } finally {
549       MyLoadBalancer.controledRegion = null;
550       TEST_UTIL.deleteTable(Bytes.toBytes(table));
551     }
552   }
553 
554   /**
555    * This tests region open failure which is not recoverable
556    */
557   @Test (timeout=60000)
558   public void testOpenFailedUnrecoverable() throws Exception {
559     TableName table =
560         TableName.valueOf("testOpenFailedUnrecoverable");
561     try {
562       HTableDescriptor desc = new HTableDescriptor(table);
563       desc.addFamily(new HColumnDescriptor(FAMILY));
564       admin.createTable(desc);
565 
566       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
567       HRegionInfo hri = new HRegionInfo(
568         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
569       MetaEditor.addRegionToMeta(meta, hri);
570 
571       FileSystem fs = FileSystem.get(conf);
572       Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
573       Path regionDir = new Path(tableDir, hri.getEncodedName());
574       // create a file named the same as the region dir to
575       // mess up with region opening
576       fs.create(regionDir, true);
577 
578       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
579       master.assignRegion(hri);
580       AssignmentManager am = master.getAssignmentManager();
581       assertFalse(am.waitForAssignment(hri));
582 
583       RegionState state = am.getRegionStates().getRegionState(hri);
584       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
585       // Failed to open due to file system issue. Region state should
586       // carry the opening region server so that we can force close it
587       // later on before opening it again. See HBASE-9092.
588       assertNotNull(state.getServerName());
589 
590       // remove the blocking file, so that region can be opened
591       fs.delete(regionDir, true);
592       master.assignRegion(hri);
593       assertTrue(am.waitForAssignment(hri));
594 
595       ServerName serverName = master.getAssignmentManager().
596         getRegionStates().getRegionServerOfRegion(hri);
597       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
598     } finally {
599       TEST_UTIL.deleteTable(table);
600     }
601   }
602 
603   @Test (timeout=60000)
604   public void testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState() throws Exception {
605     final TableName table =
606         TableName.valueOf
607             ("testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState");
608     AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
609     HRegionInfo hri = null;
610     ServerName serverName = null;
611     try {
612       hri = createTableAndGetOneRegion(table);
613       serverName = am.getRegionStates().getRegionServerOfRegion(hri);
614       ServerName destServerName = null;
615       HRegionServer destServer = null;
616       for (int i = 0; i < 3; i++) {
617         destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
618         if (!destServer.getServerName().equals(serverName)) {
619           destServerName = destServer.getServerName();
620           break;
621         }
622       }
623       am.regionOffline(hri);
624       ZooKeeperWatcher zkw = TEST_UTIL.getHBaseCluster().getMaster().getZooKeeper();
625       am.getRegionStates().updateRegionState(hri, State.PENDING_OPEN, destServerName);
626       if (ConfigUtil.useZKForAssignment(conf)) {
627         ZKAssign.createNodeOffline(zkw, hri, destServerName);
628         ZKAssign.transitionNodeOpening(zkw, hri, destServerName);
629 
630         // Wait till the event is processed and the region is in transition
631         long timeoutTime = System.currentTimeMillis() + 20000;
632         while (!am.getRegionStates().isRegionInTransition(hri)) {
633           assertTrue("Failed to process ZK opening event in time",
634             System.currentTimeMillis() < timeoutTime);
635           Thread.sleep(100);
636         }
637       }
638 
639       am.getZKTable().setDisablingTable(table);
640       List<HRegionInfo> toAssignRegions = am.processServerShutdown(destServerName);
641       assertTrue("Regions to be assigned should be empty.", toAssignRegions.isEmpty());
642       assertTrue("Regions to be assigned should be empty.", am.getRegionStates()
643           .getRegionState(hri).isOffline());
644     } finally {
645       if (hri != null && serverName != null) {
646         am.regionOnline(hri, serverName);
647       }
648       am.getZKTable().setDisabledTable(table);
649       TEST_UTIL.deleteTable(table);
650     }
651   }
652 
653   /**
654    * This tests region close hanging
655    */
656   @Test (timeout=60000)
657   public void testCloseHang() throws Exception {
658     String table = "testCloseHang";
659     try {
660       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
661       desc.addFamily(new HColumnDescriptor(FAMILY));
662       admin.createTable(desc);
663 
664       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
665       HRegionInfo hri = new HRegionInfo(
666         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
667       MetaEditor.addRegionToMeta(meta, hri);
668 
669       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
670       master.assignRegion(hri);
671       AssignmentManager am = master.getAssignmentManager();
672       assertTrue(am.waitForAssignment(hri));
673       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
674       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
675 
676       MyRegionObserver.postCloseEnabled.set(true);
677       am.unassign(hri);
678       // Now region should pending_close or closing
679       // Unassign it again forcefully so that we can trigger already
680       // in transition exception. This test is to make sure this scenario
681       // is handled properly.
682       am.server.getConfiguration().setLong(
683         AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000);
684       am.unassign(hri, true);
685       RegionState state = am.getRegionStates().getRegionState(hri);
686       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
687 
688       // Let region closing move ahead. The region should be closed
689       // properly and re-assigned automatically
690       MyRegionObserver.postCloseEnabled.set(false);
691 
692       // region may still be assigned now since it's closing,
693       // let's check if it's assigned after it's out of transition
694       am.waitOnRegionToClearRegionsInTransition(hri);
695 
696       // region should be closed and re-assigned
697       assertTrue(am.waitForAssignment(hri));
698       ServerName serverName = master.getAssignmentManager().
699         getRegionStates().getRegionServerOfRegion(hri);
700       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
701     } finally {
702       MyRegionObserver.postCloseEnabled.set(false);
703       TEST_UTIL.deleteTable(Bytes.toBytes(table));
704     }
705   }
706 
707   /**
708    * This tests region close racing with open
709    */
710   @Test (timeout=60000)
711   public void testOpenCloseRacing() throws Exception {
712     String table = "testOpenCloseRacing";
713     try {
714       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
715       desc.addFamily(new HColumnDescriptor(FAMILY));
716       admin.createTable(desc);
717 
718       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
719       HRegionInfo hri = new HRegionInfo(
720         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
721       MetaEditor.addRegionToMeta(meta, hri);
722       meta.close();
723 
724       MyRegionObserver.postOpenEnabled.set(true);
725       MyRegionObserver.postOpenCalled = false;
726       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
727       // Region will be opened, but it won't complete
728       master.assignRegion(hri);
729       long end = EnvironmentEdgeManager.currentTimeMillis() + 20000;
730       // Wait till postOpen is called
731       while (!MyRegionObserver.postOpenCalled ) {
732         assertFalse("Timed out waiting for postOpen to be called",
733           EnvironmentEdgeManager.currentTimeMillis() > end);
734         Thread.sleep(300);
735       }
736 
737       AssignmentManager am = master.getAssignmentManager();
738       // Now let's unassign it, it should do nothing
739       am.unassign(hri);
740       RegionState state = am.getRegionStates().getRegionState(hri);
741       ServerName oldServerName = state.getServerName();
742       assertTrue(state.isPendingOpenOrOpening() && oldServerName != null);
743 
744       // Now the region is stuck in opening
745       // Let's forcefully re-assign it to trigger closing/opening
746       // racing. This test is to make sure this scenario
747       // is handled properly.
748       ServerName destServerName = null;
749       int numRS = TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().size();
750       for (int i = 0; i < numRS; i++) {
751         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
752         if (!destServer.getServerName().equals(oldServerName)) {
753           destServerName = destServer.getServerName();
754           break;
755         }
756       }
757       assertNotNull(destServerName);
758       assertFalse("Region should be assigned on a new region server",
759         oldServerName.equals(destServerName));
760       List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
761       regions.add(hri);
762       am.assign(destServerName, regions);
763       
764       // let region open continue
765       MyRegionObserver.postOpenEnabled.set(false);
766 
767       // let's check if it's assigned after it's out of transition
768       am.waitOnRegionToClearRegionsInTransition(hri);
769       assertTrue(am.waitForAssignment(hri));
770 
771       ServerName serverName = master.getAssignmentManager().
772         getRegionStates().getRegionServerOfRegion(hri);
773       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
774     } finally {
775       MyRegionObserver.postOpenEnabled.set(false);
776       TEST_UTIL.deleteTable(Bytes.toBytes(table));
777     }
778   }
779 
780   /**
781    * Test force unassign/assign a region hosted on a dead server
782    */
783   @Test (timeout=60000)
784   public void testAssignRacingWithSSH() throws Exception {
785     String table = "testAssignRacingWithSSH";
786     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
787     MyMaster master = null;
788     try {
789       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
790       desc.addFamily(new HColumnDescriptor(FAMILY));
791       admin.createTable(desc);
792 
793       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
794       HRegionInfo hri = new HRegionInfo(
795         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
796       MetaEditor.addRegionToMeta(meta, hri);
797 
798       // Assign the region
799       master = (MyMaster)cluster.getMaster();
800       master.assignRegion(hri);
801 
802       // Hold SSH before killing the hosting server
803       master.enableSSH(false);
804 
805       AssignmentManager am = master.getAssignmentManager();
806       RegionStates regionStates = am.getRegionStates();
807       ServerName metaServer = regionStates.getRegionServerOfRegion(
808         HRegionInfo.FIRST_META_REGIONINFO);
809       while (true) {
810         assertTrue(am.waitForAssignment(hri));
811         RegionState state = regionStates.getRegionState(hri);
812         ServerName oldServerName = state.getServerName();
813         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
814           // Kill the hosting server, which doesn't have meta on it.
815           cluster.killRegionServer(oldServerName);
816           cluster.waitForRegionServerToStop(oldServerName, -1);
817           break;
818         }
819         int i = cluster.getServerWithMeta();
820         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
821         oldServerName = rs.getServerName();
822         master.move(hri.getEncodedNameAsBytes(),
823           Bytes.toBytes(oldServerName.getServerName()));
824       }
825 
826       // You can't assign a dead region before SSH
827       am.assign(hri, true, true);
828       RegionState state = regionStates.getRegionState(hri);
829       assertTrue(state.isFailedClose());
830 
831       // You can't unassign a dead region before SSH either
832       am.unassign(hri, true);
833       assertTrue(state.isFailedClose());
834 
835       // Enable SSH so that log can be split
836       master.enableSSH(true);
837 
838       // let's check if it's assigned after it's out of transition.
839       // no need to assign it manually, SSH should do it
840       am.waitOnRegionToClearRegionsInTransition(hri);
841       assertTrue(am.waitForAssignment(hri));
842 
843       ServerName serverName = master.getAssignmentManager().
844         getRegionStates().getRegionServerOfRegion(hri);
845       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
846     } finally {
847       if (master != null) {
848         master.enableSSH(true);
849       }
850       TEST_UTIL.deleteTable(Bytes.toBytes(table));
851     }
852   }
853 
854   /**
855    * Test force unassign/assign a region of a disabled table
856    */
857   @Test (timeout=60000)
858   public void testAssignDisabledRegion() throws Exception {
859     String table = "testAssignDisabledRegion";
860     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
861     MyMaster master = null;
862     try {
863       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
864       desc.addFamily(new HColumnDescriptor(FAMILY));
865       admin.createTable(desc);
866 
867       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
868       HRegionInfo hri = new HRegionInfo(
869         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
870       MetaEditor.addRegionToMeta(meta, hri);
871 
872       // Assign the region
873       master = (MyMaster)cluster.getMaster();
874       master.assignRegion(hri);
875       AssignmentManager am = master.getAssignmentManager();
876       RegionStates regionStates = am.getRegionStates();
877       assertTrue(am.waitForAssignment(hri));
878 
879       // Disable the table
880       admin.disableTable(table);
881       assertTrue(regionStates.isRegionOffline(hri));
882 
883       // You can't assign a disabled region
884       am.assign(hri, true, true);
885       assertTrue(regionStates.isRegionOffline(hri));
886 
887       // You can't unassign a disabled region either
888       am.unassign(hri, true);
889       assertTrue(regionStates.isRegionOffline(hri));
890     } finally {
891       TEST_UTIL.deleteTable(Bytes.toBytes(table));
892     }
893   }
894 
895   /**
896    * Test that region state transition call is idempotent
897    */
898   @Test(timeout = 60000)
899   public void testReportRegionStateTransition() throws Exception {
900     String table = "testReportRegionStateTransition";
901     try {
902       MyRegionServer.simulateRetry = true;
903       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
904       desc.addFamily(new HColumnDescriptor(FAMILY));
905       admin.createTable(desc);
906       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
907       HRegionInfo hri =
908           new HRegionInfo(desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
909       MetaEditor.addRegionToMeta(meta, hri);
910       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
911       master.assignRegion(hri);
912       AssignmentManager am = master.getAssignmentManager();
913       am.waitForAssignment(hri);
914       RegionStates regionStates = am.getRegionStates();
915       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
916       // Assert the the region is actually open on the server
917       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
918       // Closing region should just work fine
919       admin.disableTable(TableName.valueOf(table));
920       assertTrue(regionStates.isRegionOffline(hri));
921       List<HRegionInfo> regions = TEST_UTIL.getHBaseAdmin().getOnlineRegions(serverName);
922       assertTrue(!regions.contains(hri));
923     } finally {
924       MyRegionServer.simulateRetry = false;
925       TEST_UTIL.deleteTable(Bytes.toBytes(table));
926     }
927   }
928 
929   /**
930    * Test concurrent updates to meta when meta is not on master. Only for zk-less assignment
931    * @throws Exception
932    */
933   @Test(timeout = 30000)
934   public void testUpdatesRemoteMeta() throws Exception {
935     // Not for zk less assignment
936     if (conf.getBoolean("hbase.assignment.usezk", true)) {
937       return;
938     }
939     conf.setInt("hbase.regionstatestore.meta.connection", 3);
940     final RegionStateStore rss = new RegionStateStore(new MyRegionServer(conf));
941     rss.start();
942     // Create 10 threads and make each do 10 puts related to region state update
943     Thread[] th = new Thread[10];
944     List<String> nameList = new ArrayList<String>();
945     List<TableName> tableNameList = new ArrayList<TableName>();
946     for (int i = 0; i < th.length; i++) {
947       th[i] = new Thread() {
948         @Override
949         public void run() {
950           HRegionInfo[] hri = new HRegionInfo[10];
951           ServerName serverName = ServerName.valueOf("dummyhost", 1000, 1234);
952           for (int i = 0; i < 10; i++) {
953             hri[i] = new HRegionInfo(TableName.valueOf(Thread.currentThread().getName() + "_" + i));
954             RegionState newState = new RegionState(hri[i], RegionState.State.OPEN, serverName);
955             RegionState oldState =
956                 new RegionState(hri[i], RegionState.State.PENDING_OPEN, serverName);
957             rss.updateRegionState(1, newState, oldState);
958           }
959         }
960       };
961       th[i].start();
962       nameList.add(th[i].getName());
963     }
964     for (int i = 0; i < th.length; i++) {
965       th[i].join();
966     }
967     // Add all the expected table names in meta to tableNameList
968     for (String name : nameList) {
969       for (int i = 0; i < 10; i++) {
970         tableNameList.add(TableName.valueOf(name + "_" + i));
971       }
972     }
973     List<Result> metaRows =
974         MetaReader.fullScan(TEST_UTIL.getMiniHBaseCluster().getMaster().getCatalogTracker());
975     int count = 0;
976     // Check all 100 rows are in meta
977     for (Result result : metaRows) {
978       if (tableNameList.contains(HRegionInfo.getTable(result.getRow()))) {
979         count++;
980         if (count == 100) {
981           break;
982         }
983       }
984     }
985     assertTrue(count == 100);
986     rss.stop();
987   }
988 
989   static class MyLoadBalancer extends StochasticLoadBalancer {
990     // For this region, if specified, always assign to nowhere
991     static volatile String controledRegion = null;
992 
993     @Override
994     public ServerName randomAssignment(HRegionInfo regionInfo,
995         List<ServerName> servers) {
996       if (regionInfo.getEncodedName().equals(controledRegion)) {
997         return null;
998       }
999       return super.randomAssignment(regionInfo, servers);
1000     }
1001   }
1002 
1003   public static class MyMaster extends HMaster {
1004     AtomicBoolean enabled = new AtomicBoolean(true);
1005 
1006     public MyMaster(Configuration conf) throws IOException, KeeperException,
1007         InterruptedException {
1008       super(conf);
1009     }
1010 
1011     @Override
1012     public boolean isServerShutdownHandlerEnabled() {
1013       return enabled.get() && super.isServerShutdownHandlerEnabled();
1014     }
1015 
1016     public void enableSSH(boolean enabled) {
1017       this.enabled.set(enabled);
1018       if (enabled) {
1019         serverManager.processQueuedDeadServers();
1020       }
1021     }
1022   }
1023   
1024   public static class MyRegionServer extends MiniHBaseClusterRegionServer {
1025     static volatile ServerName abortedServer = null;
1026     static volatile boolean simulateRetry;
1027 
1028     public MyRegionServer(Configuration conf)
1029       throws IOException, KeeperException,
1030         InterruptedException {
1031       super(conf);
1032     }
1033 
1034     @Override
1035     public boolean
1036         reportRegionStateTransition(TransitionCode code, long openSeqNum, HRegionInfo... hris) {
1037       if (simulateRetry == true) {
1038         // Simulate retry by calling the method twice
1039         super.reportRegionStateTransition(code, openSeqNum, hris);
1040         return super.reportRegionStateTransition(code, openSeqNum, hris);
1041       }
1042       return super.reportRegionStateTransition(code, openSeqNum, hris);
1043     }
1044 
1045     @Override
1046     public boolean isAborted() {
1047       return getServerName().equals(abortedServer) || super.isAborted();
1048     }
1049   }
1050 
1051 
1052   public static class MyRegionObserver extends BaseRegionObserver {
1053     // If enabled, fail all preClose calls
1054     static AtomicBoolean preCloseEnabled = new AtomicBoolean(false);
1055 
1056     // If enabled, stall postClose calls
1057     static AtomicBoolean postCloseEnabled = new AtomicBoolean(false);
1058 
1059     // If enabled, stall postOpen calls
1060     static AtomicBoolean postOpenEnabled = new AtomicBoolean(false);
1061 
1062     // A flag to track if postOpen is called
1063     static volatile boolean postOpenCalled = false;
1064 
1065     @Override
1066     public void preClose(ObserverContext<RegionCoprocessorEnvironment> c,
1067         boolean abortRequested) throws IOException {
1068       if (preCloseEnabled.get()) throw new IOException("fail preClose from coprocessor");
1069     }
1070 
1071     @Override
1072     public void postClose(ObserverContext<RegionCoprocessorEnvironment> c,
1073         boolean abortRequested) {
1074       stallOnFlag(postCloseEnabled);
1075     }
1076 
1077     @Override
1078     public void postOpen(ObserverContext<RegionCoprocessorEnvironment> c) {
1079       postOpenCalled = true;
1080       stallOnFlag(postOpenEnabled);
1081     }
1082 
1083     private void stallOnFlag(final AtomicBoolean flag) {
1084       try {
1085         // If enabled, stall
1086         while (flag.get()) {
1087           Thread.sleep(1000);
1088         }
1089       } catch (InterruptedException ie) {
1090         Thread.currentThread().interrupt();
1091       }
1092     }
1093   }
1094 }