View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotEquals;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertNull;
25  import static org.junit.Assert.assertTrue;
26  import static org.junit.Assert.fail;
27  
28  import java.io.IOException;
29  import java.util.ArrayList;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.Set;
33  import java.util.concurrent.atomic.AtomicBoolean;
34  import java.util.concurrent.atomic.AtomicInteger;
35  
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.fs.FileSystem;
38  import org.apache.hadoop.fs.Path;
39  import org.apache.hadoop.hbase.CoordinatedStateManager;
40  import org.apache.hadoop.hbase.HBaseTestingUtility;
41  import org.apache.hadoop.hbase.HColumnDescriptor;
42  import org.apache.hadoop.hbase.HConstants;
43  import org.apache.hadoop.hbase.HRegionInfo;
44  import org.apache.hadoop.hbase.HTableDescriptor;
45  import org.apache.hadoop.hbase.testclassification.MediumTests;
46  import org.apache.hadoop.hbase.MetaTableAccessor;
47  import org.apache.hadoop.hbase.MiniHBaseCluster;
48  import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer;
49  import org.apache.hadoop.hbase.ServerLoad;
50  import org.apache.hadoop.hbase.ServerName;
51  import org.apache.hadoop.hbase.TableName;
52  import org.apache.hadoop.hbase.UnknownRegionException;
53  import org.apache.hadoop.hbase.Waiter;
54  import org.apache.hadoop.hbase.client.Admin;
55  import org.apache.hadoop.hbase.client.HBaseAdmin;
56  import org.apache.hadoop.hbase.client.HTable;
57  import org.apache.hadoop.hbase.client.Result;
58  import org.apache.hadoop.hbase.client.Table;
59  import org.apache.hadoop.hbase.coordination.ZkCoordinatedStateManager;
60  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
61  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
62  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
63  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
64  import org.apache.hadoop.hbase.coprocessor.RegionObserver;
65  import org.apache.hadoop.hbase.executor.EventType;
66  import org.apache.hadoop.hbase.master.RegionState.State;
67  import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
68  import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
69  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
70  import org.apache.hadoop.hbase.regionserver.HRegionServer;
71  import org.apache.hadoop.hbase.util.Bytes;
72  import org.apache.hadoop.hbase.util.ConfigUtil;
73  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
74  import org.apache.hadoop.hbase.util.FSUtils;
75  import org.apache.hadoop.hbase.util.JVMClusterUtil;
76  import org.apache.hadoop.hbase.util.Threads;
77  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
78  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
79  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
80  import org.apache.zookeeper.KeeperException;
81  import org.junit.AfterClass;
82  import org.junit.BeforeClass;
83  import org.junit.Test;
84  import org.junit.experimental.categories.Category;
85  
86  
87  /**
88   * This tests AssignmentManager with a testing cluster.
89   */
90  @Category(MediumTests.class)
91  @SuppressWarnings("deprecation")
92  public class TestAssignmentManagerOnCluster {
93    private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
94    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
95    final static Configuration conf = TEST_UTIL.getConfiguration();
96    private static HBaseAdmin admin;
97  
98    static void setupOnce() throws Exception {
99      // Using the our load balancer to control region plans
100     conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
101       MyLoadBalancer.class, LoadBalancer.class);
102     conf.setClass(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
103       MyRegionObserver.class, RegionObserver.class);
104     // Reduce the maximum attempts to speed up the test
105     conf.setInt("hbase.assignment.maximum.attempts", 3);
106     // Put meta on master to avoid meta server shutdown handling
107     conf.set("hbase.balancer.tablesOnMaster", "hbase:meta");
108     conf.setInt("hbase.master.maximum.ping.server.attempts", 3);
109     conf.setInt("hbase.master.ping.server.retry.sleep.interval", 1);
110 
111     TEST_UTIL.startMiniCluster(1, 4, null, MyMaster.class, MyRegionServer.class);
112     admin = TEST_UTIL.getHBaseAdmin();
113   }
114 
115   @BeforeClass
116   public static void setUpBeforeClass() throws Exception {
117     // Use ZK for region assignment
118     conf.setBoolean("hbase.assignment.usezk", true);
119     setupOnce();
120   }
121 
122   @AfterClass
123   public static void tearDownAfterClass() throws Exception {
124     TEST_UTIL.shutdownMiniCluster();
125   }
126 
127   /**
128    * This tests restarting meta regionserver
129    */
130   @Test (timeout=180000)
131   public void testRestartMetaRegionServer() throws Exception {
132     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
133     boolean stoppedARegionServer = false;
134     try {
135       HMaster master = cluster.getMaster();
136       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
137       ServerName metaServerName = regionStates.getRegionServerOfRegion(
138         HRegionInfo.FIRST_META_REGIONINFO);
139       if (master.getServerName().equals(metaServerName) || metaServerName == null
140           || !metaServerName.equals(cluster.getServerHoldingMeta())) {
141         // Move meta off master
142         metaServerName = cluster.getLiveRegionServerThreads()
143           .get(0).getRegionServer().getServerName();
144         master.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
145           Bytes.toBytes(metaServerName.getServerName()));
146         master.assignmentManager.waitUntilNoRegionsInTransition(60000);
147       }
148       RegionState metaState =
149           MetaTableLocator.getMetaRegionState(master.getZooKeeper());
150         assertEquals("Meta should be not in transition",
151             metaState.getState(), RegionState.State.OPEN);
152       assertNotEquals("Meta should be moved off master",
153         metaServerName, master.getServerName());
154       cluster.killRegionServer(metaServerName);
155       stoppedARegionServer = true;
156       cluster.waitForRegionServerToStop(metaServerName, 60000);
157       // Wait for SSH to finish
158       final ServerManager serverManager = master.getServerManager();
159       TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
160         @Override
161         public boolean evaluate() throws Exception {
162           return !serverManager.areDeadServersInProgress();
163         }
164       });
165 
166       // Now, make sure meta is assigned
167       assertTrue("Meta should be assigned",
168         regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
169       // Now, make sure meta is registered in zk
170       metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper());
171       assertEquals("Meta should be not in transition",
172           metaState.getState(), RegionState.State.OPEN);
173       assertEquals("Meta should be assigned", metaState.getServerName(),
174         regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO));
175       assertNotEquals("Meta should be assigned on a different server",
176         metaState.getServerName(), metaServerName);
177     } finally {
178       if (stoppedARegionServer) {
179         cluster.startRegionServer();
180       }
181     }
182   }
183 
184   /**
185    * This tests region assignment
186    */
187   @Test (timeout=60000)
188   public void testAssignRegion() throws Exception {
189     String table = "testAssignRegion";
190     try {
191       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
192       desc.addFamily(new HColumnDescriptor(FAMILY));
193       admin.createTable(desc);
194 
195       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
196       HRegionInfo hri = new HRegionInfo(
197         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
198       MetaTableAccessor.addRegionToMeta(meta, hri);
199 
200       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
201       master.assignRegion(hri);
202       AssignmentManager am = master.getAssignmentManager();
203       am.waitForAssignment(hri);
204 
205       RegionStates regionStates = am.getRegionStates();
206       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
207       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
208 
209       // Region is assigned now. Let's assign it again.
210       // Master should not abort, and region should be assigned.
211       RegionState oldState = regionStates.getRegionState(hri);
212       TEST_UTIL.getHBaseAdmin().assign(hri.getRegionName());
213       master.getAssignmentManager().waitForAssignment(hri);
214       RegionState newState = regionStates.getRegionState(hri);
215       assertTrue(newState.isOpened()
216         && newState.getStamp() != oldState.getStamp());
217     } finally {
218       TEST_UTIL.deleteTable(Bytes.toBytes(table));
219     }
220   }
221   
222   // Simulate a scenario where the AssignCallable and SSH are trying to assign a region
223   @Test (timeout=60000)
224   public void testAssignRegionBySSH() throws Exception {
225     if (!conf.getBoolean("hbase.assignment.usezk", true)) {
226       return;
227     }
228     String table = "testAssignRegionBySSH";
229     MyMaster master = (MyMaster) TEST_UTIL.getHBaseCluster().getMaster();
230     try {
231       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
232       desc.addFamily(new HColumnDescriptor(FAMILY));
233       admin.createTable(desc);
234 
235       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
236       HRegionInfo hri = new HRegionInfo(
237         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
238       MetaTableAccessor.addRegionToMeta(meta, hri);
239       // Add some dummy server for the region entry
240       MetaTableAccessor.updateRegionLocation(TEST_UTIL.getHBaseCluster().getMaster().getConnection(), hri,
241         ServerName.valueOf("example.org", 1234, System.currentTimeMillis()), 0);
242       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
243       int i = TEST_UTIL.getHBaseCluster().getServerWithMeta();
244       HRegionServer rs = TEST_UTIL.getHBaseCluster().getRegionServer(i == 0 ? 1 : 0);
245       // Choose a server other than meta to kill
246       ServerName controlledServer = rs.getServerName();
247       master.enableSSH(false);
248       TEST_UTIL.getHBaseCluster().killRegionServer(controlledServer);
249       TEST_UTIL.getHBaseCluster().waitForRegionServerToStop(controlledServer, -1);
250       AssignmentManager am = master.getAssignmentManager();
251       
252       // Simulate the AssignCallable trying to assign the region. Have the region in OFFLINE state,
253       // but not in transition and the server is the dead 'controlledServer'  
254       regionStates.createRegionState(hri, State.OFFLINE, controlledServer, null);
255       am.assign(hri, true, true);
256       // Region should remain OFFLINE and go to transition
257       assertEquals(State.OFFLINE, regionStates.getRegionState(hri).getState());
258       assertTrue (regionStates.isRegionInTransition(hri));
259       
260       master.enableSSH(true);
261       am.waitForAssignment(hri);
262       assertTrue (regionStates.getRegionState(hri).isOpened());
263       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
264       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
265     } finally {
266       if (master != null) {
267         master.enableSSH(true);
268       }
269       TEST_UTIL.deleteTable(Bytes.toBytes(table));
270       TEST_UTIL.getHBaseCluster().startRegionServer();
271     }
272   }
273 
274   /**
275    * This tests region assignment on a simulated restarted server
276    */
277   @Test (timeout=120000)
278   public void testAssignRegionOnRestartedServer() throws Exception {
279     String table = "testAssignRegionOnRestartedServer";
280     TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 20);
281     TEST_UTIL.getMiniHBaseCluster().stopMaster(0);
282     TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect
283 
284     ServerName deadServer = null;
285     HMaster master = null;
286     try {
287       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
288       desc.addFamily(new HColumnDescriptor(FAMILY));
289       admin.createTable(desc);
290 
291       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
292       final HRegionInfo hri = new HRegionInfo(
293         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
294       MetaTableAccessor.addRegionToMeta(meta, hri);
295 
296       master = TEST_UTIL.getHBaseCluster().getMaster();
297       Set<ServerName> onlineServers = master.serverManager.getOnlineServers().keySet();
298       assertFalse("There should be some servers online", onlineServers.isEmpty());
299 
300       // Use the first server as the destination server
301       ServerName destServer = onlineServers.iterator().next();
302 
303       // Created faked dead server
304       deadServer = ServerName.valueOf(destServer.getHostname(),
305           destServer.getPort(), destServer.getStartcode() - 100L);
306       master.serverManager.recordNewServerWithLock(deadServer, ServerLoad.EMPTY_SERVERLOAD);
307 
308       final AssignmentManager am = master.getAssignmentManager();
309       RegionPlan plan = new RegionPlan(hri, null, deadServer);
310       am.addPlan(hri.getEncodedName(), plan);
311       master.assignRegion(hri);
312 
313       int version = ZKAssign.transitionNode(master.getZooKeeper(), hri,
314         destServer, EventType.M_ZK_REGION_OFFLINE,
315         EventType.RS_ZK_REGION_OPENING, 0);
316       assertEquals("TansitionNode should fail", -1, version);
317 
318       TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
319         @Override
320         public boolean evaluate() throws Exception {
321           return ! am.getRegionStates().isRegionInTransition(hri);
322         }
323       });
324 
325     assertFalse("Region should be assigned", am.getRegionStates().isRegionInTransition(hri));
326     } finally {
327       if (deadServer != null) {
328         master.serverManager.expireServer(deadServer);
329       }
330 
331       TEST_UTIL.deleteTable(Bytes.toBytes(table));
332 
333       // reset the value for other tests
334       TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 3);
335       ServerName masterServerName = TEST_UTIL.getMiniHBaseCluster().getMaster().getServerName();
336       TEST_UTIL.getMiniHBaseCluster().stopMaster(masterServerName);
337       TEST_UTIL.getMiniHBaseCluster().startMaster();
338       // Wait till master is active and is initialized
339       while (TEST_UTIL.getMiniHBaseCluster().getMaster() == null || 
340           !TEST_UTIL.getMiniHBaseCluster().getMaster().isInitialized()) {
341         Threads.sleep(1);
342       }
343     }
344   }
345 
346   /**
347    * This tests offlining a region
348    */
349   @Test (timeout=60000)
350   public void testOfflineRegion() throws Exception {
351     TableName table =
352         TableName.valueOf("testOfflineRegion");
353     try {
354       HRegionInfo hri = createTableAndGetOneRegion(table);
355 
356       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
357         getMaster().getAssignmentManager().getRegionStates();
358       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
359       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
360       admin.offline(hri.getRegionName());
361 
362       long timeoutTime = System.currentTimeMillis() + 800;
363       while (true) {
364         List<HRegionInfo> regions =
365           regionStates.getRegionsOfTable(table);
366         if (!regions.contains(hri)) break;
367         long now = System.currentTimeMillis();
368         if (now > timeoutTime) {
369           fail("Failed to offline the region in time");
370           break;
371         }
372         Thread.sleep(10);
373       }
374       RegionState regionState = regionStates.getRegionState(hri);
375       assertTrue(regionState.isOffline());
376     } finally {
377       TEST_UTIL.deleteTable(table);
378     }
379   }
380 
381   /**
382    * This tests moving a region
383    */
384   @Test (timeout=50000)
385   public void testMoveRegion() throws Exception {
386     TableName table =
387         TableName.valueOf("testMoveRegion");
388     try {
389       HRegionInfo hri = createTableAndGetOneRegion(table);
390 
391       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
392       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
393       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
394       ServerManager serverManager = master.getServerManager();
395       ServerName destServerName = null;
396       List<JVMClusterUtil.RegionServerThread> regionServers =
397         TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads();
398       for (JVMClusterUtil.RegionServerThread regionServer: regionServers) {
399         HRegionServer destServer = regionServer.getRegionServer();
400         destServerName = destServer.getServerName();
401         if (!destServerName.equals(serverName)
402             && serverManager.isServerOnline(destServerName)) {
403           break;
404         }
405       }
406       assertTrue(destServerName != null
407         && !destServerName.equals(serverName));
408       TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
409         Bytes.toBytes(destServerName.getServerName()));
410 
411       long timeoutTime = System.currentTimeMillis() + 30000;
412       while (true) {
413         ServerName sn = regionStates.getRegionServerOfRegion(hri);
414         if (sn != null && sn.equals(destServerName)) {
415           TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
416           break;
417         }
418         long now = System.currentTimeMillis();
419         if (now > timeoutTime) {
420           fail("Failed to move the region in time: "
421             + regionStates.getRegionState(hri));
422         }
423         regionStates.waitForUpdate(50);
424       }
425 
426     } finally {
427       TEST_UTIL.deleteTable(table);
428     }
429   }
430 
431   /**
432    * If a table is deleted, we should not be able to move it anymore.
433    * Otherwise, the region will be brought back.
434    * @throws Exception
435    */
436   @Test (timeout=50000)
437   public void testMoveRegionOfDeletedTable() throws Exception {
438     TableName table =
439         TableName.valueOf("testMoveRegionOfDeletedTable");
440     Admin admin = TEST_UTIL.getHBaseAdmin();
441     try {
442       HRegionInfo hri = createTableAndGetOneRegion(table);
443 
444       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
445       AssignmentManager am = master.getAssignmentManager();
446       RegionStates regionStates = am.getRegionStates();
447       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
448       ServerName destServerName = null;
449       for (int i = 0; i < 3; i++) {
450         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
451         if (!destServer.getServerName().equals(serverName)) {
452           destServerName = destServer.getServerName();
453           break;
454         }
455       }
456       assertTrue(destServerName != null
457         && !destServerName.equals(serverName));
458 
459       TEST_UTIL.deleteTable(table);
460 
461       try {
462         admin.move(hri.getEncodedNameAsBytes(),
463           Bytes.toBytes(destServerName.getServerName()));
464         fail("We should not find the region");
465       } catch (IOException ioe) {
466         assertTrue(ioe instanceof UnknownRegionException);
467       }
468 
469       am.balance(new RegionPlan(hri, serverName, destServerName));
470       assertFalse("The region should not be in transition",
471         regionStates.isRegionInTransition(hri));
472     } finally {
473       if (admin.tableExists(table)) {
474         TEST_UTIL.deleteTable(table);
475       }
476     }
477   }
478 
479   HRegionInfo createTableAndGetOneRegion(
480       final TableName tableName) throws IOException, InterruptedException {
481     HTableDescriptor desc = new HTableDescriptor(tableName);
482     desc.addFamily(new HColumnDescriptor(FAMILY));
483     admin.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), 5);
484 
485     // wait till the table is assigned
486     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
487     long timeoutTime = System.currentTimeMillis() + 1000;
488     while (true) {
489       List<HRegionInfo> regions = master.getAssignmentManager().
490         getRegionStates().getRegionsOfTable(tableName);
491       if (regions.size() > 3) {
492         return regions.get(2);
493       }
494       long now = System.currentTimeMillis();
495       if (now > timeoutTime) {
496         fail("Could not find an online region");
497       }
498       Thread.sleep(10);
499     }
500   }
501 
502   /**
503    * This test should not be flaky. If it is flaky, it means something
504    * wrong with AssignmentManager which should be reported and fixed
505    *
506    * This tests forcefully assign a region while it's closing and re-assigned.
507    */
508   @Test (timeout=60000)
509   public void testForceAssignWhileClosing() throws Exception {
510     String table = "testForceAssignWhileClosing";
511     try {
512       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
513       desc.addFamily(new HColumnDescriptor(FAMILY));
514       admin.createTable(desc);
515 
516       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
517       HRegionInfo hri = new HRegionInfo(
518         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
519       MetaTableAccessor.addRegionToMeta(meta, hri);
520 
521       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
522       master.assignRegion(hri);
523       AssignmentManager am = master.getAssignmentManager();
524       assertTrue(am.waitForAssignment(hri));
525 
526       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
527       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
528       MyRegionObserver.preCloseEnabled.set(true);
529       am.unassign(hri);
530       RegionState state = am.getRegionStates().getRegionState(hri);
531       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
532 
533       MyRegionObserver.preCloseEnabled.set(false);
534       am.unassign(hri, true);
535 
536       // region is closing now, will be re-assigned automatically.
537       // now, let's forcefully assign it again. it should be
538       // assigned properly and no double-assignment
539       am.assign(hri, true, true);
540 
541       // let's check if it's assigned after it's out of transition
542       am.waitOnRegionToClearRegionsInTransition(hri);
543       assertTrue(am.waitForAssignment(hri));
544 
545       ServerName serverName = master.getAssignmentManager().
546         getRegionStates().getRegionServerOfRegion(hri);
547       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
548     } finally {
549       MyRegionObserver.preCloseEnabled.set(false);
550       TEST_UTIL.deleteTable(Bytes.toBytes(table));
551     }
552   }
553 
554   /**
555    * This tests region close failed
556    */
557   @Test (timeout=60000)
558   public void testCloseFailed() throws Exception {
559     String table = "testCloseFailed";
560     try {
561       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
562       desc.addFamily(new HColumnDescriptor(FAMILY));
563       admin.createTable(desc);
564 
565       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
566       HRegionInfo hri = new HRegionInfo(
567         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
568       MetaTableAccessor.addRegionToMeta(meta, hri);
569 
570       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
571       master.assignRegion(hri);
572       AssignmentManager am = master.getAssignmentManager();
573       assertTrue(am.waitForAssignment(hri));
574       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
575       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
576 
577       MyRegionObserver.preCloseEnabled.set(true);
578       am.unassign(hri);
579       RegionState state = am.getRegionStates().getRegionState(hri);
580       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
581 
582       MyRegionObserver.preCloseEnabled.set(false);
583       am.unassign(hri, true);
584 
585       // region may still be assigned now since it's closing,
586       // let's check if it's assigned after it's out of transition
587       am.waitOnRegionToClearRegionsInTransition(hri);
588 
589       // region should be closed and re-assigned
590       assertTrue(am.waitForAssignment(hri));
591       ServerName serverName = master.getAssignmentManager().
592         getRegionStates().getRegionServerOfRegion(hri);
593       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
594     } finally {
595       MyRegionObserver.preCloseEnabled.set(false);
596       TEST_UTIL.deleteTable(Bytes.toBytes(table));
597     }
598   }
599 
600   /**
601    * This tests region open failed
602    */
603   @Test (timeout=60000)
604   public void testOpenFailed() throws Exception {
605     String table = "testOpenFailed";
606     try {
607       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
608       desc.addFamily(new HColumnDescriptor(FAMILY));
609       admin.createTable(desc);
610 
611       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
612       HRegionInfo hri = new HRegionInfo(
613         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
614       MetaTableAccessor.addRegionToMeta(meta, hri);
615 
616       MyLoadBalancer.controledRegion = hri.getEncodedName();
617 
618       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
619       master.assignRegion(hri);
620       AssignmentManager am = master.getAssignmentManager();
621       assertFalse(am.waitForAssignment(hri));
622 
623       RegionState state = am.getRegionStates().getRegionState(hri);
624       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
625       // Failed to open since no plan, so it's on no server
626       assertNull(state.getServerName());
627 
628       MyLoadBalancer.controledRegion = null;
629       master.assignRegion(hri);
630       assertTrue(am.waitForAssignment(hri));
631 
632       ServerName serverName = master.getAssignmentManager().
633         getRegionStates().getRegionServerOfRegion(hri);
634       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
635     } finally {
636       MyLoadBalancer.controledRegion = null;
637       TEST_UTIL.deleteTable(Bytes.toBytes(table));
638     }
639   }
640 
641   /**
642    * This tests region open failure which is not recoverable
643    */
644   @Test (timeout=60000)
645   public void testOpenFailedUnrecoverable() throws Exception {
646     TableName table =
647         TableName.valueOf("testOpenFailedUnrecoverable");
648     try {
649       HTableDescriptor desc = new HTableDescriptor(table);
650       desc.addFamily(new HColumnDescriptor(FAMILY));
651       admin.createTable(desc);
652 
653       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
654       HRegionInfo hri = new HRegionInfo(
655         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
656       MetaTableAccessor.addRegionToMeta(meta, hri);
657 
658       FileSystem fs = FileSystem.get(conf);
659       Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
660       Path regionDir = new Path(tableDir, hri.getEncodedName());
661       // create a file named the same as the region dir to
662       // mess up with region opening
663       fs.create(regionDir, true);
664 
665       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
666       master.assignRegion(hri);
667       AssignmentManager am = master.getAssignmentManager();
668       assertFalse(am.waitForAssignment(hri));
669 
670       RegionState state = am.getRegionStates().getRegionState(hri);
671       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
672       // Failed to open due to file system issue. Region state should
673       // carry the opening region server so that we can force close it
674       // later on before opening it again. See HBASE-9092.
675       assertNotNull(state.getServerName());
676 
677       // remove the blocking file, so that region can be opened
678       fs.delete(regionDir, true);
679       master.assignRegion(hri);
680       assertTrue(am.waitForAssignment(hri));
681 
682       ServerName serverName = master.getAssignmentManager().
683         getRegionStates().getRegionServerOfRegion(hri);
684       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
685     } finally {
686       TEST_UTIL.deleteTable(table);
687     }
688   }
689 
690   @Test (timeout=60000)
691   public void testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState() throws Exception {
692     final TableName table =
693         TableName.valueOf
694             ("testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState");
695     AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
696     HRegionInfo hri = null;
697     ServerName serverName = null;
698     try {
699       hri = createTableAndGetOneRegion(table);
700       serverName = am.getRegionStates().getRegionServerOfRegion(hri);
701       ServerName destServerName = null;
702       HRegionServer destServer = null;
703       for (int i = 0; i < 3; i++) {
704         destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
705         if (!destServer.getServerName().equals(serverName)) {
706           destServerName = destServer.getServerName();
707           break;
708         }
709       }
710       am.regionOffline(hri);
711       ZooKeeperWatcher zkw = TEST_UTIL.getHBaseCluster().getMaster().getZooKeeper();
712       am.getRegionStates().updateRegionState(hri, State.PENDING_OPEN, destServerName);
713       if (ConfigUtil.useZKForAssignment(conf)) {
714         ZKAssign.createNodeOffline(zkw, hri, destServerName);
715         ZKAssign.transitionNodeOpening(zkw, hri, destServerName);
716 
717         // Wait till the event is processed and the region is in transition
718         long timeoutTime = System.currentTimeMillis() + 20000;
719         while (!am.getRegionStates().isRegionInTransition(hri)) {
720           assertTrue("Failed to process ZK opening event in time",
721             System.currentTimeMillis() < timeoutTime);
722           Thread.sleep(100);
723         }
724       }
725 
726       am.getTableStateManager().setTableState(table, ZooKeeperProtos.Table.State.DISABLING);
727       List<HRegionInfo> toAssignRegions = am.processServerShutdown(destServerName);
728       assertTrue("Regions to be assigned should be empty.", toAssignRegions.isEmpty());
729       assertTrue("Regions to be assigned should be empty.", am.getRegionStates()
730           .getRegionState(hri).isOffline());
731     } finally {
732       if (hri != null && serverName != null) {
733         am.regionOnline(hri, serverName);
734       }
735       am.getTableStateManager().setTableState(table, ZooKeeperProtos.Table.State.DISABLED);
736       TEST_UTIL.deleteTable(table);
737     }
738   }
739 
740   /**
741    * This tests region close hanging
742    */
743   @Test (timeout=60000)
744   public void testCloseHang() throws Exception {
745     String table = "testCloseHang";
746     try {
747       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
748       desc.addFamily(new HColumnDescriptor(FAMILY));
749       admin.createTable(desc);
750 
751       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
752       HRegionInfo hri = new HRegionInfo(
753         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
754       MetaTableAccessor.addRegionToMeta(meta, hri);
755 
756       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
757       master.assignRegion(hri);
758       AssignmentManager am = master.getAssignmentManager();
759       assertTrue(am.waitForAssignment(hri));
760       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
761       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
762 
763       MyRegionObserver.postCloseEnabled.set(true);
764       am.unassign(hri);
765       // Now region should pending_close or closing
766       // Unassign it again forcefully so that we can trigger already
767       // in transition exception. This test is to make sure this scenario
768       // is handled properly.
769       am.server.getConfiguration().setLong(
770         AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000);
771       am.unassign(hri, true);
772       RegionState state = am.getRegionStates().getRegionState(hri);
773       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
774 
775       // Let region closing move ahead. The region should be closed
776       // properly and re-assigned automatically
777       MyRegionObserver.postCloseEnabled.set(false);
778 
779       // region may still be assigned now since it's closing,
780       // let's check if it's assigned after it's out of transition
781       am.waitOnRegionToClearRegionsInTransition(hri);
782 
783       // region should be closed and re-assigned
784       assertTrue(am.waitForAssignment(hri));
785       ServerName serverName = master.getAssignmentManager().
786         getRegionStates().getRegionServerOfRegion(hri);
787       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
788     } finally {
789       MyRegionObserver.postCloseEnabled.set(false);
790       TEST_UTIL.deleteTable(Bytes.toBytes(table));
791     }
792   }
793 
794   /**
795    * This tests region close racing with open
796    */
797   @Test (timeout=60000)
798   public void testOpenCloseRacing() throws Exception {
799     String table = "testOpenCloseRacing";
800     try {
801       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
802       desc.addFamily(new HColumnDescriptor(FAMILY));
803       admin.createTable(desc);
804 
805       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
806       HRegionInfo hri = new HRegionInfo(
807         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
808       MetaTableAccessor.addRegionToMeta(meta, hri);
809       meta.close();
810 
811       MyRegionObserver.postOpenEnabled.set(true);
812       MyRegionObserver.postOpenCalled = false;
813       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
814       // Region will be opened, but it won't complete
815       master.assignRegion(hri);
816       long end = EnvironmentEdgeManager.currentTime() + 20000;
817       // Wait till postOpen is called
818       while (!MyRegionObserver.postOpenCalled ) {
819         assertFalse("Timed out waiting for postOpen to be called",
820           EnvironmentEdgeManager.currentTime() > end);
821         Thread.sleep(300);
822       }
823 
824       AssignmentManager am = master.getAssignmentManager();
825       // Now let's unassign it, it should do nothing
826       am.unassign(hri);
827       RegionState state = am.getRegionStates().getRegionState(hri);
828       ServerName oldServerName = state.getServerName();
829       assertTrue(state.isPendingOpenOrOpening() && oldServerName != null);
830 
831       // Now the region is stuck in opening
832       // Let's forcefully re-assign it to trigger closing/opening
833       // racing. This test is to make sure this scenario
834       // is handled properly.
835       ServerName destServerName = null;
836       int numRS = TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().size();
837       for (int i = 0; i < numRS; i++) {
838         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
839         if (!destServer.getServerName().equals(oldServerName)) {
840           destServerName = destServer.getServerName();
841           break;
842         }
843       }
844       assertNotNull(destServerName);
845       assertFalse("Region should be assigned on a new region server",
846         oldServerName.equals(destServerName));
847       List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
848       regions.add(hri);
849       am.assign(destServerName, regions);
850       
851       // let region open continue
852       MyRegionObserver.postOpenEnabled.set(false);
853 
854       // let's check if it's assigned after it's out of transition
855       am.waitOnRegionToClearRegionsInTransition(hri);
856       assertTrue(am.waitForAssignment(hri));
857 
858       ServerName serverName = master.getAssignmentManager().
859         getRegionStates().getRegionServerOfRegion(hri);
860       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
861     } finally {
862       MyRegionObserver.postOpenEnabled.set(false);
863       TEST_UTIL.deleteTable(Bytes.toBytes(table));
864     }
865   }
866 
867   /**
868    * Test force unassign/assign a region hosted on a dead server
869    */
870   @Test (timeout=60000)
871   public void testAssignRacingWithSSH() throws Exception {
872     String table = "testAssignRacingWithSSH";
873     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
874     MyMaster master = null;
875     try {
876       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
877       desc.addFamily(new HColumnDescriptor(FAMILY));
878       admin.createTable(desc);
879 
880       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
881       HRegionInfo hri = new HRegionInfo(
882         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
883       MetaTableAccessor.addRegionToMeta(meta, hri);
884 
885       // Assign the region
886       master = (MyMaster)cluster.getMaster();
887       master.assignRegion(hri);
888 
889       // Hold SSH before killing the hosting server
890       master.enableSSH(false);
891 
892       AssignmentManager am = master.getAssignmentManager();
893       RegionStates regionStates = am.getRegionStates();
894       ServerName metaServer = regionStates.getRegionServerOfRegion(
895         HRegionInfo.FIRST_META_REGIONINFO);
896       while (true) {
897         assertTrue(am.waitForAssignment(hri));
898         RegionState state = regionStates.getRegionState(hri);
899         ServerName oldServerName = state.getServerName();
900         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
901           // Kill the hosting server, which doesn't have meta on it.
902           cluster.killRegionServer(oldServerName);
903           cluster.waitForRegionServerToStop(oldServerName, -1);
904           break;
905         }
906         int i = cluster.getServerWithMeta();
907         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
908         oldServerName = rs.getServerName();
909         master.move(hri.getEncodedNameAsBytes(),
910           Bytes.toBytes(oldServerName.getServerName()));
911       }
912 
913       // You can't assign a dead region before SSH
914       am.assign(hri, true, true);
915       RegionState state = regionStates.getRegionState(hri);
916       assertTrue(state.isFailedClose());
917 
918       // You can't unassign a dead region before SSH either
919       am.unassign(hri, true);
920       assertTrue(state.isFailedClose());
921 
922       // Enable SSH so that log can be split
923       master.enableSSH(true);
924 
925       // let's check if it's assigned after it's out of transition.
926       // no need to assign it manually, SSH should do it
927       am.waitOnRegionToClearRegionsInTransition(hri);
928       assertTrue(am.waitForAssignment(hri));
929 
930       ServerName serverName = master.getAssignmentManager().
931         getRegionStates().getRegionServerOfRegion(hri);
932       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
933     } finally {
934       if (master != null) {
935         master.enableSSH(true);
936       }
937       TEST_UTIL.deleteTable(Bytes.toBytes(table));
938       cluster.startRegionServer();
939     }
940   }
941 
942   /**
943    * Test force unassign/assign a region of a disabled table
944    */
945   @Test (timeout=60000)
946   public void testAssignDisabledRegion() throws Exception {
947     TableName table = TableName.valueOf("testAssignDisabledRegion");
948     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
949     MyMaster master = null;
950     try {
951       HTableDescriptor desc = new HTableDescriptor(table);
952       desc.addFamily(new HColumnDescriptor(FAMILY));
953       admin.createTable(desc);
954 
955       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
956       HRegionInfo hri = new HRegionInfo(
957         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
958       MetaTableAccessor.addRegionToMeta(meta, hri);
959 
960       // Assign the region
961       master = (MyMaster)cluster.getMaster();
962       master.assignRegion(hri);
963       AssignmentManager am = master.getAssignmentManager();
964       RegionStates regionStates = am.getRegionStates();
965       assertTrue(am.waitForAssignment(hri));
966 
967       // Disable the table
968       admin.disableTable(table);
969       assertTrue(regionStates.isRegionOffline(hri));
970 
971       // You can't assign a disabled region
972       am.assign(hri, true, true);
973       assertTrue(regionStates.isRegionOffline(hri));
974 
975       // You can't unassign a disabled region either
976       am.unassign(hri, true);
977       assertTrue(regionStates.isRegionOffline(hri));
978     } finally {
979       TEST_UTIL.deleteTable(table);
980     }
981   }
982 
983   /**
984    * Test offlined region is assigned by SSH
985    */
986   @Test (timeout=60000)
987   public void testAssignOfflinedRegionBySSH() throws Exception {
988     String table = "testAssignOfflinedRegionBySSH";
989     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
990     MyMaster master = null;
991     try {
992       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
993       desc.addFamily(new HColumnDescriptor(FAMILY));
994       admin.createTable(desc);
995 
996       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
997       HRegionInfo hri = new HRegionInfo(
998         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
999       MetaTableAccessor.addRegionToMeta(meta, hri);
1000 
1001       // Assign the region
1002       master = (MyMaster)cluster.getMaster();
1003       master.assignRegion(hri);
1004 
1005       AssignmentManager am = master.getAssignmentManager();
1006       RegionStates regionStates = am.getRegionStates();
1007       ServerName metaServer = regionStates.getRegionServerOfRegion(
1008         HRegionInfo.FIRST_META_REGIONINFO);
1009       ServerName oldServerName = null;
1010       while (true) {
1011         assertTrue(am.waitForAssignment(hri));
1012         RegionState state = regionStates.getRegionState(hri);
1013         oldServerName = state.getServerName();
1014         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
1015           // Mark the hosting server aborted, but don't actually kill it.
1016           // It doesn't have meta on it.
1017           MyRegionServer.abortedServer = oldServerName;
1018           break;
1019         }
1020         int i = cluster.getServerWithMeta();
1021         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
1022         oldServerName = rs.getServerName();
1023         master.move(hri.getEncodedNameAsBytes(),
1024           Bytes.toBytes(oldServerName.getServerName()));
1025       }
1026 
1027       // Make sure the region is assigned on the dead server
1028       assertTrue(regionStates.isRegionOnline(hri));
1029       assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
1030 
1031       // Try to unassign the dead region before SSH
1032       am.unassign(hri, false);
1033       // The region should be moved to offline since the server is dead
1034       RegionState state = regionStates.getRegionState(hri);
1035       assertTrue(state.isOffline());
1036 
1037       // Kill the hosting server, which doesn't have meta on it.
1038       cluster.killRegionServer(oldServerName);
1039       cluster.waitForRegionServerToStop(oldServerName, -1);
1040 
1041       ServerManager serverManager = master.getServerManager();
1042       while (!serverManager.isServerDead(oldServerName)
1043           || serverManager.getDeadServers().areDeadServersInProgress()) {
1044         Thread.sleep(100);
1045       }
1046 
1047       // Let's check if it's assigned after it's out of transition.
1048       // no need to assign it manually, SSH should do it
1049       am.waitOnRegionToClearRegionsInTransition(hri);
1050       assertTrue(am.waitForAssignment(hri));
1051 
1052       ServerName serverName = master.getAssignmentManager().
1053         getRegionStates().getRegionServerOfRegion(hri);
1054       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
1055     } finally {
1056       MyRegionServer.abortedServer = null;
1057       TEST_UTIL.deleteTable(Bytes.toBytes(table));
1058       cluster.startRegionServer();
1059     }
1060   }
1061 
1062   /**
1063    * Test SSH waiting for extra region server for assignment
1064    */
1065   @Test (timeout=300000)
1066   public void testSSHWaitForServerToAssignRegion() throws Exception {
1067     TableName table = TableName.valueOf("testSSHWaitForServerToAssignRegion");
1068     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1069     boolean startAServer = false;
1070     try {
1071       HTableDescriptor desc = new HTableDescriptor(table);
1072       desc.addFamily(new HColumnDescriptor(FAMILY));
1073       admin.createTable(desc);
1074 
1075       HMaster master = cluster.getMaster();
1076       final ServerManager serverManager = master.getServerManager();
1077       MyLoadBalancer.countRegionServers = Integer.valueOf(
1078         serverManager.countOfRegionServers());
1079       HRegionServer rs = TEST_UTIL.getRSForFirstRegionInTable(table);
1080       assertNotNull("First region should be assigned", rs);
1081       final ServerName serverName = rs.getServerName();
1082       // Wait till SSH tried to assign regions a several times
1083       int counter = MyLoadBalancer.counter.get() + 5;
1084       cluster.killRegionServer(serverName);
1085       startAServer = true;
1086       cluster.waitForRegionServerToStop(serverName, -1);
1087       while (counter > MyLoadBalancer.counter.get()) {
1088         Thread.sleep(1000);
1089       }
1090       cluster.startRegionServer();
1091       startAServer = false;
1092       // Wait till the dead server is processed by SSH
1093       TEST_UTIL.waitFor(120000, 1000, new Waiter.Predicate<Exception>() {
1094         @Override
1095         public boolean evaluate() throws Exception {
1096           return serverManager.isServerDead(serverName)
1097             && !serverManager.areDeadServersInProgress();
1098         }
1099       });
1100       TEST_UTIL.waitUntilAllRegionsAssigned(table, 300000);
1101 
1102       rs = TEST_UTIL.getRSForFirstRegionInTable(table);
1103       assertTrue("First region should be re-assigned to a different server",
1104         rs != null && !serverName.equals(rs.getServerName()));
1105     } finally {
1106       MyLoadBalancer.countRegionServers = null;
1107       TEST_UTIL.deleteTable(table);
1108       if (startAServer) {
1109         cluster.startRegionServer();
1110       }
1111     }
1112   }
1113 
1114   /**
1115    * Test disabled region is ignored by SSH
1116    */
1117   @Test (timeout=60000)
1118   public void testAssignDisabledRegionBySSH() throws Exception {
1119     String table = "testAssignDisabledRegionBySSH";
1120     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1121     MyMaster master = null;
1122     try {
1123       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
1124       desc.addFamily(new HColumnDescriptor(FAMILY));
1125       admin.createTable(desc);
1126 
1127       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
1128       HRegionInfo hri = new HRegionInfo(
1129         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
1130       MetaTableAccessor.addRegionToMeta(meta, hri);
1131 
1132       // Assign the region
1133       master = (MyMaster)cluster.getMaster();
1134       master.assignRegion(hri);
1135 
1136       AssignmentManager am = master.getAssignmentManager();
1137       RegionStates regionStates = am.getRegionStates();
1138       ServerName metaServer = regionStates.getRegionServerOfRegion(
1139         HRegionInfo.FIRST_META_REGIONINFO);
1140       ServerName oldServerName = null;
1141       while (true) {
1142         assertTrue(am.waitForAssignment(hri));
1143         RegionState state = regionStates.getRegionState(hri);
1144         oldServerName = state.getServerName();
1145         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
1146           // Mark the hosting server aborted, but don't actually kill it.
1147           // It doesn't have meta on it.
1148           MyRegionServer.abortedServer = oldServerName;
1149           break;
1150         }
1151         int i = cluster.getServerWithMeta();
1152         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
1153         oldServerName = rs.getServerName();
1154         master.move(hri.getEncodedNameAsBytes(),
1155           Bytes.toBytes(oldServerName.getServerName()));
1156       }
1157 
1158       // Make sure the region is assigned on the dead server
1159       assertTrue(regionStates.isRegionOnline(hri));
1160       assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
1161 
1162       // Try to unassign the dead region before SSH
1163       am.unassign(hri, false);
1164       // The region should be moved to offline since the server is dead
1165       RegionState state = regionStates.getRegionState(hri);
1166       assertTrue(state.isOffline());
1167 
1168       // Disable the table now.
1169       master.disableTable(hri.getTable());
1170 
1171       // Kill the hosting server, which doesn't have meta on it.
1172       cluster.killRegionServer(oldServerName);
1173       cluster.waitForRegionServerToStop(oldServerName, -1);
1174 
1175       ServerManager serverManager = master.getServerManager();
1176       while (!serverManager.isServerDead(oldServerName)
1177           || serverManager.getDeadServers().areDeadServersInProgress()) {
1178         Thread.sleep(100);
1179       }
1180 
1181       // Wait till no more RIT, the region should be offline.
1182       am.waitUntilNoRegionsInTransition(60000);
1183       assertTrue(regionStates.isRegionOffline(hri));
1184     } finally {
1185       MyRegionServer.abortedServer = null;
1186       TEST_UTIL.deleteTable(Bytes.toBytes(table));
1187       cluster.startRegionServer();
1188     }
1189   }
1190 
1191   /**
1192    * Test that region state transition call is idempotent
1193    */
1194   @Test(timeout = 60000)
1195   public void testReportRegionStateTransition() throws Exception {
1196     String table = "testReportRegionStateTransition";
1197     try {
1198       MyRegionServer.simulateRetry = true;
1199       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
1200       desc.addFamily(new HColumnDescriptor(FAMILY));
1201       admin.createTable(desc);
1202       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
1203       HRegionInfo hri =
1204           new HRegionInfo(desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
1205       MetaTableAccessor.addRegionToMeta(meta, hri);
1206       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
1207       master.assignRegion(hri);
1208       AssignmentManager am = master.getAssignmentManager();
1209       am.waitForAssignment(hri);
1210       RegionStates regionStates = am.getRegionStates();
1211       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
1212       // Assert the the region is actually open on the server
1213       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
1214       // Closing region should just work fine
1215       admin.disableTable(TableName.valueOf(table));
1216       assertTrue(regionStates.isRegionOffline(hri));
1217       List<HRegionInfo> regions = TEST_UTIL.getHBaseAdmin().getOnlineRegions(serverName);
1218       assertTrue(!regions.contains(hri));
1219     } finally {
1220       MyRegionServer.simulateRetry = false;
1221       TEST_UTIL.deleteTable(Bytes.toBytes(table));
1222     }
1223   }
1224 
1225   /**
1226    * Test concurrent updates to meta when meta is not on master
1227    * @throws Exception
1228    */
1229   @Test(timeout = 30000)
1230   public void testUpdatesRemoteMeta() throws Exception {
1231     // Not for zk less assignment
1232     if (conf.getBoolean("hbase.assignment.usezk", true)) {
1233       return;
1234     }
1235     conf.setInt("hbase.regionstatestore.meta.connection", 3);
1236     final RegionStateStore rss =
1237         new RegionStateStore(new MyRegionServer(conf, new ZkCoordinatedStateManager()));
1238     rss.start();
1239     // Create 10 threads and make each do 10 puts related to region state update
1240     Thread[] th = new Thread[10];
1241     List<String> nameList = new ArrayList<String>();
1242     List<TableName> tableNameList = new ArrayList<TableName>();
1243     for (int i = 0; i < th.length; i++) {
1244       th[i] = new Thread() {
1245         @Override
1246         public void run() {
1247           HRegionInfo[] hri = new HRegionInfo[10];
1248           ServerName serverName = ServerName.valueOf("dummyhost", 1000, 1234);
1249           for (int i = 0; i < 10; i++) {
1250             hri[i] = new HRegionInfo(TableName.valueOf(Thread.currentThread().getName() + "_" + i));
1251             RegionState newState = new RegionState(hri[i], RegionState.State.OPEN, serverName);
1252             RegionState oldState =
1253                 new RegionState(hri[i], RegionState.State.PENDING_OPEN, serverName);
1254             rss.updateRegionState(1, newState, oldState);
1255           }
1256         }
1257       };
1258       th[i].start();
1259       nameList.add(th[i].getName());
1260     }
1261     for (int i = 0; i < th.length; i++) {
1262       th[i].join();
1263     }
1264     // Add all the expected table names in meta to tableNameList
1265     for (String name : nameList) {
1266       for (int i = 0; i < 10; i++) {
1267         tableNameList.add(TableName.valueOf(name + "_" + i));
1268       }
1269     }
1270     List<Result> metaRows = MetaTableAccessor.fullScanOfMeta(admin.getConnection());
1271     int count = 0;
1272     // Check all 100 rows are in meta
1273     for (Result result : metaRows) {
1274       if (tableNameList.contains(HRegionInfo.getTable(result.getRow()))) {
1275         count++;
1276         if (count == 100) {
1277           break;
1278         }
1279       }
1280     }
1281     assertTrue(count == 100);
1282     rss.stop();
1283   }
1284 
1285   static class MyLoadBalancer extends StochasticLoadBalancer {
1286     // For this region, if specified, always assign to nowhere
1287     static volatile String controledRegion = null;
1288 
1289     static volatile Integer countRegionServers = null;
1290     static AtomicInteger counter = new AtomicInteger(0);
1291 
1292     @Override
1293     public ServerName randomAssignment(HRegionInfo regionInfo,
1294         List<ServerName> servers) {
1295       if (regionInfo.getEncodedName().equals(controledRegion)) {
1296         return null;
1297       }
1298       return super.randomAssignment(regionInfo, servers);
1299     }
1300 
1301     @Override
1302     public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(
1303         List<HRegionInfo> regions, List<ServerName> servers) {
1304       if (countRegionServers != null && services != null) {
1305         int regionServers = services.getServerManager().countOfRegionServers();
1306         if (regionServers < countRegionServers.intValue()) {
1307           // Let's wait till more region servers join in.
1308           // Before that, fail region assignments.
1309           counter.incrementAndGet();
1310           return null;
1311         }
1312       }
1313       return super.roundRobinAssignment(regions, servers);
1314     }
1315   }
1316 
1317   public static class MyMaster extends HMaster {
1318     AtomicBoolean enabled = new AtomicBoolean(true);
1319 
1320     public MyMaster(Configuration conf, CoordinatedStateManager cp)
1321       throws IOException, KeeperException,
1322         InterruptedException {
1323       super(conf, cp);
1324     }
1325 
1326     @Override
1327     public boolean isServerShutdownHandlerEnabled() {
1328       return enabled.get() && super.isServerShutdownHandlerEnabled();
1329     }
1330 
1331     public void enableSSH(boolean enabled) {
1332       this.enabled.set(enabled);
1333       if (enabled) {
1334         serverManager.processQueuedDeadServers();
1335       }
1336     }
1337   }
1338 
1339   public static class MyRegionServer extends MiniHBaseClusterRegionServer {
1340     static volatile ServerName abortedServer = null;
1341     static volatile boolean simulateRetry = false;
1342 
1343     public MyRegionServer(Configuration conf, CoordinatedStateManager cp)
1344       throws IOException, KeeperException,
1345         InterruptedException {
1346       super(conf, cp);
1347     }
1348 
1349     @Override
1350     public boolean reportRegionStateTransition(TransitionCode code, long openSeqNum,
1351         HRegionInfo... hris) {
1352       if (simulateRetry) {
1353         // Simulate retry by calling the method twice
1354         super.reportRegionStateTransition(code, openSeqNum, hris);
1355         return super.reportRegionStateTransition(code, openSeqNum, hris);
1356       }
1357       return super.reportRegionStateTransition(code, openSeqNum, hris);
1358     }
1359 
1360     @Override
1361     public boolean isAborted() {
1362       return getServerName().equals(abortedServer) || super.isAborted();
1363     }
1364   }
1365 
1366   public static class MyRegionObserver extends BaseRegionObserver {
1367     // If enabled, fail all preClose calls
1368     static AtomicBoolean preCloseEnabled = new AtomicBoolean(false);
1369 
1370     // If enabled, stall postClose calls
1371     static AtomicBoolean postCloseEnabled = new AtomicBoolean(false);
1372 
1373     // If enabled, stall postOpen calls
1374     static AtomicBoolean postOpenEnabled = new AtomicBoolean(false);
1375 
1376     // A flag to track if postOpen is called
1377     static volatile boolean postOpenCalled = false;
1378 
1379     @Override
1380     public void preClose(ObserverContext<RegionCoprocessorEnvironment> c,
1381         boolean abortRequested) throws IOException {
1382       if (preCloseEnabled.get()) throw new IOException("fail preClose from coprocessor");
1383     }
1384 
1385     @Override
1386     public void postClose(ObserverContext<RegionCoprocessorEnvironment> c,
1387         boolean abortRequested) {
1388       stallOnFlag(postCloseEnabled);
1389     }
1390 
1391     @Override
1392     public void postOpen(ObserverContext<RegionCoprocessorEnvironment> c) {
1393       postOpenCalled = true;
1394       stallOnFlag(postOpenEnabled);
1395     }
1396 
1397     private void stallOnFlag(final AtomicBoolean flag) {
1398       try {
1399         // If enabled, stall
1400         while (flag.get()) {
1401           Thread.sleep(1000);
1402         }
1403       } catch (InterruptedException ie) {
1404         Thread.currentThread().interrupt();
1405       }
1406     }
1407   }
1408 }