View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotSame;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.util.ArrayList;
28  import java.util.HashMap;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.concurrent.atomic.AtomicBoolean;
32  
33  import org.apache.hadoop.hbase.CellScannable;
34  import org.apache.hadoop.hbase.CellUtil;
35  import org.apache.hadoop.hbase.CoordinatedStateException;
36  import org.apache.hadoop.hbase.CoordinatedStateManager;
37  import org.apache.hadoop.hbase.CoordinatedStateManagerFactory;
38  import org.apache.hadoop.hbase.DoNotRetryIOException;
39  import org.apache.hadoop.hbase.HBaseConfiguration;
40  import org.apache.hadoop.hbase.HBaseTestingUtility;
41  import org.apache.hadoop.hbase.HConstants;
42  import org.apache.hadoop.hbase.HRegionInfo;
43  import org.apache.hadoop.hbase.MetaMockingUtil;
44  import org.apache.hadoop.hbase.RegionException;
45  import org.apache.hadoop.hbase.RegionTransition;
46  import org.apache.hadoop.hbase.Server;
47  import org.apache.hadoop.hbase.ServerLoad;
48  import org.apache.hadoop.hbase.ServerName;
49  import org.apache.hadoop.hbase.TableName;
50  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
51  import org.apache.hadoop.hbase.client.ClusterConnection;
52  import org.apache.hadoop.hbase.client.HConnectionTestingUtility;
53  import org.apache.hadoop.hbase.client.Result;
54  import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
55  import org.apache.hadoop.hbase.coordination.OpenRegionCoordination;
56  import org.apache.hadoop.hbase.coordination.ZkCoordinatedStateManager;
57  import org.apache.hadoop.hbase.coordination.ZkOpenRegionCoordination;
58  import org.apache.hadoop.hbase.exceptions.DeserializationException;
59  import org.apache.hadoop.hbase.executor.EventType;
60  import org.apache.hadoop.hbase.executor.ExecutorService;
61  import org.apache.hadoop.hbase.executor.ExecutorType;
62  import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
63  import org.apache.hadoop.hbase.master.RegionState.State;
64  import org.apache.hadoop.hbase.master.TableLockManager.NullTableLockManager;
65  import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
66  import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer;
67  import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
68  import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
69  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
70  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
71  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetRequest;
72  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetResponse;
73  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
74  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanResponse;
75  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
76  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table;
77  import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
78  import org.apache.hadoop.hbase.testclassification.MediumTests;
79  import org.apache.hadoop.hbase.util.Bytes;
80  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
81  import org.apache.hadoop.hbase.util.Threads;
82  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
83  import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
84  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
85  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
86  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
87  import org.apache.zookeeper.KeeperException;
88  import org.apache.zookeeper.KeeperException.NodeExistsException;
89  import org.apache.zookeeper.Watcher;
90  import org.junit.After;
91  import org.junit.AfterClass;
92  import org.junit.Before;
93  import org.junit.BeforeClass;
94  import org.junit.Test;
95  import org.junit.experimental.categories.Category;
96  import org.mockito.Mockito;
97  import org.mockito.internal.util.reflection.Whitebox;
98  import org.mockito.invocation.InvocationOnMock;
99  import org.mockito.stubbing.Answer;
100 
101 import com.google.protobuf.RpcController;
102 import com.google.protobuf.ServiceException;
103 
104 
105 /**
106  * Test {@link AssignmentManager}
107  */
108 @Category(MediumTests.class)
109 public class TestAssignmentManager {
110   private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
111   private static final ServerName SERVERNAME_A =
112       ServerName.valueOf("example.org", 1234, 5678);
113   private static final ServerName SERVERNAME_B =
114       ServerName.valueOf("example.org", 0, 5678);
115   private static final HRegionInfo REGIONINFO =
116     new HRegionInfo(TableName.valueOf("t"),
117       HConstants.EMPTY_START_ROW, HConstants.EMPTY_START_ROW);
118   private static int assignmentCount;
119   private static boolean enabling = false;
120 
121   // Mocked objects or; get redone for each test.
122   private Server server;
123   private ServerManager serverManager;
124   private ZooKeeperWatcher watcher;
125   private CoordinatedStateManager cp;
126   private MetaTableLocator mtl;
127   private LoadBalancer balancer;
128   private HMaster master;
129   private ClusterConnection connection;
130 
131   @BeforeClass
132   public static void beforeClass() throws Exception {
133     HTU.getConfiguration().setBoolean("hbase.assignment.usezk", true);
134     HTU.startMiniZKCluster();
135   }
136 
137   @AfterClass
138   public static void afterClass() throws IOException {
139     HTU.shutdownMiniZKCluster();
140   }
141 
142   @Before
143   public void before() throws ZooKeeperConnectionException, IOException {
144     // TODO: Make generic versions of what we do below and put up in a mocking
145     // utility class or move up into HBaseTestingUtility.
146 
147     // Mock a Server.  Have it return a legit Configuration and ZooKeeperWatcher.
148     // If abort is called, be sure to fail the test (don't just swallow it
149     // silently as is mockito default).
150     this.server = Mockito.mock(Server.class);
151     Mockito.when(server.getServerName()).thenReturn(ServerName.valueOf("master,1,1"));
152     Mockito.when(server.getConfiguration()).thenReturn(HTU.getConfiguration());
153     this.watcher =
154       new ZooKeeperWatcher(HTU.getConfiguration(), "mockedServer", this.server, true);
155     Mockito.when(server.getZooKeeper()).thenReturn(this.watcher);
156     Mockito.doThrow(new RuntimeException("Aborted")).
157       when(server).abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
158 
159     cp = new ZkCoordinatedStateManager();
160     cp.initialize(this.server);
161     cp.start();
162 
163     mtl = Mockito.mock(MetaTableLocator.class);
164 
165     Mockito.when(server.getCoordinatedStateManager()).thenReturn(cp);
166     Mockito.when(server.getMetaTableLocator()).thenReturn(mtl);
167 
168     // Get a connection w/ mocked up common methods.
169     this.connection =
170       (ClusterConnection)HConnectionTestingUtility.getMockedConnection(HTU.getConfiguration());
171 
172     // Make it so we can get a catalogtracker from servermanager.. .needed
173     // down in guts of server shutdown handler.
174     Mockito.when(server.getConnection()).thenReturn(connection);
175 
176     // Mock a ServerManager.  Say server SERVERNAME_{A,B} are online.  Also
177     // make it so if close or open, we return 'success'.
178     this.serverManager = Mockito.mock(ServerManager.class);
179     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
180     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_B)).thenReturn(true);
181     Mockito.when(this.serverManager.getDeadServers()).thenReturn(new DeadServer());
182     final Map<ServerName, ServerLoad> onlineServers = new HashMap<ServerName, ServerLoad>();
183     onlineServers.put(SERVERNAME_B, ServerLoad.EMPTY_SERVERLOAD);
184     onlineServers.put(SERVERNAME_A, ServerLoad.EMPTY_SERVERLOAD);
185     Mockito.when(this.serverManager.getOnlineServersList()).thenReturn(
186         new ArrayList<ServerName>(onlineServers.keySet()));
187     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(onlineServers);
188 
189     List<ServerName> avServers = new ArrayList<ServerName>();
190     avServers.addAll(onlineServers.keySet());
191     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(avServers);
192     Mockito.when(this.serverManager.createDestinationServersList(null)).thenReturn(avServers);
193 
194     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, REGIONINFO, -1)).
195       thenReturn(true);
196     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_B, REGIONINFO, -1)).
197       thenReturn(true);
198     // Ditto on open.
199     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_A, REGIONINFO, -1, null)).
200       thenReturn(RegionOpeningState.OPENED);
201     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_B, REGIONINFO, -1, null)).
202       thenReturn(RegionOpeningState.OPENED);
203     this.master = Mockito.mock(HMaster.class);
204 
205     Mockito.when(this.master.getServerManager()).thenReturn(serverManager);
206   }
207 
208   @After public void after() throws KeeperException, IOException {
209     if (this.watcher != null) {
210       // Clean up all znodes
211       ZKAssign.deleteAllNodes(this.watcher);
212       this.watcher.close();
213       this.cp.stop();
214     }
215     if (this.connection != null) this.connection.close();
216   }
217 
218   /**
219    * Test a balance going on at same time as a master failover
220    *
221    * @throws IOException
222    * @throws KeeperException
223    * @throws InterruptedException
224    * @throws DeserializationException
225    */
226   @Test(timeout = 60000)
227   public void testBalanceOnMasterFailoverScenarioWithOpenedNode()
228       throws IOException, KeeperException, InterruptedException, ServiceException,
229       DeserializationException, CoordinatedStateException {
230     AssignmentManagerWithExtrasForTesting am =
231       setUpMockedAssignmentManager(this.server, this.serverManager);
232     try {
233       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
234       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
235       while (!am.processRITInvoked) Thread.sleep(1);
236       // As part of the failover cleanup, the balancing region plan is removed.
237       // So a random server will be used to open the region. For testing purpose,
238       // let's assume it is going to open on server b:
239       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
240 
241       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
242 
243       // Now fake the region closing successfully over on the regionserver; the
244       // regionserver will have set the region in CLOSED state. This will
245       // trigger callback into AM. The below zk close call is from the RS close
246       // region handler duplicated here because its down deep in a private
247       // method hard to expose.
248       int versionid =
249         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
250       assertNotSame(versionid, -1);
251       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
252 
253       // Get current versionid else will fail on transition from OFFLINE to
254       // OPENING below
255       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
256       assertNotSame(-1, versionid);
257       // This uglyness below is what the openregionhandler on RS side does.
258       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
259         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
260         EventType.RS_ZK_REGION_OPENING, versionid);
261       assertNotSame(-1, versionid);
262       // Move znode from OPENING to OPENED as RS does on successful open.
263       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
264         SERVERNAME_B, versionid);
265       assertNotSame(-1, versionid);
266       am.gate.set(false);
267       // Block here until our znode is cleared or until this test times out.
268       ZKAssign.blockUntilNoRIT(watcher);
269     } finally {
270       am.getExecutorService().shutdown();
271       am.shutdown();
272     }
273   }
274 
275   @Test(timeout = 60000)
276   public void testBalanceOnMasterFailoverScenarioWithClosedNode()
277       throws IOException, KeeperException, InterruptedException, ServiceException,
278         DeserializationException, CoordinatedStateException {
279     AssignmentManagerWithExtrasForTesting am =
280       setUpMockedAssignmentManager(this.server, this.serverManager);
281     try {
282       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
283       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
284       while (!am.processRITInvoked) Thread.sleep(1);
285       // As part of the failover cleanup, the balancing region plan is removed.
286       // So a random server will be used to open the region. For testing purpose,
287       // let's assume it is going to open on server b:
288       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
289 
290       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
291 
292       // Now fake the region closing successfully over on the regionserver; the
293       // regionserver will have set the region in CLOSED state. This will
294       // trigger callback into AM. The below zk close call is from the RS close
295       // region handler duplicated here because its down deep in a private
296       // method hard to expose.
297       int versionid =
298         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
299       assertNotSame(versionid, -1);
300       am.gate.set(false);
301       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
302 
303       // Get current versionid else will fail on transition from OFFLINE to
304       // OPENING below
305       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
306       assertNotSame(-1, versionid);
307       // This uglyness below is what the openregionhandler on RS side does.
308       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
309           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
310           EventType.RS_ZK_REGION_OPENING, versionid);
311       assertNotSame(-1, versionid);
312       // Move znode from OPENING to OPENED as RS does on successful open.
313       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
314           SERVERNAME_B, versionid);
315       assertNotSame(-1, versionid);
316 
317       // Block here until our znode is cleared or until this test timesout.
318       ZKAssign.blockUntilNoRIT(watcher);
319     } finally {
320       am.getExecutorService().shutdown();
321       am.shutdown();
322     }
323   }
324 
325   @Test(timeout = 60000)
326   public void testBalanceOnMasterFailoverScenarioWithOfflineNode()
327       throws IOException, KeeperException, InterruptedException, ServiceException,
328       DeserializationException, CoordinatedStateException {
329     AssignmentManagerWithExtrasForTesting am =
330       setUpMockedAssignmentManager(this.server, this.serverManager);
331     try {
332       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
333       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
334       while (!am.processRITInvoked) Thread.sleep(1);
335       // As part of the failover cleanup, the balancing region plan is removed.
336       // So a random server will be used to open the region. For testing purpose,
337       // let's assume it is going to open on server b:
338       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
339 
340       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
341 
342       // Now fake the region closing successfully over on the regionserver; the
343       // regionserver will have set the region in CLOSED state. This will
344       // trigger callback into AM. The below zk close call is from the RS close
345       // region handler duplicated here because its down deep in a private
346       // method hard to expose.
347       int versionid =
348         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
349       assertNotSame(versionid, -1);
350       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
351 
352       am.gate.set(false);
353       // Get current versionid else will fail on transition from OFFLINE to
354       // OPENING below
355       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
356       assertNotSame(-1, versionid);
357       // This uglyness below is what the openregionhandler on RS side does.
358       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
359           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
360           EventType.RS_ZK_REGION_OPENING, versionid);
361       assertNotSame(-1, versionid);
362       // Move znode from OPENING to OPENED as RS does on successful open.
363       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
364           SERVERNAME_B, versionid);
365       assertNotSame(-1, versionid);
366       // Block here until our znode is cleared or until this test timesout.
367       ZKAssign.blockUntilNoRIT(watcher);
368     } finally {
369       am.getExecutorService().shutdown();
370       am.shutdown();
371     }
372   }
373 
374   private void createRegionPlanAndBalance(
375       final AssignmentManager am, final ServerName from,
376       final ServerName to, final HRegionInfo hri) throws RegionException {
377     // Call the balance function but fake the region being online first at
378     // servername from.
379     am.regionOnline(hri, from);
380     // Balance region from 'from' to 'to'. It calls unassign setting CLOSING state
381     // up in zk.  Create a plan and balance
382     am.balance(new RegionPlan(hri, from, to));
383   }
384 
385   /**
386    * Tests AssignmentManager balance function.  Runs a balance moving a region
387    * from one server to another mocking regionserver responding over zk.
388    * @throws IOException
389    * @throws KeeperException
390    * @throws DeserializationException
391    */
392   @Test (timeout=180000)
393   public void testBalance() throws IOException, KeeperException, DeserializationException,
394       InterruptedException, CoordinatedStateException {
395     // Create and startup an executor.  This is used by AssignmentManager
396     // handling zk callbacks.
397     ExecutorService executor = startupMasterExecutor("testBalanceExecutor");
398 
399     // We need a mocked catalog tracker.
400     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
401         .getConfiguration());
402     // Create an AM.
403     AssignmentManager am = new AssignmentManager(this.server,
404       this.serverManager, balancer, executor, null, master.getTableLockManager());
405     am.failoverCleanupDone.set(true);
406     try {
407       // Make sure our new AM gets callbacks; once registered, can't unregister.
408       // Thats ok because we make a new zk watcher for each test.
409       this.watcher.registerListenerFirst(am);
410       // Call the balance function but fake the region being online first at
411       // SERVERNAME_A.  Create a balance plan.
412       am.regionOnline(REGIONINFO, SERVERNAME_A);
413       // Balance region from A to B.
414       RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B);
415       am.balance(plan);
416 
417       RegionStates regionStates = am.getRegionStates();
418       // Must be failed to close since the server is fake
419       assertTrue(regionStates.isRegionInTransition(REGIONINFO)
420         && regionStates.isRegionInState(REGIONINFO, State.FAILED_CLOSE));
421       // Move it back to pending_close
422       regionStates.updateRegionState(REGIONINFO, State.PENDING_CLOSE);
423 
424       // Now fake the region closing successfully over on the regionserver; the
425       // regionserver will have set the region in CLOSED state.  This will
426       // trigger callback into AM. The below zk close call is from the RS close
427       // region handler duplicated here because its down deep in a private
428       // method hard to expose.
429       int versionid =
430         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
431       assertNotSame(versionid, -1);
432       // AM is going to notice above CLOSED and queue up a new assign.  The
433       // assign will go to open the region in the new location set by the
434       // balancer.  The zk node will be OFFLINE waiting for regionserver to
435       // transition it through OPENING, OPENED.  Wait till we see the OFFLINE
436       // zk node before we proceed.
437       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
438 
439       // Get current versionid else will fail on transition from OFFLINE to OPENING below
440       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
441       assertNotSame(-1, versionid);
442       // This uglyness below is what the openregionhandler on RS side does.
443       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
444         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
445         EventType.RS_ZK_REGION_OPENING, versionid);
446       assertNotSame(-1, versionid);
447       // Move znode from OPENING to OPENED as RS does on successful open.
448       versionid =
449         ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO, SERVERNAME_B, versionid);
450       assertNotSame(-1, versionid);
451       // Wait on the handler removing the OPENED znode.
452       while(regionStates.isRegionInTransition(REGIONINFO)) Threads.sleep(1);
453     } finally {
454       executor.shutdown();
455       am.shutdown();
456       // Clean up all znodes
457       ZKAssign.deleteAllNodes(this.watcher);
458     }
459   }
460 
461   /**
462    * Run a simple server shutdown handler.
463    * @throws KeeperException
464    * @throws IOException
465    */
466   @Test (timeout=180000)
467   public void testShutdownHandler()
468       throws KeeperException, IOException, CoordinatedStateException, ServiceException {
469     // Create and startup an executor.  This is used by AssignmentManager
470     // handling zk callbacks.
471     ExecutorService executor = startupMasterExecutor("testShutdownHandler");
472 
473     // Create an AM.
474     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
475         this.server, this.serverManager);
476     try {
477       processServerShutdownHandler(am, false);
478     } finally {
479       executor.shutdown();
480       am.shutdown();
481       // Clean up all znodes
482       ZKAssign.deleteAllNodes(this.watcher);
483     }
484   }
485 
486   /**
487    * To test closed region handler to remove rit and delete corresponding znode
488    * if region in pending close or closing while processing shutdown of a region
489    * server.(HBASE-5927).
490    *
491    * @throws KeeperException
492    * @throws IOException
493    * @throws ServiceException
494    */
495   @Test (timeout=180000)
496   public void testSSHWhenDisableTableInProgress() throws KeeperException, IOException,
497     CoordinatedStateException, ServiceException {
498     testCaseWithPartiallyDisabledState(Table.State.DISABLING);
499     testCaseWithPartiallyDisabledState(Table.State.DISABLED);
500   }
501 
502 
503   /**
504    * To test if the split region is removed from RIT if the region was in SPLITTING state but the RS
505    * has actually completed the splitting in hbase:meta but went down. See HBASE-6070 and also HBASE-5806
506    *
507    * @throws KeeperException
508    * @throws IOException
509    */
510   @Test (timeout=180000)
511   public void testSSHWhenSplitRegionInProgress() throws KeeperException, IOException, Exception {
512     // true indicates the region is split but still in RIT
513     testCaseWithSplitRegionPartial(true);
514     // false indicate the region is not split
515     testCaseWithSplitRegionPartial(false);
516   }
517 
518   private void testCaseWithSplitRegionPartial(boolean regionSplitDone) throws KeeperException,
519       IOException, InterruptedException,
520     CoordinatedStateException, ServiceException {
521     // Create and startup an executor. This is used by AssignmentManager
522     // handling zk callbacks.
523     ExecutorService executor = startupMasterExecutor("testSSHWhenSplitRegionInProgress");
524     // We need a mocked catalog tracker.
525     ZKAssign.deleteAllNodes(this.watcher);
526 
527     // Create an AM.
528     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
529       this.server, this.serverManager);
530     // adding region to regions and servers maps.
531     am.regionOnline(REGIONINFO, SERVERNAME_A);
532     // adding region in pending close.
533     am.getRegionStates().updateRegionState(
534       REGIONINFO, State.SPLITTING, SERVERNAME_A);
535     am.getTableStateManager().setTableState(REGIONINFO.getTable(),
536       Table.State.ENABLED);
537     RegionTransition data = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
538         REGIONINFO.getRegionName(), SERVERNAME_A);
539     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
540     // create znode in M_ZK_REGION_CLOSING state.
541     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
542 
543     try {
544       processServerShutdownHandler(am, regionSplitDone);
545       // check znode deleted or not.
546       // In both cases the znode should be deleted.
547 
548       if (regionSplitDone) {
549         assertFalse("Region state of region in SPLITTING should be removed from rit.",
550             am.getRegionStates().isRegionsInTransition());
551       } else {
552         while (!am.assignInvoked) {
553           Thread.sleep(1);
554         }
555         assertTrue("Assign should be invoked.", am.assignInvoked);
556       }
557     } finally {
558       REGIONINFO.setOffline(false);
559       REGIONINFO.setSplit(false);
560       executor.shutdown();
561       am.shutdown();
562       // Clean up all znodes
563       ZKAssign.deleteAllNodes(this.watcher);
564     }
565   }
566 
567   private void testCaseWithPartiallyDisabledState(Table.State state) throws KeeperException,
568       IOException, CoordinatedStateException, ServiceException {
569     // Create and startup an executor. This is used by AssignmentManager
570     // handling zk callbacks.
571     ExecutorService executor = startupMasterExecutor("testSSHWhenDisableTableInProgress");
572     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
573     ZKAssign.deleteAllNodes(this.watcher);
574 
575     // Create an AM.
576     AssignmentManager am = new AssignmentManager(this.server,
577       this.serverManager, balancer, executor, null, master.getTableLockManager());
578     // adding region to regions and servers maps.
579     am.regionOnline(REGIONINFO, SERVERNAME_A);
580     // adding region in pending close.
581     am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE);
582     if (state == Table.State.DISABLING) {
583       am.getTableStateManager().setTableState(REGIONINFO.getTable(),
584         Table.State.DISABLING);
585     } else {
586       am.getTableStateManager().setTableState(REGIONINFO.getTable(),
587         Table.State.DISABLED);
588     }
589     RegionTransition data = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
590         REGIONINFO.getRegionName(), SERVERNAME_A);
591     // RegionTransitionData data = new
592     // RegionTransitionData(EventType.M_ZK_REGION_CLOSING,
593     // REGIONINFO.getRegionName(), SERVERNAME_A);
594     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
595     // create znode in M_ZK_REGION_CLOSING state.
596     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
597 
598     try {
599       processServerShutdownHandler(am, false);
600       // check znode deleted or not.
601       // In both cases the znode should be deleted.
602       assertTrue("The znode should be deleted.", ZKUtil.checkExists(this.watcher, node) == -1);
603       // check whether in rit or not. In the DISABLING case also the below
604       // assert will be true but the piece of code added for HBASE-5927 will not
605       // do that.
606       if (state == Table.State.DISABLED) {
607         assertFalse("Region state of region in pending close should be removed from rit.",
608             am.getRegionStates().isRegionsInTransition());
609       }
610     } finally {
611       am.setEnabledTable(REGIONINFO.getTable());
612       executor.shutdown();
613       am.shutdown();
614       // Clean up all znodes
615       ZKAssign.deleteAllNodes(this.watcher);
616     }
617   }
618 
619   private void processServerShutdownHandler(AssignmentManager am, boolean splitRegion)
620       throws IOException, ServiceException {
621     // Make sure our new AM gets callbacks; once registered, can't unregister.
622     // Thats ok because we make a new zk watcher for each test.
623     this.watcher.registerListenerFirst(am);
624 
625     // Need to set up a fake scan of meta for the servershutdown handler
626     // Make an RS Interface implementation.  Make it so a scanner can go against it.
627     ClientProtos.ClientService.BlockingInterface implementation =
628       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
629     // Get a meta row result that has region up on SERVERNAME_A
630 
631     Result r;
632     if (splitRegion) {
633       r = MetaMockingUtil.getMetaTableRowResultAsSplitRegion(REGIONINFO, SERVERNAME_A);
634     } else {
635       r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
636     }
637 
638     final ScanResponse.Builder builder = ScanResponse.newBuilder();
639     builder.setMoreResults(true);
640     builder.addCellsPerResult(r.size());
641     final List<CellScannable> cellScannables = new ArrayList<CellScannable>(1);
642     cellScannables.add(r);
643     Mockito.when(implementation.scan(
644       (RpcController)Mockito.any(), (ScanRequest)Mockito.any())).
645       thenAnswer(new Answer<ScanResponse>() {
646           @Override
647           public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
648             PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
649                 .getArguments()[0];
650             if (controller != null) {
651               controller.setCellScanner(CellUtil.createCellScanner(cellScannables));
652             }
653             return builder.build();
654           }
655       });
656 
657     // Get a connection w/ mocked up common methods.
658     ClusterConnection connection =
659       HConnectionTestingUtility.getMockedConnectionAndDecorate(HTU.getConfiguration(),
660         null, implementation, SERVERNAME_B, REGIONINFO);
661     // These mocks were done up when all connections were managed.  World is different now we
662     // moved to unmanaged connections.  It messes up the intercepts done in these tests.
663     // Just mark connections as marked and then down in MetaTableAccessor, it will go the path
664     // that picks up the above mocked up 'implementation' so 'scans' of meta return the expected
665     // result.  Redo in new realm of unmanaged connections.
666     Mockito.when(connection.isManaged()).thenReturn(true);
667     try {
668       // Make it so we can get a catalogtracker from servermanager.. .needed
669       // down in guts of server shutdown handler.
670       Mockito.when(this.server.getConnection()).thenReturn(connection);
671 
672       // Now make a server shutdown handler instance and invoke process.
673       // Have it that SERVERNAME_A died.
674       DeadServer deadServers = new DeadServer();
675       deadServers.add(SERVERNAME_A);
676       // I need a services instance that will return the AM
677       MasterFileSystem fs = Mockito.mock(MasterFileSystem.class);
678       Mockito.doNothing().when(fs).setLogRecoveryMode();
679       Mockito.when(fs.getLogRecoveryMode()).thenReturn(RecoveryMode.LOG_REPLAY);
680       MasterServices services = Mockito.mock(MasterServices.class);
681       Mockito.when(services.getAssignmentManager()).thenReturn(am);
682       Mockito.when(services.getServerManager()).thenReturn(this.serverManager);
683       Mockito.when(services.getZooKeeper()).thenReturn(this.watcher);
684       Mockito.when(services.getMasterFileSystem()).thenReturn(fs);
685       Mockito.when(services.getConnection()).thenReturn(connection);
686       ServerShutdownHandler handler = new ServerShutdownHandler(this.server,
687           services, deadServers, SERVERNAME_A, false);
688       am.failoverCleanupDone.set(true);
689       handler.process();
690       // The region in r will have been assigned.  It'll be up in zk as unassigned.
691     } finally {
692       if (connection != null) connection.close();
693     }
694   }
695 
696   /**
697    * Create and startup executor pools. Start same set as master does (just
698    * run a few less).
699    * @param name Name to give our executor
700    * @return Created executor (be sure to call shutdown when done).
701    */
702   private ExecutorService startupMasterExecutor(final String name) {
703     // TODO: Move up into HBaseTestingUtility?  Generally useful.
704     ExecutorService executor = new ExecutorService(name);
705     executor.startExecutorService(ExecutorType.MASTER_OPEN_REGION, 3);
706     executor.startExecutorService(ExecutorType.MASTER_CLOSE_REGION, 3);
707     executor.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS, 3);
708     executor.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS, 3);
709     return executor;
710   }
711 
712   @Test (timeout=180000)
713   public void testUnassignWithSplitAtSameTime() throws KeeperException,
714       IOException, CoordinatedStateException {
715     // Region to use in test.
716     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
717     // First amend the servermanager mock so that when we do send close of the
718     // first meta region on SERVERNAME_A, it will return true rather than
719     // default null.
720     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, hri, -1)).thenReturn(true);
721     // Need a mocked catalog tracker.
722     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
723         .getConfiguration());
724     // Create an AM.
725     AssignmentManager am = new AssignmentManager(this.server,
726       this.serverManager, balancer, null, null, master.getTableLockManager());
727     try {
728       // First make sure my mock up basically works.  Unassign a region.
729       unassign(am, SERVERNAME_A, hri);
730       // This delete will fail if the previous unassign did wrong thing.
731       ZKAssign.deleteClosingNode(this.watcher, hri, SERVERNAME_A);
732       // Now put a SPLITTING region in the way.  I don't have to assert it
733       // go put in place.  This method puts it in place then asserts it still
734       // owns it by moving state from SPLITTING to SPLITTING.
735       int version = createNodeSplitting(this.watcher, hri, SERVERNAME_A);
736       // Now, retry the unassign with the SPLTTING in place.  It should just
737       // complete without fail; a sort of 'silent' recognition that the
738       // region to unassign has been split and no longer exists: TOOD: what if
739       // the split fails and the parent region comes back to life?
740       unassign(am, SERVERNAME_A, hri);
741       // This transition should fail if the znode has been messed with.
742       ZKAssign.transitionNode(this.watcher, hri, SERVERNAME_A,
743         EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
744       assertFalse(am.getRegionStates().isRegionInTransition(hri));
745     } finally {
746       am.shutdown();
747     }
748   }
749 
750   /**
751    * Tests the processDeadServersAndRegionsInTransition should not fail with NPE
752    * when it failed to get the children. Let's abort the system in this
753    * situation
754    * @throws ServiceException
755    */
756   @Test(timeout = 60000)
757   public void testProcessDeadServersAndRegionsInTransitionShouldNotFailWithNPE()
758       throws IOException, KeeperException, CoordinatedStateException,
759       InterruptedException, ServiceException {
760     final RecoverableZooKeeper recoverableZk = Mockito
761         .mock(RecoverableZooKeeper.class);
762     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
763       this.server, this.serverManager);
764     Watcher zkw = new ZooKeeperWatcher(HBaseConfiguration.create(), "unittest",
765         null) {
766       @Override
767       public RecoverableZooKeeper getRecoverableZooKeeper() {
768         return recoverableZk;
769       }
770     };
771     ((ZooKeeperWatcher) zkw).registerListener(am);
772     Mockito.doThrow(new InterruptedException()).when(recoverableZk)
773         .getChildren("/hbase/region-in-transition", null);
774     am.setWatcher((ZooKeeperWatcher) zkw);
775     try {
776       am.processDeadServersAndRegionsInTransition(null);
777       fail("Expected to abort");
778     } catch (NullPointerException e) {
779       fail("Should not throw NPE");
780     } catch (RuntimeException e) {
781       assertEquals("Aborted", e.getLocalizedMessage());
782     } finally {
783       am.shutdown();
784     }
785   }
786   /**
787    * TestCase verifies that the regionPlan is updated whenever a region fails to open
788    * and the master tries to process RS_ZK_FAILED_OPEN state.(HBASE-5546).
789    */
790   @Test(timeout = 60000)
791   public void testRegionPlanIsUpdatedWhenRegionFailsToOpen() throws IOException, KeeperException,
792       ServiceException, InterruptedException, CoordinatedStateException {
793     this.server.getConfiguration().setClass(
794       HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MockedLoadBalancer.class,
795       LoadBalancer.class);
796     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
797       this.server, this.serverManager);
798     try {
799       // Boolean variable used for waiting until randomAssignment is called and
800       // new
801       // plan is generated.
802       AtomicBoolean gate = new AtomicBoolean(false);
803       if (balancer instanceof MockedLoadBalancer) {
804         ((MockedLoadBalancer) balancer).setGateVariable(gate);
805       }
806       ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
807       int v = ZKAssign.getVersion(this.watcher, REGIONINFO);
808       ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A,
809           EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_FAILED_OPEN, v);
810       String path = ZKAssign.getNodeName(this.watcher, REGIONINFO
811           .getEncodedName());
812       am.getRegionStates().updateRegionState(
813         REGIONINFO, State.OPENING, SERVERNAME_A);
814       // a dummy plan inserted into the regionPlans. This plan is cleared and
815       // new one is formed
816       am.regionPlans.put(REGIONINFO.getEncodedName(), new RegionPlan(
817           REGIONINFO, null, SERVERNAME_A));
818       RegionPlan regionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
819       List<ServerName> serverList = new ArrayList<ServerName>(2);
820       serverList.add(SERVERNAME_B);
821       Mockito.when(
822           this.serverManager.createDestinationServersList(SERVERNAME_A))
823           .thenReturn(serverList);
824       am.nodeDataChanged(path);
825       // here we are waiting until the random assignment in the load balancer is
826       // called.
827       while (!gate.get()) {
828         Thread.sleep(10);
829       }
830       // new region plan may take some time to get updated after random
831       // assignment is called and
832       // gate is set to true.
833       RegionPlan newRegionPlan = am.regionPlans
834           .get(REGIONINFO.getEncodedName());
835       while (newRegionPlan == null) {
836         Thread.sleep(10);
837         newRegionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
838       }
839       // the new region plan created may contain the same RS as destination but
840       // it should
841       // be new plan.
842       assertNotSame("Same region plan should not come", regionPlan,
843           newRegionPlan);
844       assertTrue("Destination servers should be different.", !(regionPlan
845           .getDestination().equals(newRegionPlan.getDestination())));
846 
847       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
848     } finally {
849       this.server.getConfiguration().setClass(
850           HConstants.HBASE_MASTER_LOADBALANCER_CLASS, SimpleLoadBalancer.class,
851           LoadBalancer.class);
852       am.getExecutorService().shutdown();
853       am.shutdown();
854     }
855   }
856 
857   /**
858    * Mocked load balancer class used in the testcase to make sure that the testcase waits until
859    * random assignment is called and the gate variable is set to true.
860    */
861   public static class MockedLoadBalancer extends SimpleLoadBalancer {
862     private AtomicBoolean gate;
863 
864     public void setGateVariable(AtomicBoolean gate) {
865       this.gate = gate;
866     }
867 
868     @Override
869     public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
870       ServerName randomServerName = super.randomAssignment(regionInfo, servers);
871       this.gate.set(true);
872       return randomServerName;
873     }
874 
875     @Override
876     public Map<ServerName, List<HRegionInfo>> retainAssignment(
877         Map<HRegionInfo, ServerName> regions, List<ServerName> servers) {
878       this.gate.set(true);
879       return super.retainAssignment(regions, servers);
880     }
881   }
882 
883   /**
884    * Test the scenario when the master is in failover and trying to process a
885    * region which is in Opening state on a dead RS. Master will force offline the
886    * region and put it in transition. AM relies on SSH to reassign it.
887    */
888   @Test(timeout = 60000)
889   public void testRegionInOpeningStateOnDeadRSWhileMasterFailover() throws IOException,
890       KeeperException, ServiceException, CoordinatedStateException, InterruptedException {
891     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
892       this.server, this.serverManager);
893     ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
894     int version = ZKAssign.getVersion(this.watcher, REGIONINFO);
895     ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A, EventType.M_ZK_REGION_OFFLINE,
896         EventType.RS_ZK_REGION_OPENING, version);
897     RegionTransition rt = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_OPENING,
898         REGIONINFO.getRegionName(), SERVERNAME_A, HConstants.EMPTY_BYTE_ARRAY);
899     version = ZKAssign.getVersion(this.watcher, REGIONINFO);
900     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(false);
901     am.getRegionStates().logSplit(SERVERNAME_A); // Assume log splitting is done
902     am.getRegionStates().createRegionState(REGIONINFO);
903     am.gate.set(false);
904 
905     BaseCoordinatedStateManager cp = new ZkCoordinatedStateManager();
906     cp.initialize(server);
907     cp.start();
908 
909     OpenRegionCoordination orc = cp.getOpenRegionCoordination();
910     ZkOpenRegionCoordination.ZkOpenRegionDetails zkOrd =
911       new ZkOpenRegionCoordination.ZkOpenRegionDetails();
912     zkOrd.setServerName(server.getServerName());
913     zkOrd.setVersion(version);
914 
915     assertFalse(am.processRegionsInTransition(rt, REGIONINFO, orc, zkOrd));
916     am.getTableStateManager().setTableState(REGIONINFO.getTable(), Table.State.ENABLED);
917     processServerShutdownHandler(am, false);
918     // Waiting for the assignment to get completed.
919     while (!am.gate.get()) {
920       Thread.sleep(10);
921     }
922     assertTrue("The region should be assigned immediately.", null != am.regionPlans.get(REGIONINFO
923         .getEncodedName()));
924     am.shutdown();
925   }
926 
927   /**
928    * Test verifies whether assignment is skipped for regions of tables in DISABLING state during
929    * clean cluster startup. See HBASE-6281.
930    *
931    * @throws KeeperException
932    * @throws IOException
933    * @throws Exception
934    */
935   @Test(timeout = 60000)
936   public void testDisablingTableRegionsAssignmentDuringCleanClusterStartup()
937       throws KeeperException, IOException, Exception {
938     this.server.getConfiguration().setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
939         MockedLoadBalancer.class, LoadBalancer.class);
940     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(
941         new HashMap<ServerName, ServerLoad>(0));
942     List<ServerName> destServers = new ArrayList<ServerName>(1);
943     destServers.add(SERVERNAME_A);
944     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
945     // To avoid cast exception in DisableTableHandler process.
946     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
947 
948     CoordinatedStateManager csm = CoordinatedStateManagerFactory.getCoordinatedStateManager(
949       HTU.getConfiguration());
950     Server server = new HMaster(HTU.getConfiguration(), csm);
951     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
952         this.serverManager);
953 
954     Whitebox.setInternalState(server, "metaTableLocator", Mockito.mock(MetaTableLocator.class));
955 
956     // Make it so we can get a catalogtracker from servermanager.. .needed
957     // down in guts of server shutdown handler.
958     Whitebox.setInternalState(server, "clusterConnection", am.getConnection());
959 
960     AtomicBoolean gate = new AtomicBoolean(false);
961     if (balancer instanceof MockedLoadBalancer) {
962       ((MockedLoadBalancer) balancer).setGateVariable(gate);
963     }
964     try{
965       // set table in disabling state.
966       am.getTableStateManager().setTableState(REGIONINFO.getTable(),
967         Table.State.DISABLING);
968       am.joinCluster();
969       // should not call retainAssignment if we get empty regions in assignAllUserRegions.
970       assertFalse(
971           "Assign should not be invoked for disabling table regions during clean cluster startup.",
972           gate.get());
973       // need to change table state from disabling to disabled.
974       assertTrue("Table should be disabled.",
975           am.getTableStateManager().isTableState(REGIONINFO.getTable(),
976             Table.State.DISABLED));
977     } finally {
978       this.server.getConfiguration().setClass(
979         HConstants.HBASE_MASTER_LOADBALANCER_CLASS, SimpleLoadBalancer.class,
980         LoadBalancer.class);
981       am.getTableStateManager().setTableState(REGIONINFO.getTable(),
982         Table.State.ENABLED);
983       am.shutdown();
984     }
985   }
986 
987   /**
988    * Test verifies whether all the enabling table regions assigned only once during master startup.
989    *
990    * @throws KeeperException
991    * @throws IOException
992    * @throws Exception
993    */
994   @Test (timeout=180000)
995   public void testMasterRestartWhenTableInEnabling() throws KeeperException, IOException, Exception {
996     enabling = true;
997     List<ServerName> destServers = new ArrayList<ServerName>(1);
998     destServers.add(SERVERNAME_A);
999     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
1000     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
1001     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
1002     CoordinatedStateManager csm = CoordinatedStateManagerFactory.getCoordinatedStateManager(
1003       HTU.getConfiguration());
1004     Server server = new HMaster(HTU.getConfiguration(), csm);
1005     Whitebox.setInternalState(server, "serverManager", this.serverManager);
1006     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
1007         this.serverManager);
1008 
1009     Whitebox.setInternalState(server, "metaTableLocator", Mockito.mock(MetaTableLocator.class));
1010 
1011     // Make it so we can get a catalogtracker from servermanager.. .needed
1012     // down in guts of server shutdown handler.
1013     Whitebox.setInternalState(server, "clusterConnection", am.getConnection());
1014 
1015     try {
1016       // set table in enabling state.
1017       am.getTableStateManager().setTableState(REGIONINFO.getTable(),
1018         Table.State.ENABLING);
1019       new EnableTableHandler(server, REGIONINFO.getTable(),
1020           am, new NullTableLockManager(), true).prepare()
1021           .process();
1022       assertEquals("Number of assignments should be 1.", 1, assignmentCount);
1023       assertTrue("Table should be enabled.",
1024           am.getTableStateManager().isTableState(REGIONINFO.getTable(),
1025             Table.State.ENABLED));
1026     } finally {
1027       enabling = false;
1028       assignmentCount = 0;
1029       am.getTableStateManager().setTableState(REGIONINFO.getTable(),
1030         Table.State.ENABLED);
1031       am.shutdown();
1032       ZKAssign.deleteAllNodes(this.watcher);
1033     }
1034   }
1035 
1036   /**
1037    * Test verifies whether stale znodes of unknown tables as for the hbase:meta will be removed or
1038    * not.
1039    * @throws KeeperException
1040    * @throws IOException
1041    * @throws Exception
1042    */
1043   @Test (timeout=180000)
1044   public void testMasterRestartShouldRemoveStaleZnodesOfUnknownTableAsForMeta()
1045       throws Exception {
1046     List<ServerName> destServers = new ArrayList<ServerName>(1);
1047     destServers.add(SERVERNAME_A);
1048     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
1049     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
1050     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
1051     CoordinatedStateManager csm = CoordinatedStateManagerFactory.getCoordinatedStateManager(
1052       HTU.getConfiguration());
1053     Server server = new HMaster(HTU.getConfiguration(), csm);
1054     Whitebox.setInternalState(server, "serverManager", this.serverManager);
1055     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
1056         this.serverManager);
1057 
1058     Whitebox.setInternalState(server, "metaTableLocator", Mockito.mock(MetaTableLocator.class));
1059 
1060     // Make it so we can get a catalogtracker from servermanager.. .needed
1061     // down in guts of server shutdown handler.
1062     Whitebox.setInternalState(server, "clusterConnection", am.getConnection());
1063 
1064     try {
1065       TableName tableName = TableName.valueOf("dummyTable");
1066       // set table in enabling state.
1067       am.getTableStateManager().setTableState(tableName,
1068         Table.State.ENABLING);
1069       am.joinCluster();
1070       assertFalse("Table should not be present in zookeeper.",
1071         am.getTableStateManager().isTablePresent(tableName));
1072     } finally {
1073       am.shutdown();
1074     }
1075   }
1076   /**
1077    * When a region is in transition, if the region server opening the region goes down,
1078    * the region assignment takes a long time normally (waiting for timeout monitor to trigger assign).
1079    * This test is to make sure SSH reassigns it right away.
1080    */
1081   @Test (timeout=180000)
1082   public void testSSHTimesOutOpeningRegionTransition()
1083       throws KeeperException, IOException, CoordinatedStateException, ServiceException {
1084     // Create an AM.
1085     AssignmentManagerWithExtrasForTesting am =
1086       setUpMockedAssignmentManager(this.server, this.serverManager);
1087     // adding region in pending open.
1088     RegionState state = new RegionState(REGIONINFO,
1089       State.OPENING, System.currentTimeMillis(), SERVERNAME_A);
1090     am.getRegionStates().regionOnline(REGIONINFO, SERVERNAME_B);
1091     am.getRegionStates().regionsInTransition.put(REGIONINFO.getEncodedName(), state);
1092     // adding region plan
1093     am.regionPlans.put(REGIONINFO.getEncodedName(),
1094       new RegionPlan(REGIONINFO, SERVERNAME_B, SERVERNAME_A));
1095     am.getTableStateManager().setTableState(REGIONINFO.getTable(),
1096       Table.State.ENABLED);
1097 
1098     try {
1099       am.assignInvoked = false;
1100       processServerShutdownHandler(am, false);
1101       assertTrue(am.assignInvoked);
1102     } finally {
1103       am.getRegionStates().regionsInTransition.remove(REGIONINFO.getEncodedName());
1104       am.regionPlans.remove(REGIONINFO.getEncodedName());
1105       am.shutdown();
1106     }
1107   }
1108 
1109   /**
1110    * Scenario:<ul>
1111    *  <li> master starts a close, and creates a znode</li>
1112    *  <li> it fails just at this moment, before contacting the RS</li>
1113    *  <li> while the second master is coming up, the targeted RS dies. But it's before ZK timeout so
1114    *    we don't know, and we have an exception.</li>
1115    *  <li> the master must handle this nicely and reassign.
1116    *  </ul>
1117    */
1118   @Test (timeout=180000)
1119   public void testClosingFailureDuringRecovery() throws Exception {
1120 
1121     AssignmentManagerWithExtrasForTesting am =
1122         setUpMockedAssignmentManager(this.server, this.serverManager);
1123     ZKAssign.createNodeClosing(this.watcher, REGIONINFO, SERVERNAME_A);
1124     try {
1125       am.getRegionStates().createRegionState(REGIONINFO);
1126 
1127       assertFalse( am.getRegionStates().isRegionsInTransition() );
1128 
1129       am.processRegionInTransition(REGIONINFO.getEncodedName(), REGIONINFO);
1130 
1131       assertTrue( am.getRegionStates().isRegionsInTransition() );
1132     } finally {
1133       am.shutdown();
1134     }
1135   }
1136 
1137   /**
1138    * Creates a new ephemeral node in the SPLITTING state for the specified region.
1139    * Create it ephemeral in case regionserver dies mid-split.
1140    *
1141    * <p>Does not transition nodes from other states.  If a node already exists
1142    * for this region, a {@link NodeExistsException} will be thrown.
1143    *
1144    * @param zkw zk reference
1145    * @param region region to be created as offline
1146    * @param serverName server event originates from
1147    * @return Version of znode created.
1148    * @throws KeeperException
1149    * @throws IOException
1150    */
1151   // Copied from SplitTransaction rather than open the method over there in
1152   // the regionserver package.
1153   private static int createNodeSplitting(final ZooKeeperWatcher zkw,
1154       final HRegionInfo region, final ServerName serverName)
1155   throws KeeperException, IOException {
1156     RegionTransition rt =
1157       RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
1158         region.getRegionName(), serverName);
1159 
1160     String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
1161     if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
1162       throw new IOException("Failed create of ephemeral " + node);
1163     }
1164     // Transition node from SPLITTING to SPLITTING and pick up version so we
1165     // can be sure this znode is ours; version is needed deleting.
1166     return transitionNodeSplitting(zkw, region, serverName, -1);
1167   }
1168 
1169   // Copied from SplitTransaction rather than open the method over there in
1170   // the regionserver package.
1171   private static int transitionNodeSplitting(final ZooKeeperWatcher zkw,
1172       final HRegionInfo parent,
1173       final ServerName serverName, final int version)
1174   throws KeeperException, IOException {
1175     return ZKAssign.transitionNode(zkw, parent, serverName,
1176       EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
1177   }
1178 
1179   private void unassign(final AssignmentManager am, final ServerName sn,
1180       final HRegionInfo hri) throws RegionException {
1181     // Before I can unassign a region, I need to set it online.
1182     am.regionOnline(hri, sn);
1183     // Unassign region.
1184     am.unassign(hri);
1185   }
1186 
1187   /**
1188    * Create an {@link AssignmentManagerWithExtrasForTesting} that has mocked
1189    * {@link CatalogTracker} etc.
1190    * @param server
1191    * @param manager
1192    * @return An AssignmentManagerWithExtras with mock connections, etc.
1193    * @throws IOException
1194    * @throws KeeperException
1195    */
1196   private AssignmentManagerWithExtrasForTesting setUpMockedAssignmentManager(final Server server,
1197       final ServerManager manager) throws IOException, KeeperException,
1198         ServiceException, CoordinatedStateException {
1199     // Make an RS Interface implementation. Make it so a scanner can go against
1200     // it and a get to return the single region, REGIONINFO, this test is
1201     // messing with. Needed when "new master" joins cluster. AM will try and
1202     // rebuild its list of user regions and it will also get the HRI that goes
1203     // with an encoded name by doing a Get on hbase:meta
1204     ClientProtos.ClientService.BlockingInterface ri =
1205       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
1206     // Get a meta row result that has region up on SERVERNAME_A for REGIONINFO
1207     Result r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
1208     final ScanResponse.Builder builder = ScanResponse.newBuilder();
1209     builder.setMoreResults(true);
1210     builder.addCellsPerResult(r.size());
1211     final List<CellScannable> rows = new ArrayList<CellScannable>(1);
1212     rows.add(r);
1213     Answer<ScanResponse> ans = new Answer<ClientProtos.ScanResponse>() {
1214       @Override
1215       public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
1216         PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
1217             .getArguments()[0];
1218         if (controller != null) {
1219           controller.setCellScanner(CellUtil.createCellScanner(rows));
1220         }
1221         return builder.build();
1222       }
1223     };
1224     if (enabling) {
1225       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any()))
1226           .thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans)
1227           .thenReturn(ScanResponse.newBuilder().setMoreResults(false).build());
1228     } else {
1229       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any())).thenAnswer(
1230           ans);
1231     }
1232     // If a get, return the above result too for REGIONINFO
1233     GetResponse.Builder getBuilder = GetResponse.newBuilder();
1234     getBuilder.setResult(ProtobufUtil.toResult(r));
1235     Mockito.when(ri.get((RpcController)Mockito.any(), (GetRequest) Mockito.any())).
1236       thenReturn(getBuilder.build());
1237     // Get a connection w/ mocked up common methods.
1238     ClusterConnection connection = (ClusterConnection)HConnectionTestingUtility.
1239       getMockedConnectionAndDecorate(HTU.getConfiguration(), null,
1240         ri, SERVERNAME_B, REGIONINFO);
1241     // These mocks were done up when all connections were managed.  World is different now we
1242     // moved to unmanaged connections.  It messes up the intercepts done in these tests.
1243     // Just mark connections as marked and then down in MetaTableAccessor, it will go the path
1244     // that picks up the above mocked up 'implementation' so 'scans' of meta return the expected
1245     // result.  Redo in new realm of unmanaged connections.
1246     Mockito.when(connection.isManaged()).thenReturn(true);
1247     // Make it so we can get the connection from our mocked catalogtracker
1248     // Create and startup an executor. Used by AM handling zk callbacks.
1249     ExecutorService executor = startupMasterExecutor("mockedAMExecutor");
1250     this.balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
1251     AssignmentManagerWithExtrasForTesting am = new AssignmentManagerWithExtrasForTesting(
1252       server, connection, manager, this.balancer, executor, new NullTableLockManager());
1253     return am;
1254   }
1255 
1256   /**
1257    * An {@link AssignmentManager} with some extra facility used testing
1258    */
1259   class AssignmentManagerWithExtrasForTesting extends AssignmentManager {
1260     // Keep a reference so can give it out below in {@link #getExecutorService}
1261     private final ExecutorService es;
1262     boolean processRITInvoked = false;
1263     boolean assignInvoked = false;
1264     AtomicBoolean gate = new AtomicBoolean(true);
1265     private ClusterConnection connection;
1266 
1267     public AssignmentManagerWithExtrasForTesting(
1268         final Server master, ClusterConnection connection, final ServerManager serverManager,
1269         final LoadBalancer balancer,
1270         final ExecutorService service, final TableLockManager tableLockManager)
1271             throws KeeperException, IOException, CoordinatedStateException {
1272       super(master, serverManager, balancer, service, null, tableLockManager);
1273       this.es = service;
1274       this.connection = connection;
1275     }
1276 
1277     @Override
1278     boolean processRegionInTransition(String encodedRegionName,
1279         HRegionInfo regionInfo) throws KeeperException, IOException {
1280       this.processRITInvoked = true;
1281       return super.processRegionInTransition(encodedRegionName, regionInfo);
1282     }
1283 
1284     @Override
1285     public void assign(HRegionInfo region, boolean setOfflineInZK, boolean forceNewPlan) {
1286       if (enabling) {
1287         assignmentCount++;
1288         this.regionOnline(region, SERVERNAME_A);
1289       } else {
1290         super.assign(region, setOfflineInZK, forceNewPlan);
1291         this.gate.set(true);
1292       }
1293     }
1294 
1295     @Override
1296     boolean assign(ServerName destination, List<HRegionInfo> regions)
1297         throws InterruptedException {
1298       if (enabling) {
1299         for (HRegionInfo region : regions) {
1300           assignmentCount++;
1301           this.regionOnline(region, SERVERNAME_A);
1302         }
1303         return true;
1304       }
1305       return super.assign(destination, regions);
1306     }
1307 
1308     @Override
1309     public void assign(List<HRegionInfo> regions)
1310         throws IOException, InterruptedException {
1311       assignInvoked = (regions != null && regions.size() > 0);
1312       super.assign(regions);
1313       this.gate.set(true);
1314     }
1315 
1316     /** reset the watcher */
1317     void setWatcher(ZooKeeperWatcher watcher) {
1318       this.watcher = watcher;
1319     }
1320 
1321     /**
1322      * @return ExecutorService used by this instance.
1323      */
1324     ExecutorService getExecutorService() {
1325       return this.es;
1326     }
1327 
1328     /*
1329      * Convenient method to retrieve mocked up connection
1330      */
1331     ClusterConnection getConnection() {
1332       return this.connection;
1333     }
1334 
1335     @Override
1336     public void shutdown() {
1337       super.shutdown();
1338       if (this.connection != null)
1339         try {
1340           this.connection.close();
1341         } catch (IOException e) {
1342           fail("Failed to close connection");
1343         }
1344     }
1345   }
1346 
1347   /**
1348    * Call joinCluster on the passed AssignmentManager.  Do it in a thread
1349    * so it runs independent of what all else is going on.  Try to simulate
1350    * an AM running insided a failed over master by clearing all in-memory
1351    * AM state first.
1352   */
1353   private void startFakeFailedOverMasterAssignmentManager(final AssignmentManager am,
1354       final ZooKeeperWatcher watcher) {
1355     // Make sure our new AM gets callbacks; once registered, we can't unregister.
1356     // Thats ok because we make a new zk watcher for each test.
1357     watcher.registerListenerFirst(am);
1358     Thread t = new Thread("RunAmJoinCluster") {
1359       @Override
1360       public void run() {
1361         // Call the joinCluster function as though we were doing a master
1362         // failover at this point. It will stall just before we go to add
1363         // the RIT region to our RIT Map in AM at processRegionsInTransition.
1364         // First clear any inmemory state from AM so it acts like a new master
1365         // coming on line.
1366         am.getRegionStates().regionsInTransition.clear();
1367         am.regionPlans.clear();
1368         try {
1369           am.joinCluster();
1370         } catch (IOException e) {
1371           throw new RuntimeException(e);
1372         } catch (KeeperException e) {
1373           throw new RuntimeException(e);
1374         } catch (InterruptedException e) {
1375           throw new RuntimeException(e);
1376         } catch (CoordinatedStateException e) {
1377           throw new RuntimeException(e);
1378         }
1379       }
1380     };
1381     t.start();
1382     while (!t.isAlive()) Threads.sleep(1);
1383   }
1384 
1385   @Test (timeout=180000)
1386   public void testForceAssignMergingRegion() throws Exception {
1387     // Region to use in test.
1388     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1389     // Need a mocked catalog tracker.
1390     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1391       server.getConfiguration());
1392     // Create an AM.
1393     AssignmentManager am = new AssignmentManager(this.server,
1394       this.serverManager, balancer, null, null, master.getTableLockManager());
1395     RegionStates regionStates = am.getRegionStates();
1396     try {
1397       // First set the state of the region to merging
1398       regionStates.updateRegionState(hri, RegionState.State.MERGING);
1399       // Now, try to assign it with force new plan
1400       am.assign(hri, true, true);
1401       assertEquals("The region should be still in merging state",
1402         RegionState.State.MERGING, regionStates.getRegionState(hri).getState());
1403     } finally {
1404       am.shutdown();
1405     }
1406   }
1407 
1408   /**
1409    * Test assignment related ZK events are ignored by AM if the region is not known
1410    * by AM to be in transition. During normal operation, all assignments are started
1411    * by AM (not considering split/merge), if an event is received but the region
1412    * is not in transition, the event must be a very late one. So it can be ignored.
1413    * During master failover, since AM watches assignment znodes after failover cleanup
1414    * is completed, when an event comes in, AM should already have the region in transition
1415    * if ZK is used during the assignment action (only hbck doesn't use ZK for region
1416    * assignment). So during master failover, we can ignored such events too.
1417    */
1418   @Test (timeout=180000)
1419   public void testAssignmentEventIgnoredIfNotExpected() throws KeeperException, IOException,
1420       CoordinatedStateException {
1421     // Region to use in test.
1422     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1423     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1424       server.getConfiguration());
1425     final AtomicBoolean zkEventProcessed = new AtomicBoolean(false);
1426     // Create an AM.
1427     AssignmentManager am = new AssignmentManager(this.server,
1428       this.serverManager, balancer, null, null, master.getTableLockManager()) {
1429 
1430       @Override
1431       void handleRegion(final RegionTransition rt, OpenRegionCoordination coordination,
1432                         OpenRegionCoordination.OpenRegionDetails ord) {
1433         super.handleRegion(rt, coordination, ord);
1434         if (rt != null && Bytes.equals(hri.getRegionName(),
1435           rt.getRegionName()) && rt.getEventType() == EventType.RS_ZK_REGION_OPENING) {
1436           zkEventProcessed.set(true);
1437         }
1438       }
1439     };
1440     try {
1441       // First make sure the region is not in transition
1442       am.getRegionStates().regionOffline(hri);
1443       zkEventProcessed.set(false); // Reset it before faking zk transition
1444       this.watcher.registerListenerFirst(am);
1445       assertFalse("The region should not be in transition",
1446         am.getRegionStates().isRegionInTransition(hri));
1447       ZKAssign.createNodeOffline(this.watcher, hri, SERVERNAME_A);
1448       // Trigger a transition event
1449       ZKAssign.transitionNodeOpening(this.watcher, hri, SERVERNAME_A);
1450       long startTime = EnvironmentEdgeManager.currentTime();
1451       while (!zkEventProcessed.get()) {
1452         assertTrue("Timed out in waiting for ZK event to be processed",
1453           EnvironmentEdgeManager.currentTime() - startTime < 30000);
1454         Threads.sleepWithoutInterrupt(100);
1455       }
1456       assertFalse(am.getRegionStates().isRegionInTransition(hri));
1457     } finally {
1458       am.shutdown();
1459     }
1460   }
1461 
1462   /**
1463    * If a table is deleted, we should not be able to balance it anymore.
1464    * Otherwise, the region will be brought back.
1465    * @throws Exception
1466    */
1467   @Test (timeout=180000)
1468   public void testBalanceRegionOfDeletedTable() throws Exception {
1469     AssignmentManager am = new AssignmentManager(this.server, this.serverManager,
1470       balancer, null, null, master.getTableLockManager());
1471     RegionStates regionStates = am.getRegionStates();
1472     HRegionInfo hri = REGIONINFO;
1473     regionStates.createRegionState(hri);
1474     assertFalse(regionStates.isRegionInTransition(hri));
1475     RegionPlan plan = new RegionPlan(hri, SERVERNAME_A, SERVERNAME_B);
1476     // Fake table is deleted
1477     regionStates.tableDeleted(hri.getTable());
1478     am.balance(plan);
1479     assertFalse("The region should not in transition",
1480       regionStates.isRegionInTransition(hri));
1481     am.shutdown();
1482   }
1483 
1484   /**
1485    * Tests an on-the-fly RPC that was scheduled for the earlier RS on the same port
1486    * for openRegion. AM should assign this somewhere else. (HBASE-9721)
1487    */
1488   @SuppressWarnings("unchecked")
1489   @Test (timeout=180000)
1490   public void testOpenCloseRegionRPCIntendedForPreviousServer() throws Exception {
1491     Mockito.when(this.serverManager.sendRegionOpen(Mockito.eq(SERVERNAME_B), Mockito.eq(REGIONINFO),
1492       Mockito.anyInt(), (List<ServerName>)Mockito.any()))
1493       .thenThrow(new DoNotRetryIOException());
1494     this.server.getConfiguration().setInt("hbase.assignment.maximum.attempts", 100);
1495 
1496     HRegionInfo hri = REGIONINFO;
1497     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1498       server.getConfiguration());
1499     // Create an AM.
1500     AssignmentManager am = new AssignmentManager(this.server,
1501       this.serverManager, balancer, null, null, master.getTableLockManager());
1502     RegionStates regionStates = am.getRegionStates();
1503     try {
1504       am.regionPlans.put(REGIONINFO.getEncodedName(),
1505         new RegionPlan(REGIONINFO, null, SERVERNAME_B));
1506 
1507       // Should fail once, but succeed on the second attempt for the SERVERNAME_A
1508       am.assign(hri, true, false);
1509     } finally {
1510       assertEquals(SERVERNAME_A, regionStates.getRegionState(REGIONINFO).getServerName());
1511       am.shutdown();
1512     }
1513   }
1514 }