View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotSame;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.util.ArrayList;
28  import java.util.HashMap;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.concurrent.atomic.AtomicBoolean;
32  
33  import org.apache.hadoop.hbase.CellScannable;
34  import org.apache.hadoop.hbase.CellUtil;
35  import org.apache.hadoop.hbase.HBaseConfiguration;
36  import org.apache.hadoop.hbase.HBaseTestingUtility;
37  import org.apache.hadoop.hbase.HConstants;
38  import org.apache.hadoop.hbase.HRegionInfo;
39  import org.apache.hadoop.hbase.MediumTests;
40  import org.apache.hadoop.hbase.RegionException;
41  import org.apache.hadoop.hbase.RegionTransition;
42  import org.apache.hadoop.hbase.Server;
43  import org.apache.hadoop.hbase.ServerLoad;
44  import org.apache.hadoop.hbase.ServerName;
45  import org.apache.hadoop.hbase.TableName;
46  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
47  import org.apache.hadoop.hbase.catalog.CatalogTracker;
48  import org.apache.hadoop.hbase.catalog.MetaMockingUtil;
49  import org.apache.hadoop.hbase.client.HConnection;
50  import org.apache.hadoop.hbase.client.HConnectionTestingUtility;
51  import org.apache.hadoop.hbase.client.Result;
52  import org.apache.hadoop.hbase.exceptions.DeserializationException;
53  import org.apache.hadoop.hbase.executor.EventType;
54  import org.apache.hadoop.hbase.executor.ExecutorService;
55  import org.apache.hadoop.hbase.executor.ExecutorType;
56  import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
57  import org.apache.hadoop.hbase.master.RegionState.State;
58  import org.apache.hadoop.hbase.master.TableLockManager.NullTableLockManager;
59  import org.apache.hadoop.hbase.master.balancer.DefaultLoadBalancer;
60  import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
61  import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
62  import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
63  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
64  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
65  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetRequest;
66  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetResponse;
67  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ResultCellMeta;
68  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
69  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanResponse;
70  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table;
71  import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
72  import org.apache.hadoop.hbase.util.Bytes;
73  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
74  import org.apache.hadoop.hbase.util.Threads;
75  import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
76  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
77  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
78  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
79  import org.apache.zookeeper.KeeperException;
80  import org.apache.zookeeper.KeeperException.NodeExistsException;
81  import org.apache.zookeeper.Watcher;
82  import org.junit.After;
83  import org.junit.AfterClass;
84  import org.junit.Before;
85  import org.junit.BeforeClass;
86  import org.junit.Test;
87  import org.junit.experimental.categories.Category;
88  import org.mockito.Mockito;
89  import org.mockito.internal.util.reflection.Whitebox;
90  import org.mockito.invocation.InvocationOnMock;
91  import org.mockito.stubbing.Answer;
92  
93  import com.google.protobuf.RpcController;
94  import com.google.protobuf.ServiceException;
95  
96  
97  /**
98   * Test {@link AssignmentManager}
99   */
100 @Category(MediumTests.class)
101 public class TestAssignmentManager {
102   private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
103   private static final ServerName SERVERNAME_A =
104     new ServerName("example.org", 1234, 5678);
105   private static final ServerName SERVERNAME_B =
106     new ServerName("example.org", 0, 5678);
107   private static final HRegionInfo REGIONINFO =
108     new HRegionInfo(TableName.valueOf("t"),
109       HConstants.EMPTY_START_ROW, HConstants.EMPTY_START_ROW);
110   private static int assignmentCount;
111   private static boolean enabling = false;
112 
113   // Mocked objects or; get redone for each test.
114   private Server server;
115   private ServerManager serverManager;
116   private ZooKeeperWatcher watcher;
117   private LoadBalancer balancer;
118   private HMaster master;
119 
120   @BeforeClass
121   public static void beforeClass() throws Exception {
122     HTU.startMiniZKCluster();
123   }
124 
125   @AfterClass
126   public static void afterClass() throws IOException {
127     HTU.shutdownMiniZKCluster();
128   }
129 
130   @Before
131   public void before() throws ZooKeeperConnectionException, IOException {
132     // TODO: Make generic versions of what we do below and put up in a mocking
133     // utility class or move up into HBaseTestingUtility.
134 
135     // Mock a Server.  Have it return a legit Configuration and ZooKeeperWatcher.
136     // If abort is called, be sure to fail the test (don't just swallow it
137     // silently as is mockito default).
138     this.server = Mockito.mock(Server.class);
139     Mockito.when(server.getServerName()).thenReturn(new ServerName("master,1,1"));
140     Mockito.when(server.getConfiguration()).thenReturn(HTU.getConfiguration());
141     this.watcher =
142       new ZooKeeperWatcher(HTU.getConfiguration(), "mockedServer", this.server, true);
143     Mockito.when(server.getZooKeeper()).thenReturn(this.watcher);
144     Mockito.doThrow(new RuntimeException("Aborted")).
145       when(server).abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
146 
147     // Mock a ServerManager.  Say server SERVERNAME_{A,B} are online.  Also
148     // make it so if close or open, we return 'success'.
149     this.serverManager = Mockito.mock(ServerManager.class);
150     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
151     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_B)).thenReturn(true);
152     Mockito.when(this.serverManager.getDeadServers()).thenReturn(new DeadServer());
153     final Map<ServerName, ServerLoad> onlineServers = new HashMap<ServerName, ServerLoad>();
154     onlineServers.put(SERVERNAME_B, ServerLoad.EMPTY_SERVERLOAD);
155     onlineServers.put(SERVERNAME_A, ServerLoad.EMPTY_SERVERLOAD);
156     Mockito.when(this.serverManager.getOnlineServersList()).thenReturn(
157         new ArrayList<ServerName>(onlineServers.keySet()));
158     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(onlineServers);
159 
160     List<ServerName> avServers = new ArrayList<ServerName>();
161     avServers.addAll(onlineServers.keySet());
162     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(avServers);
163     Mockito.when(this.serverManager.createDestinationServersList(null)).thenReturn(avServers);
164 
165     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, REGIONINFO, -1)).
166       thenReturn(true);
167     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_B, REGIONINFO, -1)).
168       thenReturn(true);
169     // Ditto on open.
170     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_A, REGIONINFO, -1, null)).
171       thenReturn(RegionOpeningState.OPENED);
172     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_B, REGIONINFO, -1, null)).
173       thenReturn(RegionOpeningState.OPENED);
174     this.master = Mockito.mock(HMaster.class);
175 
176     Mockito.when(this.master.getServerManager()).thenReturn(serverManager);
177   }
178 
179   @After
180     public void after() throws KeeperException {
181     if (this.watcher != null) {
182       // Clean up all znodes
183       ZKAssign.deleteAllNodes(this.watcher);
184       this.watcher.close();
185     }
186   }
187 
188   /**
189    * Test a balance going on at same time as a master failover
190    *
191    * @throws IOException
192    * @throws KeeperException
193    * @throws InterruptedException
194    * @throws DeserializationException
195    */
196   @Test(timeout = 60000)
197   public void testBalanceOnMasterFailoverScenarioWithOpenedNode()
198   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
199     AssignmentManagerWithExtrasForTesting am =
200       setUpMockedAssignmentManager(this.server, this.serverManager);
201     try {
202       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
203       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
204       while (!am.processRITInvoked) Thread.sleep(1);
205       // As part of the failover cleanup, the balancing region plan is removed.
206       // So a random server will be used to open the region. For testing purpose,
207       // let's assume it is going to open on server b:
208       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
209 
210       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
211 
212       // Now fake the region closing successfully over on the regionserver; the
213       // regionserver will have set the region in CLOSED state. This will
214       // trigger callback into AM. The below zk close call is from the RS close
215       // region handler duplicated here because its down deep in a private
216       // method hard to expose.
217       int versionid =
218         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
219       assertNotSame(versionid, -1);
220       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
221 
222       // Get current versionid else will fail on transition from OFFLINE to
223       // OPENING below
224       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
225       assertNotSame(-1, versionid);
226       // This uglyness below is what the openregionhandler on RS side does.
227       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
228         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
229         EventType.RS_ZK_REGION_OPENING, versionid);
230       assertNotSame(-1, versionid);
231       // Move znode from OPENING to OPENED as RS does on successful open.
232       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
233         SERVERNAME_B, versionid);
234       assertNotSame(-1, versionid);
235       am.gate.set(false);
236       // Block here until our znode is cleared or until this test times out.
237       ZKAssign.blockUntilNoRIT(watcher);
238     } finally {
239       am.getExecutorService().shutdown();
240       am.shutdown();
241     }
242   }
243 
244   @Test(timeout = 60000)
245   public void testBalanceOnMasterFailoverScenarioWithClosedNode()
246   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
247     AssignmentManagerWithExtrasForTesting am =
248       setUpMockedAssignmentManager(this.server, this.serverManager);
249     try {
250       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
251       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
252       while (!am.processRITInvoked) Thread.sleep(1);
253       // As part of the failover cleanup, the balancing region plan is removed.
254       // So a random server will be used to open the region. For testing purpose,
255       // let's assume it is going to open on server b:
256       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
257 
258       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
259 
260       // Now fake the region closing successfully over on the regionserver; the
261       // regionserver will have set the region in CLOSED state. This will
262       // trigger callback into AM. The below zk close call is from the RS close
263       // region handler duplicated here because its down deep in a private
264       // method hard to expose.
265       int versionid =
266         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
267       assertNotSame(versionid, -1);
268       am.gate.set(false);
269       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
270 
271       // Get current versionid else will fail on transition from OFFLINE to
272       // OPENING below
273       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
274       assertNotSame(-1, versionid);
275       // This uglyness below is what the openregionhandler on RS side does.
276       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
277           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
278           EventType.RS_ZK_REGION_OPENING, versionid);
279       assertNotSame(-1, versionid);
280       // Move znode from OPENING to OPENED as RS does on successful open.
281       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
282           SERVERNAME_B, versionid);
283       assertNotSame(-1, versionid);
284 
285       // Block here until our znode is cleared or until this test timesout.
286       ZKAssign.blockUntilNoRIT(watcher);
287     } finally {
288       am.getExecutorService().shutdown();
289       am.shutdown();
290     }
291   }
292 
293   @Test(timeout = 60000)
294   public void testBalanceOnMasterFailoverScenarioWithOfflineNode()
295   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
296     AssignmentManagerWithExtrasForTesting am =
297       setUpMockedAssignmentManager(this.server, this.serverManager);
298     try {
299       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
300       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
301       while (!am.processRITInvoked) Thread.sleep(1);
302       // As part of the failover cleanup, the balancing region plan is removed.
303       // So a random server will be used to open the region. For testing purpose,
304       // let's assume it is going to open on server b:
305       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
306 
307       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
308 
309       // Now fake the region closing successfully over on the regionserver; the
310       // regionserver will have set the region in CLOSED state. This will
311       // trigger callback into AM. The below zk close call is from the RS close
312       // region handler duplicated here because its down deep in a private
313       // method hard to expose.
314       int versionid =
315         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
316       assertNotSame(versionid, -1);
317       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
318 
319       am.gate.set(false);
320       // Get current versionid else will fail on transition from OFFLINE to
321       // OPENING below
322       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
323       assertNotSame(-1, versionid);
324       // This uglyness below is what the openregionhandler on RS side does.
325       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
326           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
327           EventType.RS_ZK_REGION_OPENING, versionid);
328       assertNotSame(-1, versionid);
329       // Move znode from OPENING to OPENED as RS does on successful open.
330       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
331           SERVERNAME_B, versionid);
332       assertNotSame(-1, versionid);
333       // Block here until our znode is cleared or until this test timesout.
334       ZKAssign.blockUntilNoRIT(watcher);
335     } finally {
336       am.getExecutorService().shutdown();
337       am.shutdown();
338     }
339   }
340 
341   private void createRegionPlanAndBalance(
342       final AssignmentManager am, final ServerName from,
343       final ServerName to, final HRegionInfo hri) throws RegionException {
344     // Call the balance function but fake the region being online first at
345     // servername from.
346     am.regionOnline(hri, from);
347     // Balance region from 'from' to 'to'. It calls unassign setting CLOSING state
348     // up in zk.  Create a plan and balance
349     am.balance(new RegionPlan(hri, from, to));
350   }
351 
352   /**
353    * Tests AssignmentManager balance function.  Runs a balance moving a region
354    * from one server to another mocking regionserver responding over zk.
355    * @throws IOException
356    * @throws KeeperException
357    * @throws DeserializationException
358    */
359   @Test
360   public void testBalance()
361     throws IOException, KeeperException, DeserializationException, InterruptedException {
362     // Create and startup an executor.  This is used by AssignmentManager
363     // handling zk callbacks.
364     ExecutorService executor = startupMasterExecutor("testBalanceExecutor");
365 
366     // We need a mocked catalog tracker.
367     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
368     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
369         .getConfiguration());
370     // Create an AM.
371     AssignmentManager am = new AssignmentManager(this.server,
372       this.serverManager, ct, balancer, executor, null, master.getTableLockManager());
373     am.failoverCleanupDone.set(true);
374     try {
375       // Make sure our new AM gets callbacks; once registered, can't unregister.
376       // Thats ok because we make a new zk watcher for each test.
377       this.watcher.registerListenerFirst(am);
378       // Call the balance function but fake the region being online first at
379       // SERVERNAME_A.  Create a balance plan.
380       am.regionOnline(REGIONINFO, SERVERNAME_A);
381       // Balance region from A to B.
382       RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B);
383       am.balance(plan);
384 
385       RegionStates regionStates = am.getRegionStates();
386       // Must be failed to close since the server is fake
387       assertTrue(regionStates.isRegionInTransition(REGIONINFO)
388         && regionStates.isRegionInState(REGIONINFO, State.FAILED_CLOSE));
389       // Move it back to pending_close
390       regionStates.updateRegionState(REGIONINFO, State.PENDING_CLOSE);
391 
392       // Now fake the region closing successfully over on the regionserver; the
393       // regionserver will have set the region in CLOSED state.  This will
394       // trigger callback into AM. The below zk close call is from the RS close
395       // region handler duplicated here because its down deep in a private
396       // method hard to expose.
397       int versionid =
398         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
399       assertNotSame(versionid, -1);
400       // AM is going to notice above CLOSED and queue up a new assign.  The
401       // assign will go to open the region in the new location set by the
402       // balancer.  The zk node will be OFFLINE waiting for regionserver to
403       // transition it through OPENING, OPENED.  Wait till we see the OFFLINE
404       // zk node before we proceed.
405       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
406 
407       // Get current versionid else will fail on transition from OFFLINE to OPENING below
408       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
409       assertNotSame(-1, versionid);
410       // This uglyness below is what the openregionhandler on RS side does.
411       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
412         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
413         EventType.RS_ZK_REGION_OPENING, versionid);
414       assertNotSame(-1, versionid);
415       // Move znode from OPENING to OPENED as RS does on successful open.
416       versionid =
417         ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO, SERVERNAME_B, versionid);
418       assertNotSame(-1, versionid);
419       // Wait on the handler removing the OPENED znode.
420       while(regionStates.isRegionInTransition(REGIONINFO)) Threads.sleep(1);
421     } finally {
422       executor.shutdown();
423       am.shutdown();
424       // Clean up all znodes
425       ZKAssign.deleteAllNodes(this.watcher);
426     }
427   }
428 
429   /**
430    * Run a simple server shutdown handler.
431    * @throws KeeperException
432    * @throws IOException
433    */
434   @Test
435   public void testShutdownHandler()
436       throws KeeperException, IOException, ServiceException {
437     // Create and startup an executor.  This is used by AssignmentManager
438     // handling zk callbacks.
439     ExecutorService executor = startupMasterExecutor("testShutdownHandler");
440 
441     // We need a mocked catalog tracker.
442     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
443     // Create an AM.
444     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
445         this.server, this.serverManager);
446     try {
447       processServerShutdownHandler(ct, am, false);
448     } finally {
449       executor.shutdown();
450       am.shutdown();
451       // Clean up all znodes
452       ZKAssign.deleteAllNodes(this.watcher);
453     }
454   }
455 
456   /**
457    * To test closed region handler to remove rit and delete corresponding znode
458    * if region in pending close or closing while processing shutdown of a region
459    * server.(HBASE-5927).
460    *
461    * @throws KeeperException
462    * @throws IOException
463    * @throws ServiceException
464    */
465   @Test
466   public void testSSHWhenDisableTableInProgress() throws KeeperException, IOException,
467       ServiceException {
468     testCaseWithPartiallyDisabledState(Table.State.DISABLING);
469     testCaseWithPartiallyDisabledState(Table.State.DISABLED);
470   }
471 
472 
473   /**
474    * To test if the split region is removed from RIT if the region was in SPLITTING state but the RS
475    * has actually completed the splitting in META but went down. See HBASE-6070 and also HBASE-5806
476    *
477    * @throws KeeperException
478    * @throws IOException
479    */
480   @Test
481   public void testSSHWhenSplitRegionInProgress() throws KeeperException, IOException, Exception {
482     // true indicates the region is split but still in RIT
483     testCaseWithSplitRegionPartial(true);
484     // false indicate the region is not split
485     testCaseWithSplitRegionPartial(false);
486   }
487 
488   private void testCaseWithSplitRegionPartial(boolean regionSplitDone) throws KeeperException,
489       IOException, NodeExistsException, InterruptedException, ServiceException {
490     // Create and startup an executor. This is used by AssignmentManager
491     // handling zk callbacks.
492     ExecutorService executor = startupMasterExecutor("testSSHWhenSplitRegionInProgress");
493     // We need a mocked catalog tracker.
494     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
495     ZKAssign.deleteAllNodes(this.watcher);
496 
497     // Create an AM.
498     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
499       this.server, this.serverManager);
500     // adding region to regions and servers maps.
501     am.regionOnline(REGIONINFO, SERVERNAME_A);
502     // adding region in pending close.
503     am.getRegionStates().updateRegionState(
504       REGIONINFO, State.SPLITTING, SERVERNAME_A);
505     am.getZKTable().setEnabledTable(REGIONINFO.getTableName());
506     RegionTransition data = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
507         REGIONINFO.getRegionName(), SERVERNAME_A);
508     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
509     // create znode in M_ZK_REGION_CLOSING state.
510     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
511 
512     try {
513       processServerShutdownHandler(ct, am, regionSplitDone);
514       // check znode deleted or not.
515       // In both cases the znode should be deleted.
516 
517       if (regionSplitDone) {
518         assertFalse("Region state of region in SPLITTING should be removed from rit.",
519             am.getRegionStates().isRegionsInTransition());
520       } else {
521         while (!am.assignInvoked) {
522           Thread.sleep(1);
523         }
524         assertTrue("Assign should be invoked.", am.assignInvoked);
525       }
526     } finally {
527       REGIONINFO.setOffline(false);
528       REGIONINFO.setSplit(false);
529       executor.shutdown();
530       am.shutdown();
531       // Clean up all znodes
532       ZKAssign.deleteAllNodes(this.watcher);
533     }
534   }
535 
536   private void testCaseWithPartiallyDisabledState(Table.State state) throws KeeperException,
537       IOException, NodeExistsException, ServiceException {
538     // Create and startup an executor. This is used by AssignmentManager
539     // handling zk callbacks.
540     ExecutorService executor = startupMasterExecutor("testSSHWhenDisableTableInProgress");
541     // We need a mocked catalog tracker.
542     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
543     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
544     ZKAssign.deleteAllNodes(this.watcher);
545 
546     // Create an AM.
547     AssignmentManager am = new AssignmentManager(this.server,
548       this.serverManager, ct, balancer, executor, null, master.getTableLockManager());
549     // adding region to regions and servers maps.
550     am.regionOnline(REGIONINFO, SERVERNAME_A);
551     // adding region in pending close.
552     am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE);
553     if (state == Table.State.DISABLING) {
554       am.getZKTable().setDisablingTable(REGIONINFO.getTableName());
555     } else {
556       am.getZKTable().setDisabledTable(REGIONINFO.getTableName());
557     }
558     RegionTransition data = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
559         REGIONINFO.getRegionName(), SERVERNAME_A);
560     // RegionTransitionData data = new
561     // RegionTransitionData(EventType.M_ZK_REGION_CLOSING,
562     // REGIONINFO.getRegionName(), SERVERNAME_A);
563     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
564     // create znode in M_ZK_REGION_CLOSING state.
565     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
566 
567     try {
568       processServerShutdownHandler(ct, am, false);
569       // check znode deleted or not.
570       // In both cases the znode should be deleted.
571       assertTrue("The znode should be deleted.", ZKUtil.checkExists(this.watcher, node) == -1);
572       // check whether in rit or not. In the DISABLING case also the below
573       // assert will be true but the piece of code added for HBASE-5927 will not
574       // do that.
575       if (state == Table.State.DISABLED) {
576         assertFalse("Region state of region in pending close should be removed from rit.",
577             am.getRegionStates().isRegionsInTransition());
578       }
579     } finally {
580       am.setEnabledTable(REGIONINFO.getTableName());
581       executor.shutdown();
582       am.shutdown();
583       // Clean up all znodes
584       ZKAssign.deleteAllNodes(this.watcher);
585     }
586   }
587 
588   private void processServerShutdownHandler(CatalogTracker ct, AssignmentManager am, boolean splitRegion)
589       throws IOException, ServiceException {
590     // Make sure our new AM gets callbacks; once registered, can't unregister.
591     // Thats ok because we make a new zk watcher for each test.
592     this.watcher.registerListenerFirst(am);
593 
594     // Need to set up a fake scan of meta for the servershutdown handler
595     // Make an RS Interface implementation.  Make it so a scanner can go against it.
596     ClientProtos.ClientService.BlockingInterface implementation =
597       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
598     // Get a meta row result that has region up on SERVERNAME_A
599 
600     Result r;
601     if (splitRegion) {
602       r = MetaMockingUtil.getMetaTableRowResultAsSplitRegion(REGIONINFO, SERVERNAME_A);
603     } else {
604       r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
605     }
606 
607     final ScanResponse.Builder builder = ScanResponse.newBuilder();
608     builder.setMoreResults(true);
609     ResultCellMeta.Builder metaBuilder = ResultCellMeta.newBuilder();
610     metaBuilder.addCellsLength(r.size());
611     builder.setResultCellMeta(metaBuilder.build());
612     final List<CellScannable> cellScannables = new ArrayList<CellScannable>(1);
613     cellScannables.add(r);
614     Mockito.when(implementation.scan(
615       (RpcController)Mockito.any(), (ScanRequest)Mockito.any())).
616       thenAnswer(new Answer<ScanResponse>() {
617           public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
618             PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
619                 .getArguments()[0];
620             if (controller != null) {
621               controller.setCellScanner(CellUtil.createCellScanner(cellScannables));
622             }
623             return builder.build();
624           }
625       });
626 
627     // Get a connection w/ mocked up common methods.
628     HConnection connection =
629       HConnectionTestingUtility.getMockedConnectionAndDecorate(HTU.getConfiguration(),
630         null, implementation, SERVERNAME_B, REGIONINFO);
631 
632     // Make it so we can get a catalogtracker from servermanager.. .needed
633     // down in guts of server shutdown handler.
634     Mockito.when(ct.getConnection()).thenReturn(connection);
635     Mockito.when(this.server.getCatalogTracker()).thenReturn(ct);
636 
637     // Now make a server shutdown handler instance and invoke process.
638     // Have it that SERVERNAME_A died.
639     DeadServer deadServers = new DeadServer();
640     deadServers.add(SERVERNAME_A);
641     // I need a services instance that will return the AM
642     MasterServices services = Mockito.mock(MasterServices.class);
643     Mockito.when(services.getAssignmentManager()).thenReturn(am);
644     Mockito.when(services.getServerManager()).thenReturn(this.serverManager);
645     Mockito.when(services.getZooKeeper()).thenReturn(this.watcher);
646     ServerShutdownHandler handler = new ServerShutdownHandler(this.server,
647       services, deadServers, SERVERNAME_A, false);
648     am.failoverCleanupDone.set(true);
649     handler.process();
650     // The region in r will have been assigned.  It'll be up in zk as unassigned.
651   }
652 
653   /**
654    * Create and startup executor pools. Start same set as master does (just
655    * run a few less).
656    * @param name Name to give our executor
657    * @return Created executor (be sure to call shutdown when done).
658    */
659   private ExecutorService startupMasterExecutor(final String name) {
660     // TODO: Move up into HBaseTestingUtility?  Generally useful.
661     ExecutorService executor = new ExecutorService(name);
662     executor.startExecutorService(ExecutorType.MASTER_OPEN_REGION, 3);
663     executor.startExecutorService(ExecutorType.MASTER_CLOSE_REGION, 3);
664     executor.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS, 3);
665     executor.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS, 3);
666     return executor;
667   }
668 
669   @Test
670   public void testUnassignWithSplitAtSameTime() throws KeeperException, IOException {
671     // Region to use in test.
672     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
673     // First amend the servermanager mock so that when we do send close of the
674     // first meta region on SERVERNAME_A, it will return true rather than
675     // default null.
676     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, hri, -1)).thenReturn(true);
677     // Need a mocked catalog tracker.
678     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
679     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
680         .getConfiguration());
681     // Create an AM.
682     AssignmentManager am = new AssignmentManager(this.server,
683       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
684     try {
685       // First make sure my mock up basically works.  Unassign a region.
686       unassign(am, SERVERNAME_A, hri);
687       // This delete will fail if the previous unassign did wrong thing.
688       ZKAssign.deleteClosingNode(this.watcher, hri);
689       // Now put a SPLITTING region in the way.  I don't have to assert it
690       // go put in place.  This method puts it in place then asserts it still
691       // owns it by moving state from SPLITTING to SPLITTING.
692       int version = createNodeSplitting(this.watcher, hri, SERVERNAME_A);
693       // Now, retry the unassign with the SPLTTING in place.  It should just
694       // complete without fail; a sort of 'silent' recognition that the
695       // region to unassign has been split and no longer exists: TOOD: what if
696       // the split fails and the parent region comes back to life?
697       unassign(am, SERVERNAME_A, hri);
698       // This transition should fail if the znode has been messed with.
699       ZKAssign.transitionNode(this.watcher, hri, SERVERNAME_A,
700         EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
701       assertFalse(am.getRegionStates().isRegionInTransition(hri));
702     } finally {
703       am.shutdown();
704     }
705   }
706 
707   /**
708    * Tests the processDeadServersAndRegionsInTransition should not fail with NPE
709    * when it failed to get the children. Let's abort the system in this
710    * situation
711    * @throws ServiceException
712    */
713   @Test(timeout = 60000)
714   public void testProcessDeadServersAndRegionsInTransitionShouldNotFailWithNPE()
715       throws IOException, KeeperException, InterruptedException, ServiceException {
716     final RecoverableZooKeeper recoverableZk = Mockito
717         .mock(RecoverableZooKeeper.class);
718     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
719       this.server, this.serverManager);
720     Watcher zkw = new ZooKeeperWatcher(HBaseConfiguration.create(), "unittest",
721         null) {
722       public RecoverableZooKeeper getRecoverableZooKeeper() {
723         return recoverableZk;
724       }
725     };
726     ((ZooKeeperWatcher) zkw).registerListener(am);
727     Mockito.doThrow(new InterruptedException()).when(recoverableZk)
728         .getChildren("/hbase/region-in-transition", null);
729     am.setWatcher((ZooKeeperWatcher) zkw);
730     try {
731       am.processDeadServersAndRegionsInTransition(null);
732       fail("Expected to abort");
733     } catch (NullPointerException e) {
734       fail("Should not throw NPE");
735     } catch (RuntimeException e) {
736       assertEquals("Aborted", e.getLocalizedMessage());
737     }
738   }
739   /**
740    * TestCase verifies that the regionPlan is updated whenever a region fails to open
741    * and the master tries to process RS_ZK_FAILED_OPEN state.(HBASE-5546).
742    */
743   @Test(timeout = 60000)
744   public void testRegionPlanIsUpdatedWhenRegionFailsToOpen() throws IOException, KeeperException,
745       ServiceException, InterruptedException {
746     this.server.getConfiguration().setClass(
747       HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MockedLoadBalancer.class,
748       LoadBalancer.class);
749     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
750       this.server, this.serverManager);
751     try {
752       // Boolean variable used for waiting until randomAssignment is called and
753       // new
754       // plan is generated.
755       AtomicBoolean gate = new AtomicBoolean(false);
756       if (balancer instanceof MockedLoadBalancer) {
757         ((MockedLoadBalancer) balancer).setGateVariable(gate);
758       }
759       ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
760       int v = ZKAssign.getVersion(this.watcher, REGIONINFO);
761       ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A,
762           EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_FAILED_OPEN, v);
763       String path = ZKAssign.getNodeName(this.watcher, REGIONINFO
764           .getEncodedName());
765       am.getRegionStates().updateRegionState(
766         REGIONINFO, State.OPENING, SERVERNAME_A);
767       // a dummy plan inserted into the regionPlans. This plan is cleared and
768       // new one is formed
769       am.regionPlans.put(REGIONINFO.getEncodedName(), new RegionPlan(
770           REGIONINFO, null, SERVERNAME_A));
771       RegionPlan regionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
772       List<ServerName> serverList = new ArrayList<ServerName>(2);
773       serverList.add(SERVERNAME_B);
774       Mockito.when(
775           this.serverManager.createDestinationServersList(SERVERNAME_A))
776           .thenReturn(serverList);
777       am.nodeDataChanged(path);
778       // here we are waiting until the random assignment in the load balancer is
779       // called.
780       while (!gate.get()) {
781         Thread.sleep(10);
782       }
783       // new region plan may take some time to get updated after random
784       // assignment is called and
785       // gate is set to true.
786       RegionPlan newRegionPlan = am.regionPlans
787           .get(REGIONINFO.getEncodedName());
788       while (newRegionPlan == null) {
789         Thread.sleep(10);
790         newRegionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
791       }
792       // the new region plan created may contain the same RS as destination but
793       // it should
794       // be new plan.
795       assertNotSame("Same region plan should not come", regionPlan,
796           newRegionPlan);
797       assertTrue("Destination servers should be different.", !(regionPlan
798           .getDestination().equals(newRegionPlan.getDestination())));
799 
800       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
801     } finally {
802       this.server.getConfiguration().setClass(
803           HConstants.HBASE_MASTER_LOADBALANCER_CLASS, DefaultLoadBalancer.class,
804           LoadBalancer.class);
805       am.getExecutorService().shutdown();
806       am.shutdown();
807     }
808   }
809 
810   /**
811    * Mocked load balancer class used in the testcase to make sure that the testcase waits until
812    * random assignment is called and the gate variable is set to true.
813    */
814   public static class MockedLoadBalancer extends DefaultLoadBalancer {
815     private AtomicBoolean gate;
816 
817     public void setGateVariable(AtomicBoolean gate) {
818       this.gate = gate;
819     }
820 
821     @Override
822     public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
823       ServerName randomServerName = super.randomAssignment(regionInfo, servers);
824       this.gate.set(true);
825       return randomServerName;
826     }
827 
828     @Override
829     public Map<ServerName, List<HRegionInfo>> retainAssignment(
830         Map<HRegionInfo, ServerName> regions, List<ServerName> servers) {
831       this.gate.set(true);
832       return super.retainAssignment(regions, servers);
833     }
834   }
835 
836   /**
837    * Test the scenario when the master is in failover and trying to process a
838    * region which is in Opening state on a dead RS. Master should immediately
839    * assign the region and not wait for Timeout Monitor.(Hbase-5882).
840    */
841   @Test(timeout = 60000)
842   public void testRegionInOpeningStateOnDeadRSWhileMasterFailover() throws IOException,
843       KeeperException, ServiceException, InterruptedException {
844     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
845       this.server, this.serverManager);
846     ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
847     int version = ZKAssign.getVersion(this.watcher, REGIONINFO);
848     ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A, EventType.M_ZK_REGION_OFFLINE,
849         EventType.RS_ZK_REGION_OPENING, version);
850     RegionTransition rt = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_OPENING,
851         REGIONINFO.getRegionName(), SERVERNAME_A, HConstants.EMPTY_BYTE_ARRAY);
852     version = ZKAssign.getVersion(this.watcher, REGIONINFO);
853     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(false);
854     am.getRegionStates().createRegionState(REGIONINFO);
855     am.gate.set(false);
856     am.processRegionsInTransition(rt, REGIONINFO, version);
857     // Waiting for the assignment to get completed.
858     while (!am.gate.get()) {
859       Thread.sleep(10);
860     }
861     assertTrue("The region should be assigned immediately.", null != am.regionPlans.get(REGIONINFO
862         .getEncodedName()));
863   }
864 
865   /**
866    * Test verifies whether assignment is skipped for regions of tables in DISABLING state during
867    * clean cluster startup. See HBASE-6281.
868    *
869    * @throws KeeperException
870    * @throws IOException
871    * @throws Exception
872    */
873   @Test(timeout = 60000)
874   public void testDisablingTableRegionsAssignmentDuringCleanClusterStartup()
875       throws KeeperException, IOException, Exception {
876     this.server.getConfiguration().setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
877         MockedLoadBalancer.class, LoadBalancer.class);
878     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(
879         new HashMap<ServerName, ServerLoad>(0));
880     List<ServerName> destServers = new ArrayList<ServerName>(1);
881     destServers.add(SERVERNAME_A);
882     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
883     // To avoid cast exception in DisableTableHandler process.
884     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
885     Server server = new HMaster(HTU.getConfiguration());
886     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
887         this.serverManager);
888     AtomicBoolean gate = new AtomicBoolean(false);
889     if (balancer instanceof MockedLoadBalancer) {
890       ((MockedLoadBalancer) balancer).setGateVariable(gate);
891     }
892     try{
893       // set table in disabling state.
894       am.getZKTable().setDisablingTable(REGIONINFO.getTableName());
895       am.joinCluster();
896       // should not call retainAssignment if we get empty regions in assignAllUserRegions.
897       assertFalse(
898           "Assign should not be invoked for disabling table regions during clean cluster startup.",
899           gate.get());
900       // need to change table state from disabling to disabled.
901       assertTrue("Table should be disabled.",
902           am.getZKTable().isDisabledTable(REGIONINFO.getTableName()));
903     } finally {
904       this.server.getConfiguration().setClass(
905         HConstants.HBASE_MASTER_LOADBALANCER_CLASS, DefaultLoadBalancer.class,
906         LoadBalancer.class);
907       am.getZKTable().setEnabledTable(REGIONINFO.getTableName());
908       am.shutdown();
909     }
910   }
911 
912   /**
913    * Test verifies whether all the enabling table regions assigned only once during master startup.
914    *
915    * @throws KeeperException
916    * @throws IOException
917    * @throws Exception
918    */
919   @Test
920   public void testMasterRestartWhenTableInEnabling() throws KeeperException, IOException, Exception {
921     enabling = true;
922     List<ServerName> destServers = new ArrayList<ServerName>(1);
923     destServers.add(SERVERNAME_A);
924     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
925     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
926     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
927     Server server = new HMaster(HTU.getConfiguration());
928     Whitebox.setInternalState(server, "serverManager", this.serverManager);
929     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
930         this.serverManager);
931     try {
932       // set table in enabling state.
933       am.getZKTable().setEnablingTable(REGIONINFO.getTableName());
934       new EnableTableHandler(server, REGIONINFO.getTableName(),
935           am.getCatalogTracker(), am, new NullTableLockManager(), true).prepare()
936           .process();
937       assertEquals("Number of assignments should be 1.", 1, assignmentCount);
938       assertTrue("Table should be enabled.",
939           am.getZKTable().isEnabledTable(REGIONINFO.getTableName()));
940     } finally {
941       enabling = false;
942       assignmentCount = 0;
943       am.getZKTable().setEnabledTable(REGIONINFO.getTableName());
944       am.shutdown();
945       ZKAssign.deleteAllNodes(this.watcher);
946     }
947   }
948 
949   /**
950    * When a region is in transition, if the region server opening the region goes down,
951    * the region assignment takes a long time normally (waiting for timeout monitor to trigger assign).
952    * This test is to make sure SSH reassigns it right away.
953    */
954   @Test
955   public void testSSHTimesOutOpeningRegionTransition()
956       throws KeeperException, IOException, ServiceException {
957     // We need a mocked catalog tracker.
958     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
959     // Create an AM.
960     AssignmentManagerWithExtrasForTesting am =
961       setUpMockedAssignmentManager(this.server, this.serverManager);
962     // adding region in pending open.
963     RegionState state = new RegionState(REGIONINFO,
964       State.OPENING, System.currentTimeMillis(), SERVERNAME_A);
965     am.getRegionStates().regionOnline(REGIONINFO, SERVERNAME_B);
966     am.getRegionStates().regionsInTransition.put(REGIONINFO.getEncodedName(), state);
967     // adding region plan
968     am.regionPlans.put(REGIONINFO.getEncodedName(),
969       new RegionPlan(REGIONINFO, SERVERNAME_B, SERVERNAME_A));
970     am.getZKTable().setEnabledTable(REGIONINFO.getTableName());
971 
972     try {
973       am.assignInvoked = false;
974       processServerShutdownHandler(ct, am, false);
975       assertTrue(am.assignInvoked);
976     } finally {
977       am.getRegionStates().regionsInTransition.remove(REGIONINFO.getEncodedName());
978       am.regionPlans.remove(REGIONINFO.getEncodedName());
979     }
980   }
981 
982   /**
983    * Scenario:<ul>
984    *  <li> master starts a close, and creates a znode</li>
985    *  <li> it fails just at this moment, before contacting the RS</li>
986    *  <li> while the second master is coming up, the targeted RS dies. But it's before ZK timeout so
987    *    we don't know, and we have an exception.</li>
988    *  <li> the master must handle this nicely and reassign.
989    *  </ul>
990    */
991   @Test
992   public void testClosingFailureDuringRecovery() throws Exception {
993 
994     AssignmentManagerWithExtrasForTesting am =
995         setUpMockedAssignmentManager(this.server, this.serverManager);
996     ZKAssign.createNodeClosing(this.watcher, REGIONINFO, SERVERNAME_A);
997     am.getRegionStates().createRegionState(REGIONINFO);
998 
999     assertFalse( am.getRegionStates().isRegionsInTransition() );
1000 
1001     am.processRegionInTransition(REGIONINFO.getEncodedName(), REGIONINFO);
1002 
1003     assertTrue( am.getRegionStates().isRegionsInTransition() );
1004   }
1005 
1006   /**
1007    * Creates a new ephemeral node in the SPLITTING state for the specified region.
1008    * Create it ephemeral in case regionserver dies mid-split.
1009    *
1010    * <p>Does not transition nodes from other states.  If a node already exists
1011    * for this region, a {@link NodeExistsException} will be thrown.
1012    *
1013    * @param zkw zk reference
1014    * @param region region to be created as offline
1015    * @param serverName server event originates from
1016    * @return Version of znode created.
1017    * @throws KeeperException
1018    * @throws IOException
1019    */
1020   // Copied from SplitTransaction rather than open the method over there in
1021   // the regionserver package.
1022   private static int createNodeSplitting(final ZooKeeperWatcher zkw,
1023       final HRegionInfo region, final ServerName serverName)
1024   throws KeeperException, IOException {
1025     RegionTransition rt =
1026       RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
1027         region.getRegionName(), serverName);
1028 
1029     String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
1030     if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
1031       throw new IOException("Failed create of ephemeral " + node);
1032     }
1033     // Transition node from SPLITTING to SPLITTING and pick up version so we
1034     // can be sure this znode is ours; version is needed deleting.
1035     return transitionNodeSplitting(zkw, region, serverName, -1);
1036   }
1037 
1038   // Copied from SplitTransaction rather than open the method over there in
1039   // the regionserver package.
1040   private static int transitionNodeSplitting(final ZooKeeperWatcher zkw,
1041       final HRegionInfo parent,
1042       final ServerName serverName, final int version)
1043   throws KeeperException, IOException {
1044     return ZKAssign.transitionNode(zkw, parent, serverName,
1045       EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
1046   }
1047 
1048   private void unassign(final AssignmentManager am, final ServerName sn,
1049       final HRegionInfo hri) throws RegionException {
1050     // Before I can unassign a region, I need to set it online.
1051     am.regionOnline(hri, sn);
1052     // Unassign region.
1053     am.unassign(hri);
1054   }
1055 
1056   /**
1057    * Create an {@link AssignmentManagerWithExtrasForTesting} that has mocked
1058    * {@link CatalogTracker} etc.
1059    * @param server
1060    * @param manager
1061    * @return An AssignmentManagerWithExtras with mock connections, etc.
1062    * @throws IOException
1063    * @throws KeeperException
1064    */
1065   private AssignmentManagerWithExtrasForTesting setUpMockedAssignmentManager(final Server server,
1066       final ServerManager manager) throws IOException, KeeperException, ServiceException {
1067     // We need a mocked catalog tracker. Its used by our AM instance.
1068     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1069     // Make an RS Interface implementation. Make it so a scanner can go against
1070     // it and a get to return the single region, REGIONINFO, this test is
1071     // messing with. Needed when "new master" joins cluster. AM will try and
1072     // rebuild its list of user regions and it will also get the HRI that goes
1073     // with an encoded name by doing a Get on .META.
1074     ClientProtos.ClientService.BlockingInterface ri =
1075       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
1076     // Get a meta row result that has region up on SERVERNAME_A for REGIONINFO
1077     Result r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
1078     final ScanResponse.Builder builder = ScanResponse.newBuilder();
1079     builder.setMoreResults(true);
1080     ResultCellMeta.Builder metaBuilder = ResultCellMeta.newBuilder();
1081     metaBuilder.addCellsLength(r.size());
1082     builder.setResultCellMeta(metaBuilder.build());
1083     final List<CellScannable> rows = new ArrayList<CellScannable>(1);
1084     rows.add(r);
1085     Answer<ScanResponse> ans = new Answer<ClientProtos.ScanResponse>() {
1086       public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
1087         PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
1088             .getArguments()[0];
1089         if (controller != null) {
1090           controller.setCellScanner(CellUtil.createCellScanner(rows));
1091         }
1092         return builder.build();
1093       }
1094     };
1095     if (enabling) {
1096       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any()))
1097           .thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans)
1098           .thenReturn(ScanResponse.newBuilder().setMoreResults(false).build());
1099     } else {
1100       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any())).thenAnswer(
1101           ans);
1102     }
1103     // If a get, return the above result too for REGIONINFO
1104     GetResponse.Builder getBuilder = GetResponse.newBuilder();
1105     getBuilder.setResult(ProtobufUtil.toResult(r));
1106     Mockito.when(ri.get((RpcController)Mockito.any(), (GetRequest) Mockito.any())).
1107       thenReturn(getBuilder.build());
1108     // Get a connection w/ mocked up common methods.
1109     HConnection connection = HConnectionTestingUtility.
1110       getMockedConnectionAndDecorate(HTU.getConfiguration(), null,
1111         ri, SERVERNAME_B, REGIONINFO);
1112     // Make it so we can get the connection from our mocked catalogtracker
1113     Mockito.when(ct.getConnection()).thenReturn(connection);
1114     // Create and startup an executor. Used by AM handling zk callbacks.
1115     ExecutorService executor = startupMasterExecutor("mockedAMExecutor");
1116     this.balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
1117     AssignmentManagerWithExtrasForTesting am = new AssignmentManagerWithExtrasForTesting(
1118       server, manager, ct, this.balancer, executor, new NullTableLockManager());
1119     return am;
1120   }
1121 
1122   /**
1123    * An {@link AssignmentManager} with some extra facility used testing
1124    */
1125   class AssignmentManagerWithExtrasForTesting extends AssignmentManager {
1126     // Keep a reference so can give it out below in {@link #getExecutorService}
1127     private final ExecutorService es;
1128     // Ditto for ct
1129     private final CatalogTracker ct;
1130     boolean processRITInvoked = false;
1131     boolean assignInvoked = false;
1132     AtomicBoolean gate = new AtomicBoolean(true);
1133 
1134     public AssignmentManagerWithExtrasForTesting(
1135         final Server master, final ServerManager serverManager,
1136         final CatalogTracker catalogTracker, final LoadBalancer balancer,
1137         final ExecutorService service, final TableLockManager tableLockManager)
1138             throws KeeperException, IOException {
1139       super(master, serverManager, catalogTracker, balancer, service, null, tableLockManager);
1140       this.es = service;
1141       this.ct = catalogTracker;
1142     }
1143 
1144     @Override
1145     boolean processRegionInTransition(String encodedRegionName,
1146         HRegionInfo regionInfo) throws KeeperException, IOException {
1147       this.processRITInvoked = true;
1148       return super.processRegionInTransition(encodedRegionName, regionInfo);
1149     }
1150 
1151     @Override
1152     public void assign(HRegionInfo region, boolean setOfflineInZK, boolean forceNewPlan) {
1153       if (enabling) {
1154         assignmentCount++;
1155         this.regionOnline(region, SERVERNAME_A);
1156       } else {
1157         super.assign(region, setOfflineInZK, forceNewPlan);
1158         this.gate.set(true);
1159       }
1160     }
1161 
1162     @Override
1163     public void assign(List<HRegionInfo> regions)
1164         throws IOException, InterruptedException {
1165       assignInvoked = (regions != null && regions.size() > 0);
1166     }
1167 
1168     /** reset the watcher */
1169     void setWatcher(ZooKeeperWatcher watcher) {
1170       this.watcher = watcher;
1171     }
1172 
1173     /**
1174      * @return ExecutorService used by this instance.
1175      */
1176     ExecutorService getExecutorService() {
1177       return this.es;
1178     }
1179 
1180     /**
1181      * @return CatalogTracker used by this AM (Its a mock).
1182      */
1183     CatalogTracker getCatalogTracker() {
1184       return this.ct;
1185     }
1186   }
1187 
1188   /**
1189    * Call joinCluster on the passed AssignmentManager.  Do it in a thread
1190    * so it runs independent of what all else is going on.  Try to simulate
1191    * an AM running insided a failed over master by clearing all in-memory
1192    * AM state first.
1193   */
1194   private void startFakeFailedOverMasterAssignmentManager(final AssignmentManager am,
1195       final ZooKeeperWatcher watcher) {
1196     // Make sure our new AM gets callbacks; once registered, we can't unregister.
1197     // Thats ok because we make a new zk watcher for each test.
1198     watcher.registerListenerFirst(am);
1199     Thread t = new Thread("RunAmJoinCluster") {
1200       public void run() {
1201         // Call the joinCluster function as though we were doing a master
1202         // failover at this point. It will stall just before we go to add
1203         // the RIT region to our RIT Map in AM at processRegionsInTransition.
1204         // First clear any inmemory state from AM so it acts like a new master
1205         // coming on line.
1206         am.getRegionStates().regionsInTransition.clear();
1207         am.regionPlans.clear();
1208         try {
1209           am.joinCluster();
1210         } catch (IOException e) {
1211           throw new RuntimeException(e);
1212         } catch (KeeperException e) {
1213           throw new RuntimeException(e);
1214         } catch (InterruptedException e) {
1215           throw new RuntimeException(e);
1216         }
1217       }
1218     };
1219     t.start();
1220     while (!t.isAlive()) Threads.sleep(1);
1221   }
1222 
1223   @Test
1224   public void testForceAssignMergingRegion() throws Exception {
1225     // Region to use in test.
1226     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1227     // Need a mocked catalog tracker.
1228     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1229     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1230       server.getConfiguration());
1231     // Create an AM.
1232     AssignmentManager am = new AssignmentManager(this.server,
1233       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
1234     RegionStates regionStates = am.getRegionStates();
1235     try {
1236       // First set the state of the region to merging
1237       regionStates.updateRegionState(hri, RegionState.State.MERGING);
1238       // Now, try to assign it with force new plan
1239       am.assign(hri, true, true);
1240       assertEquals("The region should be still in merging state",
1241         RegionState.State.MERGING, regionStates.getRegionState(hri).getState());
1242     } finally {
1243       am.shutdown();
1244     }
1245   }
1246 
1247   /**
1248    * Test assignment related ZK events are ignored by AM if the region is not known
1249    * by AM to be in transition. During normal operation, all assignments are started
1250    * by AM (not considering split/merge), if an event is received but the region
1251    * is not in transition, the event must be a very late one. So it can be ignored.
1252    * During master failover, since AM watches assignment znodes after failover cleanup
1253    * is completed, when an event comes in, AM should already have the region in transition
1254    * if ZK is used during the assignment action (only hbck doesn't use ZK for region
1255    * assignment). So during master failover, we can ignored such events too.
1256    */
1257   @Test
1258   public void testAssignmentEventIgnoredIfNotExpected() throws KeeperException, IOException {
1259     // Region to use in test.
1260     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1261     // Need a mocked catalog tracker.
1262     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1263     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1264       server.getConfiguration());
1265     final AtomicBoolean zkEventProcessed = new AtomicBoolean(false);
1266     // Create an AM.
1267     AssignmentManager am = new AssignmentManager(this.server,
1268       this.serverManager, ct, balancer, null, null, master.getTableLockManager()) {
1269 
1270       @Override
1271       void handleRegion(final RegionTransition rt, int expectedVersion) {
1272         super.handleRegion(rt, expectedVersion);
1273         if (rt != null && Bytes.equals(hri.getRegionName(),
1274           rt.getRegionName()) && rt.getEventType() == EventType.RS_ZK_REGION_OPENING) {
1275           zkEventProcessed.set(true);
1276         }
1277       }
1278     };
1279     try {
1280       // First make sure the region is not in transition
1281       am.getRegionStates().regionOffline(hri);
1282       zkEventProcessed.set(false); // Reset it before faking zk transition
1283       this.watcher.registerListenerFirst(am);
1284       assertFalse("The region should not be in transition",
1285         am.getRegionStates().isRegionInTransition(hri));
1286       ZKAssign.createNodeOffline(this.watcher, hri, SERVERNAME_A);
1287       // Trigger a transition event
1288       ZKAssign.transitionNodeOpening(this.watcher, hri, SERVERNAME_A);
1289       long startTime = EnvironmentEdgeManager.currentTimeMillis();
1290       while (!zkEventProcessed.get()) {
1291         assertTrue("Timed out in waiting for ZK event to be processed",
1292           EnvironmentEdgeManager.currentTimeMillis() - startTime < 30000);
1293         Threads.sleepWithoutInterrupt(100);
1294       }
1295       assertFalse(am.getRegionStates().isRegionInTransition(hri));
1296     } finally {
1297       am.shutdown();
1298     }
1299   }
1300 }