View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotSame;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.util.ArrayList;
28  import java.util.HashMap;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.concurrent.atomic.AtomicBoolean;
32  
33  import org.apache.hadoop.hbase.CellScannable;
34  import org.apache.hadoop.hbase.CellUtil;
35  import org.apache.hadoop.hbase.HBaseConfiguration;
36  import org.apache.hadoop.hbase.HBaseTestingUtility;
37  import org.apache.hadoop.hbase.HConstants;
38  import org.apache.hadoop.hbase.HRegionInfo;
39  import org.apache.hadoop.hbase.MediumTests;
40  import org.apache.hadoop.hbase.RegionException;
41  import org.apache.hadoop.hbase.RegionTransition;
42  import org.apache.hadoop.hbase.Server;
43  import org.apache.hadoop.hbase.ServerLoad;
44  import org.apache.hadoop.hbase.ServerName;
45  import org.apache.hadoop.hbase.TableName;
46  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
47  import org.apache.hadoop.hbase.catalog.CatalogTracker;
48  import org.apache.hadoop.hbase.catalog.MetaMockingUtil;
49  import org.apache.hadoop.hbase.client.HConnection;
50  import org.apache.hadoop.hbase.client.HConnectionTestingUtility;
51  import org.apache.hadoop.hbase.client.Result;
52  import org.apache.hadoop.hbase.exceptions.DeserializationException;
53  import org.apache.hadoop.hbase.executor.EventType;
54  import org.apache.hadoop.hbase.executor.ExecutorService;
55  import org.apache.hadoop.hbase.executor.ExecutorType;
56  import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
57  import org.apache.hadoop.hbase.master.RegionState.State;
58  import org.apache.hadoop.hbase.master.TableLockManager.NullTableLockManager;
59  import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer;
60  import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
61  import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
62  import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
63  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
64  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
65  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetRequest;
66  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetResponse;
67  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
68  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanResponse;
69  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table;
70  import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
71  import org.apache.hadoop.hbase.util.Bytes;
72  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
73  import org.apache.hadoop.hbase.util.Threads;
74  import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
75  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
76  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
77  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
78  import org.apache.zookeeper.KeeperException;
79  import org.apache.zookeeper.KeeperException.NodeExistsException;
80  import org.apache.zookeeper.Watcher;
81  import org.junit.After;
82  import org.junit.AfterClass;
83  import org.junit.Before;
84  import org.junit.BeforeClass;
85  import org.junit.Test;
86  import org.junit.experimental.categories.Category;
87  import org.mockito.Mockito;
88  import org.mockito.internal.util.reflection.Whitebox;
89  import org.mockito.invocation.InvocationOnMock;
90  import org.mockito.stubbing.Answer;
91  
92  import com.google.protobuf.RpcController;
93  import com.google.protobuf.ServiceException;
94  
95  
96  /**
97   * Test {@link AssignmentManager}
98   */
99  @Category(MediumTests.class)
100 public class TestAssignmentManager {
101   private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
102   private static final ServerName SERVERNAME_A =
103       ServerName.valueOf("example.org", 1234, 5678);
104   private static final ServerName SERVERNAME_B =
105       ServerName.valueOf("example.org", 0, 5678);
106   private static final HRegionInfo REGIONINFO =
107     new HRegionInfo(TableName.valueOf("t"),
108       HConstants.EMPTY_START_ROW, HConstants.EMPTY_START_ROW);
109   private static int assignmentCount;
110   private static boolean enabling = false;
111 
112   // Mocked objects or; get redone for each test.
113   private Server server;
114   private ServerManager serverManager;
115   private ZooKeeperWatcher watcher;
116   private LoadBalancer balancer;
117   private HMaster master;
118 
119   @BeforeClass
120   public static void beforeClass() throws Exception {
121     HTU.startMiniZKCluster();
122   }
123 
124   @AfterClass
125   public static void afterClass() throws IOException {
126     HTU.shutdownMiniZKCluster();
127   }
128 
129   @Before
130   public void before() throws ZooKeeperConnectionException, IOException {
131     // TODO: Make generic versions of what we do below and put up in a mocking
132     // utility class or move up into HBaseTestingUtility.
133 
134     // Mock a Server.  Have it return a legit Configuration and ZooKeeperWatcher.
135     // If abort is called, be sure to fail the test (don't just swallow it
136     // silently as is mockito default).
137     this.server = Mockito.mock(Server.class);
138     Mockito.when(server.getServerName()).thenReturn(ServerName.valueOf("master,1,1"));
139     Mockito.when(server.getConfiguration()).thenReturn(HTU.getConfiguration());
140     this.watcher =
141       new ZooKeeperWatcher(HTU.getConfiguration(), "mockedServer", this.server, true);
142     Mockito.when(server.getZooKeeper()).thenReturn(this.watcher);
143     Mockito.doThrow(new RuntimeException("Aborted")).
144       when(server).abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
145 
146     // Mock a ServerManager.  Say server SERVERNAME_{A,B} are online.  Also
147     // make it so if close or open, we return 'success'.
148     this.serverManager = Mockito.mock(ServerManager.class);
149     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
150     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_B)).thenReturn(true);
151     Mockito.when(this.serverManager.getDeadServers()).thenReturn(new DeadServer());
152     final Map<ServerName, ServerLoad> onlineServers = new HashMap<ServerName, ServerLoad>();
153     onlineServers.put(SERVERNAME_B, ServerLoad.EMPTY_SERVERLOAD);
154     onlineServers.put(SERVERNAME_A, ServerLoad.EMPTY_SERVERLOAD);
155     Mockito.when(this.serverManager.getOnlineServersList()).thenReturn(
156         new ArrayList<ServerName>(onlineServers.keySet()));
157     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(onlineServers);
158 
159     List<ServerName> avServers = new ArrayList<ServerName>();
160     avServers.addAll(onlineServers.keySet());
161     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(avServers);
162     Mockito.when(this.serverManager.createDestinationServersList(null)).thenReturn(avServers);
163 
164     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, REGIONINFO, -1)).
165       thenReturn(true);
166     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_B, REGIONINFO, -1)).
167       thenReturn(true);
168     // Ditto on open.
169     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_A, REGIONINFO, -1, null)).
170       thenReturn(RegionOpeningState.OPENED);
171     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_B, REGIONINFO, -1, null)).
172       thenReturn(RegionOpeningState.OPENED);
173     this.master = Mockito.mock(HMaster.class);
174 
175     Mockito.when(this.master.getServerManager()).thenReturn(serverManager);
176   }
177 
178   @After
179     public void after() throws KeeperException {
180     if (this.watcher != null) {
181       // Clean up all znodes
182       ZKAssign.deleteAllNodes(this.watcher);
183       this.watcher.close();
184     }
185   }
186 
187   /**
188    * Test a balance going on at same time as a master failover
189    *
190    * @throws IOException
191    * @throws KeeperException
192    * @throws InterruptedException
193    * @throws DeserializationException
194    */
195   @Test(timeout = 60000)
196   public void testBalanceOnMasterFailoverScenarioWithOpenedNode()
197   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
198     AssignmentManagerWithExtrasForTesting am =
199       setUpMockedAssignmentManager(this.server, this.serverManager);
200     try {
201       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
202       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
203       while (!am.processRITInvoked) Thread.sleep(1);
204       // As part of the failover cleanup, the balancing region plan is removed.
205       // So a random server will be used to open the region. For testing purpose,
206       // let's assume it is going to open on server b:
207       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
208 
209       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
210 
211       // Now fake the region closing successfully over on the regionserver; the
212       // regionserver will have set the region in CLOSED state. This will
213       // trigger callback into AM. The below zk close call is from the RS close
214       // region handler duplicated here because its down deep in a private
215       // method hard to expose.
216       int versionid =
217         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
218       assertNotSame(versionid, -1);
219       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
220 
221       // Get current versionid else will fail on transition from OFFLINE to
222       // OPENING below
223       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
224       assertNotSame(-1, versionid);
225       // This uglyness below is what the openregionhandler on RS side does.
226       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
227         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
228         EventType.RS_ZK_REGION_OPENING, versionid);
229       assertNotSame(-1, versionid);
230       // Move znode from OPENING to OPENED as RS does on successful open.
231       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
232         SERVERNAME_B, versionid);
233       assertNotSame(-1, versionid);
234       am.gate.set(false);
235       // Block here until our znode is cleared or until this test times out.
236       ZKAssign.blockUntilNoRIT(watcher);
237     } finally {
238       am.getExecutorService().shutdown();
239       am.shutdown();
240     }
241   }
242 
243   @Test(timeout = 60000)
244   public void testBalanceOnMasterFailoverScenarioWithClosedNode()
245   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
246     AssignmentManagerWithExtrasForTesting am =
247       setUpMockedAssignmentManager(this.server, this.serverManager);
248     try {
249       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
250       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
251       while (!am.processRITInvoked) Thread.sleep(1);
252       // As part of the failover cleanup, the balancing region plan is removed.
253       // So a random server will be used to open the region. For testing purpose,
254       // let's assume it is going to open on server b:
255       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
256 
257       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
258 
259       // Now fake the region closing successfully over on the regionserver; the
260       // regionserver will have set the region in CLOSED state. This will
261       // trigger callback into AM. The below zk close call is from the RS close
262       // region handler duplicated here because its down deep in a private
263       // method hard to expose.
264       int versionid =
265         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
266       assertNotSame(versionid, -1);
267       am.gate.set(false);
268       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
269 
270       // Get current versionid else will fail on transition from OFFLINE to
271       // OPENING below
272       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
273       assertNotSame(-1, versionid);
274       // This uglyness below is what the openregionhandler on RS side does.
275       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
276           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
277           EventType.RS_ZK_REGION_OPENING, versionid);
278       assertNotSame(-1, versionid);
279       // Move znode from OPENING to OPENED as RS does on successful open.
280       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
281           SERVERNAME_B, versionid);
282       assertNotSame(-1, versionid);
283 
284       // Block here until our znode is cleared or until this test timesout.
285       ZKAssign.blockUntilNoRIT(watcher);
286     } finally {
287       am.getExecutorService().shutdown();
288       am.shutdown();
289     }
290   }
291 
292   @Test(timeout = 60000)
293   public void testBalanceOnMasterFailoverScenarioWithOfflineNode()
294   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
295     AssignmentManagerWithExtrasForTesting am =
296       setUpMockedAssignmentManager(this.server, this.serverManager);
297     try {
298       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
299       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
300       while (!am.processRITInvoked) Thread.sleep(1);
301       // As part of the failover cleanup, the balancing region plan is removed.
302       // So a random server will be used to open the region. For testing purpose,
303       // let's assume it is going to open on server b:
304       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
305 
306       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
307 
308       // Now fake the region closing successfully over on the regionserver; the
309       // regionserver will have set the region in CLOSED state. This will
310       // trigger callback into AM. The below zk close call is from the RS close
311       // region handler duplicated here because its down deep in a private
312       // method hard to expose.
313       int versionid =
314         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
315       assertNotSame(versionid, -1);
316       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
317 
318       am.gate.set(false);
319       // Get current versionid else will fail on transition from OFFLINE to
320       // OPENING below
321       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
322       assertNotSame(-1, versionid);
323       // This uglyness below is what the openregionhandler on RS side does.
324       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
325           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
326           EventType.RS_ZK_REGION_OPENING, versionid);
327       assertNotSame(-1, versionid);
328       // Move znode from OPENING to OPENED as RS does on successful open.
329       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
330           SERVERNAME_B, versionid);
331       assertNotSame(-1, versionid);
332       // Block here until our znode is cleared or until this test timesout.
333       ZKAssign.blockUntilNoRIT(watcher);
334     } finally {
335       am.getExecutorService().shutdown();
336       am.shutdown();
337     }
338   }
339 
340   private void createRegionPlanAndBalance(
341       final AssignmentManager am, final ServerName from,
342       final ServerName to, final HRegionInfo hri) throws RegionException {
343     // Call the balance function but fake the region being online first at
344     // servername from.
345     am.regionOnline(hri, from);
346     // Balance region from 'from' to 'to'. It calls unassign setting CLOSING state
347     // up in zk.  Create a plan and balance
348     am.balance(new RegionPlan(hri, from, to));
349   }
350 
351   /**
352    * Tests AssignmentManager balance function.  Runs a balance moving a region
353    * from one server to another mocking regionserver responding over zk.
354    * @throws IOException
355    * @throws KeeperException
356    * @throws DeserializationException
357    */
358   @Test
359   public void testBalance()
360     throws IOException, KeeperException, DeserializationException, InterruptedException {
361     // Create and startup an executor.  This is used by AssignmentManager
362     // handling zk callbacks.
363     ExecutorService executor = startupMasterExecutor("testBalanceExecutor");
364 
365     // We need a mocked catalog tracker.
366     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
367     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
368         .getConfiguration());
369     // Create an AM.
370     AssignmentManager am = new AssignmentManager(this.server,
371       this.serverManager, ct, balancer, executor, null, master.getTableLockManager());
372     am.failoverCleanupDone.set(true);
373     try {
374       // Make sure our new AM gets callbacks; once registered, can't unregister.
375       // Thats ok because we make a new zk watcher for each test.
376       this.watcher.registerListenerFirst(am);
377       // Call the balance function but fake the region being online first at
378       // SERVERNAME_A.  Create a balance plan.
379       am.regionOnline(REGIONINFO, SERVERNAME_A);
380       // Balance region from A to B.
381       RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B);
382       am.balance(plan);
383 
384       RegionStates regionStates = am.getRegionStates();
385       // Must be failed to close since the server is fake
386       assertTrue(regionStates.isRegionInTransition(REGIONINFO)
387         && regionStates.isRegionInState(REGIONINFO, State.FAILED_CLOSE));
388       // Move it back to pending_close
389       regionStates.updateRegionState(REGIONINFO, State.PENDING_CLOSE);
390 
391       // Now fake the region closing successfully over on the regionserver; the
392       // regionserver will have set the region in CLOSED state.  This will
393       // trigger callback into AM. The below zk close call is from the RS close
394       // region handler duplicated here because its down deep in a private
395       // method hard to expose.
396       int versionid =
397         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
398       assertNotSame(versionid, -1);
399       // AM is going to notice above CLOSED and queue up a new assign.  The
400       // assign will go to open the region in the new location set by the
401       // balancer.  The zk node will be OFFLINE waiting for regionserver to
402       // transition it through OPENING, OPENED.  Wait till we see the OFFLINE
403       // zk node before we proceed.
404       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
405 
406       // Get current versionid else will fail on transition from OFFLINE to OPENING below
407       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
408       assertNotSame(-1, versionid);
409       // This uglyness below is what the openregionhandler on RS side does.
410       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
411         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
412         EventType.RS_ZK_REGION_OPENING, versionid);
413       assertNotSame(-1, versionid);
414       // Move znode from OPENING to OPENED as RS does on successful open.
415       versionid =
416         ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO, SERVERNAME_B, versionid);
417       assertNotSame(-1, versionid);
418       // Wait on the handler removing the OPENED znode.
419       while(regionStates.isRegionInTransition(REGIONINFO)) Threads.sleep(1);
420     } finally {
421       executor.shutdown();
422       am.shutdown();
423       // Clean up all znodes
424       ZKAssign.deleteAllNodes(this.watcher);
425     }
426   }
427 
428   /**
429    * Run a simple server shutdown handler.
430    * @throws KeeperException
431    * @throws IOException
432    */
433   @Test
434   public void testShutdownHandler()
435       throws KeeperException, IOException, ServiceException {
436     // Create and startup an executor.  This is used by AssignmentManager
437     // handling zk callbacks.
438     ExecutorService executor = startupMasterExecutor("testShutdownHandler");
439 
440     // We need a mocked catalog tracker.
441     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
442     // Create an AM.
443     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
444         this.server, this.serverManager);
445     try {
446       processServerShutdownHandler(ct, am, false);
447     } finally {
448       executor.shutdown();
449       am.shutdown();
450       // Clean up all znodes
451       ZKAssign.deleteAllNodes(this.watcher);
452     }
453   }
454 
455   /**
456    * To test closed region handler to remove rit and delete corresponding znode
457    * if region in pending close or closing while processing shutdown of a region
458    * server.(HBASE-5927).
459    *
460    * @throws KeeperException
461    * @throws IOException
462    * @throws ServiceException
463    */
464   @Test
465   public void testSSHWhenDisableTableInProgress() throws KeeperException, IOException,
466       ServiceException {
467     testCaseWithPartiallyDisabledState(Table.State.DISABLING);
468     testCaseWithPartiallyDisabledState(Table.State.DISABLED);
469   }
470 
471 
472   /**
473    * To test if the split region is removed from RIT if the region was in SPLITTING state but the RS
474    * has actually completed the splitting in hbase:meta but went down. See HBASE-6070 and also HBASE-5806
475    *
476    * @throws KeeperException
477    * @throws IOException
478    */
479   @Test
480   public void testSSHWhenSplitRegionInProgress() throws KeeperException, IOException, Exception {
481     // true indicates the region is split but still in RIT
482     testCaseWithSplitRegionPartial(true);
483     // false indicate the region is not split
484     testCaseWithSplitRegionPartial(false);
485   }
486 
487   private void testCaseWithSplitRegionPartial(boolean regionSplitDone) throws KeeperException,
488       IOException, NodeExistsException, InterruptedException, ServiceException {
489     // Create and startup an executor. This is used by AssignmentManager
490     // handling zk callbacks.
491     ExecutorService executor = startupMasterExecutor("testSSHWhenSplitRegionInProgress");
492     // We need a mocked catalog tracker.
493     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
494     ZKAssign.deleteAllNodes(this.watcher);
495 
496     // Create an AM.
497     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
498       this.server, this.serverManager);
499     // adding region to regions and servers maps.
500     am.regionOnline(REGIONINFO, SERVERNAME_A);
501     // adding region in pending close.
502     am.getRegionStates().updateRegionState(
503       REGIONINFO, State.SPLITTING, SERVERNAME_A);
504     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
505     RegionTransition data = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
506         REGIONINFO.getRegionName(), SERVERNAME_A);
507     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
508     // create znode in M_ZK_REGION_CLOSING state.
509     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
510 
511     try {
512       processServerShutdownHandler(ct, am, regionSplitDone);
513       // check znode deleted or not.
514       // In both cases the znode should be deleted.
515 
516       if (regionSplitDone) {
517         assertFalse("Region state of region in SPLITTING should be removed from rit.",
518             am.getRegionStates().isRegionsInTransition());
519       } else {
520         while (!am.assignInvoked) {
521           Thread.sleep(1);
522         }
523         assertTrue("Assign should be invoked.", am.assignInvoked);
524       }
525     } finally {
526       REGIONINFO.setOffline(false);
527       REGIONINFO.setSplit(false);
528       executor.shutdown();
529       am.shutdown();
530       // Clean up all znodes
531       ZKAssign.deleteAllNodes(this.watcher);
532     }
533   }
534 
535   private void testCaseWithPartiallyDisabledState(Table.State state) throws KeeperException,
536       IOException, NodeExistsException, ServiceException {
537     // Create and startup an executor. This is used by AssignmentManager
538     // handling zk callbacks.
539     ExecutorService executor = startupMasterExecutor("testSSHWhenDisableTableInProgress");
540     // We need a mocked catalog tracker.
541     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
542     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
543     ZKAssign.deleteAllNodes(this.watcher);
544 
545     // Create an AM.
546     AssignmentManager am = new AssignmentManager(this.server,
547       this.serverManager, ct, balancer, executor, null, master.getTableLockManager());
548     // adding region to regions and servers maps.
549     am.regionOnline(REGIONINFO, SERVERNAME_A);
550     // adding region in pending close.
551     am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE);
552     if (state == Table.State.DISABLING) {
553       am.getZKTable().setDisablingTable(REGIONINFO.getTable());
554     } else {
555       am.getZKTable().setDisabledTable(REGIONINFO.getTable());
556     }
557     RegionTransition data = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
558         REGIONINFO.getRegionName(), SERVERNAME_A);
559     // RegionTransitionData data = new
560     // RegionTransitionData(EventType.M_ZK_REGION_CLOSING,
561     // REGIONINFO.getRegionName(), SERVERNAME_A);
562     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
563     // create znode in M_ZK_REGION_CLOSING state.
564     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
565 
566     try {
567       processServerShutdownHandler(ct, am, false);
568       // check znode deleted or not.
569       // In both cases the znode should be deleted.
570       assertTrue("The znode should be deleted.", ZKUtil.checkExists(this.watcher, node) == -1);
571       // check whether in rit or not. In the DISABLING case also the below
572       // assert will be true but the piece of code added for HBASE-5927 will not
573       // do that.
574       if (state == Table.State.DISABLED) {
575         assertFalse("Region state of region in pending close should be removed from rit.",
576             am.getRegionStates().isRegionsInTransition());
577       }
578     } finally {
579       am.setEnabledTable(REGIONINFO.getTable());
580       executor.shutdown();
581       am.shutdown();
582       // Clean up all znodes
583       ZKAssign.deleteAllNodes(this.watcher);
584     }
585   }
586 
587   private void processServerShutdownHandler(CatalogTracker ct, AssignmentManager am, boolean splitRegion)
588       throws IOException, ServiceException {
589     // Make sure our new AM gets callbacks; once registered, can't unregister.
590     // Thats ok because we make a new zk watcher for each test.
591     this.watcher.registerListenerFirst(am);
592 
593     // Need to set up a fake scan of meta for the servershutdown handler
594     // Make an RS Interface implementation.  Make it so a scanner can go against it.
595     ClientProtos.ClientService.BlockingInterface implementation =
596       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
597     // Get a meta row result that has region up on SERVERNAME_A
598 
599     Result r;
600     if (splitRegion) {
601       r = MetaMockingUtil.getMetaTableRowResultAsSplitRegion(REGIONINFO, SERVERNAME_A);
602     } else {
603       r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
604     }
605 
606     final ScanResponse.Builder builder = ScanResponse.newBuilder();
607     builder.setMoreResults(true);
608     builder.addCellsPerResult(r.size());
609     final List<CellScannable> cellScannables = new ArrayList<CellScannable>(1);
610     cellScannables.add(r);
611     Mockito.when(implementation.scan(
612       (RpcController)Mockito.any(), (ScanRequest)Mockito.any())).
613       thenAnswer(new Answer<ScanResponse>() {
614           public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
615             PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
616                 .getArguments()[0];
617             if (controller != null) {
618               controller.setCellScanner(CellUtil.createCellScanner(cellScannables));
619             }
620             return builder.build();
621           }
622       });
623 
624     // Get a connection w/ mocked up common methods.
625     HConnection connection =
626       HConnectionTestingUtility.getMockedConnectionAndDecorate(HTU.getConfiguration(),
627         null, implementation, SERVERNAME_B, REGIONINFO);
628 
629     // Make it so we can get a catalogtracker from servermanager.. .needed
630     // down in guts of server shutdown handler.
631     Mockito.when(ct.getConnection()).thenReturn(connection);
632     Mockito.when(this.server.getCatalogTracker()).thenReturn(ct);
633 
634     // Now make a server shutdown handler instance and invoke process.
635     // Have it that SERVERNAME_A died.
636     DeadServer deadServers = new DeadServer();
637     deadServers.add(SERVERNAME_A);
638     // I need a services instance that will return the AM
639     MasterServices services = Mockito.mock(MasterServices.class);
640     Mockito.when(services.getAssignmentManager()).thenReturn(am);
641     Mockito.when(services.getServerManager()).thenReturn(this.serverManager);
642     Mockito.when(services.getZooKeeper()).thenReturn(this.watcher);
643     ServerShutdownHandler handler = new ServerShutdownHandler(this.server,
644       services, deadServers, SERVERNAME_A, false);
645     am.failoverCleanupDone.set(true);
646     handler.process();
647     // The region in r will have been assigned.  It'll be up in zk as unassigned.
648   }
649 
650   /**
651    * Create and startup executor pools. Start same set as master does (just
652    * run a few less).
653    * @param name Name to give our executor
654    * @return Created executor (be sure to call shutdown when done).
655    */
656   private ExecutorService startupMasterExecutor(final String name) {
657     // TODO: Move up into HBaseTestingUtility?  Generally useful.
658     ExecutorService executor = new ExecutorService(name);
659     executor.startExecutorService(ExecutorType.MASTER_OPEN_REGION, 3);
660     executor.startExecutorService(ExecutorType.MASTER_CLOSE_REGION, 3);
661     executor.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS, 3);
662     executor.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS, 3);
663     return executor;
664   }
665 
666   @Test
667   public void testUnassignWithSplitAtSameTime() throws KeeperException, IOException {
668     // Region to use in test.
669     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
670     // First amend the servermanager mock so that when we do send close of the
671     // first meta region on SERVERNAME_A, it will return true rather than
672     // default null.
673     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, hri, -1)).thenReturn(true);
674     // Need a mocked catalog tracker.
675     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
676     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
677         .getConfiguration());
678     // Create an AM.
679     AssignmentManager am = new AssignmentManager(this.server,
680       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
681     try {
682       // First make sure my mock up basically works.  Unassign a region.
683       unassign(am, SERVERNAME_A, hri);
684       // This delete will fail if the previous unassign did wrong thing.
685       ZKAssign.deleteClosingNode(this.watcher, hri, SERVERNAME_A);
686       // Now put a SPLITTING region in the way.  I don't have to assert it
687       // go put in place.  This method puts it in place then asserts it still
688       // owns it by moving state from SPLITTING to SPLITTING.
689       int version = createNodeSplitting(this.watcher, hri, SERVERNAME_A);
690       // Now, retry the unassign with the SPLTTING in place.  It should just
691       // complete without fail; a sort of 'silent' recognition that the
692       // region to unassign has been split and no longer exists: TOOD: what if
693       // the split fails and the parent region comes back to life?
694       unassign(am, SERVERNAME_A, hri);
695       // This transition should fail if the znode has been messed with.
696       ZKAssign.transitionNode(this.watcher, hri, SERVERNAME_A,
697         EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
698       assertFalse(am.getRegionStates().isRegionInTransition(hri));
699     } finally {
700       am.shutdown();
701     }
702   }
703 
704   /**
705    * Tests the processDeadServersAndRegionsInTransition should not fail with NPE
706    * when it failed to get the children. Let's abort the system in this
707    * situation
708    * @throws ServiceException
709    */
710   @Test(timeout = 60000)
711   public void testProcessDeadServersAndRegionsInTransitionShouldNotFailWithNPE()
712       throws IOException, KeeperException, InterruptedException, ServiceException {
713     final RecoverableZooKeeper recoverableZk = Mockito
714         .mock(RecoverableZooKeeper.class);
715     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
716       this.server, this.serverManager);
717     Watcher zkw = new ZooKeeperWatcher(HBaseConfiguration.create(), "unittest",
718         null) {
719       public RecoverableZooKeeper getRecoverableZooKeeper() {
720         return recoverableZk;
721       }
722     };
723     ((ZooKeeperWatcher) zkw).registerListener(am);
724     Mockito.doThrow(new InterruptedException()).when(recoverableZk)
725         .getChildren("/hbase/region-in-transition", null);
726     am.setWatcher((ZooKeeperWatcher) zkw);
727     try {
728       am.processDeadServersAndRegionsInTransition(null);
729       fail("Expected to abort");
730     } catch (NullPointerException e) {
731       fail("Should not throw NPE");
732     } catch (RuntimeException e) {
733       assertEquals("Aborted", e.getLocalizedMessage());
734     }
735   }
736   /**
737    * TestCase verifies that the regionPlan is updated whenever a region fails to open
738    * and the master tries to process RS_ZK_FAILED_OPEN state.(HBASE-5546).
739    */
740   @Test(timeout = 60000)
741   public void testRegionPlanIsUpdatedWhenRegionFailsToOpen() throws IOException, KeeperException,
742       ServiceException, InterruptedException {
743     this.server.getConfiguration().setClass(
744       HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MockedLoadBalancer.class,
745       LoadBalancer.class);
746     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
747       this.server, this.serverManager);
748     try {
749       // Boolean variable used for waiting until randomAssignment is called and
750       // new
751       // plan is generated.
752       AtomicBoolean gate = new AtomicBoolean(false);
753       if (balancer instanceof MockedLoadBalancer) {
754         ((MockedLoadBalancer) balancer).setGateVariable(gate);
755       }
756       ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
757       int v = ZKAssign.getVersion(this.watcher, REGIONINFO);
758       ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A,
759           EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_FAILED_OPEN, v);
760       String path = ZKAssign.getNodeName(this.watcher, REGIONINFO
761           .getEncodedName());
762       am.getRegionStates().updateRegionState(
763         REGIONINFO, State.OPENING, SERVERNAME_A);
764       // a dummy plan inserted into the regionPlans. This plan is cleared and
765       // new one is formed
766       am.regionPlans.put(REGIONINFO.getEncodedName(), new RegionPlan(
767           REGIONINFO, null, SERVERNAME_A));
768       RegionPlan regionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
769       List<ServerName> serverList = new ArrayList<ServerName>(2);
770       serverList.add(SERVERNAME_B);
771       Mockito.when(
772           this.serverManager.createDestinationServersList(SERVERNAME_A))
773           .thenReturn(serverList);
774       am.nodeDataChanged(path);
775       // here we are waiting until the random assignment in the load balancer is
776       // called.
777       while (!gate.get()) {
778         Thread.sleep(10);
779       }
780       // new region plan may take some time to get updated after random
781       // assignment is called and
782       // gate is set to true.
783       RegionPlan newRegionPlan = am.regionPlans
784           .get(REGIONINFO.getEncodedName());
785       while (newRegionPlan == null) {
786         Thread.sleep(10);
787         newRegionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
788       }
789       // the new region plan created may contain the same RS as destination but
790       // it should
791       // be new plan.
792       assertNotSame("Same region plan should not come", regionPlan,
793           newRegionPlan);
794       assertTrue("Destination servers should be different.", !(regionPlan
795           .getDestination().equals(newRegionPlan.getDestination())));
796 
797       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
798     } finally {
799       this.server.getConfiguration().setClass(
800           HConstants.HBASE_MASTER_LOADBALANCER_CLASS, SimpleLoadBalancer.class,
801           LoadBalancer.class);
802       am.getExecutorService().shutdown();
803       am.shutdown();
804     }
805   }
806 
807   /**
808    * Mocked load balancer class used in the testcase to make sure that the testcase waits until
809    * random assignment is called and the gate variable is set to true.
810    */
811   public static class MockedLoadBalancer extends SimpleLoadBalancer {
812     private AtomicBoolean gate;
813 
814     public void setGateVariable(AtomicBoolean gate) {
815       this.gate = gate;
816     }
817 
818     @Override
819     public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
820       ServerName randomServerName = super.randomAssignment(regionInfo, servers);
821       this.gate.set(true);
822       return randomServerName;
823     }
824 
825     @Override
826     public Map<ServerName, List<HRegionInfo>> retainAssignment(
827         Map<HRegionInfo, ServerName> regions, List<ServerName> servers) {
828       this.gate.set(true);
829       return super.retainAssignment(regions, servers);
830     }
831   }
832 
833   /**
834    * Test the scenario when the master is in failover and trying to process a
835    * region which is in Opening state on a dead RS. Master will force offline the
836    * region and put it in transition. AM relies on SSH to reassign it.
837    */
838   @Test(timeout = 60000)
839   public void testRegionInOpeningStateOnDeadRSWhileMasterFailover() throws IOException,
840       KeeperException, ServiceException, InterruptedException {
841     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
842       this.server, this.serverManager);
843     ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
844     int version = ZKAssign.getVersion(this.watcher, REGIONINFO);
845     ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A, EventType.M_ZK_REGION_OFFLINE,
846         EventType.RS_ZK_REGION_OPENING, version);
847     RegionTransition rt = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_OPENING,
848         REGIONINFO.getRegionName(), SERVERNAME_A, HConstants.EMPTY_BYTE_ARRAY);
849     version = ZKAssign.getVersion(this.watcher, REGIONINFO);
850     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(false);
851     am.getRegionStates().logSplit(SERVERNAME_A); // Assume log splitting is done
852     am.getRegionStates().createRegionState(REGIONINFO);
853     am.gate.set(false);
854     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
855     assertFalse(am.processRegionsInTransition(rt, REGIONINFO, version));
856     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
857     processServerShutdownHandler(ct, am, false);
858     // Waiting for the assignment to get completed.
859     while (!am.gate.get()) {
860       Thread.sleep(10);
861     }
862     assertTrue("The region should be assigned immediately.", null != am.regionPlans.get(REGIONINFO
863         .getEncodedName()));
864   }
865 
866   /**
867    * Test verifies whether assignment is skipped for regions of tables in DISABLING state during
868    * clean cluster startup. See HBASE-6281.
869    *
870    * @throws KeeperException
871    * @throws IOException
872    * @throws Exception
873    */
874   @Test(timeout = 60000)
875   public void testDisablingTableRegionsAssignmentDuringCleanClusterStartup()
876       throws KeeperException, IOException, Exception {
877     this.server.getConfiguration().setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
878         MockedLoadBalancer.class, LoadBalancer.class);
879     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(
880         new HashMap<ServerName, ServerLoad>(0));
881     List<ServerName> destServers = new ArrayList<ServerName>(1);
882     destServers.add(SERVERNAME_A);
883     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
884     // To avoid cast exception in DisableTableHandler process.
885     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
886     Server server = new HMaster(HTU.getConfiguration());
887     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
888         this.serverManager);
889     AtomicBoolean gate = new AtomicBoolean(false);
890     if (balancer instanceof MockedLoadBalancer) {
891       ((MockedLoadBalancer) balancer).setGateVariable(gate);
892     }
893     try{
894       // set table in disabling state.
895       am.getZKTable().setDisablingTable(REGIONINFO.getTable());
896       am.joinCluster();
897       // should not call retainAssignment if we get empty regions in assignAllUserRegions.
898       assertFalse(
899           "Assign should not be invoked for disabling table regions during clean cluster startup.",
900           gate.get());
901       // need to change table state from disabling to disabled.
902       assertTrue("Table should be disabled.",
903           am.getZKTable().isDisabledTable(REGIONINFO.getTable()));
904     } finally {
905       this.server.getConfiguration().setClass(
906         HConstants.HBASE_MASTER_LOADBALANCER_CLASS, SimpleLoadBalancer.class,
907         LoadBalancer.class);
908       am.getZKTable().setEnabledTable(REGIONINFO.getTable());
909       am.shutdown();
910     }
911   }
912 
913   /**
914    * Test verifies whether all the enabling table regions assigned only once during master startup.
915    *
916    * @throws KeeperException
917    * @throws IOException
918    * @throws Exception
919    */
920   @Test
921   public void testMasterRestartWhenTableInEnabling() throws KeeperException, IOException, Exception {
922     enabling = true;
923     List<ServerName> destServers = new ArrayList<ServerName>(1);
924     destServers.add(SERVERNAME_A);
925     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
926     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
927     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
928     Server server = new HMaster(HTU.getConfiguration());
929     Whitebox.setInternalState(server, "serverManager", this.serverManager);
930     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
931         this.serverManager);
932     try {
933       // set table in enabling state.
934       am.getZKTable().setEnablingTable(REGIONINFO.getTable());
935       new EnableTableHandler(server, REGIONINFO.getTable(),
936           am.getCatalogTracker(), am, new NullTableLockManager(), true).prepare()
937           .process();
938       assertEquals("Number of assignments should be 1.", 1, assignmentCount);
939       assertTrue("Table should be enabled.",
940           am.getZKTable().isEnabledTable(REGIONINFO.getTable()));
941     } finally {
942       enabling = false;
943       assignmentCount = 0;
944       am.getZKTable().setEnabledTable(REGIONINFO.getTable());
945       am.shutdown();
946       ZKAssign.deleteAllNodes(this.watcher);
947     }
948   }
949 
950   /**
951    * When a region is in transition, if the region server opening the region goes down,
952    * the region assignment takes a long time normally (waiting for timeout monitor to trigger assign).
953    * This test is to make sure SSH reassigns it right away.
954    */
955   @Test
956   public void testSSHTimesOutOpeningRegionTransition()
957       throws KeeperException, IOException, ServiceException {
958     // We need a mocked catalog tracker.
959     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
960     // Create an AM.
961     AssignmentManagerWithExtrasForTesting am =
962       setUpMockedAssignmentManager(this.server, this.serverManager);
963     // adding region in pending open.
964     RegionState state = new RegionState(REGIONINFO,
965       State.OPENING, System.currentTimeMillis(), SERVERNAME_A);
966     am.getRegionStates().regionOnline(REGIONINFO, SERVERNAME_B);
967     am.getRegionStates().regionsInTransition.put(REGIONINFO.getEncodedName(), state);
968     // adding region plan
969     am.regionPlans.put(REGIONINFO.getEncodedName(),
970       new RegionPlan(REGIONINFO, SERVERNAME_B, SERVERNAME_A));
971     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
972 
973     try {
974       am.assignInvoked = false;
975       processServerShutdownHandler(ct, am, false);
976       assertTrue(am.assignInvoked);
977     } finally {
978       am.getRegionStates().regionsInTransition.remove(REGIONINFO.getEncodedName());
979       am.regionPlans.remove(REGIONINFO.getEncodedName());
980     }
981   }
982 
983   /**
984    * Scenario:<ul>
985    *  <li> master starts a close, and creates a znode</li>
986    *  <li> it fails just at this moment, before contacting the RS</li>
987    *  <li> while the second master is coming up, the targeted RS dies. But it's before ZK timeout so
988    *    we don't know, and we have an exception.</li>
989    *  <li> the master must handle this nicely and reassign.
990    *  </ul>
991    */
992   @Test
993   public void testClosingFailureDuringRecovery() throws Exception {
994 
995     AssignmentManagerWithExtrasForTesting am =
996         setUpMockedAssignmentManager(this.server, this.serverManager);
997     ZKAssign.createNodeClosing(this.watcher, REGIONINFO, SERVERNAME_A);
998     am.getRegionStates().createRegionState(REGIONINFO);
999 
1000     assertFalse( am.getRegionStates().isRegionsInTransition() );
1001 
1002     am.processRegionInTransition(REGIONINFO.getEncodedName(), REGIONINFO);
1003 
1004     assertTrue( am.getRegionStates().isRegionsInTransition() );
1005   }
1006 
1007   /**
1008    * Creates a new ephemeral node in the SPLITTING state for the specified region.
1009    * Create it ephemeral in case regionserver dies mid-split.
1010    *
1011    * <p>Does not transition nodes from other states.  If a node already exists
1012    * for this region, a {@link NodeExistsException} will be thrown.
1013    *
1014    * @param zkw zk reference
1015    * @param region region to be created as offline
1016    * @param serverName server event originates from
1017    * @return Version of znode created.
1018    * @throws KeeperException
1019    * @throws IOException
1020    */
1021   // Copied from SplitTransaction rather than open the method over there in
1022   // the regionserver package.
1023   private static int createNodeSplitting(final ZooKeeperWatcher zkw,
1024       final HRegionInfo region, final ServerName serverName)
1025   throws KeeperException, IOException {
1026     RegionTransition rt =
1027       RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
1028         region.getRegionName(), serverName);
1029 
1030     String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
1031     if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
1032       throw new IOException("Failed create of ephemeral " + node);
1033     }
1034     // Transition node from SPLITTING to SPLITTING and pick up version so we
1035     // can be sure this znode is ours; version is needed deleting.
1036     return transitionNodeSplitting(zkw, region, serverName, -1);
1037   }
1038 
1039   // Copied from SplitTransaction rather than open the method over there in
1040   // the regionserver package.
1041   private static int transitionNodeSplitting(final ZooKeeperWatcher zkw,
1042       final HRegionInfo parent,
1043       final ServerName serverName, final int version)
1044   throws KeeperException, IOException {
1045     return ZKAssign.transitionNode(zkw, parent, serverName,
1046       EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
1047   }
1048 
1049   private void unassign(final AssignmentManager am, final ServerName sn,
1050       final HRegionInfo hri) throws RegionException {
1051     // Before I can unassign a region, I need to set it online.
1052     am.regionOnline(hri, sn);
1053     // Unassign region.
1054     am.unassign(hri);
1055   }
1056 
1057   /**
1058    * Create an {@link AssignmentManagerWithExtrasForTesting} that has mocked
1059    * {@link CatalogTracker} etc.
1060    * @param server
1061    * @param manager
1062    * @return An AssignmentManagerWithExtras with mock connections, etc.
1063    * @throws IOException
1064    * @throws KeeperException
1065    */
1066   private AssignmentManagerWithExtrasForTesting setUpMockedAssignmentManager(final Server server,
1067       final ServerManager manager) throws IOException, KeeperException, ServiceException {
1068     // We need a mocked catalog tracker. Its used by our AM instance.
1069     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1070     // Make an RS Interface implementation. Make it so a scanner can go against
1071     // it and a get to return the single region, REGIONINFO, this test is
1072     // messing with. Needed when "new master" joins cluster. AM will try and
1073     // rebuild its list of user regions and it will also get the HRI that goes
1074     // with an encoded name by doing a Get on hbase:meta
1075     ClientProtos.ClientService.BlockingInterface ri =
1076       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
1077     // Get a meta row result that has region up on SERVERNAME_A for REGIONINFO
1078     Result r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
1079     final ScanResponse.Builder builder = ScanResponse.newBuilder();
1080     builder.setMoreResults(true);
1081     builder.addCellsPerResult(r.size());
1082     final List<CellScannable> rows = new ArrayList<CellScannable>(1);
1083     rows.add(r);
1084     Answer<ScanResponse> ans = new Answer<ClientProtos.ScanResponse>() {
1085       public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
1086         PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
1087             .getArguments()[0];
1088         if (controller != null) {
1089           controller.setCellScanner(CellUtil.createCellScanner(rows));
1090         }
1091         return builder.build();
1092       }
1093     };
1094     if (enabling) {
1095       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any()))
1096           .thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans)
1097           .thenReturn(ScanResponse.newBuilder().setMoreResults(false).build());
1098     } else {
1099       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any())).thenAnswer(
1100           ans);
1101     }
1102     // If a get, return the above result too for REGIONINFO
1103     GetResponse.Builder getBuilder = GetResponse.newBuilder();
1104     getBuilder.setResult(ProtobufUtil.toResult(r));
1105     Mockito.when(ri.get((RpcController)Mockito.any(), (GetRequest) Mockito.any())).
1106       thenReturn(getBuilder.build());
1107     // Get a connection w/ mocked up common methods.
1108     HConnection connection = HConnectionTestingUtility.
1109       getMockedConnectionAndDecorate(HTU.getConfiguration(), null,
1110         ri, SERVERNAME_B, REGIONINFO);
1111     // Make it so we can get the connection from our mocked catalogtracker
1112     Mockito.when(ct.getConnection()).thenReturn(connection);
1113     // Create and startup an executor. Used by AM handling zk callbacks.
1114     ExecutorService executor = startupMasterExecutor("mockedAMExecutor");
1115     this.balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
1116     AssignmentManagerWithExtrasForTesting am = new AssignmentManagerWithExtrasForTesting(
1117       server, manager, ct, this.balancer, executor, new NullTableLockManager());
1118     return am;
1119   }
1120 
1121   /**
1122    * An {@link AssignmentManager} with some extra facility used testing
1123    */
1124   class AssignmentManagerWithExtrasForTesting extends AssignmentManager {
1125     // Keep a reference so can give it out below in {@link #getExecutorService}
1126     private final ExecutorService es;
1127     // Ditto for ct
1128     private final CatalogTracker ct;
1129     boolean processRITInvoked = false;
1130     boolean assignInvoked = false;
1131     AtomicBoolean gate = new AtomicBoolean(true);
1132 
1133     public AssignmentManagerWithExtrasForTesting(
1134         final Server master, final ServerManager serverManager,
1135         final CatalogTracker catalogTracker, final LoadBalancer balancer,
1136         final ExecutorService service, final TableLockManager tableLockManager)
1137             throws KeeperException, IOException {
1138       super(master, serverManager, catalogTracker, balancer, service, null, tableLockManager);
1139       this.es = service;
1140       this.ct = catalogTracker;
1141     }
1142 
1143     @Override
1144     boolean processRegionInTransition(String encodedRegionName,
1145         HRegionInfo regionInfo) throws KeeperException, IOException {
1146       this.processRITInvoked = true;
1147       return super.processRegionInTransition(encodedRegionName, regionInfo);
1148     }
1149 
1150     @Override
1151     public void assign(HRegionInfo region, boolean setOfflineInZK, boolean forceNewPlan) {
1152       if (enabling) {
1153         assignmentCount++;
1154         this.regionOnline(region, SERVERNAME_A);
1155       } else {
1156         super.assign(region, setOfflineInZK, forceNewPlan);
1157         this.gate.set(true);
1158       }
1159     }
1160 
1161     @Override
1162     public void assign(List<HRegionInfo> regions)
1163         throws IOException, InterruptedException {
1164       assignInvoked = (regions != null && regions.size() > 0);
1165       super.assign(regions);
1166       this.gate.set(true);
1167     }
1168 
1169     /** reset the watcher */
1170     void setWatcher(ZooKeeperWatcher watcher) {
1171       this.watcher = watcher;
1172     }
1173 
1174     /**
1175      * @return ExecutorService used by this instance.
1176      */
1177     ExecutorService getExecutorService() {
1178       return this.es;
1179     }
1180 
1181     /**
1182      * @return CatalogTracker used by this AM (Its a mock).
1183      */
1184     CatalogTracker getCatalogTracker() {
1185       return this.ct;
1186     }
1187   }
1188 
1189   /**
1190    * Call joinCluster on the passed AssignmentManager.  Do it in a thread
1191    * so it runs independent of what all else is going on.  Try to simulate
1192    * an AM running insided a failed over master by clearing all in-memory
1193    * AM state first.
1194   */
1195   private void startFakeFailedOverMasterAssignmentManager(final AssignmentManager am,
1196       final ZooKeeperWatcher watcher) {
1197     // Make sure our new AM gets callbacks; once registered, we can't unregister.
1198     // Thats ok because we make a new zk watcher for each test.
1199     watcher.registerListenerFirst(am);
1200     Thread t = new Thread("RunAmJoinCluster") {
1201       public void run() {
1202         // Call the joinCluster function as though we were doing a master
1203         // failover at this point. It will stall just before we go to add
1204         // the RIT region to our RIT Map in AM at processRegionsInTransition.
1205         // First clear any inmemory state from AM so it acts like a new master
1206         // coming on line.
1207         am.getRegionStates().regionsInTransition.clear();
1208         am.regionPlans.clear();
1209         try {
1210           am.joinCluster();
1211         } catch (IOException e) {
1212           throw new RuntimeException(e);
1213         } catch (KeeperException e) {
1214           throw new RuntimeException(e);
1215         } catch (InterruptedException e) {
1216           throw new RuntimeException(e);
1217         }
1218       }
1219     };
1220     t.start();
1221     while (!t.isAlive()) Threads.sleep(1);
1222   }
1223 
1224   @Test
1225   public void testForceAssignMergingRegion() throws Exception {
1226     // Region to use in test.
1227     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1228     // Need a mocked catalog tracker.
1229     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1230     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1231       server.getConfiguration());
1232     // Create an AM.
1233     AssignmentManager am = new AssignmentManager(this.server,
1234       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
1235     RegionStates regionStates = am.getRegionStates();
1236     try {
1237       // First set the state of the region to merging
1238       regionStates.updateRegionState(hri, RegionState.State.MERGING);
1239       // Now, try to assign it with force new plan
1240       am.assign(hri, true, true);
1241       assertEquals("The region should be still in merging state",
1242         RegionState.State.MERGING, regionStates.getRegionState(hri).getState());
1243     } finally {
1244       am.shutdown();
1245     }
1246   }
1247 
1248   /**
1249    * Test assignment related ZK events are ignored by AM if the region is not known
1250    * by AM to be in transition. During normal operation, all assignments are started
1251    * by AM (not considering split/merge), if an event is received but the region
1252    * is not in transition, the event must be a very late one. So it can be ignored.
1253    * During master failover, since AM watches assignment znodes after failover cleanup
1254    * is completed, when an event comes in, AM should already have the region in transition
1255    * if ZK is used during the assignment action (only hbck doesn't use ZK for region
1256    * assignment). So during master failover, we can ignored such events too.
1257    */
1258   @Test
1259   public void testAssignmentEventIgnoredIfNotExpected() throws KeeperException, IOException {
1260     // Region to use in test.
1261     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1262     // Need a mocked catalog tracker.
1263     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1264     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1265       server.getConfiguration());
1266     final AtomicBoolean zkEventProcessed = new AtomicBoolean(false);
1267     // Create an AM.
1268     AssignmentManager am = new AssignmentManager(this.server,
1269       this.serverManager, ct, balancer, null, null, master.getTableLockManager()) {
1270 
1271       @Override
1272       void handleRegion(final RegionTransition rt, int expectedVersion) {
1273         super.handleRegion(rt, expectedVersion);
1274         if (rt != null && Bytes.equals(hri.getRegionName(),
1275           rt.getRegionName()) && rt.getEventType() == EventType.RS_ZK_REGION_OPENING) {
1276           zkEventProcessed.set(true);
1277         }
1278       }
1279     };
1280     try {
1281       // First make sure the region is not in transition
1282       am.getRegionStates().regionOffline(hri);
1283       zkEventProcessed.set(false); // Reset it before faking zk transition
1284       this.watcher.registerListenerFirst(am);
1285       assertFalse("The region should not be in transition",
1286         am.getRegionStates().isRegionInTransition(hri));
1287       ZKAssign.createNodeOffline(this.watcher, hri, SERVERNAME_A);
1288       // Trigger a transition event
1289       ZKAssign.transitionNodeOpening(this.watcher, hri, SERVERNAME_A);
1290       long startTime = EnvironmentEdgeManager.currentTimeMillis();
1291       while (!zkEventProcessed.get()) {
1292         assertTrue("Timed out in waiting for ZK event to be processed",
1293           EnvironmentEdgeManager.currentTimeMillis() - startTime < 30000);
1294         Threads.sleepWithoutInterrupt(100);
1295       }
1296       assertFalse(am.getRegionStates().isRegionInTransition(hri));
1297     } finally {
1298       am.shutdown();
1299     }
1300   }
1301 }