View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotSame;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.util.ArrayList;
28  import java.util.HashMap;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.concurrent.atomic.AtomicBoolean;
32  
33  import org.apache.hadoop.hbase.CellScannable;
34  import org.apache.hadoop.hbase.CellUtil;
35  import org.apache.hadoop.hbase.HBaseConfiguration;
36  import org.apache.hadoop.hbase.HBaseTestingUtility;
37  import org.apache.hadoop.hbase.HConstants;
38  import org.apache.hadoop.hbase.HRegionInfo;
39  import org.apache.hadoop.hbase.MediumTests;
40  import org.apache.hadoop.hbase.RegionException;
41  import org.apache.hadoop.hbase.RegionTransition;
42  import org.apache.hadoop.hbase.Server;
43  import org.apache.hadoop.hbase.ServerLoad;
44  import org.apache.hadoop.hbase.ServerName;
45  import org.apache.hadoop.hbase.TableName;
46  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
47  import org.apache.hadoop.hbase.catalog.CatalogTracker;
48  import org.apache.hadoop.hbase.catalog.MetaMockingUtil;
49  import org.apache.hadoop.hbase.client.HConnection;
50  import org.apache.hadoop.hbase.client.HConnectionTestingUtility;
51  import org.apache.hadoop.hbase.client.Result;
52  import org.apache.hadoop.hbase.exceptions.DeserializationException;
53  import org.apache.hadoop.hbase.executor.EventType;
54  import org.apache.hadoop.hbase.executor.ExecutorService;
55  import org.apache.hadoop.hbase.executor.ExecutorType;
56  import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
57  import org.apache.hadoop.hbase.master.RegionState.State;
58  import org.apache.hadoop.hbase.master.TableLockManager.NullTableLockManager;
59  import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer;
60  import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
61  import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
62  import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
63  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
64  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
65  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetRequest;
66  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetResponse;
67  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
68  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanResponse;
69  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table;
70  import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
71  import org.apache.hadoop.hbase.util.Bytes;
72  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
73  import org.apache.hadoop.hbase.util.Threads;
74  import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
75  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
76  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
77  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
78  import org.apache.zookeeper.KeeperException;
79  import org.apache.zookeeper.KeeperException.NodeExistsException;
80  import org.apache.zookeeper.Watcher;
81  import org.junit.After;
82  import org.junit.AfterClass;
83  import org.junit.Before;
84  import org.junit.BeforeClass;
85  import org.junit.Test;
86  import org.junit.experimental.categories.Category;
87  import org.mockito.Mockito;
88  import org.mockito.internal.util.reflection.Whitebox;
89  import org.mockito.invocation.InvocationOnMock;
90  import org.mockito.stubbing.Answer;
91  
92  import com.google.protobuf.RpcController;
93  import com.google.protobuf.ServiceException;
94  
95  
96  /**
97   * Test {@link AssignmentManager}
98   */
99  @Category(MediumTests.class)
100 public class TestAssignmentManager {
101   private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
102   private static final ServerName SERVERNAME_A =
103       ServerName.valueOf("example.org", 1234, 5678);
104   private static final ServerName SERVERNAME_B =
105       ServerName.valueOf("example.org", 0, 5678);
106   private static final HRegionInfo REGIONINFO =
107     new HRegionInfo(TableName.valueOf("t"),
108       HConstants.EMPTY_START_ROW, HConstants.EMPTY_START_ROW);
109   private static int assignmentCount;
110   private static boolean enabling = false;
111 
112   // Mocked objects or; get redone for each test.
113   private Server server;
114   private ServerManager serverManager;
115   private ZooKeeperWatcher watcher;
116   private LoadBalancer balancer;
117   private HMaster master;
118 
119   @BeforeClass
120   public static void beforeClass() throws Exception {
121     HTU.startMiniZKCluster();
122   }
123 
124   @AfterClass
125   public static void afterClass() throws IOException {
126     HTU.shutdownMiniZKCluster();
127   }
128 
129   @Before
130   public void before() throws ZooKeeperConnectionException, IOException {
131     // TODO: Make generic versions of what we do below and put up in a mocking
132     // utility class or move up into HBaseTestingUtility.
133 
134     // Mock a Server.  Have it return a legit Configuration and ZooKeeperWatcher.
135     // If abort is called, be sure to fail the test (don't just swallow it
136     // silently as is mockito default).
137     this.server = Mockito.mock(Server.class);
138     Mockito.when(server.getServerName()).thenReturn(ServerName.valueOf("master,1,1"));
139     Mockito.when(server.getConfiguration()).thenReturn(HTU.getConfiguration());
140     this.watcher =
141       new ZooKeeperWatcher(HTU.getConfiguration(), "mockedServer", this.server, true);
142     Mockito.when(server.getZooKeeper()).thenReturn(this.watcher);
143     Mockito.doThrow(new RuntimeException("Aborted")).
144       when(server).abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
145 
146     // Mock a ServerManager.  Say server SERVERNAME_{A,B} are online.  Also
147     // make it so if close or open, we return 'success'.
148     this.serverManager = Mockito.mock(ServerManager.class);
149     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
150     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_B)).thenReturn(true);
151     Mockito.when(this.serverManager.getDeadServers()).thenReturn(new DeadServer());
152     final Map<ServerName, ServerLoad> onlineServers = new HashMap<ServerName, ServerLoad>();
153     onlineServers.put(SERVERNAME_B, ServerLoad.EMPTY_SERVERLOAD);
154     onlineServers.put(SERVERNAME_A, ServerLoad.EMPTY_SERVERLOAD);
155     Mockito.when(this.serverManager.getOnlineServersList()).thenReturn(
156         new ArrayList<ServerName>(onlineServers.keySet()));
157     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(onlineServers);
158 
159     List<ServerName> avServers = new ArrayList<ServerName>();
160     avServers.addAll(onlineServers.keySet());
161     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(avServers);
162     Mockito.when(this.serverManager.createDestinationServersList(null)).thenReturn(avServers);
163 
164     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, REGIONINFO, -1)).
165       thenReturn(true);
166     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_B, REGIONINFO, -1)).
167       thenReturn(true);
168     // Ditto on open.
169     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_A, REGIONINFO, -1, null)).
170       thenReturn(RegionOpeningState.OPENED);
171     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_B, REGIONINFO, -1, null)).
172       thenReturn(RegionOpeningState.OPENED);
173     this.master = Mockito.mock(HMaster.class);
174 
175     Mockito.when(this.master.getServerManager()).thenReturn(serverManager);
176   }
177 
178   @After
179     public void after() throws KeeperException {
180     if (this.watcher != null) {
181       // Clean up all znodes
182       ZKAssign.deleteAllNodes(this.watcher);
183       this.watcher.close();
184     }
185   }
186 
187   /**
188    * Test a balance going on at same time as a master failover
189    *
190    * @throws IOException
191    * @throws KeeperException
192    * @throws InterruptedException
193    * @throws DeserializationException
194    */
195   @Test(timeout = 60000)
196   public void testBalanceOnMasterFailoverScenarioWithOpenedNode()
197   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
198     AssignmentManagerWithExtrasForTesting am =
199       setUpMockedAssignmentManager(this.server, this.serverManager);
200     try {
201       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
202       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
203       while (!am.processRITInvoked) Thread.sleep(1);
204       // As part of the failover cleanup, the balancing region plan is removed.
205       // So a random server will be used to open the region. For testing purpose,
206       // let's assume it is going to open on server b:
207       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
208 
209       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
210 
211       // Now fake the region closing successfully over on the regionserver; the
212       // regionserver will have set the region in CLOSED state. This will
213       // trigger callback into AM. The below zk close call is from the RS close
214       // region handler duplicated here because its down deep in a private
215       // method hard to expose.
216       int versionid =
217         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
218       assertNotSame(versionid, -1);
219       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
220 
221       // Get current versionid else will fail on transition from OFFLINE to
222       // OPENING below
223       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
224       assertNotSame(-1, versionid);
225       // This uglyness below is what the openregionhandler on RS side does.
226       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
227         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
228         EventType.RS_ZK_REGION_OPENING, versionid);
229       assertNotSame(-1, versionid);
230       // Move znode from OPENING to OPENED as RS does on successful open.
231       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
232         SERVERNAME_B, versionid);
233       assertNotSame(-1, versionid);
234       am.gate.set(false);
235       // Block here until our znode is cleared or until this test times out.
236       ZKAssign.blockUntilNoRIT(watcher);
237     } finally {
238       am.getExecutorService().shutdown();
239       am.shutdown();
240     }
241   }
242 
243   @Test(timeout = 60000)
244   public void testBalanceOnMasterFailoverScenarioWithClosedNode()
245   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
246     AssignmentManagerWithExtrasForTesting am =
247       setUpMockedAssignmentManager(this.server, this.serverManager);
248     try {
249       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
250       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
251       while (!am.processRITInvoked) Thread.sleep(1);
252       // As part of the failover cleanup, the balancing region plan is removed.
253       // So a random server will be used to open the region. For testing purpose,
254       // let's assume it is going to open on server b:
255       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
256 
257       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
258 
259       // Now fake the region closing successfully over on the regionserver; the
260       // regionserver will have set the region in CLOSED state. This will
261       // trigger callback into AM. The below zk close call is from the RS close
262       // region handler duplicated here because its down deep in a private
263       // method hard to expose.
264       int versionid =
265         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
266       assertNotSame(versionid, -1);
267       am.gate.set(false);
268       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
269 
270       // Get current versionid else will fail on transition from OFFLINE to
271       // OPENING below
272       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
273       assertNotSame(-1, versionid);
274       // This uglyness below is what the openregionhandler on RS side does.
275       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
276           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
277           EventType.RS_ZK_REGION_OPENING, versionid);
278       assertNotSame(-1, versionid);
279       // Move znode from OPENING to OPENED as RS does on successful open.
280       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
281           SERVERNAME_B, versionid);
282       assertNotSame(-1, versionid);
283 
284       // Block here until our znode is cleared or until this test timesout.
285       ZKAssign.blockUntilNoRIT(watcher);
286     } finally {
287       am.getExecutorService().shutdown();
288       am.shutdown();
289     }
290   }
291 
292   @Test(timeout = 60000)
293   public void testBalanceOnMasterFailoverScenarioWithOfflineNode()
294   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
295     AssignmentManagerWithExtrasForTesting am =
296       setUpMockedAssignmentManager(this.server, this.serverManager);
297     try {
298       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
299       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
300       while (!am.processRITInvoked) Thread.sleep(1);
301       // As part of the failover cleanup, the balancing region plan is removed.
302       // So a random server will be used to open the region. For testing purpose,
303       // let's assume it is going to open on server b:
304       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
305 
306       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
307 
308       // Now fake the region closing successfully over on the regionserver; the
309       // regionserver will have set the region in CLOSED state. This will
310       // trigger callback into AM. The below zk close call is from the RS close
311       // region handler duplicated here because its down deep in a private
312       // method hard to expose.
313       int versionid =
314         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
315       assertNotSame(versionid, -1);
316       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
317 
318       am.gate.set(false);
319       // Get current versionid else will fail on transition from OFFLINE to
320       // OPENING below
321       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
322       assertNotSame(-1, versionid);
323       // This uglyness below is what the openregionhandler on RS side does.
324       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
325           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
326           EventType.RS_ZK_REGION_OPENING, versionid);
327       assertNotSame(-1, versionid);
328       // Move znode from OPENING to OPENED as RS does on successful open.
329       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
330           SERVERNAME_B, versionid);
331       assertNotSame(-1, versionid);
332       // Block here until our znode is cleared or until this test timesout.
333       ZKAssign.blockUntilNoRIT(watcher);
334     } finally {
335       am.getExecutorService().shutdown();
336       am.shutdown();
337     }
338   }
339 
340   private void createRegionPlanAndBalance(
341       final AssignmentManager am, final ServerName from,
342       final ServerName to, final HRegionInfo hri) throws RegionException {
343     // Call the balance function but fake the region being online first at
344     // servername from.
345     am.regionOnline(hri, from);
346     // Balance region from 'from' to 'to'. It calls unassign setting CLOSING state
347     // up in zk.  Create a plan and balance
348     am.balance(new RegionPlan(hri, from, to));
349   }
350 
351   /**
352    * Tests AssignmentManager balance function.  Runs a balance moving a region
353    * from one server to another mocking regionserver responding over zk.
354    * @throws IOException
355    * @throws KeeperException
356    * @throws DeserializationException
357    */
358   @Test
359   public void testBalance()
360     throws IOException, KeeperException, DeserializationException, InterruptedException {
361     // Create and startup an executor.  This is used by AssignmentManager
362     // handling zk callbacks.
363     ExecutorService executor = startupMasterExecutor("testBalanceExecutor");
364 
365     // We need a mocked catalog tracker.
366     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
367     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
368         .getConfiguration());
369     // Create an AM.
370     AssignmentManager am = new AssignmentManager(this.server,
371       this.serverManager, ct, balancer, executor, null, master.getTableLockManager());
372     am.failoverCleanupDone.set(true);
373     try {
374       // Make sure our new AM gets callbacks; once registered, can't unregister.
375       // Thats ok because we make a new zk watcher for each test.
376       this.watcher.registerListenerFirst(am);
377       // Call the balance function but fake the region being online first at
378       // SERVERNAME_A.  Create a balance plan.
379       am.regionOnline(REGIONINFO, SERVERNAME_A);
380       // Balance region from A to B.
381       RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B);
382       am.balance(plan);
383 
384       RegionStates regionStates = am.getRegionStates();
385       // Must be failed to close since the server is fake
386       assertTrue(regionStates.isRegionInTransition(REGIONINFO)
387         && regionStates.isRegionInState(REGIONINFO, State.FAILED_CLOSE));
388       // Move it back to pending_close
389       regionStates.updateRegionState(REGIONINFO, State.PENDING_CLOSE);
390 
391       // Now fake the region closing successfully over on the regionserver; the
392       // regionserver will have set the region in CLOSED state.  This will
393       // trigger callback into AM. The below zk close call is from the RS close
394       // region handler duplicated here because its down deep in a private
395       // method hard to expose.
396       int versionid =
397         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
398       assertNotSame(versionid, -1);
399       // AM is going to notice above CLOSED and queue up a new assign.  The
400       // assign will go to open the region in the new location set by the
401       // balancer.  The zk node will be OFFLINE waiting for regionserver to
402       // transition it through OPENING, OPENED.  Wait till we see the OFFLINE
403       // zk node before we proceed.
404       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
405 
406       // Get current versionid else will fail on transition from OFFLINE to OPENING below
407       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
408       assertNotSame(-1, versionid);
409       // This uglyness below is what the openregionhandler on RS side does.
410       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
411         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
412         EventType.RS_ZK_REGION_OPENING, versionid);
413       assertNotSame(-1, versionid);
414       // Move znode from OPENING to OPENED as RS does on successful open.
415       versionid =
416         ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO, SERVERNAME_B, versionid);
417       assertNotSame(-1, versionid);
418       // Wait on the handler removing the OPENED znode.
419       while(regionStates.isRegionInTransition(REGIONINFO)) Threads.sleep(1);
420     } finally {
421       executor.shutdown();
422       am.shutdown();
423       // Clean up all znodes
424       ZKAssign.deleteAllNodes(this.watcher);
425     }
426   }
427 
428   /**
429    * Run a simple server shutdown handler.
430    * @throws KeeperException
431    * @throws IOException
432    */
433   @Test
434   public void testShutdownHandler()
435       throws KeeperException, IOException, ServiceException {
436     // Create and startup an executor.  This is used by AssignmentManager
437     // handling zk callbacks.
438     ExecutorService executor = startupMasterExecutor("testShutdownHandler");
439 
440     // We need a mocked catalog tracker.
441     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
442     // Create an AM.
443     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
444         this.server, this.serverManager);
445     try {
446       processServerShutdownHandler(ct, am, false);
447     } finally {
448       executor.shutdown();
449       am.shutdown();
450       // Clean up all znodes
451       ZKAssign.deleteAllNodes(this.watcher);
452     }
453   }
454 
455   /**
456    * To test closed region handler to remove rit and delete corresponding znode
457    * if region in pending close or closing while processing shutdown of a region
458    * server.(HBASE-5927).
459    *
460    * @throws KeeperException
461    * @throws IOException
462    * @throws ServiceException
463    */
464   @Test
465   public void testSSHWhenDisableTableInProgress() throws KeeperException, IOException,
466       ServiceException {
467     testCaseWithPartiallyDisabledState(Table.State.DISABLING);
468     testCaseWithPartiallyDisabledState(Table.State.DISABLED);
469   }
470 
471 
472   /**
473    * To test if the split region is removed from RIT if the region was in SPLITTING state but the RS
474    * has actually completed the splitting in hbase:meta but went down. See HBASE-6070 and also HBASE-5806
475    *
476    * @throws KeeperException
477    * @throws IOException
478    */
479   @Test
480   public void testSSHWhenSplitRegionInProgress() throws KeeperException, IOException, Exception {
481     // true indicates the region is split but still in RIT
482     testCaseWithSplitRegionPartial(true);
483     // false indicate the region is not split
484     testCaseWithSplitRegionPartial(false);
485   }
486 
487   private void testCaseWithSplitRegionPartial(boolean regionSplitDone) throws KeeperException,
488       IOException, NodeExistsException, InterruptedException, ServiceException {
489     // Create and startup an executor. This is used by AssignmentManager
490     // handling zk callbacks.
491     ExecutorService executor = startupMasterExecutor("testSSHWhenSplitRegionInProgress");
492     // We need a mocked catalog tracker.
493     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
494     ZKAssign.deleteAllNodes(this.watcher);
495 
496     // Create an AM.
497     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
498       this.server, this.serverManager);
499     // adding region to regions and servers maps.
500     am.regionOnline(REGIONINFO, SERVERNAME_A);
501     // adding region in pending close.
502     am.getRegionStates().updateRegionState(
503       REGIONINFO, State.SPLITTING, SERVERNAME_A);
504     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
505     RegionTransition data = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
506         REGIONINFO.getRegionName(), SERVERNAME_A);
507     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
508     // create znode in M_ZK_REGION_CLOSING state.
509     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
510 
511     try {
512       processServerShutdownHandler(ct, am, regionSplitDone);
513       // check znode deleted or not.
514       // In both cases the znode should be deleted.
515 
516       if (regionSplitDone) {
517         assertFalse("Region state of region in SPLITTING should be removed from rit.",
518             am.getRegionStates().isRegionsInTransition());
519       } else {
520         while (!am.assignInvoked) {
521           Thread.sleep(1);
522         }
523         assertTrue("Assign should be invoked.", am.assignInvoked);
524       }
525     } finally {
526       REGIONINFO.setOffline(false);
527       REGIONINFO.setSplit(false);
528       executor.shutdown();
529       am.shutdown();
530       // Clean up all znodes
531       ZKAssign.deleteAllNodes(this.watcher);
532     }
533   }
534 
535   private void testCaseWithPartiallyDisabledState(Table.State state) throws KeeperException,
536       IOException, NodeExistsException, ServiceException {
537     // Create and startup an executor. This is used by AssignmentManager
538     // handling zk callbacks.
539     ExecutorService executor = startupMasterExecutor("testSSHWhenDisableTableInProgress");
540     // We need a mocked catalog tracker.
541     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
542     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
543     ZKAssign.deleteAllNodes(this.watcher);
544 
545     // Create an AM.
546     AssignmentManager am = new AssignmentManager(this.server,
547       this.serverManager, ct, balancer, executor, null, master.getTableLockManager());
548     // adding region to regions and servers maps.
549     am.regionOnline(REGIONINFO, SERVERNAME_A);
550     // adding region in pending close.
551     am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE);
552     if (state == Table.State.DISABLING) {
553       am.getZKTable().setDisablingTable(REGIONINFO.getTable());
554     } else {
555       am.getZKTable().setDisabledTable(REGIONINFO.getTable());
556     }
557     RegionTransition data = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
558         REGIONINFO.getRegionName(), SERVERNAME_A);
559     // RegionTransitionData data = new
560     // RegionTransitionData(EventType.M_ZK_REGION_CLOSING,
561     // REGIONINFO.getRegionName(), SERVERNAME_A);
562     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
563     // create znode in M_ZK_REGION_CLOSING state.
564     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
565 
566     try {
567       processServerShutdownHandler(ct, am, false);
568       // check znode deleted or not.
569       // In both cases the znode should be deleted.
570       assertTrue("The znode should be deleted.", ZKUtil.checkExists(this.watcher, node) == -1);
571       // check whether in rit or not. In the DISABLING case also the below
572       // assert will be true but the piece of code added for HBASE-5927 will not
573       // do that.
574       if (state == Table.State.DISABLED) {
575         assertFalse("Region state of region in pending close should be removed from rit.",
576             am.getRegionStates().isRegionsInTransition());
577       }
578     } finally {
579       am.setEnabledTable(REGIONINFO.getTable());
580       executor.shutdown();
581       am.shutdown();
582       // Clean up all znodes
583       ZKAssign.deleteAllNodes(this.watcher);
584     }
585   }
586 
587   private void processServerShutdownHandler(CatalogTracker ct, AssignmentManager am, boolean splitRegion)
588       throws IOException, ServiceException {
589     // Make sure our new AM gets callbacks; once registered, can't unregister.
590     // Thats ok because we make a new zk watcher for each test.
591     this.watcher.registerListenerFirst(am);
592 
593     // Need to set up a fake scan of meta for the servershutdown handler
594     // Make an RS Interface implementation.  Make it so a scanner can go against it.
595     ClientProtos.ClientService.BlockingInterface implementation =
596       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
597     // Get a meta row result that has region up on SERVERNAME_A
598 
599     Result r;
600     if (splitRegion) {
601       r = MetaMockingUtil.getMetaTableRowResultAsSplitRegion(REGIONINFO, SERVERNAME_A);
602     } else {
603       r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
604     }
605 
606     final ScanResponse.Builder builder = ScanResponse.newBuilder();
607     builder.setMoreResults(true);
608     builder.addCellsPerResult(r.size());
609     final List<CellScannable> cellScannables = new ArrayList<CellScannable>(1);
610     cellScannables.add(r);
611     Mockito.when(implementation.scan(
612       (RpcController)Mockito.any(), (ScanRequest)Mockito.any())).
613       thenAnswer(new Answer<ScanResponse>() {
614           public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
615             PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
616                 .getArguments()[0];
617             if (controller != null) {
618               controller.setCellScanner(CellUtil.createCellScanner(cellScannables));
619             }
620             return builder.build();
621           }
622       });
623 
624     // Get a connection w/ mocked up common methods.
625     HConnection connection =
626       HConnectionTestingUtility.getMockedConnectionAndDecorate(HTU.getConfiguration(),
627         null, implementation, SERVERNAME_B, REGIONINFO);
628 
629     // Make it so we can get a catalogtracker from servermanager.. .needed
630     // down in guts of server shutdown handler.
631     Mockito.when(ct.getConnection()).thenReturn(connection);
632     Mockito.when(this.server.getCatalogTracker()).thenReturn(ct);
633 
634     // Now make a server shutdown handler instance and invoke process.
635     // Have it that SERVERNAME_A died.
636     DeadServer deadServers = new DeadServer();
637     deadServers.add(SERVERNAME_A);
638     // I need a services instance that will return the AM
639     MasterServices services = Mockito.mock(MasterServices.class);
640     Mockito.when(services.getAssignmentManager()).thenReturn(am);
641     Mockito.when(services.getServerManager()).thenReturn(this.serverManager);
642     Mockito.when(services.getZooKeeper()).thenReturn(this.watcher);
643     ServerShutdownHandler handler = new ServerShutdownHandler(this.server,
644       services, deadServers, SERVERNAME_A, false);
645     am.failoverCleanupDone.set(true);
646     handler.process();
647     // The region in r will have been assigned.  It'll be up in zk as unassigned.
648   }
649 
650   /**
651    * Create and startup executor pools. Start same set as master does (just
652    * run a few less).
653    * @param name Name to give our executor
654    * @return Created executor (be sure to call shutdown when done).
655    */
656   private ExecutorService startupMasterExecutor(final String name) {
657     // TODO: Move up into HBaseTestingUtility?  Generally useful.
658     ExecutorService executor = new ExecutorService(name);
659     executor.startExecutorService(ExecutorType.MASTER_OPEN_REGION, 3);
660     executor.startExecutorService(ExecutorType.MASTER_CLOSE_REGION, 3);
661     executor.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS, 3);
662     executor.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS, 3);
663     return executor;
664   }
665 
666   @Test
667   public void testUnassignWithSplitAtSameTime() throws KeeperException, IOException {
668     // Region to use in test.
669     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
670     // First amend the servermanager mock so that when we do send close of the
671     // first meta region on SERVERNAME_A, it will return true rather than
672     // default null.
673     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, hri, -1)).thenReturn(true);
674     // Need a mocked catalog tracker.
675     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
676     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
677         .getConfiguration());
678     // Create an AM.
679     AssignmentManager am = new AssignmentManager(this.server,
680       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
681     try {
682       // First make sure my mock up basically works.  Unassign a region.
683       unassign(am, SERVERNAME_A, hri);
684       // This delete will fail if the previous unassign did wrong thing.
685       ZKAssign.deleteClosingNode(this.watcher, hri, SERVERNAME_A);
686       // Now put a SPLITTING region in the way.  I don't have to assert it
687       // go put in place.  This method puts it in place then asserts it still
688       // owns it by moving state from SPLITTING to SPLITTING.
689       int version = createNodeSplitting(this.watcher, hri, SERVERNAME_A);
690       // Now, retry the unassign with the SPLTTING in place.  It should just
691       // complete without fail; a sort of 'silent' recognition that the
692       // region to unassign has been split and no longer exists: TOOD: what if
693       // the split fails and the parent region comes back to life?
694       unassign(am, SERVERNAME_A, hri);
695       // This transition should fail if the znode has been messed with.
696       ZKAssign.transitionNode(this.watcher, hri, SERVERNAME_A,
697         EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
698       assertFalse(am.getRegionStates().isRegionInTransition(hri));
699     } finally {
700       am.shutdown();
701     }
702   }
703 
704   /**
705    * Tests the processDeadServersAndRegionsInTransition should not fail with NPE
706    * when it failed to get the children. Let's abort the system in this
707    * situation
708    * @throws ServiceException
709    */
710   @Test(timeout = 60000)
711   public void testProcessDeadServersAndRegionsInTransitionShouldNotFailWithNPE()
712       throws IOException, KeeperException, InterruptedException, ServiceException {
713     final RecoverableZooKeeper recoverableZk = Mockito
714         .mock(RecoverableZooKeeper.class);
715     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
716       this.server, this.serverManager);
717     Watcher zkw = new ZooKeeperWatcher(HBaseConfiguration.create(), "unittest",
718         null) {
719       public RecoverableZooKeeper getRecoverableZooKeeper() {
720         return recoverableZk;
721       }
722     };
723     ((ZooKeeperWatcher) zkw).registerListener(am);
724     Mockito.doThrow(new InterruptedException()).when(recoverableZk)
725         .getChildren("/hbase/region-in-transition", null);
726     am.setWatcher((ZooKeeperWatcher) zkw);
727     try {
728       am.processDeadServersAndRegionsInTransition(null);
729       fail("Expected to abort");
730     } catch (NullPointerException e) {
731       fail("Should not throw NPE");
732     } catch (RuntimeException e) {
733       assertEquals("Aborted", e.getLocalizedMessage());
734     }
735   }
736   /**
737    * TestCase verifies that the regionPlan is updated whenever a region fails to open
738    * and the master tries to process RS_ZK_FAILED_OPEN state.(HBASE-5546).
739    */
740   @Test(timeout = 60000)
741   public void testRegionPlanIsUpdatedWhenRegionFailsToOpen() throws IOException, KeeperException,
742       ServiceException, InterruptedException {
743     this.server.getConfiguration().setClass(
744       HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MockedLoadBalancer.class,
745       LoadBalancer.class);
746     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
747       this.server, this.serverManager);
748     try {
749       // Boolean variable used for waiting until randomAssignment is called and
750       // new
751       // plan is generated.
752       AtomicBoolean gate = new AtomicBoolean(false);
753       if (balancer instanceof MockedLoadBalancer) {
754         ((MockedLoadBalancer) balancer).setGateVariable(gate);
755       }
756       ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
757       int v = ZKAssign.getVersion(this.watcher, REGIONINFO);
758       ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A,
759           EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_FAILED_OPEN, v);
760       String path = ZKAssign.getNodeName(this.watcher, REGIONINFO
761           .getEncodedName());
762       am.getRegionStates().updateRegionState(
763         REGIONINFO, State.OPENING, SERVERNAME_A);
764       // a dummy plan inserted into the regionPlans. This plan is cleared and
765       // new one is formed
766       am.regionPlans.put(REGIONINFO.getEncodedName(), new RegionPlan(
767           REGIONINFO, null, SERVERNAME_A));
768       RegionPlan regionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
769       List<ServerName> serverList = new ArrayList<ServerName>(2);
770       serverList.add(SERVERNAME_B);
771       Mockito.when(
772           this.serverManager.createDestinationServersList(SERVERNAME_A))
773           .thenReturn(serverList);
774       am.nodeDataChanged(path);
775       // here we are waiting until the random assignment in the load balancer is
776       // called.
777       while (!gate.get()) {
778         Thread.sleep(10);
779       }
780       // new region plan may take some time to get updated after random
781       // assignment is called and
782       // gate is set to true.
783       RegionPlan newRegionPlan = am.regionPlans
784           .get(REGIONINFO.getEncodedName());
785       while (newRegionPlan == null) {
786         Thread.sleep(10);
787         newRegionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
788       }
789       // the new region plan created may contain the same RS as destination but
790       // it should
791       // be new plan.
792       assertNotSame("Same region plan should not come", regionPlan,
793           newRegionPlan);
794       assertTrue("Destination servers should be different.", !(regionPlan
795           .getDestination().equals(newRegionPlan.getDestination())));
796 
797       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
798     } finally {
799       this.server.getConfiguration().setClass(
800           HConstants.HBASE_MASTER_LOADBALANCER_CLASS, SimpleLoadBalancer.class,
801           LoadBalancer.class);
802       am.getExecutorService().shutdown();
803       am.shutdown();
804     }
805   }
806 
807   /**
808    * Mocked load balancer class used in the testcase to make sure that the testcase waits until
809    * random assignment is called and the gate variable is set to true.
810    */
811   public static class MockedLoadBalancer extends SimpleLoadBalancer {
812     private AtomicBoolean gate;
813 
814     public void setGateVariable(AtomicBoolean gate) {
815       this.gate = gate;
816     }
817 
818     @Override
819     public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
820       ServerName randomServerName = super.randomAssignment(regionInfo, servers);
821       this.gate.set(true);
822       return randomServerName;
823     }
824 
825     @Override
826     public Map<ServerName, List<HRegionInfo>> retainAssignment(
827         Map<HRegionInfo, ServerName> regions, List<ServerName> servers) {
828       this.gate.set(true);
829       return super.retainAssignment(regions, servers);
830     }
831   }
832 
833   /**
834    * Test the scenario when the master is in failover and trying to process a
835    * region which is in Opening state on a dead RS. Master will force offline the
836    * region and put it in transition. AM relies on SSH to reassign it.
837    */
838   @Test(timeout = 60000)
839   public void testRegionInOpeningStateOnDeadRSWhileMasterFailover() throws IOException,
840       KeeperException, ServiceException, InterruptedException {
841     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
842       this.server, this.serverManager);
843     ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
844     int version = ZKAssign.getVersion(this.watcher, REGIONINFO);
845     ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A, EventType.M_ZK_REGION_OFFLINE,
846         EventType.RS_ZK_REGION_OPENING, version);
847     RegionTransition rt = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_OPENING,
848         REGIONINFO.getRegionName(), SERVERNAME_A, HConstants.EMPTY_BYTE_ARRAY);
849     version = ZKAssign.getVersion(this.watcher, REGIONINFO);
850     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(false);
851     am.getRegionStates().logSplit(SERVERNAME_A); // Assume log splitting is done
852     am.getRegionStates().createRegionState(REGIONINFO);
853     am.gate.set(false);
854     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
855     assertFalse(am.processRegionsInTransition(rt, REGIONINFO, version));
856     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
857     processServerShutdownHandler(ct, am, false);
858     // Waiting for the assignment to get completed.
859     while (!am.gate.get()) {
860       Thread.sleep(10);
861     }
862     assertTrue("The region should be assigned immediately.", null != am.regionPlans.get(REGIONINFO
863         .getEncodedName()));
864   }
865 
866   /**
867    * Test verifies whether assignment is skipped for regions of tables in DISABLING state during
868    * clean cluster startup. See HBASE-6281.
869    *
870    * @throws KeeperException
871    * @throws IOException
872    * @throws Exception
873    */
874   @Test(timeout = 60000)
875   public void testDisablingTableRegionsAssignmentDuringCleanClusterStartup()
876       throws KeeperException, IOException, Exception {
877     this.server.getConfiguration().setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
878         MockedLoadBalancer.class, LoadBalancer.class);
879     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(
880         new HashMap<ServerName, ServerLoad>(0));
881     List<ServerName> destServers = new ArrayList<ServerName>(1);
882     destServers.add(SERVERNAME_A);
883     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
884     // To avoid cast exception in DisableTableHandler process.
885     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
886     Server server = new HMaster(HTU.getConfiguration());
887     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
888         this.serverManager);
889     AtomicBoolean gate = new AtomicBoolean(false);
890     if (balancer instanceof MockedLoadBalancer) {
891       ((MockedLoadBalancer) balancer).setGateVariable(gate);
892     }
893     try{
894       // set table in disabling state.
895       am.getZKTable().setDisablingTable(REGIONINFO.getTable());
896       am.joinCluster();
897       // should not call retainAssignment if we get empty regions in assignAllUserRegions.
898       assertFalse(
899           "Assign should not be invoked for disabling table regions during clean cluster startup.",
900           gate.get());
901       // need to change table state from disabling to disabled.
902       assertTrue("Table should be disabled.",
903           am.getZKTable().isDisabledTable(REGIONINFO.getTable()));
904     } finally {
905       this.server.getConfiguration().setClass(
906         HConstants.HBASE_MASTER_LOADBALANCER_CLASS, SimpleLoadBalancer.class,
907         LoadBalancer.class);
908       am.getZKTable().setEnabledTable(REGIONINFO.getTable());
909       am.shutdown();
910     }
911   }
912 
913   /**
914    * Test verifies whether all the enabling table regions assigned only once during master startup.
915    *
916    * @throws KeeperException
917    * @throws IOException
918    * @throws Exception
919    */
920   @Test
921   public void testMasterRestartWhenTableInEnabling() throws KeeperException, IOException, Exception {
922     enabling = true;
923     List<ServerName> destServers = new ArrayList<ServerName>(1);
924     destServers.add(SERVERNAME_A);
925     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
926     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
927     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
928     Server server = new HMaster(HTU.getConfiguration());
929     Whitebox.setInternalState(server, "serverManager", this.serverManager);
930     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
931         this.serverManager);
932     try {
933       // set table in enabling state.
934       am.getZKTable().setEnablingTable(REGIONINFO.getTable());
935       new EnableTableHandler(server, REGIONINFO.getTable(),
936           am.getCatalogTracker(), am, new NullTableLockManager(), true).prepare()
937           .process();
938       assertEquals("Number of assignments should be 1.", 1, assignmentCount);
939       assertTrue("Table should be enabled.",
940           am.getZKTable().isEnabledTable(REGIONINFO.getTable()));
941     } finally {
942       enabling = false;
943       assignmentCount = 0;
944       am.getZKTable().setEnabledTable(REGIONINFO.getTable());
945       am.shutdown();
946       ZKAssign.deleteAllNodes(this.watcher);
947     }
948   }
949 
950   /**
951    * Test verifies whether stale znodes of unknown tables as for the hbase:meta will be removed or
952    * not.
953    * @throws KeeperException
954    * @throws IOException
955    * @throws Exception
956    */
957   @Test
958   public void testMasterRestartShouldRemoveStaleZnodesOfUnknownTableAsForMeta()
959       throws KeeperException, IOException, Exception {
960     List<ServerName> destServers = new ArrayList<ServerName>(1);
961     destServers.add(SERVERNAME_A);
962     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
963     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
964     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
965     Server server = new HMaster(HTU.getConfiguration());
966     Whitebox.setInternalState(server, "serverManager", this.serverManager);
967     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
968         this.serverManager);
969     try {
970       TableName tableName = TableName.valueOf("dummyTable");
971       // set table in enabling state.
972       am.getZKTable().setEnablingTable(tableName);
973       am.joinCluster();
974       assertFalse("Table should not be present in zookeeper.",
975         am.getZKTable().isTablePresent(tableName));
976     } finally {
977     }
978   }
979   /**
980    * When a region is in transition, if the region server opening the region goes down,
981    * the region assignment takes a long time normally (waiting for timeout monitor to trigger assign).
982    * This test is to make sure SSH reassigns it right away.
983    */
984   @Test
985   public void testSSHTimesOutOpeningRegionTransition()
986       throws KeeperException, IOException, ServiceException {
987     // We need a mocked catalog tracker.
988     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
989     // Create an AM.
990     AssignmentManagerWithExtrasForTesting am =
991       setUpMockedAssignmentManager(this.server, this.serverManager);
992     // adding region in pending open.
993     RegionState state = new RegionState(REGIONINFO,
994       State.OPENING, System.currentTimeMillis(), SERVERNAME_A);
995     am.getRegionStates().regionOnline(REGIONINFO, SERVERNAME_B);
996     am.getRegionStates().regionsInTransition.put(REGIONINFO.getEncodedName(), state);
997     // adding region plan
998     am.regionPlans.put(REGIONINFO.getEncodedName(),
999       new RegionPlan(REGIONINFO, SERVERNAME_B, SERVERNAME_A));
1000     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
1001 
1002     try {
1003       am.assignInvoked = false;
1004       processServerShutdownHandler(ct, am, false);
1005       assertTrue(am.assignInvoked);
1006     } finally {
1007       am.getRegionStates().regionsInTransition.remove(REGIONINFO.getEncodedName());
1008       am.regionPlans.remove(REGIONINFO.getEncodedName());
1009     }
1010   }
1011 
1012   /**
1013    * Scenario:<ul>
1014    *  <li> master starts a close, and creates a znode</li>
1015    *  <li> it fails just at this moment, before contacting the RS</li>
1016    *  <li> while the second master is coming up, the targeted RS dies. But it's before ZK timeout so
1017    *    we don't know, and we have an exception.</li>
1018    *  <li> the master must handle this nicely and reassign.
1019    *  </ul>
1020    */
1021   @Test
1022   public void testClosingFailureDuringRecovery() throws Exception {
1023 
1024     AssignmentManagerWithExtrasForTesting am =
1025         setUpMockedAssignmentManager(this.server, this.serverManager);
1026     ZKAssign.createNodeClosing(this.watcher, REGIONINFO, SERVERNAME_A);
1027     am.getRegionStates().createRegionState(REGIONINFO);
1028 
1029     assertFalse( am.getRegionStates().isRegionsInTransition() );
1030 
1031     am.processRegionInTransition(REGIONINFO.getEncodedName(), REGIONINFO);
1032 
1033     assertTrue( am.getRegionStates().isRegionsInTransition() );
1034   }
1035 
1036   /**
1037    * Creates a new ephemeral node in the SPLITTING state for the specified region.
1038    * Create it ephemeral in case regionserver dies mid-split.
1039    *
1040    * <p>Does not transition nodes from other states.  If a node already exists
1041    * for this region, a {@link NodeExistsException} will be thrown.
1042    *
1043    * @param zkw zk reference
1044    * @param region region to be created as offline
1045    * @param serverName server event originates from
1046    * @return Version of znode created.
1047    * @throws KeeperException
1048    * @throws IOException
1049    */
1050   // Copied from SplitTransaction rather than open the method over there in
1051   // the regionserver package.
1052   private static int createNodeSplitting(final ZooKeeperWatcher zkw,
1053       final HRegionInfo region, final ServerName serverName)
1054   throws KeeperException, IOException {
1055     RegionTransition rt =
1056       RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
1057         region.getRegionName(), serverName);
1058 
1059     String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
1060     if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
1061       throw new IOException("Failed create of ephemeral " + node);
1062     }
1063     // Transition node from SPLITTING to SPLITTING and pick up version so we
1064     // can be sure this znode is ours; version is needed deleting.
1065     return transitionNodeSplitting(zkw, region, serverName, -1);
1066   }
1067 
1068   // Copied from SplitTransaction rather than open the method over there in
1069   // the regionserver package.
1070   private static int transitionNodeSplitting(final ZooKeeperWatcher zkw,
1071       final HRegionInfo parent,
1072       final ServerName serverName, final int version)
1073   throws KeeperException, IOException {
1074     return ZKAssign.transitionNode(zkw, parent, serverName,
1075       EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
1076   }
1077 
1078   private void unassign(final AssignmentManager am, final ServerName sn,
1079       final HRegionInfo hri) throws RegionException {
1080     // Before I can unassign a region, I need to set it online.
1081     am.regionOnline(hri, sn);
1082     // Unassign region.
1083     am.unassign(hri);
1084   }
1085 
1086   /**
1087    * Create an {@link AssignmentManagerWithExtrasForTesting} that has mocked
1088    * {@link CatalogTracker} etc.
1089    * @param server
1090    * @param manager
1091    * @return An AssignmentManagerWithExtras with mock connections, etc.
1092    * @throws IOException
1093    * @throws KeeperException
1094    */
1095   private AssignmentManagerWithExtrasForTesting setUpMockedAssignmentManager(final Server server,
1096       final ServerManager manager) throws IOException, KeeperException, ServiceException {
1097     // We need a mocked catalog tracker. Its used by our AM instance.
1098     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1099     // Make an RS Interface implementation. Make it so a scanner can go against
1100     // it and a get to return the single region, REGIONINFO, this test is
1101     // messing with. Needed when "new master" joins cluster. AM will try and
1102     // rebuild its list of user regions and it will also get the HRI that goes
1103     // with an encoded name by doing a Get on hbase:meta
1104     ClientProtos.ClientService.BlockingInterface ri =
1105       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
1106     // Get a meta row result that has region up on SERVERNAME_A for REGIONINFO
1107     Result r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
1108     final ScanResponse.Builder builder = ScanResponse.newBuilder();
1109     builder.setMoreResults(true);
1110     builder.addCellsPerResult(r.size());
1111     final List<CellScannable> rows = new ArrayList<CellScannable>(1);
1112     rows.add(r);
1113     Answer<ScanResponse> ans = new Answer<ClientProtos.ScanResponse>() {
1114       public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
1115         PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
1116             .getArguments()[0];
1117         if (controller != null) {
1118           controller.setCellScanner(CellUtil.createCellScanner(rows));
1119         }
1120         return builder.build();
1121       }
1122     };
1123     if (enabling) {
1124       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any()))
1125           .thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans)
1126           .thenReturn(ScanResponse.newBuilder().setMoreResults(false).build());
1127     } else {
1128       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any())).thenAnswer(
1129           ans);
1130     }
1131     // If a get, return the above result too for REGIONINFO
1132     GetResponse.Builder getBuilder = GetResponse.newBuilder();
1133     getBuilder.setResult(ProtobufUtil.toResult(r));
1134     Mockito.when(ri.get((RpcController)Mockito.any(), (GetRequest) Mockito.any())).
1135       thenReturn(getBuilder.build());
1136     // Get a connection w/ mocked up common methods.
1137     HConnection connection = HConnectionTestingUtility.
1138       getMockedConnectionAndDecorate(HTU.getConfiguration(), null,
1139         ri, SERVERNAME_B, REGIONINFO);
1140     // Make it so we can get the connection from our mocked catalogtracker
1141     Mockito.when(ct.getConnection()).thenReturn(connection);
1142     // Create and startup an executor. Used by AM handling zk callbacks.
1143     ExecutorService executor = startupMasterExecutor("mockedAMExecutor");
1144     this.balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
1145     AssignmentManagerWithExtrasForTesting am = new AssignmentManagerWithExtrasForTesting(
1146       server, manager, ct, this.balancer, executor, new NullTableLockManager());
1147     return am;
1148   }
1149 
1150   /**
1151    * An {@link AssignmentManager} with some extra facility used testing
1152    */
1153   class AssignmentManagerWithExtrasForTesting extends AssignmentManager {
1154     // Keep a reference so can give it out below in {@link #getExecutorService}
1155     private final ExecutorService es;
1156     // Ditto for ct
1157     private final CatalogTracker ct;
1158     boolean processRITInvoked = false;
1159     boolean assignInvoked = false;
1160     AtomicBoolean gate = new AtomicBoolean(true);
1161 
1162     public AssignmentManagerWithExtrasForTesting(
1163         final Server master, final ServerManager serverManager,
1164         final CatalogTracker catalogTracker, final LoadBalancer balancer,
1165         final ExecutorService service, final TableLockManager tableLockManager)
1166             throws KeeperException, IOException {
1167       super(master, serverManager, catalogTracker, balancer, service, null, tableLockManager);
1168       this.es = service;
1169       this.ct = catalogTracker;
1170     }
1171 
1172     @Override
1173     boolean processRegionInTransition(String encodedRegionName,
1174         HRegionInfo regionInfo) throws KeeperException, IOException {
1175       this.processRITInvoked = true;
1176       return super.processRegionInTransition(encodedRegionName, regionInfo);
1177     }
1178 
1179     @Override
1180     public void assign(HRegionInfo region, boolean setOfflineInZK, boolean forceNewPlan) {
1181       if (enabling) {
1182         assignmentCount++;
1183         this.regionOnline(region, SERVERNAME_A);
1184       } else {
1185         super.assign(region, setOfflineInZK, forceNewPlan);
1186         this.gate.set(true);
1187       }
1188     }
1189 
1190     @Override
1191     boolean assign(ServerName destination, List<HRegionInfo> regions) {
1192       if (enabling) {
1193         for (HRegionInfo region : regions) {
1194           assignmentCount++;
1195           this.regionOnline(region, SERVERNAME_A);
1196         }
1197         return true;
1198       }
1199       return super.assign(destination, regions);
1200     }
1201 
1202     @Override
1203     public void assign(List<HRegionInfo> regions)
1204         throws IOException, InterruptedException {
1205       assignInvoked = (regions != null && regions.size() > 0);
1206       super.assign(regions);
1207       this.gate.set(true);
1208     }
1209 
1210     /** reset the watcher */
1211     void setWatcher(ZooKeeperWatcher watcher) {
1212       this.watcher = watcher;
1213     }
1214 
1215     /**
1216      * @return ExecutorService used by this instance.
1217      */
1218     ExecutorService getExecutorService() {
1219       return this.es;
1220     }
1221 
1222     /**
1223      * @return CatalogTracker used by this AM (Its a mock).
1224      */
1225     CatalogTracker getCatalogTracker() {
1226       return this.ct;
1227     }
1228   }
1229 
1230   /**
1231    * Call joinCluster on the passed AssignmentManager.  Do it in a thread
1232    * so it runs independent of what all else is going on.  Try to simulate
1233    * an AM running insided a failed over master by clearing all in-memory
1234    * AM state first.
1235   */
1236   private void startFakeFailedOverMasterAssignmentManager(final AssignmentManager am,
1237       final ZooKeeperWatcher watcher) {
1238     // Make sure our new AM gets callbacks; once registered, we can't unregister.
1239     // Thats ok because we make a new zk watcher for each test.
1240     watcher.registerListenerFirst(am);
1241     Thread t = new Thread("RunAmJoinCluster") {
1242       public void run() {
1243         // Call the joinCluster function as though we were doing a master
1244         // failover at this point. It will stall just before we go to add
1245         // the RIT region to our RIT Map in AM at processRegionsInTransition.
1246         // First clear any inmemory state from AM so it acts like a new master
1247         // coming on line.
1248         am.getRegionStates().regionsInTransition.clear();
1249         am.regionPlans.clear();
1250         try {
1251           am.joinCluster();
1252         } catch (IOException e) {
1253           throw new RuntimeException(e);
1254         } catch (KeeperException e) {
1255           throw new RuntimeException(e);
1256         } catch (InterruptedException e) {
1257           throw new RuntimeException(e);
1258         }
1259       }
1260     };
1261     t.start();
1262     while (!t.isAlive()) Threads.sleep(1);
1263   }
1264 
1265   @Test
1266   public void testForceAssignMergingRegion() throws Exception {
1267     // Region to use in test.
1268     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1269     // Need a mocked catalog tracker.
1270     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1271     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1272       server.getConfiguration());
1273     // Create an AM.
1274     AssignmentManager am = new AssignmentManager(this.server,
1275       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
1276     RegionStates regionStates = am.getRegionStates();
1277     try {
1278       // First set the state of the region to merging
1279       regionStates.updateRegionState(hri, RegionState.State.MERGING);
1280       // Now, try to assign it with force new plan
1281       am.assign(hri, true, true);
1282       assertEquals("The region should be still in merging state",
1283         RegionState.State.MERGING, regionStates.getRegionState(hri).getState());
1284     } finally {
1285       am.shutdown();
1286     }
1287   }
1288 
1289   /**
1290    * Test assignment related ZK events are ignored by AM if the region is not known
1291    * by AM to be in transition. During normal operation, all assignments are started
1292    * by AM (not considering split/merge), if an event is received but the region
1293    * is not in transition, the event must be a very late one. So it can be ignored.
1294    * During master failover, since AM watches assignment znodes after failover cleanup
1295    * is completed, when an event comes in, AM should already have the region in transition
1296    * if ZK is used during the assignment action (only hbck doesn't use ZK for region
1297    * assignment). So during master failover, we can ignored such events too.
1298    */
1299   @Test
1300   public void testAssignmentEventIgnoredIfNotExpected() throws KeeperException, IOException {
1301     // Region to use in test.
1302     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1303     // Need a mocked catalog tracker.
1304     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1305     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1306       server.getConfiguration());
1307     final AtomicBoolean zkEventProcessed = new AtomicBoolean(false);
1308     // Create an AM.
1309     AssignmentManager am = new AssignmentManager(this.server,
1310       this.serverManager, ct, balancer, null, null, master.getTableLockManager()) {
1311 
1312       @Override
1313       void handleRegion(final RegionTransition rt, int expectedVersion) {
1314         super.handleRegion(rt, expectedVersion);
1315         if (rt != null && Bytes.equals(hri.getRegionName(),
1316           rt.getRegionName()) && rt.getEventType() == EventType.RS_ZK_REGION_OPENING) {
1317           zkEventProcessed.set(true);
1318         }
1319       }
1320     };
1321     try {
1322       // First make sure the region is not in transition
1323       am.getRegionStates().regionOffline(hri);
1324       zkEventProcessed.set(false); // Reset it before faking zk transition
1325       this.watcher.registerListenerFirst(am);
1326       assertFalse("The region should not be in transition",
1327         am.getRegionStates().isRegionInTransition(hri));
1328       ZKAssign.createNodeOffline(this.watcher, hri, SERVERNAME_A);
1329       // Trigger a transition event
1330       ZKAssign.transitionNodeOpening(this.watcher, hri, SERVERNAME_A);
1331       long startTime = EnvironmentEdgeManager.currentTimeMillis();
1332       while (!zkEventProcessed.get()) {
1333         assertTrue("Timed out in waiting for ZK event to be processed",
1334           EnvironmentEdgeManager.currentTimeMillis() - startTime < 30000);
1335         Threads.sleepWithoutInterrupt(100);
1336       }
1337       assertFalse(am.getRegionStates().isRegionInTransition(hri));
1338     } finally {
1339       am.shutdown();
1340     }
1341   }
1342 
1343   /**
1344    * If a table is deleted, we should not be able to balance it anymore.
1345    * Otherwise, the region will be brought back.
1346    * @throws Exception
1347    */
1348   @Test
1349   public void testBalanceRegionOfDeletedTable() throws Exception {
1350     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1351     AssignmentManager am = new AssignmentManager(this.server, this.serverManager,
1352       ct, balancer, null, null, master.getTableLockManager());
1353     RegionStates regionStates = am.getRegionStates();
1354     HRegionInfo hri = REGIONINFO;
1355     regionStates.createRegionState(hri);
1356     assertFalse(regionStates.isRegionInTransition(hri));
1357     RegionPlan plan = new RegionPlan(hri, SERVERNAME_A, SERVERNAME_B);
1358     // Fake table is deleted
1359     regionStates.tableDeleted(hri.getTable());
1360     am.balance(plan);
1361     assertFalse("The region should not in transition",
1362       regionStates.isRegionInTransition(hri));
1363   }
1364 }