View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotSame;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.util.ArrayList;
28  import java.util.HashMap;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.concurrent.atomic.AtomicBoolean;
32  
33  import org.apache.hadoop.hbase.CellScannable;
34  import org.apache.hadoop.hbase.CellUtil;
35  import org.apache.hadoop.hbase.DoNotRetryIOException;
36  import org.apache.hadoop.hbase.HBaseConfiguration;
37  import org.apache.hadoop.hbase.HBaseTestingUtility;
38  import org.apache.hadoop.hbase.HConstants;
39  import org.apache.hadoop.hbase.HRegionInfo;
40  import org.apache.hadoop.hbase.testclassification.MediumTests;
41  import org.apache.hadoop.hbase.RegionException;
42  import org.apache.hadoop.hbase.RegionTransition;
43  import org.apache.hadoop.hbase.Server;
44  import org.apache.hadoop.hbase.ServerLoad;
45  import org.apache.hadoop.hbase.ServerName;
46  import org.apache.hadoop.hbase.TableName;
47  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
48  import org.apache.hadoop.hbase.catalog.CatalogTracker;
49  import org.apache.hadoop.hbase.catalog.MetaMockingUtil;
50  import org.apache.hadoop.hbase.client.HConnection;
51  import org.apache.hadoop.hbase.client.HConnectionTestingUtility;
52  import org.apache.hadoop.hbase.client.Result;
53  import org.apache.hadoop.hbase.exceptions.DeserializationException;
54  import org.apache.hadoop.hbase.executor.EventType;
55  import org.apache.hadoop.hbase.executor.ExecutorService;
56  import org.apache.hadoop.hbase.executor.ExecutorType;
57  import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
58  import org.apache.hadoop.hbase.master.RegionState.State;
59  import org.apache.hadoop.hbase.master.TableLockManager.NullTableLockManager;
60  import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
61  import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer;
62  import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
63  import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
64  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
65  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
66  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetRequest;
67  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetResponse;
68  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
69  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanResponse;
70  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table;
71  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
72  import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
73  import org.apache.hadoop.hbase.util.Bytes;
74  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
75  import org.apache.hadoop.hbase.util.Threads;
76  import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
77  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
78  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
79  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
80  import org.apache.zookeeper.KeeperException;
81  import org.apache.zookeeper.KeeperException.NodeExistsException;
82  import org.apache.zookeeper.Watcher;
83  import org.junit.After;
84  import org.junit.AfterClass;
85  import org.junit.Before;
86  import org.junit.BeforeClass;
87  import org.junit.Test;
88  import org.junit.experimental.categories.Category;
89  import org.mockito.Mockito;
90  import org.mockito.internal.util.reflection.Whitebox;
91  import org.mockito.invocation.InvocationOnMock;
92  import org.mockito.stubbing.Answer;
93  
94  import com.google.protobuf.RpcController;
95  import com.google.protobuf.ServiceException;
96  
97  
98  /**
99   * Test {@link AssignmentManager}
100  */
101 @Category(MediumTests.class)
102 public class TestAssignmentManager {
103   private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
104   private static final ServerName SERVERNAME_A =
105       ServerName.valueOf("example.org", 1234, 5678);
106   private static final ServerName SERVERNAME_B =
107       ServerName.valueOf("example.org", 0, 5678);
108   private static final HRegionInfo REGIONINFO =
109     new HRegionInfo(TableName.valueOf("t"),
110       HConstants.EMPTY_START_ROW, HConstants.EMPTY_START_ROW);
111   private static int assignmentCount;
112   private static boolean enabling = false;
113 
114   // Mocked objects or; get redone for each test.
115   private Server server;
116   private ServerManager serverManager;
117   private ZooKeeperWatcher watcher;
118   private LoadBalancer balancer;
119   private HMaster master;
120 
121   @BeforeClass
122   public static void beforeClass() throws Exception {
123     HTU.getConfiguration().setBoolean("hbase.assignment.usezk", true);
124     HTU.startMiniZKCluster();
125   }
126 
127   @AfterClass
128   public static void afterClass() throws IOException {
129     HTU.shutdownMiniZKCluster();
130   }
131 
132   @Before
133   public void before() throws ZooKeeperConnectionException, IOException {
134     // TODO: Make generic versions of what we do below and put up in a mocking
135     // utility class or move up into HBaseTestingUtility.
136 
137     // Mock a Server.  Have it return a legit Configuration and ZooKeeperWatcher.
138     // If abort is called, be sure to fail the test (don't just swallow it
139     // silently as is mockito default).
140     this.server = Mockito.mock(Server.class);
141     Mockito.when(server.getServerName()).thenReturn(ServerName.valueOf("master,1,1"));
142     Mockito.when(server.getConfiguration()).thenReturn(HTU.getConfiguration());
143     Mockito.when(server.getCatalogTracker()).thenReturn(null);
144     this.watcher =
145       new ZooKeeperWatcher(HTU.getConfiguration(), "mockedServer", this.server, true);
146     Mockito.when(server.getZooKeeper()).thenReturn(this.watcher);
147     Mockito.doThrow(new RuntimeException("Aborted")).
148       when(server).abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
149 
150     // Mock a ServerManager.  Say server SERVERNAME_{A,B} are online.  Also
151     // make it so if close or open, we return 'success'.
152     this.serverManager = Mockito.mock(ServerManager.class);
153     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
154     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_B)).thenReturn(true);
155     Mockito.when(this.serverManager.getDeadServers()).thenReturn(new DeadServer());
156     final Map<ServerName, ServerLoad> onlineServers = new HashMap<ServerName, ServerLoad>();
157     onlineServers.put(SERVERNAME_B, ServerLoad.EMPTY_SERVERLOAD);
158     onlineServers.put(SERVERNAME_A, ServerLoad.EMPTY_SERVERLOAD);
159     Mockito.when(this.serverManager.getOnlineServersList()).thenReturn(
160         new ArrayList<ServerName>(onlineServers.keySet()));
161     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(onlineServers);
162 
163     List<ServerName> avServers = new ArrayList<ServerName>();
164     avServers.addAll(onlineServers.keySet());
165     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(avServers);
166     Mockito.when(this.serverManager.createDestinationServersList(null)).thenReturn(avServers);
167 
168     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, REGIONINFO, -1)).
169       thenReturn(true);
170     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_B, REGIONINFO, -1)).
171       thenReturn(true);
172     // Ditto on open.
173     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_A, REGIONINFO, -1, null)).
174       thenReturn(RegionOpeningState.OPENED);
175     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_B, REGIONINFO, -1, null)).
176       thenReturn(RegionOpeningState.OPENED);
177     this.master = Mockito.mock(HMaster.class);
178 
179     Mockito.when(this.master.getServerManager()).thenReturn(serverManager);
180   }
181 
182   @After
183     public void after() throws KeeperException {
184     if (this.watcher != null) {
185       // Clean up all znodes
186       ZKAssign.deleteAllNodes(this.watcher);
187       this.watcher.close();
188     }
189   }
190 
191   /**
192    * Test a balance going on at same time as a master failover
193    *
194    * @throws IOException
195    * @throws KeeperException
196    * @throws InterruptedException
197    * @throws DeserializationException
198    */
199   @Test(timeout = 60000)
200   public void testBalanceOnMasterFailoverScenarioWithOpenedNode()
201   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
202     AssignmentManagerWithExtrasForTesting am =
203       setUpMockedAssignmentManager(this.server, this.serverManager);
204     try {
205       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
206       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
207       while (!am.processRITInvoked) Thread.sleep(1);
208       // As part of the failover cleanup, the balancing region plan is removed.
209       // So a random server will be used to open the region. For testing purpose,
210       // let's assume it is going to open on server b:
211       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
212 
213       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
214 
215       // Now fake the region closing successfully over on the regionserver; the
216       // regionserver will have set the region in CLOSED state. This will
217       // trigger callback into AM. The below zk close call is from the RS close
218       // region handler duplicated here because its down deep in a private
219       // method hard to expose.
220       int versionid =
221         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
222       assertNotSame(versionid, -1);
223       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
224 
225       // Get current versionid else will fail on transition from OFFLINE to
226       // OPENING below
227       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
228       assertNotSame(-1, versionid);
229       // This uglyness below is what the openregionhandler on RS side does.
230       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
231         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
232         EventType.RS_ZK_REGION_OPENING, versionid);
233       assertNotSame(-1, versionid);
234       // Move znode from OPENING to OPENED as RS does on successful open.
235       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
236         SERVERNAME_B, versionid);
237       assertNotSame(-1, versionid);
238       am.gate.set(false);
239       // Block here until our znode is cleared or until this test times out.
240       ZKAssign.blockUntilNoRIT(watcher);
241     } finally {
242       am.getExecutorService().shutdown();
243       am.shutdown();
244     }
245   }
246 
247   @Test(timeout = 60000)
248   public void testBalanceOnMasterFailoverScenarioWithClosedNode()
249   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
250     AssignmentManagerWithExtrasForTesting am =
251       setUpMockedAssignmentManager(this.server, this.serverManager);
252     try {
253       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
254       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
255       while (!am.processRITInvoked) Thread.sleep(1);
256       // As part of the failover cleanup, the balancing region plan is removed.
257       // So a random server will be used to open the region. For testing purpose,
258       // let's assume it is going to open on server b:
259       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
260 
261       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
262 
263       // Now fake the region closing successfully over on the regionserver; the
264       // regionserver will have set the region in CLOSED state. This will
265       // trigger callback into AM. The below zk close call is from the RS close
266       // region handler duplicated here because its down deep in a private
267       // method hard to expose.
268       int versionid =
269         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
270       assertNotSame(versionid, -1);
271       am.gate.set(false);
272       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
273 
274       // Get current versionid else will fail on transition from OFFLINE to
275       // OPENING below
276       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
277       assertNotSame(-1, versionid);
278       // This uglyness below is what the openregionhandler on RS side does.
279       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
280           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
281           EventType.RS_ZK_REGION_OPENING, versionid);
282       assertNotSame(-1, versionid);
283       // Move znode from OPENING to OPENED as RS does on successful open.
284       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
285           SERVERNAME_B, versionid);
286       assertNotSame(-1, versionid);
287 
288       // Block here until our znode is cleared or until this test timesout.
289       ZKAssign.blockUntilNoRIT(watcher);
290     } finally {
291       am.getExecutorService().shutdown();
292       am.shutdown();
293     }
294   }
295 
296   @Test(timeout = 60000)
297   public void testBalanceOnMasterFailoverScenarioWithOfflineNode()
298   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
299     AssignmentManagerWithExtrasForTesting am =
300       setUpMockedAssignmentManager(this.server, this.serverManager);
301     try {
302       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
303       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
304       while (!am.processRITInvoked) Thread.sleep(1);
305       // As part of the failover cleanup, the balancing region plan is removed.
306       // So a random server will be used to open the region. For testing purpose,
307       // let's assume it is going to open on server b:
308       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
309 
310       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
311 
312       // Now fake the region closing successfully over on the regionserver; the
313       // regionserver will have set the region in CLOSED state. This will
314       // trigger callback into AM. The below zk close call is from the RS close
315       // region handler duplicated here because its down deep in a private
316       // method hard to expose.
317       int versionid =
318         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
319       assertNotSame(versionid, -1);
320       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
321 
322       am.gate.set(false);
323       // Get current versionid else will fail on transition from OFFLINE to
324       // OPENING below
325       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
326       assertNotSame(-1, versionid);
327       // This uglyness below is what the openregionhandler on RS side does.
328       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
329           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
330           EventType.RS_ZK_REGION_OPENING, versionid);
331       assertNotSame(-1, versionid);
332       // Move znode from OPENING to OPENED as RS does on successful open.
333       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
334           SERVERNAME_B, versionid);
335       assertNotSame(-1, versionid);
336       // Block here until our znode is cleared or until this test timesout.
337       ZKAssign.blockUntilNoRIT(watcher);
338     } finally {
339       am.getExecutorService().shutdown();
340       am.shutdown();
341     }
342   }
343 
344   private void createRegionPlanAndBalance(
345       final AssignmentManager am, final ServerName from,
346       final ServerName to, final HRegionInfo hri) throws RegionException {
347     // Call the balance function but fake the region being online first at
348     // servername from.
349     am.regionOnline(hri, from);
350     // Balance region from 'from' to 'to'. It calls unassign setting CLOSING state
351     // up in zk.  Create a plan and balance
352     am.balance(new RegionPlan(hri, from, to));
353   }
354 
355   /**
356    * Tests AssignmentManager balance function.  Runs a balance moving a region
357    * from one server to another mocking regionserver responding over zk.
358    * @throws IOException
359    * @throws KeeperException
360    * @throws DeserializationException
361    */
362   @Test (timeout=180000)
363   public void testBalance()
364     throws IOException, KeeperException, DeserializationException, InterruptedException {
365     // Create and startup an executor.  This is used by AssignmentManager
366     // handling zk callbacks.
367     ExecutorService executor = startupMasterExecutor("testBalanceExecutor");
368 
369     // We need a mocked catalog tracker.
370     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
371     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
372         .getConfiguration());
373     // Create an AM.
374     AssignmentManager am = new AssignmentManager(this.server,
375       this.serverManager, ct, balancer, executor, null, master.getTableLockManager());
376     am.failoverCleanupDone.set(true);
377     try {
378       // Make sure our new AM gets callbacks; once registered, can't unregister.
379       // Thats ok because we make a new zk watcher for each test.
380       this.watcher.registerListenerFirst(am);
381       // Call the balance function but fake the region being online first at
382       // SERVERNAME_A.  Create a balance plan.
383       am.regionOnline(REGIONINFO, SERVERNAME_A);
384       // Balance region from A to B.
385       RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B);
386       am.balance(plan);
387 
388       RegionStates regionStates = am.getRegionStates();
389       // Must be failed to close since the server is fake
390       assertTrue(regionStates.isRegionInTransition(REGIONINFO)
391         && regionStates.isRegionInState(REGIONINFO, State.FAILED_CLOSE));
392       // Move it back to pending_close
393       regionStates.updateRegionState(REGIONINFO, State.PENDING_CLOSE);
394 
395       // Now fake the region closing successfully over on the regionserver; the
396       // regionserver will have set the region in CLOSED state.  This will
397       // trigger callback into AM. The below zk close call is from the RS close
398       // region handler duplicated here because its down deep in a private
399       // method hard to expose.
400       int versionid =
401         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
402       assertNotSame(versionid, -1);
403       // AM is going to notice above CLOSED and queue up a new assign.  The
404       // assign will go to open the region in the new location set by the
405       // balancer.  The zk node will be OFFLINE waiting for regionserver to
406       // transition it through OPENING, OPENED.  Wait till we see the OFFLINE
407       // zk node before we proceed.
408       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
409 
410       // Get current versionid else will fail on transition from OFFLINE to OPENING below
411       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
412       assertNotSame(-1, versionid);
413       // This uglyness below is what the openregionhandler on RS side does.
414       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
415         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
416         EventType.RS_ZK_REGION_OPENING, versionid);
417       assertNotSame(-1, versionid);
418       // Move znode from OPENING to OPENED as RS does on successful open.
419       versionid =
420         ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO, SERVERNAME_B, versionid);
421       assertNotSame(-1, versionid);
422       // Wait on the handler removing the OPENED znode.
423       while(regionStates.isRegionInTransition(REGIONINFO)) Threads.sleep(1);
424     } finally {
425       executor.shutdown();
426       am.shutdown();
427       // Clean up all znodes
428       ZKAssign.deleteAllNodes(this.watcher);
429     }
430   }
431 
432   /**
433    * Run a simple server shutdown handler.
434    * @throws KeeperException
435    * @throws IOException
436    */
437   @Test (timeout=180000)
438   public void testShutdownHandler()
439       throws KeeperException, IOException, ServiceException {
440     // Create and startup an executor.  This is used by AssignmentManager
441     // handling zk callbacks.
442     ExecutorService executor = startupMasterExecutor("testShutdownHandler");
443 
444     // We need a mocked catalog tracker.
445     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
446     // Create an AM.
447     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
448         this.server, this.serverManager);
449     try {
450       processServerShutdownHandler(ct, am, false);
451     } finally {
452       executor.shutdown();
453       am.shutdown();
454       // Clean up all znodes
455       ZKAssign.deleteAllNodes(this.watcher);
456     }
457   }
458 
459   /**
460    * To test closed region handler to remove rit and delete corresponding znode
461    * if region in pending close or closing while processing shutdown of a region
462    * server.(HBASE-5927).
463    *
464    * @throws KeeperException
465    * @throws IOException
466    * @throws ServiceException
467    */
468   @Test (timeout=180000)
469   public void testSSHWhenDisableTableInProgress() throws KeeperException, IOException,
470       ServiceException {
471     testCaseWithPartiallyDisabledState(Table.State.DISABLING);
472     testCaseWithPartiallyDisabledState(Table.State.DISABLED);
473   }
474 
475 
476   /**
477    * To test if the split region is removed from RIT if the region was in SPLITTING state but the RS
478    * has actually completed the splitting in hbase:meta but went down. See HBASE-6070 and also HBASE-5806
479    *
480    * @throws KeeperException
481    * @throws IOException
482    */
483   @Test (timeout=180000)
484   public void testSSHWhenSplitRegionInProgress() throws KeeperException, IOException, Exception {
485     // true indicates the region is split but still in RIT
486     testCaseWithSplitRegionPartial(true);
487     // false indicate the region is not split
488     testCaseWithSplitRegionPartial(false);
489   }
490 
491   private void testCaseWithSplitRegionPartial(boolean regionSplitDone) throws KeeperException,
492       IOException, NodeExistsException, InterruptedException, ServiceException {
493     // Create and startup an executor. This is used by AssignmentManager
494     // handling zk callbacks.
495     ExecutorService executor = startupMasterExecutor("testSSHWhenSplitRegionInProgress");
496     // We need a mocked catalog tracker.
497     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
498     ZKAssign.deleteAllNodes(this.watcher);
499 
500     // Create an AM.
501     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
502       this.server, this.serverManager);
503     // adding region to regions and servers maps.
504     am.regionOnline(REGIONINFO, SERVERNAME_A);
505     // adding region in pending close.
506     am.getRegionStates().updateRegionState(
507       REGIONINFO, State.SPLITTING, SERVERNAME_A);
508     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
509     RegionTransition data = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
510         REGIONINFO.getRegionName(), SERVERNAME_A);
511     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
512     // create znode in M_ZK_REGION_CLOSING state.
513     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
514 
515     try {
516       processServerShutdownHandler(ct, am, regionSplitDone);
517       // check znode deleted or not.
518       // In both cases the znode should be deleted.
519 
520       if (regionSplitDone) {
521         assertFalse("Region state of region in SPLITTING should be removed from rit.",
522             am.getRegionStates().isRegionsInTransition());
523       } else {
524         while (!am.assignInvoked) {
525           Thread.sleep(1);
526         }
527         assertTrue("Assign should be invoked.", am.assignInvoked);
528       }
529     } finally {
530       REGIONINFO.setOffline(false);
531       REGIONINFO.setSplit(false);
532       executor.shutdown();
533       am.shutdown();
534       // Clean up all znodes
535       ZKAssign.deleteAllNodes(this.watcher);
536     }
537   }
538 
539   private void testCaseWithPartiallyDisabledState(Table.State state) throws KeeperException,
540       IOException, NodeExistsException, ServiceException {
541     // Create and startup an executor. This is used by AssignmentManager
542     // handling zk callbacks.
543     ExecutorService executor = startupMasterExecutor("testSSHWhenDisableTableInProgress");
544     // We need a mocked catalog tracker.
545     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
546     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
547     ZKAssign.deleteAllNodes(this.watcher);
548 
549     // Create an AM.
550     AssignmentManager am = new AssignmentManager(this.server,
551       this.serverManager, ct, balancer, executor, null, master.getTableLockManager());
552     // adding region to regions and servers maps.
553     am.regionOnline(REGIONINFO, SERVERNAME_A);
554     // adding region in pending close.
555     am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE);
556     if (state == Table.State.DISABLING) {
557       am.getZKTable().setDisablingTable(REGIONINFO.getTable());
558     } else {
559       am.getZKTable().setDisabledTable(REGIONINFO.getTable());
560     }
561     RegionTransition data = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
562         REGIONINFO.getRegionName(), SERVERNAME_A);
563     // RegionTransitionData data = new
564     // RegionTransitionData(EventType.M_ZK_REGION_CLOSING,
565     // REGIONINFO.getRegionName(), SERVERNAME_A);
566     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
567     // create znode in M_ZK_REGION_CLOSING state.
568     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
569 
570     try {
571       processServerShutdownHandler(ct, am, false);
572       // check znode deleted or not.
573       // In both cases the znode should be deleted.
574       assertTrue("The znode should be deleted.", ZKUtil.checkExists(this.watcher, node) == -1);
575       // check whether in rit or not. In the DISABLING case also the below
576       // assert will be true but the piece of code added for HBASE-5927 will not
577       // do that.
578       if (state == Table.State.DISABLED) {
579         assertFalse("Region state of region in pending close should be removed from rit.",
580             am.getRegionStates().isRegionsInTransition());
581       }
582     } finally {
583       am.setEnabledTable(REGIONINFO.getTable());
584       executor.shutdown();
585       am.shutdown();
586       // Clean up all znodes
587       ZKAssign.deleteAllNodes(this.watcher);
588     }
589   }
590 
591   private void processServerShutdownHandler(CatalogTracker ct, AssignmentManager am, boolean splitRegion)
592       throws IOException, ServiceException {
593     // Make sure our new AM gets callbacks; once registered, can't unregister.
594     // Thats ok because we make a new zk watcher for each test.
595     this.watcher.registerListenerFirst(am);
596 
597     // Need to set up a fake scan of meta for the servershutdown handler
598     // Make an RS Interface implementation.  Make it so a scanner can go against it.
599     ClientProtos.ClientService.BlockingInterface implementation =
600       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
601     // Get a meta row result that has region up on SERVERNAME_A
602 
603     Result r;
604     if (splitRegion) {
605       r = MetaMockingUtil.getMetaTableRowResultAsSplitRegion(REGIONINFO, SERVERNAME_A);
606     } else {
607       r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
608     }
609 
610     final ScanResponse.Builder builder = ScanResponse.newBuilder();
611     builder.setMoreResults(true);
612     builder.addCellsPerResult(r.size());
613     final List<CellScannable> cellScannables = new ArrayList<CellScannable>(1);
614     cellScannables.add(r);
615     Mockito.when(implementation.scan(
616       (RpcController)Mockito.any(), (ScanRequest)Mockito.any())).
617       thenAnswer(new Answer<ScanResponse>() {
618           @Override
619           public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
620             PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
621                 .getArguments()[0];
622             if (controller != null) {
623               controller.setCellScanner(CellUtil.createCellScanner(cellScannables));
624             }
625             return builder.build();
626           }
627       });
628 
629     // Get a connection w/ mocked up common methods.
630     HConnection connection =
631       HConnectionTestingUtility.getMockedConnectionAndDecorate(HTU.getConfiguration(),
632         null, implementation, SERVERNAME_B, REGIONINFO);
633 
634     // Make it so we can get a catalogtracker from servermanager.. .needed
635     // down in guts of server shutdown handler.
636     Mockito.when(ct.getConnection()).thenReturn(connection);
637     Mockito.when(this.server.getCatalogTracker()).thenReturn(ct);
638 
639     // Now make a server shutdown handler instance and invoke process.
640     // Have it that SERVERNAME_A died.
641     DeadServer deadServers = new DeadServer();
642     deadServers.add(SERVERNAME_A);
643     // I need a services instance that will return the AM
644     MasterFileSystem fs = Mockito.mock(MasterFileSystem.class);
645     Mockito.doNothing().when(fs).setLogRecoveryMode();
646     Mockito.when(fs.getLogRecoveryMode()).thenReturn(RecoveryMode.LOG_REPLAY);
647     MasterServices services = Mockito.mock(MasterServices.class);
648     Mockito.when(services.getAssignmentManager()).thenReturn(am);
649     Mockito.when(services.getServerManager()).thenReturn(this.serverManager);
650     Mockito.when(services.getZooKeeper()).thenReturn(this.watcher);
651     Mockito.when(services.getMasterFileSystem()).thenReturn(fs);
652     ServerShutdownHandler handler = new ServerShutdownHandler(this.server,
653       services, deadServers, SERVERNAME_A, false);
654     am.failoverCleanupDone.set(true);
655     handler.process();
656     // The region in r will have been assigned.  It'll be up in zk as unassigned.
657   }
658 
659   /**
660    * Create and startup executor pools. Start same set as master does (just
661    * run a few less).
662    * @param name Name to give our executor
663    * @return Created executor (be sure to call shutdown when done).
664    */
665   private ExecutorService startupMasterExecutor(final String name) {
666     // TODO: Move up into HBaseTestingUtility?  Generally useful.
667     ExecutorService executor = new ExecutorService(name);
668     executor.startExecutorService(ExecutorType.MASTER_OPEN_REGION, 3);
669     executor.startExecutorService(ExecutorType.MASTER_CLOSE_REGION, 3);
670     executor.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS, 3);
671     executor.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS, 3);
672     return executor;
673   }
674 
675   @Test (timeout=180000)
676   public void testUnassignWithSplitAtSameTime() throws KeeperException, IOException {
677     // Region to use in test.
678     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
679     // First amend the servermanager mock so that when we do send close of the
680     // first meta region on SERVERNAME_A, it will return true rather than
681     // default null.
682     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, hri, -1)).thenReturn(true);
683     // Need a mocked catalog tracker.
684     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
685     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
686         .getConfiguration());
687     // Create an AM.
688     AssignmentManager am = new AssignmentManager(this.server,
689       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
690     try {
691       // First make sure my mock up basically works.  Unassign a region.
692       unassign(am, SERVERNAME_A, hri);
693       // This delete will fail if the previous unassign did wrong thing.
694       ZKAssign.deleteClosingNode(this.watcher, hri, SERVERNAME_A);
695       // Now put a SPLITTING region in the way.  I don't have to assert it
696       // go put in place.  This method puts it in place then asserts it still
697       // owns it by moving state from SPLITTING to SPLITTING.
698       int version = createNodeSplitting(this.watcher, hri, SERVERNAME_A);
699       // Now, retry the unassign with the SPLTTING in place.  It should just
700       // complete without fail; a sort of 'silent' recognition that the
701       // region to unassign has been split and no longer exists: TOOD: what if
702       // the split fails and the parent region comes back to life?
703       unassign(am, SERVERNAME_A, hri);
704       // This transition should fail if the znode has been messed with.
705       ZKAssign.transitionNode(this.watcher, hri, SERVERNAME_A,
706         EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
707       assertFalse(am.getRegionStates().isRegionInTransition(hri));
708     } finally {
709       am.shutdown();
710     }
711   }
712 
713   /**
714    * Tests the processDeadServersAndRegionsInTransition should not fail with NPE
715    * when it failed to get the children. Let's abort the system in this
716    * situation
717    * @throws ServiceException
718    */
719   @Test(timeout = 60000)
720   public void testProcessDeadServersAndRegionsInTransitionShouldNotFailWithNPE()
721       throws IOException, KeeperException, InterruptedException, ServiceException {
722     final RecoverableZooKeeper recoverableZk = Mockito
723         .mock(RecoverableZooKeeper.class);
724     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
725       this.server, this.serverManager);
726     Watcher zkw = new ZooKeeperWatcher(HBaseConfiguration.create(), "unittest",
727         null) {
728       @Override
729       public RecoverableZooKeeper getRecoverableZooKeeper() {
730         return recoverableZk;
731       }
732     };
733     ((ZooKeeperWatcher) zkw).registerListener(am);
734     Mockito.doThrow(new InterruptedException()).when(recoverableZk)
735         .getChildren("/hbase/region-in-transition", null);
736     am.setWatcher((ZooKeeperWatcher) zkw);
737     try {
738       am.processDeadServersAndRegionsInTransition(null);
739       fail("Expected to abort");
740     } catch (NullPointerException e) {
741       fail("Should not throw NPE");
742     } catch (RuntimeException e) {
743       assertEquals("Aborted", e.getLocalizedMessage());
744     }
745   }
746   /**
747    * TestCase verifies that the regionPlan is updated whenever a region fails to open
748    * and the master tries to process RS_ZK_FAILED_OPEN state.(HBASE-5546).
749    */
750   @Test(timeout = 60000)
751   public void testRegionPlanIsUpdatedWhenRegionFailsToOpen() throws IOException, KeeperException,
752       ServiceException, InterruptedException {
753     this.server.getConfiguration().setClass(
754       HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MockedLoadBalancer.class,
755       LoadBalancer.class);
756     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
757       this.server, this.serverManager);
758     try {
759       // Boolean variable used for waiting until randomAssignment is called and
760       // new
761       // plan is generated.
762       AtomicBoolean gate = new AtomicBoolean(false);
763       if (balancer instanceof MockedLoadBalancer) {
764         ((MockedLoadBalancer) balancer).setGateVariable(gate);
765       }
766       ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
767       int v = ZKAssign.getVersion(this.watcher, REGIONINFO);
768       ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A,
769           EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_FAILED_OPEN, v);
770       String path = ZKAssign.getNodeName(this.watcher, REGIONINFO
771           .getEncodedName());
772       am.getRegionStates().updateRegionState(
773         REGIONINFO, State.OPENING, SERVERNAME_A);
774       // a dummy plan inserted into the regionPlans. This plan is cleared and
775       // new one is formed
776       am.regionPlans.put(REGIONINFO.getEncodedName(), new RegionPlan(
777           REGIONINFO, null, SERVERNAME_A));
778       RegionPlan regionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
779       List<ServerName> serverList = new ArrayList<ServerName>(2);
780       serverList.add(SERVERNAME_B);
781       Mockito.when(
782           this.serverManager.createDestinationServersList(SERVERNAME_A))
783           .thenReturn(serverList);
784       am.nodeDataChanged(path);
785       // here we are waiting until the random assignment in the load balancer is
786       // called.
787       while (!gate.get()) {
788         Thread.sleep(10);
789       }
790       // new region plan may take some time to get updated after random
791       // assignment is called and
792       // gate is set to true.
793       RegionPlan newRegionPlan = am.regionPlans
794           .get(REGIONINFO.getEncodedName());
795       while (newRegionPlan == null) {
796         Thread.sleep(10);
797         newRegionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
798       }
799       // the new region plan created may contain the same RS as destination but
800       // it should
801       // be new plan.
802       assertNotSame("Same region plan should not come", regionPlan,
803           newRegionPlan);
804       assertTrue("Destination servers should be different.", !(regionPlan
805           .getDestination().equals(newRegionPlan.getDestination())));
806 
807       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
808     } finally {
809       this.server.getConfiguration().setClass(
810           HConstants.HBASE_MASTER_LOADBALANCER_CLASS, SimpleLoadBalancer.class,
811           LoadBalancer.class);
812       am.getExecutorService().shutdown();
813       am.shutdown();
814     }
815   }
816 
817   /**
818    * Mocked load balancer class used in the testcase to make sure that the testcase waits until
819    * random assignment is called and the gate variable is set to true.
820    */
821   public static class MockedLoadBalancer extends SimpleLoadBalancer {
822     private AtomicBoolean gate;
823 
824     public void setGateVariable(AtomicBoolean gate) {
825       this.gate = gate;
826     }
827 
828     @Override
829     public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
830       ServerName randomServerName = super.randomAssignment(regionInfo, servers);
831       this.gate.set(true);
832       return randomServerName;
833     }
834 
835     @Override
836     public Map<ServerName, List<HRegionInfo>> retainAssignment(
837         Map<HRegionInfo, ServerName> regions, List<ServerName> servers) {
838       this.gate.set(true);
839       return super.retainAssignment(regions, servers);
840     }
841   }
842 
843   /**
844    * Test the scenario when the master is in failover and trying to process a
845    * region which is in Opening state on a dead RS. Master will force offline the
846    * region and put it in transition. AM relies on SSH to reassign it.
847    */
848   @Test(timeout = 60000)
849   public void testRegionInOpeningStateOnDeadRSWhileMasterFailover() throws IOException,
850       KeeperException, ServiceException, InterruptedException {
851     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
852       this.server, this.serverManager);
853     ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
854     int version = ZKAssign.getVersion(this.watcher, REGIONINFO);
855     ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A, EventType.M_ZK_REGION_OFFLINE,
856         EventType.RS_ZK_REGION_OPENING, version);
857     RegionTransition rt = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_OPENING,
858         REGIONINFO.getRegionName(), SERVERNAME_A, HConstants.EMPTY_BYTE_ARRAY);
859     version = ZKAssign.getVersion(this.watcher, REGIONINFO);
860     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(false);
861     am.getRegionStates().logSplit(SERVERNAME_A); // Assume log splitting is done
862     am.getRegionStates().createRegionState(REGIONINFO);
863     am.gate.set(false);
864     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
865     assertFalse(am.processRegionsInTransition(rt, REGIONINFO, version));
866     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
867     processServerShutdownHandler(ct, am, false);
868     // Waiting for the assignment to get completed.
869     while (!am.gate.get()) {
870       Thread.sleep(10);
871     }
872     assertTrue("The region should be assigned immediately.", null != am.regionPlans.get(REGIONINFO
873         .getEncodedName()));
874   }
875 
876   /**
877    * Test verifies whether assignment is skipped for regions of tables in DISABLING state during
878    * clean cluster startup. See HBASE-6281.
879    *
880    * @throws KeeperException
881    * @throws IOException
882    * @throws Exception
883    */
884   @Test(timeout = 60000)
885   public void testDisablingTableRegionsAssignmentDuringCleanClusterStartup()
886       throws KeeperException, IOException, Exception {
887     this.server.getConfiguration().setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
888         MockedLoadBalancer.class, LoadBalancer.class);
889     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(
890         new HashMap<ServerName, ServerLoad>(0));
891     List<ServerName> destServers = new ArrayList<ServerName>(1);
892     destServers.add(SERVERNAME_A);
893     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
894     // To avoid cast exception in DisableTableHandler process.
895     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
896     Server server = new HMaster(HTU.getConfiguration());
897     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
898         this.serverManager);
899     AtomicBoolean gate = new AtomicBoolean(false);
900     if (balancer instanceof MockedLoadBalancer) {
901       ((MockedLoadBalancer) balancer).setGateVariable(gate);
902     }
903     try{
904       // set table in disabling state.
905       am.getZKTable().setDisablingTable(REGIONINFO.getTable());
906       am.joinCluster();
907       // should not call retainAssignment if we get empty regions in assignAllUserRegions.
908       assertFalse(
909           "Assign should not be invoked for disabling table regions during clean cluster startup.",
910           gate.get());
911       // need to change table state from disabling to disabled.
912       assertTrue("Table should be disabled.",
913           am.getZKTable().isDisabledTable(REGIONINFO.getTable()));
914     } finally {
915       this.server.getConfiguration().setClass(
916         HConstants.HBASE_MASTER_LOADBALANCER_CLASS, SimpleLoadBalancer.class,
917         LoadBalancer.class);
918       am.getZKTable().setEnabledTable(REGIONINFO.getTable());
919       am.shutdown();
920     }
921   }
922 
923   /**
924    * Test verifies whether all the enabling table regions assigned only once during master startup.
925    *
926    * @throws KeeperException
927    * @throws IOException
928    * @throws Exception
929    */
930   @Test (timeout=180000)
931   public void testMasterRestartWhenTableInEnabling() throws KeeperException, IOException, Exception {
932     enabling = true;
933     List<ServerName> destServers = new ArrayList<ServerName>(1);
934     destServers.add(SERVERNAME_A);
935     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
936     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
937     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
938     Server server = new HMaster(HTU.getConfiguration());
939     Whitebox.setInternalState(server, "serverManager", this.serverManager);
940     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
941         this.serverManager);
942     try {
943       // set table in enabling state.
944       am.getZKTable().setEnablingTable(REGIONINFO.getTable());
945       new EnableTableHandler(server, REGIONINFO.getTable(),
946           am.getCatalogTracker(), am, new NullTableLockManager(), true).prepare()
947           .process();
948       assertEquals("Number of assignments should be 1.", 1, assignmentCount);
949       assertTrue("Table should be enabled.",
950           am.getZKTable().isEnabledTable(REGIONINFO.getTable()));
951     } finally {
952       enabling = false;
953       assignmentCount = 0;
954       am.getZKTable().setEnabledTable(REGIONINFO.getTable());
955       am.shutdown();
956       ZKAssign.deleteAllNodes(this.watcher);
957     }
958   }
959 
960   /**
961    * Test verifies whether stale znodes of unknown tables as for the hbase:meta will be removed or
962    * not.
963    * @throws KeeperException
964    * @throws IOException
965    * @throws Exception
966    */
967   @Test (timeout=180000)
968   public void testMasterRestartShouldRemoveStaleZnodesOfUnknownTableAsForMeta()
969       throws KeeperException, IOException, Exception {
970     List<ServerName> destServers = new ArrayList<ServerName>(1);
971     destServers.add(SERVERNAME_A);
972     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
973     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
974     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
975     Server server = new HMaster(HTU.getConfiguration());
976     Whitebox.setInternalState(server, "serverManager", this.serverManager);
977     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
978         this.serverManager);
979     try {
980       TableName tableName = TableName.valueOf("dummyTable");
981       // set table in enabling state.
982       am.getZKTable().setEnablingTable(tableName);
983       am.joinCluster();
984       assertFalse("Table should not be present in zookeeper.",
985         am.getZKTable().isTablePresent(tableName));
986     } finally {
987     }
988   }
989   /**
990    * When a region is in transition, if the region server opening the region goes down,
991    * the region assignment takes a long time normally (waiting for timeout monitor to trigger assign).
992    * This test is to make sure SSH reassigns it right away.
993    */
994   @Test (timeout=180000)
995   public void testSSHTimesOutOpeningRegionTransition()
996       throws KeeperException, IOException, ServiceException {
997     // We need a mocked catalog tracker.
998     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
999     // Create an AM.
1000     AssignmentManagerWithExtrasForTesting am =
1001       setUpMockedAssignmentManager(this.server, this.serverManager);
1002     // adding region in pending open.
1003     RegionState state = new RegionState(REGIONINFO,
1004       State.OPENING, System.currentTimeMillis(), SERVERNAME_A);
1005     am.getRegionStates().regionOnline(REGIONINFO, SERVERNAME_B);
1006     am.getRegionStates().regionsInTransition.put(REGIONINFO.getEncodedName(), state);
1007     // adding region plan
1008     am.regionPlans.put(REGIONINFO.getEncodedName(),
1009       new RegionPlan(REGIONINFO, SERVERNAME_B, SERVERNAME_A));
1010     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
1011 
1012     try {
1013       am.assignInvoked = false;
1014       processServerShutdownHandler(ct, am, false);
1015       assertTrue(am.assignInvoked);
1016     } finally {
1017       am.getRegionStates().regionsInTransition.remove(REGIONINFO.getEncodedName());
1018       am.regionPlans.remove(REGIONINFO.getEncodedName());
1019     }
1020   }
1021 
1022   /**
1023    * Scenario:<ul>
1024    *  <li> master starts a close, and creates a znode</li>
1025    *  <li> it fails just at this moment, before contacting the RS</li>
1026    *  <li> while the second master is coming up, the targeted RS dies. But it's before ZK timeout so
1027    *    we don't know, and we have an exception.</li>
1028    *  <li> the master must handle this nicely and reassign.
1029    *  </ul>
1030    */
1031   @Test (timeout=180000)
1032   public void testClosingFailureDuringRecovery() throws Exception {
1033 
1034     AssignmentManagerWithExtrasForTesting am =
1035         setUpMockedAssignmentManager(this.server, this.serverManager);
1036     ZKAssign.createNodeClosing(this.watcher, REGIONINFO, SERVERNAME_A);
1037     am.getRegionStates().createRegionState(REGIONINFO);
1038 
1039     assertFalse( am.getRegionStates().isRegionsInTransition() );
1040 
1041     am.processRegionInTransition(REGIONINFO.getEncodedName(), REGIONINFO);
1042 
1043     assertTrue( am.getRegionStates().isRegionsInTransition() );
1044   }
1045 
1046   /**
1047    * Creates a new ephemeral node in the SPLITTING state for the specified region.
1048    * Create it ephemeral in case regionserver dies mid-split.
1049    *
1050    * <p>Does not transition nodes from other states.  If a node already exists
1051    * for this region, a {@link NodeExistsException} will be thrown.
1052    *
1053    * @param zkw zk reference
1054    * @param region region to be created as offline
1055    * @param serverName server event originates from
1056    * @return Version of znode created.
1057    * @throws KeeperException
1058    * @throws IOException
1059    */
1060   // Copied from SplitTransaction rather than open the method over there in
1061   // the regionserver package.
1062   private static int createNodeSplitting(final ZooKeeperWatcher zkw,
1063       final HRegionInfo region, final ServerName serverName)
1064   throws KeeperException, IOException {
1065     RegionTransition rt =
1066       RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
1067         region.getRegionName(), serverName);
1068 
1069     String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
1070     if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
1071       throw new IOException("Failed create of ephemeral " + node);
1072     }
1073     // Transition node from SPLITTING to SPLITTING and pick up version so we
1074     // can be sure this znode is ours; version is needed deleting.
1075     return transitionNodeSplitting(zkw, region, serverName, -1);
1076   }
1077 
1078   // Copied from SplitTransaction rather than open the method over there in
1079   // the regionserver package.
1080   private static int transitionNodeSplitting(final ZooKeeperWatcher zkw,
1081       final HRegionInfo parent,
1082       final ServerName serverName, final int version)
1083   throws KeeperException, IOException {
1084     return ZKAssign.transitionNode(zkw, parent, serverName,
1085       EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
1086   }
1087 
1088   private void unassign(final AssignmentManager am, final ServerName sn,
1089       final HRegionInfo hri) throws RegionException {
1090     // Before I can unassign a region, I need to set it online.
1091     am.regionOnline(hri, sn);
1092     // Unassign region.
1093     am.unassign(hri);
1094   }
1095 
1096   /**
1097    * Create an {@link AssignmentManagerWithExtrasForTesting} that has mocked
1098    * {@link CatalogTracker} etc.
1099    * @param server
1100    * @param manager
1101    * @return An AssignmentManagerWithExtras with mock connections, etc.
1102    * @throws IOException
1103    * @throws KeeperException
1104    */
1105   private AssignmentManagerWithExtrasForTesting setUpMockedAssignmentManager(final Server server,
1106       final ServerManager manager) throws IOException, KeeperException, ServiceException {
1107     // We need a mocked catalog tracker. Its used by our AM instance.
1108     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1109     // Make an RS Interface implementation. Make it so a scanner can go against
1110     // it and a get to return the single region, REGIONINFO, this test is
1111     // messing with. Needed when "new master" joins cluster. AM will try and
1112     // rebuild its list of user regions and it will also get the HRI that goes
1113     // with an encoded name by doing a Get on hbase:meta
1114     ClientProtos.ClientService.BlockingInterface ri =
1115       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
1116     // Get a meta row result that has region up on SERVERNAME_A for REGIONINFO
1117     Result r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
1118     final ScanResponse.Builder builder = ScanResponse.newBuilder();
1119     builder.setMoreResults(true);
1120     builder.addCellsPerResult(r.size());
1121     final List<CellScannable> rows = new ArrayList<CellScannable>(1);
1122     rows.add(r);
1123     Answer<ScanResponse> ans = new Answer<ClientProtos.ScanResponse>() {
1124       @Override
1125       public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
1126         PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
1127             .getArguments()[0];
1128         if (controller != null) {
1129           controller.setCellScanner(CellUtil.createCellScanner(rows));
1130         }
1131         return builder.build();
1132       }
1133     };
1134     if (enabling) {
1135       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any()))
1136           .thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans)
1137           .thenReturn(ScanResponse.newBuilder().setMoreResults(false).build());
1138     } else {
1139       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any())).thenAnswer(
1140           ans);
1141     }
1142     // If a get, return the above result too for REGIONINFO
1143     GetResponse.Builder getBuilder = GetResponse.newBuilder();
1144     getBuilder.setResult(ProtobufUtil.toResult(r));
1145     Mockito.when(ri.get((RpcController)Mockito.any(), (GetRequest) Mockito.any())).
1146       thenReturn(getBuilder.build());
1147     // Get a connection w/ mocked up common methods.
1148     HConnection connection = HConnectionTestingUtility.
1149       getMockedConnectionAndDecorate(HTU.getConfiguration(), null,
1150         ri, SERVERNAME_B, REGIONINFO);
1151     // Make it so we can get the connection from our mocked catalogtracker
1152     Mockito.when(ct.getConnection()).thenReturn(connection);
1153     // Create and startup an executor. Used by AM handling zk callbacks.
1154     ExecutorService executor = startupMasterExecutor("mockedAMExecutor");
1155     this.balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
1156     AssignmentManagerWithExtrasForTesting am = new AssignmentManagerWithExtrasForTesting(
1157       server, manager, ct, this.balancer, executor, new NullTableLockManager());
1158     return am;
1159   }
1160 
1161   /**
1162    * An {@link AssignmentManager} with some extra facility used testing
1163    */
1164   class AssignmentManagerWithExtrasForTesting extends AssignmentManager {
1165     // Keep a reference so can give it out below in {@link #getExecutorService}
1166     private final ExecutorService es;
1167     // Ditto for ct
1168     private final CatalogTracker ct;
1169     boolean processRITInvoked = false;
1170     boolean assignInvoked = false;
1171     AtomicBoolean gate = new AtomicBoolean(true);
1172 
1173     public AssignmentManagerWithExtrasForTesting(
1174         final Server master, final ServerManager serverManager,
1175         final CatalogTracker catalogTracker, final LoadBalancer balancer,
1176         final ExecutorService service, final TableLockManager tableLockManager)
1177             throws KeeperException, IOException {
1178       super(master, serverManager, catalogTracker, balancer, service, null, tableLockManager);
1179       this.es = service;
1180       this.ct = catalogTracker;
1181     }
1182 
1183     @Override
1184     boolean processRegionInTransition(String encodedRegionName,
1185         HRegionInfo regionInfo) throws KeeperException, IOException {
1186       this.processRITInvoked = true;
1187       return super.processRegionInTransition(encodedRegionName, regionInfo);
1188     }
1189 
1190     @Override
1191     public void assign(HRegionInfo region, boolean setOfflineInZK, boolean forceNewPlan) {
1192       if (enabling) {
1193         assignmentCount++;
1194         this.regionOnline(region, SERVERNAME_A);
1195       } else {
1196         super.assign(region, setOfflineInZK, forceNewPlan);
1197         this.gate.set(true);
1198       }
1199     }
1200 
1201     @Override
1202     boolean assign(ServerName destination, List<HRegionInfo> regions) {
1203       if (enabling) {
1204         for (HRegionInfo region : regions) {
1205           assignmentCount++;
1206           this.regionOnline(region, SERVERNAME_A);
1207         }
1208         return true;
1209       }
1210       return super.assign(destination, regions);
1211     }
1212 
1213     @Override
1214     public void assign(List<HRegionInfo> regions)
1215         throws IOException, InterruptedException {
1216       assignInvoked = (regions != null && regions.size() > 0);
1217       super.assign(regions);
1218       this.gate.set(true);
1219     }
1220 
1221     /** reset the watcher */
1222     void setWatcher(ZooKeeperWatcher watcher) {
1223       this.watcher = watcher;
1224     }
1225 
1226     /**
1227      * @return ExecutorService used by this instance.
1228      */
1229     ExecutorService getExecutorService() {
1230       return this.es;
1231     }
1232 
1233     /**
1234      * @return CatalogTracker used by this AM (Its a mock).
1235      */
1236     CatalogTracker getCatalogTracker() {
1237       return this.ct;
1238     }
1239   }
1240 
1241   /**
1242    * Call joinCluster on the passed AssignmentManager.  Do it in a thread
1243    * so it runs independent of what all else is going on.  Try to simulate
1244    * an AM running insided a failed over master by clearing all in-memory
1245    * AM state first.
1246   */
1247   private void startFakeFailedOverMasterAssignmentManager(final AssignmentManager am,
1248       final ZooKeeperWatcher watcher) {
1249     // Make sure our new AM gets callbacks; once registered, we can't unregister.
1250     // Thats ok because we make a new zk watcher for each test.
1251     watcher.registerListenerFirst(am);
1252     Thread t = new Thread("RunAmJoinCluster") {
1253       @Override
1254       public void run() {
1255         // Call the joinCluster function as though we were doing a master
1256         // failover at this point. It will stall just before we go to add
1257         // the RIT region to our RIT Map in AM at processRegionsInTransition.
1258         // First clear any inmemory state from AM so it acts like a new master
1259         // coming on line.
1260         am.getRegionStates().regionsInTransition.clear();
1261         am.regionPlans.clear();
1262         try {
1263           am.joinCluster();
1264         } catch (IOException e) {
1265           throw new RuntimeException(e);
1266         } catch (KeeperException e) {
1267           throw new RuntimeException(e);
1268         } catch (InterruptedException e) {
1269           throw new RuntimeException(e);
1270         }
1271       }
1272     };
1273     t.start();
1274     while (!t.isAlive()) Threads.sleep(1);
1275   }
1276 
1277   @Test (timeout=180000)
1278   public void testForceAssignMergingRegion() throws Exception {
1279     // Region to use in test.
1280     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1281     // Need a mocked catalog tracker.
1282     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1283     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1284       server.getConfiguration());
1285     // Create an AM.
1286     AssignmentManager am = new AssignmentManager(this.server,
1287       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
1288     RegionStates regionStates = am.getRegionStates();
1289     try {
1290       // First set the state of the region to merging
1291       regionStates.updateRegionState(hri, RegionState.State.MERGING);
1292       // Now, try to assign it with force new plan
1293       am.assign(hri, true, true);
1294       assertEquals("The region should be still in merging state",
1295         RegionState.State.MERGING, regionStates.getRegionState(hri).getState());
1296     } finally {
1297       am.shutdown();
1298     }
1299   }
1300 
1301   /**
1302    * Test assignment related ZK events are ignored by AM if the region is not known
1303    * by AM to be in transition. During normal operation, all assignments are started
1304    * by AM (not considering split/merge), if an event is received but the region
1305    * is not in transition, the event must be a very late one. So it can be ignored.
1306    * During master failover, since AM watches assignment znodes after failover cleanup
1307    * is completed, when an event comes in, AM should already have the region in transition
1308    * if ZK is used during the assignment action (only hbck doesn't use ZK for region
1309    * assignment). So during master failover, we can ignored such events too.
1310    */
1311   @Test (timeout=180000)
1312   public void testAssignmentEventIgnoredIfNotExpected() throws KeeperException, IOException {
1313     // Region to use in test.
1314     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1315     // Need a mocked catalog tracker.
1316     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1317     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1318       server.getConfiguration());
1319     final AtomicBoolean zkEventProcessed = new AtomicBoolean(false);
1320     // Create an AM.
1321     AssignmentManager am = new AssignmentManager(this.server,
1322       this.serverManager, ct, balancer, null, null, master.getTableLockManager()) {
1323 
1324       @Override
1325       void handleRegion(final RegionTransition rt, int expectedVersion) {
1326         super.handleRegion(rt, expectedVersion);
1327         if (rt != null && Bytes.equals(hri.getRegionName(),
1328           rt.getRegionName()) && rt.getEventType() == EventType.RS_ZK_REGION_OPENING) {
1329           zkEventProcessed.set(true);
1330         }
1331       }
1332     };
1333     try {
1334       // First make sure the region is not in transition
1335       am.getRegionStates().regionOffline(hri);
1336       zkEventProcessed.set(false); // Reset it before faking zk transition
1337       this.watcher.registerListenerFirst(am);
1338       assertFalse("The region should not be in transition",
1339         am.getRegionStates().isRegionInTransition(hri));
1340       ZKAssign.createNodeOffline(this.watcher, hri, SERVERNAME_A);
1341       // Trigger a transition event
1342       ZKAssign.transitionNodeOpening(this.watcher, hri, SERVERNAME_A);
1343       long startTime = EnvironmentEdgeManager.currentTimeMillis();
1344       while (!zkEventProcessed.get()) {
1345         assertTrue("Timed out in waiting for ZK event to be processed",
1346           EnvironmentEdgeManager.currentTimeMillis() - startTime < 30000);
1347         Threads.sleepWithoutInterrupt(100);
1348       }
1349       assertFalse(am.getRegionStates().isRegionInTransition(hri));
1350     } finally {
1351       am.shutdown();
1352     }
1353   }
1354 
1355   /**
1356    * If a table is deleted, we should not be able to balance it anymore.
1357    * Otherwise, the region will be brought back.
1358    * @throws Exception
1359    */
1360   @Test (timeout=180000)
1361   public void testBalanceRegionOfDeletedTable() throws Exception {
1362     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1363     AssignmentManager am = new AssignmentManager(this.server, this.serverManager,
1364       ct, balancer, null, null, master.getTableLockManager());
1365     RegionStates regionStates = am.getRegionStates();
1366     HRegionInfo hri = REGIONINFO;
1367     regionStates.createRegionState(hri);
1368     assertFalse(regionStates.isRegionInTransition(hri));
1369     RegionPlan plan = new RegionPlan(hri, SERVERNAME_A, SERVERNAME_B);
1370     // Fake table is deleted
1371     regionStates.tableDeleted(hri.getTable());
1372     am.balance(plan);
1373     assertFalse("The region should not in transition",
1374       regionStates.isRegionInTransition(hri));
1375   }
1376 
1377   /**
1378    * Tests an on-the-fly RPC that was scheduled for the earlier RS on the same port
1379    * for openRegion. AM should assign this somewhere else. (HBASE-9721)
1380    */
1381   @SuppressWarnings("unchecked")
1382   @Test (timeout=180000)
1383   public void testOpenCloseRegionRPCIntendedForPreviousServer() throws Exception {
1384     Mockito.when(this.serverManager.sendRegionOpen(Mockito.eq(SERVERNAME_B), Mockito.eq(REGIONINFO),
1385       Mockito.anyInt(), (List<ServerName>)Mockito.any()))
1386       .thenThrow(new DoNotRetryIOException());
1387     this.server.getConfiguration().setInt("hbase.assignment.maximum.attempts", 100);
1388 
1389     HRegionInfo hri = REGIONINFO;
1390     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1391     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1392       server.getConfiguration());
1393     // Create an AM.
1394     AssignmentManager am = new AssignmentManager(this.server,
1395       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
1396     RegionStates regionStates = am.getRegionStates();
1397     try {
1398       am.regionPlans.put(REGIONINFO.getEncodedName(),
1399         new RegionPlan(REGIONINFO, null, SERVERNAME_B));
1400 
1401       // Should fail once, but succeed on the second attempt for the SERVERNAME_A
1402       am.assign(hri, true, false);
1403     } finally {
1404       assertEquals(SERVERNAME_A, regionStates.getRegionState(REGIONINFO).getServerName());
1405     }
1406   }
1407 }