View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotSame;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.util.ArrayList;
28  import java.util.HashMap;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.concurrent.atomic.AtomicBoolean;
32  
33  import org.apache.hadoop.hbase.CellScannable;
34  import org.apache.hadoop.hbase.CellUtil;
35  import org.apache.hadoop.hbase.DoNotRetryIOException;
36  import org.apache.hadoop.hbase.HBaseConfiguration;
37  import org.apache.hadoop.hbase.HBaseTestingUtility;
38  import org.apache.hadoop.hbase.HConstants;
39  import org.apache.hadoop.hbase.HRegionInfo;
40  import org.apache.hadoop.hbase.MediumTests;
41  import org.apache.hadoop.hbase.RegionException;
42  import org.apache.hadoop.hbase.RegionTransition;
43  import org.apache.hadoop.hbase.Server;
44  import org.apache.hadoop.hbase.ServerLoad;
45  import org.apache.hadoop.hbase.ServerName;
46  import org.apache.hadoop.hbase.TableName;
47  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
48  import org.apache.hadoop.hbase.catalog.CatalogTracker;
49  import org.apache.hadoop.hbase.catalog.MetaMockingUtil;
50  import org.apache.hadoop.hbase.client.HConnection;
51  import org.apache.hadoop.hbase.client.HConnectionTestingUtility;
52  import org.apache.hadoop.hbase.client.Result;
53  import org.apache.hadoop.hbase.exceptions.DeserializationException;
54  import org.apache.hadoop.hbase.executor.EventType;
55  import org.apache.hadoop.hbase.executor.ExecutorService;
56  import org.apache.hadoop.hbase.executor.ExecutorType;
57  import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
58  import org.apache.hadoop.hbase.master.RegionState.State;
59  import org.apache.hadoop.hbase.master.TableLockManager.NullTableLockManager;
60  import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
61  import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer;
62  import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
63  import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
64  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
65  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
66  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetRequest;
67  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetResponse;
68  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
69  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanResponse;
70  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table;
71  import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
72  import org.apache.hadoop.hbase.util.Bytes;
73  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
74  import org.apache.hadoop.hbase.util.Threads;
75  import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
76  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
77  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
78  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
79  import org.apache.zookeeper.KeeperException;
80  import org.apache.zookeeper.KeeperException.NodeExistsException;
81  import org.apache.zookeeper.Watcher;
82  import org.junit.After;
83  import org.junit.AfterClass;
84  import org.junit.Before;
85  import org.junit.BeforeClass;
86  import org.junit.Test;
87  import org.junit.experimental.categories.Category;
88  import org.mockito.Mockito;
89  import org.mockito.internal.util.reflection.Whitebox;
90  import org.mockito.invocation.InvocationOnMock;
91  import org.mockito.stubbing.Answer;
92  
93  import com.google.protobuf.RpcController;
94  import com.google.protobuf.ServiceException;
95  
96  
97  /**
98   * Test {@link AssignmentManager}
99   */
100 @Category(MediumTests.class)
101 public class TestAssignmentManager {
102   private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
103   private static final ServerName SERVERNAME_A =
104       ServerName.valueOf("example.org", 1234, 5678);
105   private static final ServerName SERVERNAME_B =
106       ServerName.valueOf("example.org", 0, 5678);
107   private static final HRegionInfo REGIONINFO =
108     new HRegionInfo(TableName.valueOf("t"),
109       HConstants.EMPTY_START_ROW, HConstants.EMPTY_START_ROW);
110   private static int assignmentCount;
111   private static boolean enabling = false;
112 
113   // Mocked objects or; get redone for each test.
114   private Server server;
115   private ServerManager serverManager;
116   private ZooKeeperWatcher watcher;
117   private LoadBalancer balancer;
118   private HMaster master;
119 
120   @BeforeClass
121   public static void beforeClass() throws Exception {
122     HTU.startMiniZKCluster();
123   }
124 
125   @AfterClass
126   public static void afterClass() throws IOException {
127     HTU.shutdownMiniZKCluster();
128   }
129 
130   @Before
131   public void before() throws ZooKeeperConnectionException, IOException {
132     // TODO: Make generic versions of what we do below and put up in a mocking
133     // utility class or move up into HBaseTestingUtility.
134 
135     // Mock a Server.  Have it return a legit Configuration and ZooKeeperWatcher.
136     // If abort is called, be sure to fail the test (don't just swallow it
137     // silently as is mockito default).
138     this.server = Mockito.mock(Server.class);
139     Mockito.when(server.getServerName()).thenReturn(ServerName.valueOf("master,1,1"));
140     Mockito.when(server.getConfiguration()).thenReturn(HTU.getConfiguration());
141     this.watcher =
142       new ZooKeeperWatcher(HTU.getConfiguration(), "mockedServer", this.server, true);
143     Mockito.when(server.getZooKeeper()).thenReturn(this.watcher);
144     Mockito.doThrow(new RuntimeException("Aborted")).
145       when(server).abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
146 
147     // Mock a ServerManager.  Say server SERVERNAME_{A,B} are online.  Also
148     // make it so if close or open, we return 'success'.
149     this.serverManager = Mockito.mock(ServerManager.class);
150     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
151     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_B)).thenReturn(true);
152     Mockito.when(this.serverManager.getDeadServers()).thenReturn(new DeadServer());
153     final Map<ServerName, ServerLoad> onlineServers = new HashMap<ServerName, ServerLoad>();
154     onlineServers.put(SERVERNAME_B, ServerLoad.EMPTY_SERVERLOAD);
155     onlineServers.put(SERVERNAME_A, ServerLoad.EMPTY_SERVERLOAD);
156     Mockito.when(this.serverManager.getOnlineServersList()).thenReturn(
157         new ArrayList<ServerName>(onlineServers.keySet()));
158     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(onlineServers);
159 
160     List<ServerName> avServers = new ArrayList<ServerName>();
161     avServers.addAll(onlineServers.keySet());
162     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(avServers);
163     Mockito.when(this.serverManager.createDestinationServersList(null)).thenReturn(avServers);
164 
165     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, REGIONINFO, -1)).
166       thenReturn(true);
167     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_B, REGIONINFO, -1)).
168       thenReturn(true);
169     // Ditto on open.
170     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_A, REGIONINFO, -1, null)).
171       thenReturn(RegionOpeningState.OPENED);
172     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_B, REGIONINFO, -1, null)).
173       thenReturn(RegionOpeningState.OPENED);
174     this.master = Mockito.mock(HMaster.class);
175 
176     Mockito.when(this.master.getServerManager()).thenReturn(serverManager);
177   }
178 
179   @After
180     public void after() throws KeeperException {
181     if (this.watcher != null) {
182       // Clean up all znodes
183       ZKAssign.deleteAllNodes(this.watcher);
184       this.watcher.close();
185     }
186   }
187 
188   /**
189    * Test a balance going on at same time as a master failover
190    *
191    * @throws IOException
192    * @throws KeeperException
193    * @throws InterruptedException
194    * @throws DeserializationException
195    */
196   @Test(timeout = 60000)
197   public void testBalanceOnMasterFailoverScenarioWithOpenedNode()
198   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
199     AssignmentManagerWithExtrasForTesting am =
200       setUpMockedAssignmentManager(this.server, this.serverManager);
201     try {
202       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
203       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
204       while (!am.processRITInvoked) Thread.sleep(1);
205       // As part of the failover cleanup, the balancing region plan is removed.
206       // So a random server will be used to open the region. For testing purpose,
207       // let's assume it is going to open on server b:
208       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
209 
210       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
211 
212       // Now fake the region closing successfully over on the regionserver; the
213       // regionserver will have set the region in CLOSED state. This will
214       // trigger callback into AM. The below zk close call is from the RS close
215       // region handler duplicated here because its down deep in a private
216       // method hard to expose.
217       int versionid =
218         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
219       assertNotSame(versionid, -1);
220       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
221 
222       // Get current versionid else will fail on transition from OFFLINE to
223       // OPENING below
224       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
225       assertNotSame(-1, versionid);
226       // This uglyness below is what the openregionhandler on RS side does.
227       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
228         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
229         EventType.RS_ZK_REGION_OPENING, versionid);
230       assertNotSame(-1, versionid);
231       // Move znode from OPENING to OPENED as RS does on successful open.
232       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
233         SERVERNAME_B, versionid);
234       assertNotSame(-1, versionid);
235       am.gate.set(false);
236       // Block here until our znode is cleared or until this test times out.
237       ZKAssign.blockUntilNoRIT(watcher);
238     } finally {
239       am.getExecutorService().shutdown();
240       am.shutdown();
241     }
242   }
243 
244   @Test(timeout = 60000)
245   public void testBalanceOnMasterFailoverScenarioWithClosedNode()
246   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
247     AssignmentManagerWithExtrasForTesting am =
248       setUpMockedAssignmentManager(this.server, this.serverManager);
249     try {
250       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
251       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
252       while (!am.processRITInvoked) Thread.sleep(1);
253       // As part of the failover cleanup, the balancing region plan is removed.
254       // So a random server will be used to open the region. For testing purpose,
255       // let's assume it is going to open on server b:
256       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
257 
258       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
259 
260       // Now fake the region closing successfully over on the regionserver; the
261       // regionserver will have set the region in CLOSED state. This will
262       // trigger callback into AM. The below zk close call is from the RS close
263       // region handler duplicated here because its down deep in a private
264       // method hard to expose.
265       int versionid =
266         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
267       assertNotSame(versionid, -1);
268       am.gate.set(false);
269       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
270 
271       // Get current versionid else will fail on transition from OFFLINE to
272       // OPENING below
273       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
274       assertNotSame(-1, versionid);
275       // This uglyness below is what the openregionhandler on RS side does.
276       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
277           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
278           EventType.RS_ZK_REGION_OPENING, versionid);
279       assertNotSame(-1, versionid);
280       // Move znode from OPENING to OPENED as RS does on successful open.
281       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
282           SERVERNAME_B, versionid);
283       assertNotSame(-1, versionid);
284 
285       // Block here until our znode is cleared or until this test timesout.
286       ZKAssign.blockUntilNoRIT(watcher);
287     } finally {
288       am.getExecutorService().shutdown();
289       am.shutdown();
290     }
291   }
292 
293   @Test(timeout = 60000)
294   public void testBalanceOnMasterFailoverScenarioWithOfflineNode()
295   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
296     AssignmentManagerWithExtrasForTesting am =
297       setUpMockedAssignmentManager(this.server, this.serverManager);
298     try {
299       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
300       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
301       while (!am.processRITInvoked) Thread.sleep(1);
302       // As part of the failover cleanup, the balancing region plan is removed.
303       // So a random server will be used to open the region. For testing purpose,
304       // let's assume it is going to open on server b:
305       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
306 
307       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
308 
309       // Now fake the region closing successfully over on the regionserver; the
310       // regionserver will have set the region in CLOSED state. This will
311       // trigger callback into AM. The below zk close call is from the RS close
312       // region handler duplicated here because its down deep in a private
313       // method hard to expose.
314       int versionid =
315         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
316       assertNotSame(versionid, -1);
317       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
318 
319       am.gate.set(false);
320       // Get current versionid else will fail on transition from OFFLINE to
321       // OPENING below
322       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
323       assertNotSame(-1, versionid);
324       // This uglyness below is what the openregionhandler on RS side does.
325       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
326           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
327           EventType.RS_ZK_REGION_OPENING, versionid);
328       assertNotSame(-1, versionid);
329       // Move znode from OPENING to OPENED as RS does on successful open.
330       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
331           SERVERNAME_B, versionid);
332       assertNotSame(-1, versionid);
333       // Block here until our znode is cleared or until this test timesout.
334       ZKAssign.blockUntilNoRIT(watcher);
335     } finally {
336       am.getExecutorService().shutdown();
337       am.shutdown();
338     }
339   }
340 
341   private void createRegionPlanAndBalance(
342       final AssignmentManager am, final ServerName from,
343       final ServerName to, final HRegionInfo hri) throws RegionException {
344     // Call the balance function but fake the region being online first at
345     // servername from.
346     am.regionOnline(hri, from);
347     // Balance region from 'from' to 'to'. It calls unassign setting CLOSING state
348     // up in zk.  Create a plan and balance
349     am.balance(new RegionPlan(hri, from, to));
350   }
351 
352   /**
353    * Tests AssignmentManager balance function.  Runs a balance moving a region
354    * from one server to another mocking regionserver responding over zk.
355    * @throws IOException
356    * @throws KeeperException
357    * @throws DeserializationException
358    */
359   @Test
360   public void testBalance()
361     throws IOException, KeeperException, DeserializationException, InterruptedException {
362     // Create and startup an executor.  This is used by AssignmentManager
363     // handling zk callbacks.
364     ExecutorService executor = startupMasterExecutor("testBalanceExecutor");
365 
366     // We need a mocked catalog tracker.
367     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
368     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
369         .getConfiguration());
370     // Create an AM.
371     AssignmentManager am = new AssignmentManager(this.server,
372       this.serverManager, ct, balancer, executor, null, master.getTableLockManager());
373     am.failoverCleanupDone.set(true);
374     try {
375       // Make sure our new AM gets callbacks; once registered, can't unregister.
376       // Thats ok because we make a new zk watcher for each test.
377       this.watcher.registerListenerFirst(am);
378       // Call the balance function but fake the region being online first at
379       // SERVERNAME_A.  Create a balance plan.
380       am.regionOnline(REGIONINFO, SERVERNAME_A);
381       // Balance region from A to B.
382       RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B);
383       am.balance(plan);
384 
385       RegionStates regionStates = am.getRegionStates();
386       // Must be failed to close since the server is fake
387       assertTrue(regionStates.isRegionInTransition(REGIONINFO)
388         && regionStates.isRegionInState(REGIONINFO, State.FAILED_CLOSE));
389       // Move it back to pending_close
390       regionStates.updateRegionState(REGIONINFO, State.PENDING_CLOSE);
391 
392       // Now fake the region closing successfully over on the regionserver; the
393       // regionserver will have set the region in CLOSED state.  This will
394       // trigger callback into AM. The below zk close call is from the RS close
395       // region handler duplicated here because its down deep in a private
396       // method hard to expose.
397       int versionid =
398         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
399       assertNotSame(versionid, -1);
400       // AM is going to notice above CLOSED and queue up a new assign.  The
401       // assign will go to open the region in the new location set by the
402       // balancer.  The zk node will be OFFLINE waiting for regionserver to
403       // transition it through OPENING, OPENED.  Wait till we see the OFFLINE
404       // zk node before we proceed.
405       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
406 
407       // Get current versionid else will fail on transition from OFFLINE to OPENING below
408       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
409       assertNotSame(-1, versionid);
410       // This uglyness below is what the openregionhandler on RS side does.
411       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
412         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
413         EventType.RS_ZK_REGION_OPENING, versionid);
414       assertNotSame(-1, versionid);
415       // Move znode from OPENING to OPENED as RS does on successful open.
416       versionid =
417         ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO, SERVERNAME_B, versionid);
418       assertNotSame(-1, versionid);
419       // Wait on the handler removing the OPENED znode.
420       while(regionStates.isRegionInTransition(REGIONINFO)) Threads.sleep(1);
421     } finally {
422       executor.shutdown();
423       am.shutdown();
424       // Clean up all znodes
425       ZKAssign.deleteAllNodes(this.watcher);
426     }
427   }
428 
429   /**
430    * Run a simple server shutdown handler.
431    * @throws KeeperException
432    * @throws IOException
433    */
434   @Test
435   public void testShutdownHandler()
436       throws KeeperException, IOException, ServiceException {
437     // Create and startup an executor.  This is used by AssignmentManager
438     // handling zk callbacks.
439     ExecutorService executor = startupMasterExecutor("testShutdownHandler");
440 
441     // We need a mocked catalog tracker.
442     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
443     // Create an AM.
444     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
445         this.server, this.serverManager);
446     try {
447       processServerShutdownHandler(ct, am, false);
448     } finally {
449       executor.shutdown();
450       am.shutdown();
451       // Clean up all znodes
452       ZKAssign.deleteAllNodes(this.watcher);
453     }
454   }
455 
456   /**
457    * To test closed region handler to remove rit and delete corresponding znode
458    * if region in pending close or closing while processing shutdown of a region
459    * server.(HBASE-5927).
460    *
461    * @throws KeeperException
462    * @throws IOException
463    * @throws ServiceException
464    */
465   @Test
466   public void testSSHWhenDisableTableInProgress() throws KeeperException, IOException,
467       ServiceException {
468     testCaseWithPartiallyDisabledState(Table.State.DISABLING);
469     testCaseWithPartiallyDisabledState(Table.State.DISABLED);
470   }
471 
472 
473   /**
474    * To test if the split region is removed from RIT if the region was in SPLITTING state but the RS
475    * has actually completed the splitting in hbase:meta but went down. See HBASE-6070 and also HBASE-5806
476    *
477    * @throws KeeperException
478    * @throws IOException
479    */
480   @Test
481   public void testSSHWhenSplitRegionInProgress() throws KeeperException, IOException, Exception {
482     // true indicates the region is split but still in RIT
483     testCaseWithSplitRegionPartial(true);
484     // false indicate the region is not split
485     testCaseWithSplitRegionPartial(false);
486   }
487 
488   private void testCaseWithSplitRegionPartial(boolean regionSplitDone) throws KeeperException,
489       IOException, NodeExistsException, InterruptedException, ServiceException {
490     // Create and startup an executor. This is used by AssignmentManager
491     // handling zk callbacks.
492     ExecutorService executor = startupMasterExecutor("testSSHWhenSplitRegionInProgress");
493     // We need a mocked catalog tracker.
494     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
495     ZKAssign.deleteAllNodes(this.watcher);
496 
497     // Create an AM.
498     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
499       this.server, this.serverManager);
500     // adding region to regions and servers maps.
501     am.regionOnline(REGIONINFO, SERVERNAME_A);
502     // adding region in pending close.
503     am.getRegionStates().updateRegionState(
504       REGIONINFO, State.SPLITTING, SERVERNAME_A);
505     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
506     RegionTransition data = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
507         REGIONINFO.getRegionName(), SERVERNAME_A);
508     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
509     // create znode in M_ZK_REGION_CLOSING state.
510     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
511 
512     try {
513       processServerShutdownHandler(ct, am, regionSplitDone);
514       // check znode deleted or not.
515       // In both cases the znode should be deleted.
516 
517       if (regionSplitDone) {
518         assertFalse("Region state of region in SPLITTING should be removed from rit.",
519             am.getRegionStates().isRegionsInTransition());
520       } else {
521         while (!am.assignInvoked) {
522           Thread.sleep(1);
523         }
524         assertTrue("Assign should be invoked.", am.assignInvoked);
525       }
526     } finally {
527       REGIONINFO.setOffline(false);
528       REGIONINFO.setSplit(false);
529       executor.shutdown();
530       am.shutdown();
531       // Clean up all znodes
532       ZKAssign.deleteAllNodes(this.watcher);
533     }
534   }
535 
536   private void testCaseWithPartiallyDisabledState(Table.State state) throws KeeperException,
537       IOException, NodeExistsException, ServiceException {
538     // Create and startup an executor. This is used by AssignmentManager
539     // handling zk callbacks.
540     ExecutorService executor = startupMasterExecutor("testSSHWhenDisableTableInProgress");
541     // We need a mocked catalog tracker.
542     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
543     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
544     ZKAssign.deleteAllNodes(this.watcher);
545 
546     // Create an AM.
547     AssignmentManager am = new AssignmentManager(this.server,
548       this.serverManager, ct, balancer, executor, null, master.getTableLockManager());
549     // adding region to regions and servers maps.
550     am.regionOnline(REGIONINFO, SERVERNAME_A);
551     // adding region in pending close.
552     am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE);
553     if (state == Table.State.DISABLING) {
554       am.getZKTable().setDisablingTable(REGIONINFO.getTable());
555     } else {
556       am.getZKTable().setDisabledTable(REGIONINFO.getTable());
557     }
558     RegionTransition data = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
559         REGIONINFO.getRegionName(), SERVERNAME_A);
560     // RegionTransitionData data = new
561     // RegionTransitionData(EventType.M_ZK_REGION_CLOSING,
562     // REGIONINFO.getRegionName(), SERVERNAME_A);
563     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
564     // create znode in M_ZK_REGION_CLOSING state.
565     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
566 
567     try {
568       processServerShutdownHandler(ct, am, false);
569       // check znode deleted or not.
570       // In both cases the znode should be deleted.
571       assertTrue("The znode should be deleted.", ZKUtil.checkExists(this.watcher, node) == -1);
572       // check whether in rit or not. In the DISABLING case also the below
573       // assert will be true but the piece of code added for HBASE-5927 will not
574       // do that.
575       if (state == Table.State.DISABLED) {
576         assertFalse("Region state of region in pending close should be removed from rit.",
577             am.getRegionStates().isRegionsInTransition());
578       }
579     } finally {
580       am.setEnabledTable(REGIONINFO.getTable());
581       executor.shutdown();
582       am.shutdown();
583       // Clean up all znodes
584       ZKAssign.deleteAllNodes(this.watcher);
585     }
586   }
587 
588   private void processServerShutdownHandler(CatalogTracker ct, AssignmentManager am, boolean splitRegion)
589       throws IOException, ServiceException {
590     // Make sure our new AM gets callbacks; once registered, can't unregister.
591     // Thats ok because we make a new zk watcher for each test.
592     this.watcher.registerListenerFirst(am);
593 
594     // Need to set up a fake scan of meta for the servershutdown handler
595     // Make an RS Interface implementation.  Make it so a scanner can go against it.
596     ClientProtos.ClientService.BlockingInterface implementation =
597       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
598     // Get a meta row result that has region up on SERVERNAME_A
599 
600     Result r;
601     if (splitRegion) {
602       r = MetaMockingUtil.getMetaTableRowResultAsSplitRegion(REGIONINFO, SERVERNAME_A);
603     } else {
604       r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
605     }
606 
607     final ScanResponse.Builder builder = ScanResponse.newBuilder();
608     builder.setMoreResults(true);
609     builder.addCellsPerResult(r.size());
610     final List<CellScannable> cellScannables = new ArrayList<CellScannable>(1);
611     cellScannables.add(r);
612     Mockito.when(implementation.scan(
613       (RpcController)Mockito.any(), (ScanRequest)Mockito.any())).
614       thenAnswer(new Answer<ScanResponse>() {
615           @Override
616           public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
617             PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
618                 .getArguments()[0];
619             if (controller != null) {
620               controller.setCellScanner(CellUtil.createCellScanner(cellScannables));
621             }
622             return builder.build();
623           }
624       });
625 
626     // Get a connection w/ mocked up common methods.
627     HConnection connection =
628       HConnectionTestingUtility.getMockedConnectionAndDecorate(HTU.getConfiguration(),
629         null, implementation, SERVERNAME_B, REGIONINFO);
630 
631     // Make it so we can get a catalogtracker from servermanager.. .needed
632     // down in guts of server shutdown handler.
633     Mockito.when(ct.getConnection()).thenReturn(connection);
634     Mockito.when(this.server.getCatalogTracker()).thenReturn(ct);
635 
636     // Now make a server shutdown handler instance and invoke process.
637     // Have it that SERVERNAME_A died.
638     DeadServer deadServers = new DeadServer();
639     deadServers.add(SERVERNAME_A);
640     // I need a services instance that will return the AM
641     MasterServices services = Mockito.mock(MasterServices.class);
642     Mockito.when(services.getAssignmentManager()).thenReturn(am);
643     Mockito.when(services.getServerManager()).thenReturn(this.serverManager);
644     Mockito.when(services.getZooKeeper()).thenReturn(this.watcher);
645     ServerShutdownHandler handler = new ServerShutdownHandler(this.server,
646       services, deadServers, SERVERNAME_A, false);
647     am.failoverCleanupDone.set(true);
648     handler.process();
649     // The region in r will have been assigned.  It'll be up in zk as unassigned.
650   }
651 
652   /**
653    * Create and startup executor pools. Start same set as master does (just
654    * run a few less).
655    * @param name Name to give our executor
656    * @return Created executor (be sure to call shutdown when done).
657    */
658   private ExecutorService startupMasterExecutor(final String name) {
659     // TODO: Move up into HBaseTestingUtility?  Generally useful.
660     ExecutorService executor = new ExecutorService(name);
661     executor.startExecutorService(ExecutorType.MASTER_OPEN_REGION, 3);
662     executor.startExecutorService(ExecutorType.MASTER_CLOSE_REGION, 3);
663     executor.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS, 3);
664     executor.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS, 3);
665     return executor;
666   }
667 
668   @Test
669   public void testUnassignWithSplitAtSameTime() throws KeeperException, IOException {
670     // Region to use in test.
671     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
672     // First amend the servermanager mock so that when we do send close of the
673     // first meta region on SERVERNAME_A, it will return true rather than
674     // default null.
675     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, hri, -1)).thenReturn(true);
676     // Need a mocked catalog tracker.
677     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
678     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
679         .getConfiguration());
680     // Create an AM.
681     AssignmentManager am = new AssignmentManager(this.server,
682       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
683     try {
684       // First make sure my mock up basically works.  Unassign a region.
685       unassign(am, SERVERNAME_A, hri);
686       // This delete will fail if the previous unassign did wrong thing.
687       ZKAssign.deleteClosingNode(this.watcher, hri, SERVERNAME_A);
688       // Now put a SPLITTING region in the way.  I don't have to assert it
689       // go put in place.  This method puts it in place then asserts it still
690       // owns it by moving state from SPLITTING to SPLITTING.
691       int version = createNodeSplitting(this.watcher, hri, SERVERNAME_A);
692       // Now, retry the unassign with the SPLTTING in place.  It should just
693       // complete without fail; a sort of 'silent' recognition that the
694       // region to unassign has been split and no longer exists: TOOD: what if
695       // the split fails and the parent region comes back to life?
696       unassign(am, SERVERNAME_A, hri);
697       // This transition should fail if the znode has been messed with.
698       ZKAssign.transitionNode(this.watcher, hri, SERVERNAME_A,
699         EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
700       assertFalse(am.getRegionStates().isRegionInTransition(hri));
701     } finally {
702       am.shutdown();
703     }
704   }
705 
706   /**
707    * Tests the processDeadServersAndRegionsInTransition should not fail with NPE
708    * when it failed to get the children. Let's abort the system in this
709    * situation
710    * @throws ServiceException
711    */
712   @Test(timeout = 60000)
713   public void testProcessDeadServersAndRegionsInTransitionShouldNotFailWithNPE()
714       throws IOException, KeeperException, InterruptedException, ServiceException {
715     final RecoverableZooKeeper recoverableZk = Mockito
716         .mock(RecoverableZooKeeper.class);
717     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
718       this.server, this.serverManager);
719     Watcher zkw = new ZooKeeperWatcher(HBaseConfiguration.create(), "unittest",
720         null) {
721       @Override
722       public RecoverableZooKeeper getRecoverableZooKeeper() {
723         return recoverableZk;
724       }
725     };
726     ((ZooKeeperWatcher) zkw).registerListener(am);
727     Mockito.doThrow(new InterruptedException()).when(recoverableZk)
728         .getChildren("/hbase/region-in-transition", null);
729     am.setWatcher((ZooKeeperWatcher) zkw);
730     try {
731       am.processDeadServersAndRegionsInTransition(null);
732       fail("Expected to abort");
733     } catch (NullPointerException e) {
734       fail("Should not throw NPE");
735     } catch (RuntimeException e) {
736       assertEquals("Aborted", e.getLocalizedMessage());
737     }
738   }
739   /**
740    * TestCase verifies that the regionPlan is updated whenever a region fails to open
741    * and the master tries to process RS_ZK_FAILED_OPEN state.(HBASE-5546).
742    */
743   @Test(timeout = 60000)
744   public void testRegionPlanIsUpdatedWhenRegionFailsToOpen() throws IOException, KeeperException,
745       ServiceException, InterruptedException {
746     this.server.getConfiguration().setClass(
747       HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MockedLoadBalancer.class,
748       LoadBalancer.class);
749     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
750       this.server, this.serverManager);
751     try {
752       // Boolean variable used for waiting until randomAssignment is called and
753       // new
754       // plan is generated.
755       AtomicBoolean gate = new AtomicBoolean(false);
756       if (balancer instanceof MockedLoadBalancer) {
757         ((MockedLoadBalancer) balancer).setGateVariable(gate);
758       }
759       ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
760       int v = ZKAssign.getVersion(this.watcher, REGIONINFO);
761       ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A,
762           EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_FAILED_OPEN, v);
763       String path = ZKAssign.getNodeName(this.watcher, REGIONINFO
764           .getEncodedName());
765       am.getRegionStates().updateRegionState(
766         REGIONINFO, State.OPENING, SERVERNAME_A);
767       // a dummy plan inserted into the regionPlans. This plan is cleared and
768       // new one is formed
769       am.regionPlans.put(REGIONINFO.getEncodedName(), new RegionPlan(
770           REGIONINFO, null, SERVERNAME_A));
771       RegionPlan regionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
772       List<ServerName> serverList = new ArrayList<ServerName>(2);
773       serverList.add(SERVERNAME_B);
774       Mockito.when(
775           this.serverManager.createDestinationServersList(SERVERNAME_A))
776           .thenReturn(serverList);
777       am.nodeDataChanged(path);
778       // here we are waiting until the random assignment in the load balancer is
779       // called.
780       while (!gate.get()) {
781         Thread.sleep(10);
782       }
783       // new region plan may take some time to get updated after random
784       // assignment is called and
785       // gate is set to true.
786       RegionPlan newRegionPlan = am.regionPlans
787           .get(REGIONINFO.getEncodedName());
788       while (newRegionPlan == null) {
789         Thread.sleep(10);
790         newRegionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
791       }
792       // the new region plan created may contain the same RS as destination but
793       // it should
794       // be new plan.
795       assertNotSame("Same region plan should not come", regionPlan,
796           newRegionPlan);
797       assertTrue("Destination servers should be different.", !(regionPlan
798           .getDestination().equals(newRegionPlan.getDestination())));
799 
800       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
801     } finally {
802       this.server.getConfiguration().setClass(
803           HConstants.HBASE_MASTER_LOADBALANCER_CLASS, SimpleLoadBalancer.class,
804           LoadBalancer.class);
805       am.getExecutorService().shutdown();
806       am.shutdown();
807     }
808   }
809 
810   /**
811    * Mocked load balancer class used in the testcase to make sure that the testcase waits until
812    * random assignment is called and the gate variable is set to true.
813    */
814   public static class MockedLoadBalancer extends SimpleLoadBalancer {
815     private AtomicBoolean gate;
816 
817     public void setGateVariable(AtomicBoolean gate) {
818       this.gate = gate;
819     }
820 
821     @Override
822     public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
823       ServerName randomServerName = super.randomAssignment(regionInfo, servers);
824       this.gate.set(true);
825       return randomServerName;
826     }
827 
828     @Override
829     public Map<ServerName, List<HRegionInfo>> retainAssignment(
830         Map<HRegionInfo, ServerName> regions, List<ServerName> servers) {
831       this.gate.set(true);
832       return super.retainAssignment(regions, servers);
833     }
834   }
835 
836   /**
837    * Test the scenario when the master is in failover and trying to process a
838    * region which is in Opening state on a dead RS. Master will force offline the
839    * region and put it in transition. AM relies on SSH to reassign it.
840    */
841   @Test(timeout = 60000)
842   public void testRegionInOpeningStateOnDeadRSWhileMasterFailover() throws IOException,
843       KeeperException, ServiceException, InterruptedException {
844     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
845       this.server, this.serverManager);
846     ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
847     int version = ZKAssign.getVersion(this.watcher, REGIONINFO);
848     ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A, EventType.M_ZK_REGION_OFFLINE,
849         EventType.RS_ZK_REGION_OPENING, version);
850     RegionTransition rt = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_OPENING,
851         REGIONINFO.getRegionName(), SERVERNAME_A, HConstants.EMPTY_BYTE_ARRAY);
852     version = ZKAssign.getVersion(this.watcher, REGIONINFO);
853     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(false);
854     am.getRegionStates().logSplit(SERVERNAME_A); // Assume log splitting is done
855     am.getRegionStates().createRegionState(REGIONINFO);
856     am.gate.set(false);
857     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
858     assertFalse(am.processRegionsInTransition(rt, REGIONINFO, version));
859     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
860     processServerShutdownHandler(ct, am, false);
861     // Waiting for the assignment to get completed.
862     while (!am.gate.get()) {
863       Thread.sleep(10);
864     }
865     assertTrue("The region should be assigned immediately.", null != am.regionPlans.get(REGIONINFO
866         .getEncodedName()));
867   }
868 
869   /**
870    * Test verifies whether assignment is skipped for regions of tables in DISABLING state during
871    * clean cluster startup. See HBASE-6281.
872    *
873    * @throws KeeperException
874    * @throws IOException
875    * @throws Exception
876    */
877   @Test(timeout = 60000)
878   public void testDisablingTableRegionsAssignmentDuringCleanClusterStartup()
879       throws KeeperException, IOException, Exception {
880     this.server.getConfiguration().setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
881         MockedLoadBalancer.class, LoadBalancer.class);
882     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(
883         new HashMap<ServerName, ServerLoad>(0));
884     List<ServerName> destServers = new ArrayList<ServerName>(1);
885     destServers.add(SERVERNAME_A);
886     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
887     // To avoid cast exception in DisableTableHandler process.
888     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
889     Server server = new HMaster(HTU.getConfiguration());
890     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
891         this.serverManager);
892     AtomicBoolean gate = new AtomicBoolean(false);
893     if (balancer instanceof MockedLoadBalancer) {
894       ((MockedLoadBalancer) balancer).setGateVariable(gate);
895     }
896     try{
897       // set table in disabling state.
898       am.getZKTable().setDisablingTable(REGIONINFO.getTable());
899       am.joinCluster();
900       // should not call retainAssignment if we get empty regions in assignAllUserRegions.
901       assertFalse(
902           "Assign should not be invoked for disabling table regions during clean cluster startup.",
903           gate.get());
904       // need to change table state from disabling to disabled.
905       assertTrue("Table should be disabled.",
906           am.getZKTable().isDisabledTable(REGIONINFO.getTable()));
907     } finally {
908       this.server.getConfiguration().setClass(
909         HConstants.HBASE_MASTER_LOADBALANCER_CLASS, SimpleLoadBalancer.class,
910         LoadBalancer.class);
911       am.getZKTable().setEnabledTable(REGIONINFO.getTable());
912       am.shutdown();
913     }
914   }
915 
916   /**
917    * Test verifies whether all the enabling table regions assigned only once during master startup.
918    *
919    * @throws KeeperException
920    * @throws IOException
921    * @throws Exception
922    */
923   @Test
924   public void testMasterRestartWhenTableInEnabling() throws KeeperException, IOException, Exception {
925     enabling = true;
926     List<ServerName> destServers = new ArrayList<ServerName>(1);
927     destServers.add(SERVERNAME_A);
928     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
929     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
930     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
931     Server server = new HMaster(HTU.getConfiguration());
932     Whitebox.setInternalState(server, "serverManager", this.serverManager);
933     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
934         this.serverManager);
935     try {
936       // set table in enabling state.
937       am.getZKTable().setEnablingTable(REGIONINFO.getTable());
938       new EnableTableHandler(server, REGIONINFO.getTable(),
939           am.getCatalogTracker(), am, new NullTableLockManager(), true).prepare()
940           .process();
941       assertEquals("Number of assignments should be 1.", 1, assignmentCount);
942       assertTrue("Table should be enabled.",
943           am.getZKTable().isEnabledTable(REGIONINFO.getTable()));
944     } finally {
945       enabling = false;
946       assignmentCount = 0;
947       am.getZKTable().setEnabledTable(REGIONINFO.getTable());
948       am.shutdown();
949       ZKAssign.deleteAllNodes(this.watcher);
950     }
951   }
952 
953   /**
954    * Test verifies whether stale znodes of unknown tables as for the hbase:meta will be removed or
955    * not.
956    * @throws KeeperException
957    * @throws IOException
958    * @throws Exception
959    */
960   @Test
961   public void testMasterRestartShouldRemoveStaleZnodesOfUnknownTableAsForMeta()
962       throws KeeperException, IOException, Exception {
963     List<ServerName> destServers = new ArrayList<ServerName>(1);
964     destServers.add(SERVERNAME_A);
965     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
966     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
967     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
968     Server server = new HMaster(HTU.getConfiguration());
969     Whitebox.setInternalState(server, "serverManager", this.serverManager);
970     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
971         this.serverManager);
972     try {
973       TableName tableName = TableName.valueOf("dummyTable");
974       // set table in enabling state.
975       am.getZKTable().setEnablingTable(tableName);
976       am.joinCluster();
977       assertFalse("Table should not be present in zookeeper.",
978         am.getZKTable().isTablePresent(tableName));
979     } finally {
980     }
981   }
982   /**
983    * When a region is in transition, if the region server opening the region goes down,
984    * the region assignment takes a long time normally (waiting for timeout monitor to trigger assign).
985    * This test is to make sure SSH reassigns it right away.
986    */
987   @Test
988   public void testSSHTimesOutOpeningRegionTransition()
989       throws KeeperException, IOException, ServiceException {
990     // We need a mocked catalog tracker.
991     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
992     // Create an AM.
993     AssignmentManagerWithExtrasForTesting am =
994       setUpMockedAssignmentManager(this.server, this.serverManager);
995     // adding region in pending open.
996     RegionState state = new RegionState(REGIONINFO,
997       State.OPENING, System.currentTimeMillis(), SERVERNAME_A);
998     am.getRegionStates().regionOnline(REGIONINFO, SERVERNAME_B);
999     am.getRegionStates().regionsInTransition.put(REGIONINFO.getEncodedName(), state);
1000     // adding region plan
1001     am.regionPlans.put(REGIONINFO.getEncodedName(),
1002       new RegionPlan(REGIONINFO, SERVERNAME_B, SERVERNAME_A));
1003     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
1004 
1005     try {
1006       am.assignInvoked = false;
1007       processServerShutdownHandler(ct, am, false);
1008       assertTrue(am.assignInvoked);
1009     } finally {
1010       am.getRegionStates().regionsInTransition.remove(REGIONINFO.getEncodedName());
1011       am.regionPlans.remove(REGIONINFO.getEncodedName());
1012     }
1013   }
1014 
1015   /**
1016    * Scenario:<ul>
1017    *  <li> master starts a close, and creates a znode</li>
1018    *  <li> it fails just at this moment, before contacting the RS</li>
1019    *  <li> while the second master is coming up, the targeted RS dies. But it's before ZK timeout so
1020    *    we don't know, and we have an exception.</li>
1021    *  <li> the master must handle this nicely and reassign.
1022    *  </ul>
1023    */
1024   @Test
1025   public void testClosingFailureDuringRecovery() throws Exception {
1026 
1027     AssignmentManagerWithExtrasForTesting am =
1028         setUpMockedAssignmentManager(this.server, this.serverManager);
1029     ZKAssign.createNodeClosing(this.watcher, REGIONINFO, SERVERNAME_A);
1030     am.getRegionStates().createRegionState(REGIONINFO);
1031 
1032     assertFalse( am.getRegionStates().isRegionsInTransition() );
1033 
1034     am.processRegionInTransition(REGIONINFO.getEncodedName(), REGIONINFO);
1035 
1036     assertTrue( am.getRegionStates().isRegionsInTransition() );
1037   }
1038 
1039   /**
1040    * Creates a new ephemeral node in the SPLITTING state for the specified region.
1041    * Create it ephemeral in case regionserver dies mid-split.
1042    *
1043    * <p>Does not transition nodes from other states.  If a node already exists
1044    * for this region, a {@link NodeExistsException} will be thrown.
1045    *
1046    * @param zkw zk reference
1047    * @param region region to be created as offline
1048    * @param serverName server event originates from
1049    * @return Version of znode created.
1050    * @throws KeeperException
1051    * @throws IOException
1052    */
1053   // Copied from SplitTransaction rather than open the method over there in
1054   // the regionserver package.
1055   private static int createNodeSplitting(final ZooKeeperWatcher zkw,
1056       final HRegionInfo region, final ServerName serverName)
1057   throws KeeperException, IOException {
1058     RegionTransition rt =
1059       RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
1060         region.getRegionName(), serverName);
1061 
1062     String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
1063     if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
1064       throw new IOException("Failed create of ephemeral " + node);
1065     }
1066     // Transition node from SPLITTING to SPLITTING and pick up version so we
1067     // can be sure this znode is ours; version is needed deleting.
1068     return transitionNodeSplitting(zkw, region, serverName, -1);
1069   }
1070 
1071   // Copied from SplitTransaction rather than open the method over there in
1072   // the regionserver package.
1073   private static int transitionNodeSplitting(final ZooKeeperWatcher zkw,
1074       final HRegionInfo parent,
1075       final ServerName serverName, final int version)
1076   throws KeeperException, IOException {
1077     return ZKAssign.transitionNode(zkw, parent, serverName,
1078       EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
1079   }
1080 
1081   private void unassign(final AssignmentManager am, final ServerName sn,
1082       final HRegionInfo hri) throws RegionException {
1083     // Before I can unassign a region, I need to set it online.
1084     am.regionOnline(hri, sn);
1085     // Unassign region.
1086     am.unassign(hri);
1087   }
1088 
1089   /**
1090    * Create an {@link AssignmentManagerWithExtrasForTesting} that has mocked
1091    * {@link CatalogTracker} etc.
1092    * @param server
1093    * @param manager
1094    * @return An AssignmentManagerWithExtras with mock connections, etc.
1095    * @throws IOException
1096    * @throws KeeperException
1097    */
1098   private AssignmentManagerWithExtrasForTesting setUpMockedAssignmentManager(final Server server,
1099       final ServerManager manager) throws IOException, KeeperException, ServiceException {
1100     // We need a mocked catalog tracker. Its used by our AM instance.
1101     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1102     // Make an RS Interface implementation. Make it so a scanner can go against
1103     // it and a get to return the single region, REGIONINFO, this test is
1104     // messing with. Needed when "new master" joins cluster. AM will try and
1105     // rebuild its list of user regions and it will also get the HRI that goes
1106     // with an encoded name by doing a Get on hbase:meta
1107     ClientProtos.ClientService.BlockingInterface ri =
1108       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
1109     // Get a meta row result that has region up on SERVERNAME_A for REGIONINFO
1110     Result r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
1111     final ScanResponse.Builder builder = ScanResponse.newBuilder();
1112     builder.setMoreResults(true);
1113     builder.addCellsPerResult(r.size());
1114     final List<CellScannable> rows = new ArrayList<CellScannable>(1);
1115     rows.add(r);
1116     Answer<ScanResponse> ans = new Answer<ClientProtos.ScanResponse>() {
1117       @Override
1118       public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
1119         PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
1120             .getArguments()[0];
1121         if (controller != null) {
1122           controller.setCellScanner(CellUtil.createCellScanner(rows));
1123         }
1124         return builder.build();
1125       }
1126     };
1127     if (enabling) {
1128       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any()))
1129           .thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans)
1130           .thenReturn(ScanResponse.newBuilder().setMoreResults(false).build());
1131     } else {
1132       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any())).thenAnswer(
1133           ans);
1134     }
1135     // If a get, return the above result too for REGIONINFO
1136     GetResponse.Builder getBuilder = GetResponse.newBuilder();
1137     getBuilder.setResult(ProtobufUtil.toResult(r));
1138     Mockito.when(ri.get((RpcController)Mockito.any(), (GetRequest) Mockito.any())).
1139       thenReturn(getBuilder.build());
1140     // Get a connection w/ mocked up common methods.
1141     HConnection connection = HConnectionTestingUtility.
1142       getMockedConnectionAndDecorate(HTU.getConfiguration(), null,
1143         ri, SERVERNAME_B, REGIONINFO);
1144     // Make it so we can get the connection from our mocked catalogtracker
1145     Mockito.when(ct.getConnection()).thenReturn(connection);
1146     // Create and startup an executor. Used by AM handling zk callbacks.
1147     ExecutorService executor = startupMasterExecutor("mockedAMExecutor");
1148     this.balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
1149     AssignmentManagerWithExtrasForTesting am = new AssignmentManagerWithExtrasForTesting(
1150       server, manager, ct, this.balancer, executor, new NullTableLockManager());
1151     return am;
1152   }
1153 
1154   /**
1155    * An {@link AssignmentManager} with some extra facility used testing
1156    */
1157   class AssignmentManagerWithExtrasForTesting extends AssignmentManager {
1158     // Keep a reference so can give it out below in {@link #getExecutorService}
1159     private final ExecutorService es;
1160     // Ditto for ct
1161     private final CatalogTracker ct;
1162     boolean processRITInvoked = false;
1163     boolean assignInvoked = false;
1164     AtomicBoolean gate = new AtomicBoolean(true);
1165 
1166     public AssignmentManagerWithExtrasForTesting(
1167         final Server master, final ServerManager serverManager,
1168         final CatalogTracker catalogTracker, final LoadBalancer balancer,
1169         final ExecutorService service, final TableLockManager tableLockManager)
1170             throws KeeperException, IOException {
1171       super(master, serverManager, catalogTracker, balancer, service, null, tableLockManager);
1172       this.es = service;
1173       this.ct = catalogTracker;
1174     }
1175 
1176     @Override
1177     boolean processRegionInTransition(String encodedRegionName,
1178         HRegionInfo regionInfo) throws KeeperException, IOException {
1179       this.processRITInvoked = true;
1180       return super.processRegionInTransition(encodedRegionName, regionInfo);
1181     }
1182 
1183     @Override
1184     public void assign(HRegionInfo region, boolean setOfflineInZK, boolean forceNewPlan) {
1185       if (enabling) {
1186         assignmentCount++;
1187         this.regionOnline(region, SERVERNAME_A);
1188       } else {
1189         super.assign(region, setOfflineInZK, forceNewPlan);
1190         this.gate.set(true);
1191       }
1192     }
1193 
1194     @Override
1195     boolean assign(ServerName destination, List<HRegionInfo> regions) {
1196       if (enabling) {
1197         for (HRegionInfo region : regions) {
1198           assignmentCount++;
1199           this.regionOnline(region, SERVERNAME_A);
1200         }
1201         return true;
1202       }
1203       return super.assign(destination, regions);
1204     }
1205 
1206     @Override
1207     public void assign(List<HRegionInfo> regions)
1208         throws IOException, InterruptedException {
1209       assignInvoked = (regions != null && regions.size() > 0);
1210       super.assign(regions);
1211       this.gate.set(true);
1212     }
1213 
1214     /** reset the watcher */
1215     void setWatcher(ZooKeeperWatcher watcher) {
1216       this.watcher = watcher;
1217     }
1218 
1219     /**
1220      * @return ExecutorService used by this instance.
1221      */
1222     ExecutorService getExecutorService() {
1223       return this.es;
1224     }
1225 
1226     /**
1227      * @return CatalogTracker used by this AM (Its a mock).
1228      */
1229     CatalogTracker getCatalogTracker() {
1230       return this.ct;
1231     }
1232   }
1233 
1234   /**
1235    * Call joinCluster on the passed AssignmentManager.  Do it in a thread
1236    * so it runs independent of what all else is going on.  Try to simulate
1237    * an AM running insided a failed over master by clearing all in-memory
1238    * AM state first.
1239   */
1240   private void startFakeFailedOverMasterAssignmentManager(final AssignmentManager am,
1241       final ZooKeeperWatcher watcher) {
1242     // Make sure our new AM gets callbacks; once registered, we can't unregister.
1243     // Thats ok because we make a new zk watcher for each test.
1244     watcher.registerListenerFirst(am);
1245     Thread t = new Thread("RunAmJoinCluster") {
1246       @Override
1247       public void run() {
1248         // Call the joinCluster function as though we were doing a master
1249         // failover at this point. It will stall just before we go to add
1250         // the RIT region to our RIT Map in AM at processRegionsInTransition.
1251         // First clear any inmemory state from AM so it acts like a new master
1252         // coming on line.
1253         am.getRegionStates().regionsInTransition.clear();
1254         am.regionPlans.clear();
1255         try {
1256           am.joinCluster();
1257         } catch (IOException e) {
1258           throw new RuntimeException(e);
1259         } catch (KeeperException e) {
1260           throw new RuntimeException(e);
1261         } catch (InterruptedException e) {
1262           throw new RuntimeException(e);
1263         }
1264       }
1265     };
1266     t.start();
1267     while (!t.isAlive()) Threads.sleep(1);
1268   }
1269 
1270   @Test
1271   public void testForceAssignMergingRegion() throws Exception {
1272     // Region to use in test.
1273     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1274     // Need a mocked catalog tracker.
1275     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1276     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1277       server.getConfiguration());
1278     // Create an AM.
1279     AssignmentManager am = new AssignmentManager(this.server,
1280       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
1281     RegionStates regionStates = am.getRegionStates();
1282     try {
1283       // First set the state of the region to merging
1284       regionStates.updateRegionState(hri, RegionState.State.MERGING);
1285       // Now, try to assign it with force new plan
1286       am.assign(hri, true, true);
1287       assertEquals("The region should be still in merging state",
1288         RegionState.State.MERGING, regionStates.getRegionState(hri).getState());
1289     } finally {
1290       am.shutdown();
1291     }
1292   }
1293 
1294   /**
1295    * Test assignment related ZK events are ignored by AM if the region is not known
1296    * by AM to be in transition. During normal operation, all assignments are started
1297    * by AM (not considering split/merge), if an event is received but the region
1298    * is not in transition, the event must be a very late one. So it can be ignored.
1299    * During master failover, since AM watches assignment znodes after failover cleanup
1300    * is completed, when an event comes in, AM should already have the region in transition
1301    * if ZK is used during the assignment action (only hbck doesn't use ZK for region
1302    * assignment). So during master failover, we can ignored such events too.
1303    */
1304   @Test
1305   public void testAssignmentEventIgnoredIfNotExpected() throws KeeperException, IOException {
1306     // Region to use in test.
1307     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1308     // Need a mocked catalog tracker.
1309     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1310     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1311       server.getConfiguration());
1312     final AtomicBoolean zkEventProcessed = new AtomicBoolean(false);
1313     // Create an AM.
1314     AssignmentManager am = new AssignmentManager(this.server,
1315       this.serverManager, ct, balancer, null, null, master.getTableLockManager()) {
1316 
1317       @Override
1318       void handleRegion(final RegionTransition rt, int expectedVersion) {
1319         super.handleRegion(rt, expectedVersion);
1320         if (rt != null && Bytes.equals(hri.getRegionName(),
1321           rt.getRegionName()) && rt.getEventType() == EventType.RS_ZK_REGION_OPENING) {
1322           zkEventProcessed.set(true);
1323         }
1324       }
1325     };
1326     try {
1327       // First make sure the region is not in transition
1328       am.getRegionStates().regionOffline(hri);
1329       zkEventProcessed.set(false); // Reset it before faking zk transition
1330       this.watcher.registerListenerFirst(am);
1331       assertFalse("The region should not be in transition",
1332         am.getRegionStates().isRegionInTransition(hri));
1333       ZKAssign.createNodeOffline(this.watcher, hri, SERVERNAME_A);
1334       // Trigger a transition event
1335       ZKAssign.transitionNodeOpening(this.watcher, hri, SERVERNAME_A);
1336       long startTime = EnvironmentEdgeManager.currentTimeMillis();
1337       while (!zkEventProcessed.get()) {
1338         assertTrue("Timed out in waiting for ZK event to be processed",
1339           EnvironmentEdgeManager.currentTimeMillis() - startTime < 30000);
1340         Threads.sleepWithoutInterrupt(100);
1341       }
1342       assertFalse(am.getRegionStates().isRegionInTransition(hri));
1343     } finally {
1344       am.shutdown();
1345     }
1346   }
1347 
1348   /**
1349    * If a table is deleted, we should not be able to balance it anymore.
1350    * Otherwise, the region will be brought back.
1351    * @throws Exception
1352    */
1353   @Test
1354   public void testBalanceRegionOfDeletedTable() throws Exception {
1355     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1356     AssignmentManager am = new AssignmentManager(this.server, this.serverManager,
1357       ct, balancer, null, null, master.getTableLockManager());
1358     RegionStates regionStates = am.getRegionStates();
1359     HRegionInfo hri = REGIONINFO;
1360     regionStates.createRegionState(hri);
1361     assertFalse(regionStates.isRegionInTransition(hri));
1362     RegionPlan plan = new RegionPlan(hri, SERVERNAME_A, SERVERNAME_B);
1363     // Fake table is deleted
1364     regionStates.tableDeleted(hri.getTable());
1365     am.balance(plan);
1366     assertFalse("The region should not in transition",
1367       regionStates.isRegionInTransition(hri));
1368   }
1369 
1370   /**
1371    * Tests an on-the-fly RPC that was scheduled for the earlier RS on the same port
1372    * for openRegion. AM should assign this somewhere else. (HBASE-9721)
1373    */
1374   @SuppressWarnings("unchecked")
1375   @Test
1376   public void testOpenCloseRegionRPCIntendedForPreviousServer() throws Exception {
1377     Mockito.when(this.serverManager.sendRegionOpen(Mockito.eq(SERVERNAME_B), Mockito.eq(REGIONINFO),
1378       Mockito.anyInt(), (List<ServerName>)Mockito.any()))
1379       .thenThrow(new DoNotRetryIOException());
1380     this.server.getConfiguration().setInt("hbase.assignment.maximum.attempts", 100);
1381 
1382     HRegionInfo hri = REGIONINFO;
1383     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1384     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1385       server.getConfiguration());
1386     // Create an AM.
1387     AssignmentManager am = new AssignmentManager(this.server,
1388       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
1389     RegionStates regionStates = am.getRegionStates();
1390     try {
1391       am.regionPlans.put(REGIONINFO.getEncodedName(),
1392         new RegionPlan(REGIONINFO, null, SERVERNAME_B));
1393 
1394       // Should fail once, but succeed on the second attempt for the SERVERNAME_A
1395       am.assign(hri, true, false);
1396     } finally {
1397       assertEquals(SERVERNAME_A, regionStates.getRegionState(REGIONINFO).getServerName());
1398     }
1399   }
1400 }