View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotSame;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.util.*;
28  import java.util.concurrent.atomic.AtomicBoolean;
29  
30  import org.apache.hadoop.hbase.CellScannable;
31  import org.apache.hadoop.hbase.CellUtil;
32  import org.apache.hadoop.hbase.DoNotRetryIOException;
33  import org.apache.hadoop.hbase.HBaseConfiguration;
34  import org.apache.hadoop.hbase.HBaseTestingUtility;
35  import org.apache.hadoop.hbase.HConstants;
36  import org.apache.hadoop.hbase.HRegionInfo;
37  import org.apache.hadoop.hbase.testclassification.MediumTests;
38  import org.apache.hadoop.hbase.RegionException;
39  import org.apache.hadoop.hbase.RegionTransition;
40  import org.apache.hadoop.hbase.Server;
41  import org.apache.hadoop.hbase.ServerLoad;
42  import org.apache.hadoop.hbase.ServerName;
43  import org.apache.hadoop.hbase.TableName;
44  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
45  import org.apache.hadoop.hbase.catalog.CatalogTracker;
46  import org.apache.hadoop.hbase.catalog.MetaMockingUtil;
47  import org.apache.hadoop.hbase.client.HConnection;
48  import org.apache.hadoop.hbase.client.HConnectionTestingUtility;
49  import org.apache.hadoop.hbase.client.Result;
50  import org.apache.hadoop.hbase.exceptions.DeserializationException;
51  import org.apache.hadoop.hbase.executor.EventType;
52  import org.apache.hadoop.hbase.executor.ExecutorService;
53  import org.apache.hadoop.hbase.executor.ExecutorType;
54  import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
55  import org.apache.hadoop.hbase.master.RegionState.State;
56  import org.apache.hadoop.hbase.master.TableLockManager.NullTableLockManager;
57  import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
58  import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer;
59  import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
60  import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
61  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
62  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
63  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetRequest;
64  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetResponse;
65  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
66  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanResponse;
67  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table;
68  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
69  import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
70  import org.apache.hadoop.hbase.util.Bytes;
71  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
72  import org.apache.hadoop.hbase.util.Threads;
73  import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
74  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
75  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
76  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
77  import org.apache.zookeeper.KeeperException;
78  import org.apache.zookeeper.KeeperException.NodeExistsException;
79  import org.apache.zookeeper.Watcher;
80  import org.junit.After;
81  import org.junit.AfterClass;
82  import org.junit.Before;
83  import org.junit.BeforeClass;
84  import org.junit.Test;
85  import org.junit.experimental.categories.Category;
86  import org.mockito.Mockito;
87  import org.mockito.internal.util.reflection.Whitebox;
88  import org.mockito.invocation.InvocationOnMock;
89  import org.mockito.stubbing.Answer;
90  
91  import com.google.protobuf.RpcController;
92  import com.google.protobuf.ServiceException;
93  
94  
95  /**
96   * Test {@link AssignmentManager}
97   */
98  @Category(MediumTests.class)
99  public class TestAssignmentManager {
100   private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
101   private static final ServerName SERVERNAME_A =
102       ServerName.valueOf("example.org", 1234, 5678);
103   private static final ServerName SERVERNAME_B =
104       ServerName.valueOf("example.org", 0, 5678);
105   private static final ServerName SERVERNAME_C =
106       ServerName.valueOf("example.org", 0, 5678);
107   private static final HRegionInfo REGIONINFO =
108     new HRegionInfo(TableName.valueOf("t"),
109       HConstants.EMPTY_START_ROW, HConstants.EMPTY_START_ROW);
110   private static int assignmentCount;
111   private static boolean enabling = false;
112 
113   // Mocked objects or; get redone for each test.
114   private Server server;
115   private ServerManager serverManager;
116   private ZooKeeperWatcher watcher;
117   private LoadBalancer balancer;
118   private HMaster master;
119 
120   @BeforeClass
121   public static void beforeClass() throws Exception {
122     HTU.getConfiguration().setBoolean("hbase.assignment.usezk", true);
123     HTU.startMiniZKCluster();
124   }
125 
126   @AfterClass
127   public static void afterClass() throws IOException {
128     HTU.shutdownMiniZKCluster();
129   }
130 
131   @Before
132   public void before() throws ZooKeeperConnectionException, IOException {
133     // TODO: Make generic versions of what we do below and put up in a mocking
134     // utility class or move up into HBaseTestingUtility.
135 
136     // Mock a Server.  Have it return a legit Configuration and ZooKeeperWatcher.
137     // If abort is called, be sure to fail the test (don't just swallow it
138     // silently as is mockito default).
139     this.server = Mockito.mock(Server.class);
140     Mockito.when(server.getServerName()).thenReturn(ServerName.valueOf("master,1,1"));
141     Mockito.when(server.getConfiguration()).thenReturn(HTU.getConfiguration());
142     Mockito.when(server.getCatalogTracker()).thenReturn(null);
143     this.watcher =
144       new ZooKeeperWatcher(HTU.getConfiguration(), "mockedServer", this.server, true);
145     Mockito.when(server.getZooKeeper()).thenReturn(this.watcher);
146     Mockito.doThrow(new RuntimeException("Aborted")).
147       when(server).abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
148 
149     // Mock a ServerManager.  Say server SERVERNAME_{A,B} are online.  Also
150     // make it so if close or open, we return 'success'.
151     this.serverManager = Mockito.mock(ServerManager.class);
152     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
153     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_B)).thenReturn(true);
154     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_C)).thenReturn(true);
155     Mockito.when(this.serverManager.getDeadServers()).thenReturn(new DeadServer());
156     final Map<ServerName, ServerLoad> onlineServers = new HashMap<ServerName, ServerLoad>();
157     onlineServers.put(SERVERNAME_B, ServerLoad.EMPTY_SERVERLOAD);
158     onlineServers.put(SERVERNAME_A, ServerLoad.EMPTY_SERVERLOAD);
159     onlineServers.put(SERVERNAME_C, ServerLoad.EMPTY_SERVERLOAD);
160     Mockito.when(this.serverManager.getOnlineServersList()).thenReturn(
161         new ArrayList<ServerName>(onlineServers.keySet()));
162     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(onlineServers);
163 
164     List<ServerName> avServers = new ArrayList<ServerName>();
165     avServers.addAll(onlineServers.keySet());
166     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(avServers);
167     Mockito.when(this.serverManager.createDestinationServersList(null)).thenReturn(avServers);
168 
169     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, REGIONINFO, -1)).
170       thenReturn(true);
171     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_B, REGIONINFO, -1)).
172       thenReturn(true);
173     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_C, REGIONINFO, -1)).
174         thenReturn(true);
175     // Ditto on open.
176     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_A, REGIONINFO, -1, null)).
177       thenReturn(RegionOpeningState.OPENED);
178     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_B, REGIONINFO, -1, null)).
179       thenReturn(RegionOpeningState.OPENED);
180     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_C, REGIONINFO, -1, null)).
181         thenReturn(RegionOpeningState.OPENED);
182     this.master = Mockito.mock(HMaster.class);
183     Mockito.when(this.serverManager.getDrainingServersList())
184         .thenReturn(Arrays.asList(SERVERNAME_C));
185 
186     Mockito.when(this.master.getServerManager()).thenReturn(serverManager);
187   }
188 
189   @After
190     public void after() throws KeeperException {
191     if (this.watcher != null) {
192       // Clean up all znodes
193       ZKAssign.deleteAllNodes(this.watcher);
194       this.watcher.close();
195     }
196   }
197 
198   /**
199    * Test a balance going on at same time as a master failover
200    *
201    * @throws IOException
202    * @throws KeeperException
203    * @throws InterruptedException
204    * @throws DeserializationException
205    */
206   @Test(timeout = 60000)
207   public void testBalanceOnMasterFailoverScenarioWithOpenedNode()
208   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
209     AssignmentManagerWithExtrasForTesting am =
210       setUpMockedAssignmentManager(this.server, this.serverManager);
211     try {
212       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
213       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
214       while (!am.processRITInvoked) Thread.sleep(1);
215       // As part of the failover cleanup, the balancing region plan is removed.
216       // So a random server will be used to open the region. For testing purpose,
217       // let's assume it is going to open on server b:
218       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
219 
220       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
221 
222       // Now fake the region closing successfully over on the regionserver; the
223       // regionserver will have set the region in CLOSED state. This will
224       // trigger callback into AM. The below zk close call is from the RS close
225       // region handler duplicated here because its down deep in a private
226       // method hard to expose.
227       int versionid =
228         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
229       assertNotSame(versionid, -1);
230       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
231 
232       // Get current versionid else will fail on transition from OFFLINE to
233       // OPENING below
234       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
235       assertNotSame(-1, versionid);
236       // This uglyness below is what the openregionhandler on RS side does.
237       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
238         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
239         EventType.RS_ZK_REGION_OPENING, versionid);
240       assertNotSame(-1, versionid);
241       // Move znode from OPENING to OPENED as RS does on successful open.
242       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
243         SERVERNAME_B, versionid);
244       assertNotSame(-1, versionid);
245       am.gate.set(false);
246       // Block here until our znode is cleared or until this test times out.
247       ZKAssign.blockUntilNoRIT(watcher);
248     } finally {
249       am.getExecutorService().shutdown();
250       am.shutdown();
251     }
252   }
253 
254   @Test(timeout = 60000)
255   public void testGettingAssignmentsExcludesDrainingServers() throws Exception {
256     AssignmentManagerWithExtrasForTesting am =
257         setUpMockedAssignmentManager(this.server, this.serverManager);
258 
259     Map<TableName, Map<ServerName, List<HRegionInfo>>>
260         result = am.getRegionStates().getAssignmentsByTable();
261     for (Map<ServerName, List<HRegionInfo>> map : result.values()) {
262       System.out.println(map.keySet());
263       assertFalse(map.containsKey(SERVERNAME_C));
264     }
265   }
266 
267   @Test(timeout = 60000)
268   public void testBalanceOnMasterFailoverScenarioWithClosedNode()
269   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
270     AssignmentManagerWithExtrasForTesting am =
271       setUpMockedAssignmentManager(this.server, this.serverManager);
272     try {
273       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
274       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
275       while (!am.processRITInvoked) Thread.sleep(1);
276       // As part of the failover cleanup, the balancing region plan is removed.
277       // So a random server will be used to open the region. For testing purpose,
278       // let's assume it is going to open on server b:
279       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
280 
281       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
282 
283       // Now fake the region closing successfully over on the regionserver; the
284       // regionserver will have set the region in CLOSED state. This will
285       // trigger callback into AM. The below zk close call is from the RS close
286       // region handler duplicated here because its down deep in a private
287       // method hard to expose.
288       int versionid =
289         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
290       assertNotSame(versionid, -1);
291       am.gate.set(false);
292       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
293 
294       // Get current versionid else will fail on transition from OFFLINE to
295       // OPENING below
296       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
297       assertNotSame(-1, versionid);
298       // This uglyness below is what the openregionhandler on RS side does.
299       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
300           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
301           EventType.RS_ZK_REGION_OPENING, versionid);
302       assertNotSame(-1, versionid);
303       // Move znode from OPENING to OPENED as RS does on successful open.
304       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
305           SERVERNAME_B, versionid);
306       assertNotSame(-1, versionid);
307 
308       // Block here until our znode is cleared or until this test timesout.
309       ZKAssign.blockUntilNoRIT(watcher);
310     } finally {
311       am.getExecutorService().shutdown();
312       am.shutdown();
313     }
314   }
315 
316   @Test(timeout = 60000)
317   public void testBalanceOnMasterFailoverScenarioWithOfflineNode()
318   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
319     AssignmentManagerWithExtrasForTesting am =
320       setUpMockedAssignmentManager(this.server, this.serverManager);
321     try {
322       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
323       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
324       while (!am.processRITInvoked) Thread.sleep(1);
325       // As part of the failover cleanup, the balancing region plan is removed.
326       // So a random server will be used to open the region. For testing purpose,
327       // let's assume it is going to open on server b:
328       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
329 
330       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
331 
332       // Now fake the region closing successfully over on the regionserver; the
333       // regionserver will have set the region in CLOSED state. This will
334       // trigger callback into AM. The below zk close call is from the RS close
335       // region handler duplicated here because its down deep in a private
336       // method hard to expose.
337       int versionid =
338         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
339       assertNotSame(versionid, -1);
340       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
341 
342       am.gate.set(false);
343       // Get current versionid else will fail on transition from OFFLINE to
344       // OPENING below
345       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
346       assertNotSame(-1, versionid);
347       // This uglyness below is what the openregionhandler on RS side does.
348       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
349           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
350           EventType.RS_ZK_REGION_OPENING, versionid);
351       assertNotSame(-1, versionid);
352       // Move znode from OPENING to OPENED as RS does on successful open.
353       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
354           SERVERNAME_B, versionid);
355       assertNotSame(-1, versionid);
356       // Block here until our znode is cleared or until this test timesout.
357       ZKAssign.blockUntilNoRIT(watcher);
358     } finally {
359       am.getExecutorService().shutdown();
360       am.shutdown();
361     }
362   }
363 
364   private void createRegionPlanAndBalance(
365       final AssignmentManager am, final ServerName from,
366       final ServerName to, final HRegionInfo hri) throws RegionException {
367     // Call the balance function but fake the region being online first at
368     // servername from.
369     am.regionOnline(hri, from);
370     // Balance region from 'from' to 'to'. It calls unassign setting CLOSING state
371     // up in zk.  Create a plan and balance
372     am.balance(new RegionPlan(hri, from, to));
373   }
374 
375   /**
376    * Tests AssignmentManager balance function.  Runs a balance moving a region
377    * from one server to another mocking regionserver responding over zk.
378    * @throws IOException
379    * @throws KeeperException
380    * @throws DeserializationException
381    */
382   @Test (timeout=180000)
383   public void testBalance()
384     throws IOException, KeeperException, DeserializationException, InterruptedException {
385     // Create and startup an executor.  This is used by AssignmentManager
386     // handling zk callbacks.
387     ExecutorService executor = startupMasterExecutor("testBalanceExecutor");
388 
389     // We need a mocked catalog tracker.
390     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
391     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
392         .getConfiguration());
393     // Create an AM.
394     AssignmentManager am = new AssignmentManager(this.server,
395       this.serverManager, ct, balancer, executor, null, master.getTableLockManager());
396     am.failoverCleanupDone.set(true);
397     try {
398       // Make sure our new AM gets callbacks; once registered, can't unregister.
399       // Thats ok because we make a new zk watcher for each test.
400       this.watcher.registerListenerFirst(am);
401       // Call the balance function but fake the region being online first at
402       // SERVERNAME_A.  Create a balance plan.
403       am.regionOnline(REGIONINFO, SERVERNAME_A);
404       // Balance region from A to B.
405       RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B);
406       am.balance(plan);
407 
408       RegionStates regionStates = am.getRegionStates();
409       // Must be failed to close since the server is fake
410       assertTrue(regionStates.isRegionInTransition(REGIONINFO)
411         && regionStates.isRegionInState(REGIONINFO, State.FAILED_CLOSE));
412       // Move it back to pending_close
413       regionStates.updateRegionState(REGIONINFO, State.PENDING_CLOSE);
414 
415       // Now fake the region closing successfully over on the regionserver; the
416       // regionserver will have set the region in CLOSED state.  This will
417       // trigger callback into AM. The below zk close call is from the RS close
418       // region handler duplicated here because its down deep in a private
419       // method hard to expose.
420       int versionid =
421         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
422       assertNotSame(versionid, -1);
423       // AM is going to notice above CLOSED and queue up a new assign.  The
424       // assign will go to open the region in the new location set by the
425       // balancer.  The zk node will be OFFLINE waiting for regionserver to
426       // transition it through OPENING, OPENED.  Wait till we see the OFFLINE
427       // zk node before we proceed.
428       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
429 
430       // Get current versionid else will fail on transition from OFFLINE to OPENING below
431       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
432       assertNotSame(-1, versionid);
433       // This uglyness below is what the openregionhandler on RS side does.
434       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
435         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
436         EventType.RS_ZK_REGION_OPENING, versionid);
437       assertNotSame(-1, versionid);
438       // Move znode from OPENING to OPENED as RS does on successful open.
439       versionid =
440         ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO, SERVERNAME_B, versionid);
441       assertNotSame(-1, versionid);
442       // Wait on the handler removing the OPENED znode.
443       while(regionStates.isRegionInTransition(REGIONINFO)) Threads.sleep(1);
444     } finally {
445       executor.shutdown();
446       am.shutdown();
447       // Clean up all znodes
448       ZKAssign.deleteAllNodes(this.watcher);
449     }
450   }
451 
452   /**
453    * Run a simple server shutdown handler.
454    * @throws KeeperException
455    * @throws IOException
456    */
457   @Test (timeout=180000)
458   public void testShutdownHandler()
459       throws KeeperException, IOException, ServiceException {
460     // Create and startup an executor.  This is used by AssignmentManager
461     // handling zk callbacks.
462     ExecutorService executor = startupMasterExecutor("testShutdownHandler");
463 
464     // We need a mocked catalog tracker.
465     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
466     // Create an AM.
467     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
468         this.server, this.serverManager);
469     try {
470       processServerShutdownHandler(ct, am, false);
471     } finally {
472       executor.shutdown();
473       am.shutdown();
474       // Clean up all znodes
475       ZKAssign.deleteAllNodes(this.watcher);
476     }
477   }
478 
479   /**
480    * To test closed region handler to remove rit and delete corresponding znode
481    * if region in pending close or closing while processing shutdown of a region
482    * server.(HBASE-5927).
483    *
484    * @throws KeeperException
485    * @throws IOException
486    * @throws ServiceException
487    */
488   @Test (timeout=180000)
489   public void testSSHWhenDisableTableInProgress() throws KeeperException, IOException,
490       ServiceException {
491     testCaseWithPartiallyDisabledState(Table.State.DISABLING);
492     testCaseWithPartiallyDisabledState(Table.State.DISABLED);
493   }
494 
495 
496   /**
497    * To test if the split region is removed from RIT if the region was in SPLITTING state but the RS
498    * has actually completed the splitting in hbase:meta but went down. See HBASE-6070 and also HBASE-5806
499    *
500    * @throws KeeperException
501    * @throws IOException
502    */
503   @Test (timeout=180000)
504   public void testSSHWhenSplitRegionInProgress() throws KeeperException, IOException, Exception {
505     // true indicates the region is split but still in RIT
506     testCaseWithSplitRegionPartial(true);
507     // false indicate the region is not split
508     testCaseWithSplitRegionPartial(false);
509   }
510 
511   private void testCaseWithSplitRegionPartial(boolean regionSplitDone) throws KeeperException,
512       IOException, NodeExistsException, InterruptedException, ServiceException {
513     // Create and startup an executor. This is used by AssignmentManager
514     // handling zk callbacks.
515     ExecutorService executor = startupMasterExecutor("testSSHWhenSplitRegionInProgress");
516     // We need a mocked catalog tracker.
517     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
518     ZKAssign.deleteAllNodes(this.watcher);
519 
520     // Create an AM.
521     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
522       this.server, this.serverManager);
523     // adding region to regions and servers maps.
524     am.regionOnline(REGIONINFO, SERVERNAME_A);
525     // adding region in pending close.
526     am.getRegionStates().updateRegionState(
527       REGIONINFO, State.SPLITTING, SERVERNAME_A);
528     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
529     RegionTransition data = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
530         REGIONINFO.getRegionName(), SERVERNAME_A);
531     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
532     // create znode in M_ZK_REGION_CLOSING state.
533     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
534 
535     try {
536       processServerShutdownHandler(ct, am, regionSplitDone);
537       // check znode deleted or not.
538       // In both cases the znode should be deleted.
539 
540       if (regionSplitDone) {
541         assertFalse("Region state of region in SPLITTING should be removed from rit.",
542             am.getRegionStates().isRegionsInTransition());
543       } else {
544         while (!am.assignInvoked) {
545           Thread.sleep(1);
546         }
547         assertTrue("Assign should be invoked.", am.assignInvoked);
548       }
549     } finally {
550       REGIONINFO.setOffline(false);
551       REGIONINFO.setSplit(false);
552       executor.shutdown();
553       am.shutdown();
554       // Clean up all znodes
555       ZKAssign.deleteAllNodes(this.watcher);
556     }
557   }
558 
559   private void testCaseWithPartiallyDisabledState(Table.State state) throws KeeperException,
560       IOException, NodeExistsException, ServiceException {
561     // Create and startup an executor. This is used by AssignmentManager
562     // handling zk callbacks.
563     ExecutorService executor = startupMasterExecutor("testSSHWhenDisableTableInProgress");
564     // We need a mocked catalog tracker.
565     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
566     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
567     ZKAssign.deleteAllNodes(this.watcher);
568 
569     // Create an AM.
570     AssignmentManager am = new AssignmentManager(this.server,
571       this.serverManager, ct, balancer, executor, null, master.getTableLockManager());
572     // adding region to regions and servers maps.
573     am.regionOnline(REGIONINFO, SERVERNAME_A);
574     // adding region in pending close.
575     am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE);
576     if (state == Table.State.DISABLING) {
577       am.getZKTable().setDisablingTable(REGIONINFO.getTable());
578     } else {
579       am.getZKTable().setDisabledTable(REGIONINFO.getTable());
580     }
581     RegionTransition data = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
582         REGIONINFO.getRegionName(), SERVERNAME_A);
583     // RegionTransitionData data = new
584     // RegionTransitionData(EventType.M_ZK_REGION_CLOSING,
585     // REGIONINFO.getRegionName(), SERVERNAME_A);
586     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
587     // create znode in M_ZK_REGION_CLOSING state.
588     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
589 
590     try {
591       processServerShutdownHandler(ct, am, false);
592       // check znode deleted or not.
593       // In both cases the znode should be deleted.
594       assertTrue("The znode should be deleted.", ZKUtil.checkExists(this.watcher, node) == -1);
595       // check whether in rit or not. In the DISABLING case also the below
596       // assert will be true but the piece of code added for HBASE-5927 will not
597       // do that.
598       if (state == Table.State.DISABLED) {
599         assertFalse("Region state of region in pending close should be removed from rit.",
600             am.getRegionStates().isRegionsInTransition());
601       }
602     } finally {
603       am.setEnabledTable(REGIONINFO.getTable());
604       executor.shutdown();
605       am.shutdown();
606       // Clean up all znodes
607       ZKAssign.deleteAllNodes(this.watcher);
608     }
609   }
610 
611   private void processServerShutdownHandler(CatalogTracker ct, AssignmentManager am, boolean splitRegion)
612       throws IOException, ServiceException {
613     // Make sure our new AM gets callbacks; once registered, can't unregister.
614     // Thats ok because we make a new zk watcher for each test.
615     this.watcher.registerListenerFirst(am);
616 
617     // Need to set up a fake scan of meta for the servershutdown handler
618     // Make an RS Interface implementation.  Make it so a scanner can go against it.
619     ClientProtos.ClientService.BlockingInterface implementation =
620       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
621     // Get a meta row result that has region up on SERVERNAME_A
622 
623     Result r;
624     if (splitRegion) {
625       r = MetaMockingUtil.getMetaTableRowResultAsSplitRegion(REGIONINFO, SERVERNAME_A);
626     } else {
627       r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
628     }
629 
630     final ScanResponse.Builder builder = ScanResponse.newBuilder();
631     builder.setMoreResults(true);
632     builder.addCellsPerResult(r.size());
633     final List<CellScannable> cellScannables = new ArrayList<CellScannable>(1);
634     cellScannables.add(r);
635     Mockito.when(implementation.scan(
636       (RpcController)Mockito.any(), (ScanRequest)Mockito.any())).
637       thenAnswer(new Answer<ScanResponse>() {
638           @Override
639           public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
640             PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
641                 .getArguments()[0];
642             if (controller != null) {
643               controller.setCellScanner(CellUtil.createCellScanner(cellScannables));
644             }
645             return builder.build();
646           }
647       });
648 
649     // Get a connection w/ mocked up common methods.
650     HConnection connection =
651       HConnectionTestingUtility.getMockedConnectionAndDecorate(HTU.getConfiguration(),
652         null, implementation, SERVERNAME_B, REGIONINFO);
653 
654     // Make it so we can get a catalogtracker from servermanager.. .needed
655     // down in guts of server shutdown handler.
656     Mockito.when(ct.getConnection()).thenReturn(connection);
657     Mockito.when(this.server.getCatalogTracker()).thenReturn(ct);
658 
659     // Now make a server shutdown handler instance and invoke process.
660     // Have it that SERVERNAME_A died.
661     DeadServer deadServers = new DeadServer();
662     deadServers.add(SERVERNAME_A);
663     // I need a services instance that will return the AM
664     MasterFileSystem fs = Mockito.mock(MasterFileSystem.class);
665     Mockito.doNothing().when(fs).setLogRecoveryMode();
666     Mockito.when(fs.getLogRecoveryMode()).thenReturn(RecoveryMode.LOG_REPLAY);
667     MasterServices services = Mockito.mock(MasterServices.class);
668     Mockito.when(services.getAssignmentManager()).thenReturn(am);
669     Mockito.when(services.getServerManager()).thenReturn(this.serverManager);
670     Mockito.when(services.getZooKeeper()).thenReturn(this.watcher);
671     Mockito.when(services.getMasterFileSystem()).thenReturn(fs);
672     ServerShutdownHandler handler = new ServerShutdownHandler(this.server,
673       services, deadServers, SERVERNAME_A, false);
674     am.failoverCleanupDone.set(true);
675     handler.process();
676     // The region in r will have been assigned.  It'll be up in zk as unassigned.
677   }
678 
679   /**
680    * Create and startup executor pools. Start same set as master does (just
681    * run a few less).
682    * @param name Name to give our executor
683    * @return Created executor (be sure to call shutdown when done).
684    */
685   private ExecutorService startupMasterExecutor(final String name) {
686     // TODO: Move up into HBaseTestingUtility?  Generally useful.
687     ExecutorService executor = new ExecutorService(name);
688     executor.startExecutorService(ExecutorType.MASTER_OPEN_REGION, 3);
689     executor.startExecutorService(ExecutorType.MASTER_CLOSE_REGION, 3);
690     executor.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS, 3);
691     executor.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS, 3);
692     return executor;
693   }
694 
695   @Test (timeout=180000)
696   public void testUnassignWithSplitAtSameTime() throws KeeperException, IOException {
697     // Region to use in test.
698     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
699     // First amend the servermanager mock so that when we do send close of the
700     // first meta region on SERVERNAME_A, it will return true rather than
701     // default null.
702     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, hri, -1)).thenReturn(true);
703     // Need a mocked catalog tracker.
704     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
705     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
706         .getConfiguration());
707     // Create an AM.
708     AssignmentManager am = new AssignmentManager(this.server,
709       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
710     try {
711       // First make sure my mock up basically works.  Unassign a region.
712       unassign(am, SERVERNAME_A, hri);
713       // This delete will fail if the previous unassign did wrong thing.
714       ZKAssign.deleteClosingNode(this.watcher, hri, SERVERNAME_A);
715       // Now put a SPLITTING region in the way.  I don't have to assert it
716       // go put in place.  This method puts it in place then asserts it still
717       // owns it by moving state from SPLITTING to SPLITTING.
718       int version = createNodeSplitting(this.watcher, hri, SERVERNAME_A);
719       // Now, retry the unassign with the SPLTTING in place.  It should just
720       // complete without fail; a sort of 'silent' recognition that the
721       // region to unassign has been split and no longer exists: TOOD: what if
722       // the split fails and the parent region comes back to life?
723       unassign(am, SERVERNAME_A, hri);
724       // This transition should fail if the znode has been messed with.
725       ZKAssign.transitionNode(this.watcher, hri, SERVERNAME_A,
726         EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
727       assertFalse(am.getRegionStates().isRegionInTransition(hri));
728     } finally {
729       am.shutdown();
730     }
731   }
732 
733   /**
734    * Tests the processDeadServersAndRegionsInTransition should not fail with NPE
735    * when it failed to get the children. Let's abort the system in this
736    * situation
737    * @throws ServiceException
738    */
739   @Test(timeout = 60000)
740   public void testProcessDeadServersAndRegionsInTransitionShouldNotFailWithNPE()
741       throws IOException, KeeperException, InterruptedException, ServiceException {
742     final RecoverableZooKeeper recoverableZk = Mockito
743         .mock(RecoverableZooKeeper.class);
744     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
745       this.server, this.serverManager);
746     Watcher zkw = new ZooKeeperWatcher(HBaseConfiguration.create(), "unittest",
747         null) {
748       @Override
749       public RecoverableZooKeeper getRecoverableZooKeeper() {
750         return recoverableZk;
751       }
752     };
753     ((ZooKeeperWatcher) zkw).registerListener(am);
754     Mockito.doThrow(new InterruptedException()).when(recoverableZk)
755         .getChildren("/hbase/region-in-transition", null);
756     am.setWatcher((ZooKeeperWatcher) zkw);
757     try {
758       am.processDeadServersAndRegionsInTransition(null);
759       fail("Expected to abort");
760     } catch (NullPointerException e) {
761       fail("Should not throw NPE");
762     } catch (RuntimeException e) {
763       assertEquals("Aborted", e.getLocalizedMessage());
764     }
765   }
766   /**
767    * TestCase verifies that the regionPlan is updated whenever a region fails to open
768    * and the master tries to process RS_ZK_FAILED_OPEN state.(HBASE-5546).
769    */
770   @Test(timeout = 60000)
771   public void testRegionPlanIsUpdatedWhenRegionFailsToOpen() throws IOException, KeeperException,
772       ServiceException, InterruptedException {
773     this.server.getConfiguration().setClass(
774       HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MockedLoadBalancer.class,
775       LoadBalancer.class);
776     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
777       this.server, this.serverManager);
778     try {
779       // Boolean variable used for waiting until randomAssignment is called and
780       // new
781       // plan is generated.
782       AtomicBoolean gate = new AtomicBoolean(false);
783       if (balancer instanceof MockedLoadBalancer) {
784         ((MockedLoadBalancer) balancer).setGateVariable(gate);
785       }
786       ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
787       int v = ZKAssign.getVersion(this.watcher, REGIONINFO);
788       ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A,
789           EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_FAILED_OPEN, v);
790       String path = ZKAssign.getNodeName(this.watcher, REGIONINFO
791           .getEncodedName());
792       am.getRegionStates().updateRegionState(
793         REGIONINFO, State.OPENING, SERVERNAME_A);
794       // a dummy plan inserted into the regionPlans. This plan is cleared and
795       // new one is formed
796       am.regionPlans.put(REGIONINFO.getEncodedName(), new RegionPlan(
797           REGIONINFO, null, SERVERNAME_A));
798       RegionPlan regionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
799       List<ServerName> serverList = new ArrayList<ServerName>(2);
800       serverList.add(SERVERNAME_B);
801       Mockito.when(
802           this.serverManager.createDestinationServersList(SERVERNAME_A))
803           .thenReturn(serverList);
804       am.nodeDataChanged(path);
805       // here we are waiting until the random assignment in the load balancer is
806       // called.
807       while (!gate.get()) {
808         Thread.sleep(10);
809       }
810       // new region plan may take some time to get updated after random
811       // assignment is called and
812       // gate is set to true.
813       RegionPlan newRegionPlan = am.regionPlans
814           .get(REGIONINFO.getEncodedName());
815       while (newRegionPlan == null) {
816         Thread.sleep(10);
817         newRegionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
818       }
819       // the new region plan created may contain the same RS as destination but
820       // it should
821       // be new plan.
822       assertNotSame("Same region plan should not come", regionPlan,
823           newRegionPlan);
824       assertTrue("Destination servers should be different.", !(regionPlan
825           .getDestination().equals(newRegionPlan.getDestination())));
826 
827       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
828     } finally {
829       this.server.getConfiguration().setClass(
830           HConstants.HBASE_MASTER_LOADBALANCER_CLASS, SimpleLoadBalancer.class,
831           LoadBalancer.class);
832       am.getExecutorService().shutdown();
833       am.shutdown();
834     }
835   }
836 
837   /**
838    * Mocked load balancer class used in the testcase to make sure that the testcase waits until
839    * random assignment is called and the gate variable is set to true.
840    */
841   public static class MockedLoadBalancer extends SimpleLoadBalancer {
842     private AtomicBoolean gate;
843 
844     public void setGateVariable(AtomicBoolean gate) {
845       this.gate = gate;
846     }
847 
848     @Override
849     public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
850       ServerName randomServerName = super.randomAssignment(regionInfo, servers);
851       this.gate.set(true);
852       return randomServerName;
853     }
854 
855     @Override
856     public Map<ServerName, List<HRegionInfo>> retainAssignment(
857         Map<HRegionInfo, ServerName> regions, List<ServerName> servers) {
858       this.gate.set(true);
859       return super.retainAssignment(regions, servers);
860     }
861   }
862 
863   /*
864    * Tests the scenario
865    * - a regionserver (SERVERNAME_A) owns a region (hence the meta would have
866    *   the SERVERNAME_A as the host for the region),
867    * - SERVERNAME_A goes down
868    * - one of the affected regions is assigned to a live regionserver (SERVERNAME_B) but that
869    *   assignment somehow fails. The region ends up in the FAILED_OPEN state on ZK
870    * - [Issue that the patch on HBASE-13330 fixes] when the master is restarted,
871    *   the SSH for SERVERNAME_A rightly thinks that the region is now on transition on
872    *   SERVERNAME_B. But the owner for the region is still SERVERNAME_A in the AM's states.
873    *   The AM thinks that the SSH for SERVERNAME_A will assign the region. The region remains
874    *   unassigned for ever.
875    */
876   @Test(timeout = 60000)
877   public void testAssignmentOfRegionInSSHAndInFailedOpenState() throws IOException,
878   KeeperException, ServiceException, InterruptedException {
879     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
880         this.server, this.serverManager);
881     ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_B);
882     int v = ZKAssign.getVersion(this.watcher, REGIONINFO);
883     ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_B,
884         EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_FAILED_OPEN, v);
885     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_B)).thenReturn(true);
886     Mockito.when(this.serverManager.isServerReachable(SERVERNAME_B)).thenReturn(true);
887     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(false);
888     DeadServer deadServers = new DeadServer();
889     deadServers.add(SERVERNAME_A);
890     Mockito.when(this.serverManager.getDeadServers()).thenReturn(deadServers);
891     final Map<ServerName, ServerLoad> onlineServers = new HashMap<ServerName, ServerLoad>();
892     onlineServers.put(SERVERNAME_B, ServerLoad.EMPTY_SERVERLOAD);
893     Mockito.when(this.serverManager.getOnlineServersList()).thenReturn(
894         new ArrayList<ServerName>(onlineServers.keySet()));
895     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(onlineServers);
896     am.gate.set(false);
897     // join the cluster - that's when the AM is really kicking in after a restart
898     am.joinCluster();
899     while (!am.gate.get()) {
900       Thread.sleep(10);
901     }
902     assertTrue(am.getRegionStates().getRegionState(REGIONINFO).getState()
903         == RegionState.State.PENDING_OPEN);
904     am.shutdown();
905   }
906 
907   /**
908    * Test the scenario when the master is in failover and trying to process a
909    * region which is in Opening state on a dead RS. Master will force offline the
910    * region and put it in transition. AM relies on SSH to reassign it.
911    */
912   @Test(timeout = 60000)
913   public void testRegionInOpeningStateOnDeadRSWhileMasterFailover() throws IOException,
914       KeeperException, ServiceException, InterruptedException {
915     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
916       this.server, this.serverManager);
917     ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
918     int version = ZKAssign.getVersion(this.watcher, REGIONINFO);
919     ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A, EventType.M_ZK_REGION_OFFLINE,
920         EventType.RS_ZK_REGION_OPENING, version);
921     RegionTransition rt = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_OPENING,
922         REGIONINFO.getRegionName(), SERVERNAME_A, HConstants.EMPTY_BYTE_ARRAY);
923     version = ZKAssign.getVersion(this.watcher, REGIONINFO);
924     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(false);
925     am.getRegionStates().logSplit(SERVERNAME_A); // Assume log splitting is done
926     am.getRegionStates().createRegionState(REGIONINFO);
927     am.gate.set(false);
928     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
929     assertFalse(am.processRegionsInTransition(rt, REGIONINFO, version));
930     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
931     processServerShutdownHandler(ct, am, false);
932     // Waiting for the assignment to get completed.
933     while (!am.gate.get()) {
934       Thread.sleep(10);
935     }
936     assertTrue("The region should be assigned immediately.", null != am.regionPlans.get(REGIONINFO
937         .getEncodedName()));
938   }
939 
940   /**
941    * Test verifies whether assignment is skipped for regions of tables in DISABLING state during
942    * clean cluster startup. See HBASE-6281.
943    *
944    * @throws KeeperException
945    * @throws IOException
946    * @throws Exception
947    */
948   @Test(timeout = 60000)
949   public void testDisablingTableRegionsAssignmentDuringCleanClusterStartup()
950       throws KeeperException, IOException, Exception {
951     this.server.getConfiguration().setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
952         MockedLoadBalancer.class, LoadBalancer.class);
953     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(
954         new HashMap<ServerName, ServerLoad>(0));
955     List<ServerName> destServers = new ArrayList<ServerName>(1);
956     destServers.add(SERVERNAME_A);
957     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
958     // To avoid cast exception in DisableTableHandler process.
959     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
960     Server server = new HMaster(HTU.getConfiguration());
961     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
962         this.serverManager);
963     AtomicBoolean gate = new AtomicBoolean(false);
964     if (balancer instanceof MockedLoadBalancer) {
965       ((MockedLoadBalancer) balancer).setGateVariable(gate);
966     }
967     try{
968       // set table in disabling state.
969       am.getZKTable().setDisablingTable(REGIONINFO.getTable());
970       am.joinCluster();
971       // should not call retainAssignment if we get empty regions in assignAllUserRegions.
972       assertFalse(
973           "Assign should not be invoked for disabling table regions during clean cluster startup.",
974           gate.get());
975       // need to change table state from disabling to disabled.
976       assertTrue("Table should be disabled.",
977           am.getZKTable().isDisabledTable(REGIONINFO.getTable()));
978     } finally {
979       this.server.getConfiguration().setClass(
980         HConstants.HBASE_MASTER_LOADBALANCER_CLASS, SimpleLoadBalancer.class,
981         LoadBalancer.class);
982       am.getZKTable().setEnabledTable(REGIONINFO.getTable());
983       am.shutdown();
984     }
985   }
986 
987   /**
988    * Test verifies whether all the enabling table regions assigned only once during master startup.
989    *
990    * @throws KeeperException
991    * @throws IOException
992    * @throws Exception
993    */
994   @Test (timeout=180000)
995   public void testMasterRestartWhenTableInEnabling() throws KeeperException, IOException, Exception {
996     enabling = true;
997     List<ServerName> destServers = new ArrayList<ServerName>(1);
998     destServers.add(SERVERNAME_A);
999     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
1000     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
1001     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
1002     Server server = new HMaster(HTU.getConfiguration());
1003     Whitebox.setInternalState(server, "serverManager", this.serverManager);
1004     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
1005         this.serverManager);
1006     try {
1007       // set table in enabling state.
1008       am.getZKTable().setEnablingTable(REGIONINFO.getTable());
1009       new EnableTableHandler(server, REGIONINFO.getTable(),
1010           am.getCatalogTracker(), am, new NullTableLockManager(), true).prepare()
1011           .process();
1012       assertEquals("Number of assignments should be 1.", 1, assignmentCount);
1013       assertTrue("Table should be enabled.",
1014           am.getZKTable().isEnabledTable(REGIONINFO.getTable()));
1015     } finally {
1016       enabling = false;
1017       assignmentCount = 0;
1018       am.getZKTable().setEnabledTable(REGIONINFO.getTable());
1019       am.shutdown();
1020       ZKAssign.deleteAllNodes(this.watcher);
1021     }
1022   }
1023 
1024   /**
1025    * Test verifies whether stale znodes of unknown tables as for the hbase:meta will be removed or
1026    * not.
1027    * @throws KeeperException
1028    * @throws IOException
1029    * @throws Exception
1030    */
1031   @Test (timeout=180000)
1032   public void testMasterRestartShouldRemoveStaleZnodesOfUnknownTableAsForMeta()
1033       throws KeeperException, IOException, Exception {
1034     List<ServerName> destServers = new ArrayList<ServerName>(1);
1035     destServers.add(SERVERNAME_A);
1036     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
1037     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
1038     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
1039     Server server = new HMaster(HTU.getConfiguration());
1040     Whitebox.setInternalState(server, "serverManager", this.serverManager);
1041     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
1042         this.serverManager);
1043     try {
1044       TableName tableName = TableName.valueOf("dummyTable");
1045       // set table in enabling state.
1046       am.getZKTable().setEnablingTable(tableName);
1047       am.joinCluster();
1048       assertFalse("Table should not be present in zookeeper.",
1049         am.getZKTable().isTablePresent(tableName));
1050     } finally {
1051     }
1052   }
1053   /**
1054    * When a region is in transition, if the region server opening the region goes down,
1055    * the region assignment takes a long time normally (waiting for timeout monitor to trigger assign).
1056    * This test is to make sure SSH reassigns it right away.
1057    */
1058   @Test (timeout=180000)
1059   public void testSSHTimesOutOpeningRegionTransition()
1060       throws KeeperException, IOException, ServiceException {
1061     // We need a mocked catalog tracker.
1062     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1063     // Create an AM.
1064     AssignmentManagerWithExtrasForTesting am =
1065       setUpMockedAssignmentManager(this.server, this.serverManager);
1066     // adding region in pending open.
1067     RegionState state = new RegionState(REGIONINFO,
1068       State.OPENING, System.currentTimeMillis(), SERVERNAME_A);
1069     am.getRegionStates().regionOnline(REGIONINFO, SERVERNAME_B);
1070     am.getRegionStates().regionsInTransition.put(REGIONINFO.getEncodedName(), state);
1071     // adding region plan
1072     am.regionPlans.put(REGIONINFO.getEncodedName(),
1073       new RegionPlan(REGIONINFO, SERVERNAME_B, SERVERNAME_A));
1074     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
1075 
1076     try {
1077       am.assignInvoked = false;
1078       processServerShutdownHandler(ct, am, false);
1079       assertTrue(am.assignInvoked);
1080     } finally {
1081       am.getRegionStates().regionsInTransition.remove(REGIONINFO.getEncodedName());
1082       am.regionPlans.remove(REGIONINFO.getEncodedName());
1083     }
1084   }
1085 
1086   /**
1087    * Scenario:<ul>
1088    *  <li> master starts a close, and creates a znode</li>
1089    *  <li> it fails just at this moment, before contacting the RS</li>
1090    *  <li> while the second master is coming up, the targeted RS dies. But it's before ZK timeout so
1091    *    we don't know, and we have an exception.</li>
1092    *  <li> the master must handle this nicely and reassign.
1093    *  </ul>
1094    */
1095   @Test (timeout=180000)
1096   public void testClosingFailureDuringRecovery() throws Exception {
1097 
1098     AssignmentManagerWithExtrasForTesting am =
1099         setUpMockedAssignmentManager(this.server, this.serverManager);
1100     ZKAssign.createNodeClosing(this.watcher, REGIONINFO, SERVERNAME_A);
1101     am.getRegionStates().createRegionState(REGIONINFO);
1102 
1103     assertFalse( am.getRegionStates().isRegionsInTransition() );
1104 
1105     am.processRegionInTransition(REGIONINFO.getEncodedName(), REGIONINFO);
1106 
1107     assertTrue( am.getRegionStates().isRegionsInTransition() );
1108   }
1109 
1110   /**
1111    * Creates a new ephemeral node in the SPLITTING state for the specified region.
1112    * Create it ephemeral in case regionserver dies mid-split.
1113    *
1114    * <p>Does not transition nodes from other states.  If a node already exists
1115    * for this region, a {@link NodeExistsException} will be thrown.
1116    *
1117    * @param zkw zk reference
1118    * @param region region to be created as offline
1119    * @param serverName server event originates from
1120    * @return Version of znode created.
1121    * @throws KeeperException
1122    * @throws IOException
1123    */
1124   // Copied from SplitTransaction rather than open the method over there in
1125   // the regionserver package.
1126   private static int createNodeSplitting(final ZooKeeperWatcher zkw,
1127       final HRegionInfo region, final ServerName serverName)
1128   throws KeeperException, IOException {
1129     RegionTransition rt =
1130       RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
1131         region.getRegionName(), serverName);
1132 
1133     String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
1134     if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
1135       throw new IOException("Failed create of ephemeral " + node);
1136     }
1137     // Transition node from SPLITTING to SPLITTING and pick up version so we
1138     // can be sure this znode is ours; version is needed deleting.
1139     return transitionNodeSplitting(zkw, region, serverName, -1);
1140   }
1141 
1142   // Copied from SplitTransaction rather than open the method over there in
1143   // the regionserver package.
1144   private static int transitionNodeSplitting(final ZooKeeperWatcher zkw,
1145       final HRegionInfo parent,
1146       final ServerName serverName, final int version)
1147   throws KeeperException, IOException {
1148     return ZKAssign.transitionNode(zkw, parent, serverName,
1149       EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
1150   }
1151 
1152   private void unassign(final AssignmentManager am, final ServerName sn,
1153       final HRegionInfo hri) throws RegionException {
1154     // Before I can unassign a region, I need to set it online.
1155     am.regionOnline(hri, sn);
1156     // Unassign region.
1157     am.unassign(hri);
1158   }
1159 
1160   /**
1161    * Create an {@link AssignmentManagerWithExtrasForTesting} that has mocked
1162    * {@link CatalogTracker} etc.
1163    * @param server
1164    * @param manager
1165    * @return An AssignmentManagerWithExtras with mock connections, etc.
1166    * @throws IOException
1167    * @throws KeeperException
1168    */
1169   private AssignmentManagerWithExtrasForTesting setUpMockedAssignmentManager(final Server server,
1170       final ServerManager manager) throws IOException, KeeperException, ServiceException {
1171     // We need a mocked catalog tracker. Its used by our AM instance.
1172     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1173     // Make an RS Interface implementation. Make it so a scanner can go against
1174     // it and a get to return the single region, REGIONINFO, this test is
1175     // messing with. Needed when "new master" joins cluster. AM will try and
1176     // rebuild its list of user regions and it will also get the HRI that goes
1177     // with an encoded name by doing a Get on hbase:meta
1178     ClientProtos.ClientService.BlockingInterface ri =
1179       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
1180     // Get a meta row result that has region up on SERVERNAME_A for REGIONINFO
1181     Result r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
1182     final ScanResponse.Builder builder = ScanResponse.newBuilder();
1183     builder.setMoreResults(true);
1184     builder.addCellsPerResult(r.size());
1185     final List<CellScannable> rows = new ArrayList<CellScannable>(1);
1186     rows.add(r);
1187     Answer<ScanResponse> ans = new Answer<ClientProtos.ScanResponse>() {
1188       @Override
1189       public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
1190         PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
1191             .getArguments()[0];
1192         if (controller != null) {
1193           controller.setCellScanner(CellUtil.createCellScanner(rows));
1194         }
1195         return builder.build();
1196       }
1197     };
1198     if (enabling) {
1199       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any()))
1200           .thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans)
1201           .thenReturn(ScanResponse.newBuilder().setMoreResults(false).build());
1202     } else {
1203       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any())).thenAnswer(
1204           ans);
1205     }
1206     // If a get, return the above result too for REGIONINFO
1207     GetResponse.Builder getBuilder = GetResponse.newBuilder();
1208     getBuilder.setResult(ProtobufUtil.toResult(r));
1209     Mockito.when(ri.get((RpcController)Mockito.any(), (GetRequest) Mockito.any())).
1210       thenReturn(getBuilder.build());
1211     // Get a connection w/ mocked up common methods.
1212     HConnection connection = HConnectionTestingUtility.
1213       getMockedConnectionAndDecorate(HTU.getConfiguration(), null,
1214         ri, SERVERNAME_B, REGIONINFO);
1215     // Make it so we can get the connection from our mocked catalogtracker
1216     Mockito.when(ct.getConnection()).thenReturn(connection);
1217     // Create and startup an executor. Used by AM handling zk callbacks.
1218     ExecutorService executor = startupMasterExecutor("mockedAMExecutor");
1219     this.balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
1220     AssignmentManagerWithExtrasForTesting am = new AssignmentManagerWithExtrasForTesting(
1221       server, manager, ct, this.balancer, executor, new NullTableLockManager());
1222     return am;
1223   }
1224 
1225   /**
1226    * An {@link AssignmentManager} with some extra facility used testing
1227    */
1228   class AssignmentManagerWithExtrasForTesting extends AssignmentManager {
1229     // Keep a reference so can give it out below in {@link #getExecutorService}
1230     private final ExecutorService es;
1231     // Ditto for ct
1232     private final CatalogTracker ct;
1233     boolean processRITInvoked = false;
1234     boolean assignInvoked = false;
1235     AtomicBoolean gate = new AtomicBoolean(true);
1236 
1237     public AssignmentManagerWithExtrasForTesting(
1238         final Server master, final ServerManager serverManager,
1239         final CatalogTracker catalogTracker, final LoadBalancer balancer,
1240         final ExecutorService service, final TableLockManager tableLockManager)
1241             throws KeeperException, IOException {
1242       super(master, serverManager, catalogTracker, balancer, service, null, tableLockManager);
1243       this.es = service;
1244       this.ct = catalogTracker;
1245     }
1246 
1247     @Override
1248     boolean processRegionInTransition(String encodedRegionName,
1249         HRegionInfo regionInfo) throws KeeperException, IOException {
1250       this.processRITInvoked = true;
1251       return super.processRegionInTransition(encodedRegionName, regionInfo);
1252     }
1253 
1254     @Override
1255     public void assign(HRegionInfo region, boolean setOfflineInZK, boolean forceNewPlan) {
1256       if (enabling) {
1257         assignmentCount++;
1258         this.regionOnline(region, SERVERNAME_A);
1259       } else {
1260         super.assign(region, setOfflineInZK, forceNewPlan);
1261         this.gate.set(true);
1262       }
1263     }
1264 
1265     @Override
1266     boolean assign(ServerName destination, List<HRegionInfo> regions) {
1267       if (enabling) {
1268         for (HRegionInfo region : regions) {
1269           assignmentCount++;
1270           this.regionOnline(region, SERVERNAME_A);
1271         }
1272         return true;
1273       }
1274       return super.assign(destination, regions);
1275     }
1276 
1277     @Override
1278     public void assign(List<HRegionInfo> regions)
1279         throws IOException, InterruptedException {
1280       assignInvoked = (regions != null && regions.size() > 0);
1281       super.assign(regions);
1282       this.gate.set(true);
1283     }
1284 
1285     /** reset the watcher */
1286     void setWatcher(ZooKeeperWatcher watcher) {
1287       this.watcher = watcher;
1288     }
1289 
1290     /**
1291      * @return ExecutorService used by this instance.
1292      */
1293     ExecutorService getExecutorService() {
1294       return this.es;
1295     }
1296 
1297     /**
1298      * @return CatalogTracker used by this AM (Its a mock).
1299      */
1300     CatalogTracker getCatalogTracker() {
1301       return this.ct;
1302     }
1303   }
1304 
1305   /**
1306    * Call joinCluster on the passed AssignmentManager.  Do it in a thread
1307    * so it runs independent of what all else is going on.  Try to simulate
1308    * an AM running insided a failed over master by clearing all in-memory
1309    * AM state first.
1310   */
1311   private void startFakeFailedOverMasterAssignmentManager(final AssignmentManager am,
1312       final ZooKeeperWatcher watcher) {
1313     // Make sure our new AM gets callbacks; once registered, we can't unregister.
1314     // Thats ok because we make a new zk watcher for each test.
1315     watcher.registerListenerFirst(am);
1316     Thread t = new Thread("RunAmJoinCluster") {
1317       @Override
1318       public void run() {
1319         // Call the joinCluster function as though we were doing a master
1320         // failover at this point. It will stall just before we go to add
1321         // the RIT region to our RIT Map in AM at processRegionsInTransition.
1322         // First clear any inmemory state from AM so it acts like a new master
1323         // coming on line.
1324         am.getRegionStates().regionsInTransition.clear();
1325         am.regionPlans.clear();
1326         try {
1327           am.joinCluster();
1328         } catch (IOException e) {
1329           throw new RuntimeException(e);
1330         } catch (KeeperException e) {
1331           throw new RuntimeException(e);
1332         } catch (InterruptedException e) {
1333           throw new RuntimeException(e);
1334         }
1335       }
1336     };
1337     t.start();
1338     while (!t.isAlive()) Threads.sleep(1);
1339   }
1340 
1341   @Test (timeout=180000)
1342   public void testForceAssignMergingRegion() throws Exception {
1343     // Region to use in test.
1344     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1345     // Need a mocked catalog tracker.
1346     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1347     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1348       server.getConfiguration());
1349     // Create an AM.
1350     AssignmentManager am = new AssignmentManager(this.server,
1351       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
1352     RegionStates regionStates = am.getRegionStates();
1353     try {
1354       // First set the state of the region to merging
1355       regionStates.updateRegionState(hri, RegionState.State.MERGING);
1356       // Now, try to assign it with force new plan
1357       am.assign(hri, true, true);
1358       assertEquals("The region should be still in merging state",
1359         RegionState.State.MERGING, regionStates.getRegionState(hri).getState());
1360     } finally {
1361       am.shutdown();
1362     }
1363   }
1364 
1365   /**
1366    * Test assignment related ZK events are ignored by AM if the region is not known
1367    * by AM to be in transition. During normal operation, all assignments are started
1368    * by AM (not considering split/merge), if an event is received but the region
1369    * is not in transition, the event must be a very late one. So it can be ignored.
1370    * During master failover, since AM watches assignment znodes after failover cleanup
1371    * is completed, when an event comes in, AM should already have the region in transition
1372    * if ZK is used during the assignment action (only hbck doesn't use ZK for region
1373    * assignment). So during master failover, we can ignored such events too.
1374    */
1375   @Test (timeout=180000)
1376   public void testAssignmentEventIgnoredIfNotExpected() throws KeeperException, IOException {
1377     // Region to use in test.
1378     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1379     // Need a mocked catalog tracker.
1380     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1381     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1382       server.getConfiguration());
1383     final AtomicBoolean zkEventProcessed = new AtomicBoolean(false);
1384     // Create an AM.
1385     AssignmentManager am = new AssignmentManager(this.server,
1386       this.serverManager, ct, balancer, null, null, master.getTableLockManager()) {
1387 
1388       @Override
1389       void handleRegion(final RegionTransition rt, int expectedVersion) {
1390         super.handleRegion(rt, expectedVersion);
1391         if (rt != null && Bytes.equals(hri.getRegionName(),
1392           rt.getRegionName()) && rt.getEventType() == EventType.RS_ZK_REGION_OPENING) {
1393           zkEventProcessed.set(true);
1394         }
1395       }
1396     };
1397     try {
1398       // First make sure the region is not in transition
1399       am.getRegionStates().regionOffline(hri);
1400       zkEventProcessed.set(false); // Reset it before faking zk transition
1401       this.watcher.registerListenerFirst(am);
1402       assertFalse("The region should not be in transition",
1403         am.getRegionStates().isRegionInTransition(hri));
1404       ZKAssign.createNodeOffline(this.watcher, hri, SERVERNAME_A);
1405       // Trigger a transition event
1406       ZKAssign.transitionNodeOpening(this.watcher, hri, SERVERNAME_A);
1407       long startTime = EnvironmentEdgeManager.currentTimeMillis();
1408       while (!zkEventProcessed.get()) {
1409         assertTrue("Timed out in waiting for ZK event to be processed",
1410           EnvironmentEdgeManager.currentTimeMillis() - startTime < 30000);
1411         Threads.sleepWithoutInterrupt(100);
1412       }
1413       assertFalse(am.getRegionStates().isRegionInTransition(hri));
1414     } finally {
1415       am.shutdown();
1416     }
1417   }
1418 
1419   /**
1420    * If a table is deleted, we should not be able to balance it anymore.
1421    * Otherwise, the region will be brought back.
1422    * @throws Exception
1423    */
1424   @Test (timeout=180000)
1425   public void testBalanceRegionOfDeletedTable() throws Exception {
1426     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1427     AssignmentManager am = new AssignmentManager(this.server, this.serverManager,
1428       ct, balancer, null, null, master.getTableLockManager());
1429     RegionStates regionStates = am.getRegionStates();
1430     HRegionInfo hri = REGIONINFO;
1431     regionStates.createRegionState(hri);
1432     assertFalse(regionStates.isRegionInTransition(hri));
1433     RegionPlan plan = new RegionPlan(hri, SERVERNAME_A, SERVERNAME_B);
1434     // Fake table is deleted
1435     regionStates.tableDeleted(hri.getTable());
1436     am.balance(plan);
1437     assertFalse("The region should not in transition",
1438       regionStates.isRegionInTransition(hri));
1439   }
1440 
1441   /**
1442    * Tests an on-the-fly RPC that was scheduled for the earlier RS on the same port
1443    * for openRegion. AM should assign this somewhere else. (HBASE-9721)
1444    */
1445   @SuppressWarnings("unchecked")
1446   @Test (timeout=180000)
1447   public void testOpenCloseRegionRPCIntendedForPreviousServer() throws Exception {
1448     Mockito.when(this.serverManager.sendRegionOpen(Mockito.eq(SERVERNAME_B), Mockito.eq(REGIONINFO),
1449       Mockito.anyInt(), (List<ServerName>)Mockito.any()))
1450       .thenThrow(new DoNotRetryIOException());
1451     this.server.getConfiguration().setInt("hbase.assignment.maximum.attempts", 100);
1452 
1453     HRegionInfo hri = REGIONINFO;
1454     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1455     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1456       server.getConfiguration());
1457     // Create an AM.
1458     AssignmentManager am = new AssignmentManager(this.server,
1459       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
1460     RegionStates regionStates = am.getRegionStates();
1461     try {
1462       am.regionPlans.put(REGIONINFO.getEncodedName(),
1463         new RegionPlan(REGIONINFO, null, SERVERNAME_B));
1464 
1465       // Should fail once, but succeed on the second attempt for the SERVERNAME_A
1466       am.assign(hri, true, false);
1467     } finally {
1468       assertEquals(SERVERNAME_A, regionStates.getRegionState(REGIONINFO).getServerName());
1469     }
1470   }
1471 }