1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.master;
21  
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertFalse;
24  import static org.junit.Assert.assertTrue;
25  
26  import java.util.ArrayList;
27  import java.util.List;
28  import java.util.Set;
29  import java.util.TreeSet;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.hbase.Abortable;
35  import org.apache.hadoop.hbase.HBaseConfiguration;
36  import org.apache.hadoop.hbase.HBaseTestingUtility;
37  import org.apache.hadoop.hbase.HColumnDescriptor;
38  import org.apache.hadoop.hbase.HRegionInfo;
39  import org.apache.hadoop.hbase.HServerInfo;
40  import org.apache.hadoop.hbase.HTableDescriptor;
41  import org.apache.hadoop.hbase.MiniHBaseCluster;
42  import org.apache.hadoop.hbase.executor.RegionTransitionData;
43  import org.apache.hadoop.hbase.executor.EventHandler.EventType;
44  import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
45  import org.apache.hadoop.hbase.master.LoadBalancer.RegionPlan;
46  import org.apache.hadoop.hbase.regionserver.HRegionServer;
47  import org.apache.hadoop.hbase.util.Bytes;
48  import org.apache.hadoop.hbase.util.JVMClusterUtil;
49  import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
50  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
51  import org.apache.hadoop.hbase.zookeeper.ZKTable;
52  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
53  import org.junit.Test;
54  
55  public class TestMasterFailover {
56    private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
57  
58    /**
59     * Simple test of master failover.
60     * <p>
61     * Starts with three masters.  Kills a backup master.  Then kills the active
62     * master.  Ensures the final master becomes active and we can still contact
63     * the cluster.
64     * @throws Exception
65     */
66    @Test (timeout=180000)
67    public void testSimpleMasterFailover() throws Exception {
68  
69      final int NUM_MASTERS = 3;
70      final int NUM_RS = 3;
71  
72      // Create config to use for this cluster
73      Configuration conf = HBaseConfiguration.create();
74      conf.setInt("hbase.master.wait.on.regionservers.mintostart", 3);
75      conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 3);
76  
77      // Start the cluster
78      HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
79      TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
80      MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
81  
82      // get all the master threads
83      List<MasterThread> masterThreads = cluster.getMasterThreads();
84  
85      // wait for each to come online
86      for (MasterThread mt : masterThreads) {
87        assertTrue(mt.isAlive());
88      }
89  
90      // verify only one is the active master and we have right number
91      int numActive = 0;
92      int activeIndex = -1;
93      String activeName = null;
94      for (int i = 0; i < masterThreads.size(); i++) {
95        if (masterThreads.get(i).getMaster().isActiveMaster()) {
96          numActive++;
97          activeIndex = i;
98          activeName = masterThreads.get(i).getMaster().getServerName();
99        }
100     }
101     assertEquals(1, numActive);
102     assertEquals(NUM_MASTERS, masterThreads.size());
103 
104     // attempt to stop one of the inactive masters
105     LOG.debug("\n\nStopping a backup master\n");
106     int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
107     cluster.stopMaster(backupIndex, false);
108     cluster.waitOnMaster(backupIndex);
109 
110     // verify still one active master and it's the same
111     for (int i = 0; i < masterThreads.size(); i++) {
112       if (masterThreads.get(i).getMaster().isActiveMaster()) {
113         assertTrue(activeName.equals(
114             masterThreads.get(i).getMaster().getServerName()));
115         activeIndex = i;
116       }
117     }
118     assertEquals(1, numActive);
119     assertEquals(2, masterThreads.size());
120 
121     // kill the active master
122     LOG.debug("\n\nStopping the active master\n");
123     cluster.stopMaster(activeIndex, false);
124     cluster.waitOnMaster(activeIndex);
125 
126     // wait for an active master to show up and be ready
127     assertTrue(cluster.waitForActiveAndReadyMaster());
128 
129     LOG.debug("\n\nVerifying backup master is now active\n");
130     // should only have one master now
131     assertEquals(1, masterThreads.size());
132     // and he should be active
133     assertTrue(masterThreads.get(0).getMaster().isActiveMaster());
134 
135     // Stop the cluster
136     TEST_UTIL.shutdownMiniCluster();
137   }
138 
139   /**
140    * Complex test of master failover that tests as many permutations of the
141    * different possible states that regions in transition could be in within ZK.
142    * <p>
143    * This tests the proper handling of these states by the failed-over master
144    * and includes a thorough testing of the timeout code as well.
145    * <p>
146    * Starts with a single master and three regionservers.
147    * <p>
148    * Creates two tables, enabledTable and disabledTable, each containing 5
149    * regions.  The disabledTable is then disabled.
150    * <p>
151    * After reaching steady-state, the master is killed.  We then mock several
152    * states in ZK.
153    * <p>
154    * After mocking them, we will startup a new master which should become the
155    * active master and also detect that it is a failover.  The primary test
156    * passing condition will be that all regions of the enabled table are
157    * assigned and all the regions of the disabled table are not assigned.
158    * <p>
159    * The different scenarios to be tested are below:
160    * <p>
161    * <b>ZK State:  OFFLINE</b>
162    * <p>A node can get into OFFLINE state if</p>
163    * <ul>
164    * <li>An RS fails to open a region, so it reverts the state back to OFFLINE
165    * <li>The Master is assigning the region to a RS before it sends RPC
166    * </ul>
167    * <p>We will mock the scenarios</p>
168    * <ul>
169    * <li>Master has assigned an enabled region but RS failed so a region is
170    *     not assigned anywhere and is sitting in ZK as OFFLINE</li>
171    * <li>This seems to cover both cases?</li>
172    * </ul>
173    * <p>
174    * <b>ZK State:  CLOSING</b>
175    * <p>A node can get into CLOSING state if</p>
176    * <ul>
177    * <li>An RS has begun to close a region
178    * </ul>
179    * <p>We will mock the scenarios</p>
180    * <ul>
181    * <li>Region of enabled table was being closed but did not complete
182    * <li>Region of disabled table was being closed but did not complete
183    * </ul>
184    * <p>
185    * <b>ZK State:  CLOSED</b>
186    * <p>A node can get into CLOSED state if</p>
187    * <ul>
188    * <li>An RS has completed closing a region but not acknowledged by master yet
189    * </ul>
190    * <p>We will mock the scenarios</p>
191    * <ul>
192    * <li>Region of a table that should be enabled was closed on an RS
193    * <li>Region of a table that should be disabled was closed on an RS
194    * </ul>
195    * <p>
196    * <b>ZK State:  OPENING</b>
197    * <p>A node can get into OPENING state if</p>
198    * <ul>
199    * <li>An RS has begun to open a region
200    * </ul>
201    * <p>We will mock the scenarios</p>
202    * <ul>
203    * <li>RS was opening a region of enabled table but never finishes
204    * </ul>
205    * <p>
206    * <b>ZK State:  OPENED</b>
207    * <p>A node can get into OPENED state if</p>
208    * <ul>
209    * <li>An RS has finished opening a region but not acknowledged by master yet
210    * </ul>
211    * <p>We will mock the scenarios</p>
212    * <ul>
213    * <li>Region of a table that should be enabled was opened on an RS
214    * <li>Region of a table that should be disabled was opened on an RS
215    * </ul>
216    * @throws Exception
217    */
218   @Test (timeout=180000)
219   public void testMasterFailoverWithMockedRIT() throws Exception {
220 
221     final int NUM_MASTERS = 1;
222     final int NUM_RS = 3;
223 
224     // Create config to use for this cluster
225     Configuration conf = HBaseConfiguration.create();
226     // Need to drop the timeout much lower
227     conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
228     conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
229     conf.setInt("hbase.master.wait.on.regionservers.mintostart", 3);
230     conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 3);
231 
232     // Start the cluster
233     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
234     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
235     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
236     log("Cluster started");
237 
238     // Create a ZKW to use in the test
239     ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
240       "unittest", new Abortable() {
241         @Override
242         public void abort(String why, Throwable e) {
243           throw new RuntimeException("Fatal ZK error, why=" + why, e);
244         }
245     });
246 
247     // get all the master threads
248     List<MasterThread> masterThreads = cluster.getMasterThreads();
249     assertEquals(1, masterThreads.size());
250 
251     // only one master thread, let's wait for it to be initialized
252     assertTrue(cluster.waitForActiveAndReadyMaster());
253     HMaster master = masterThreads.get(0).getMaster();
254     assertTrue(master.isActiveMaster());
255     assertTrue(master.isInitialized());
256 
257     // disable load balancing on this master
258     master.balanceSwitch(false);
259 
260     // create two tables in META, each with 10 regions
261     byte [] FAMILY = Bytes.toBytes("family");
262     byte [][] SPLIT_KEYS = new byte [][] {
263         new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
264         Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
265         Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
266         Bytes.toBytes("iii"), Bytes.toBytes("jjj")
267     };
268 
269     byte [] enabledTable = Bytes.toBytes("enabledTable");
270     HTableDescriptor htdEnabled = new HTableDescriptor(enabledTable);
271     htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
272     List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
273         TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
274 
275     byte [] disabledTable = Bytes.toBytes("disabledTable");
276     HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
277     htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
278     List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
279         TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
280 
281     log("Regions in META have been created");
282 
283     // at this point we only expect 2 regions to be assigned out (catalogs)
284     assertEquals(2, cluster.countServedRegions());
285 
286     // Let's just assign everything to first RS
287     HRegionServer hrs = cluster.getRegionServer(0);
288     String serverName = hrs.getServerName();
289     HServerInfo hsiAlive = hrs.getServerInfo();
290 
291     // we'll need some regions to already be assigned out properly on live RS
292     List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
293     enabledAndAssignedRegions.add(enabledRegions.remove(0));
294     enabledAndAssignedRegions.add(enabledRegions.remove(0));
295     List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
296     disabledAndAssignedRegions.add(disabledRegions.remove(0));
297     disabledAndAssignedRegions.add(disabledRegions.remove(0));
298 
299     // now actually assign them
300     for (HRegionInfo hri : enabledAndAssignedRegions) {
301       master.assignmentManager.regionPlans.put(hri.getEncodedName(),
302           new RegionPlan(hri, null, hsiAlive));
303       master.assignRegion(hri);
304     }
305     for (HRegionInfo hri : disabledAndAssignedRegions) {
306       master.assignmentManager.regionPlans.put(hri.getEncodedName(),
307           new RegionPlan(hri, null, hsiAlive));
308       master.assignRegion(hri);
309     }
310 
311     // wait for no more RIT
312     log("Waiting for assignment to finish");
313     ZKAssign.blockUntilNoRIT(zkw);
314     log("Assignment completed");
315 
316     // Stop the master
317     log("Aborting master");
318     cluster.abortMaster(0);
319     cluster.waitOnMaster(0);
320     log("Master has aborted");
321 
322     /*
323      * Now, let's start mocking up some weird states as described in the method
324      * javadoc.
325      */
326 
327     List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
328     List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
329 
330     log("Beginning to mock scenarios");
331 
332     // Disable the disabledTable in ZK
333     ZKTable zktable = new ZKTable(zkw);
334     zktable.setDisabledTable(Bytes.toString(disabledTable));
335 
336     /*
337      *  ZK = OFFLINE
338      */
339 
340     // Region that should be assigned but is not and is in ZK as OFFLINE
341     HRegionInfo region = enabledRegions.remove(0);
342     regionsThatShouldBeOnline.add(region);
343     ZKAssign.createNodeOffline(zkw, region, serverName);
344 
345     /*
346      * ZK = CLOSING
347      */
348 
349 //    Disabled test of CLOSING.  This case is invalid after HBASE-3181.
350 //    How can an RS stop a CLOSING w/o deleting the node?  If it did ever fail
351 //    and left the node in CLOSING, the RS would have aborted and we'd process
352 //    these regions in server shutdown
353 //
354 //    // Region of enabled table being closed but not complete
355 //    // Region is already assigned, don't say anything to RS but set ZK closing
356 //    region = enabledAndAssignedRegions.remove(0);
357 //    regionsThatShouldBeOnline.add(region);
358 //    ZKAssign.createNodeClosing(zkw, region, serverName);
359 //
360 //    // Region of disabled table being closed but not complete
361 //    // Region is already assigned, don't say anything to RS but set ZK closing
362 //    region = disabledAndAssignedRegions.remove(0);
363 //    regionsThatShouldBeOffline.add(region);
364 //    ZKAssign.createNodeClosing(zkw, region, serverName);
365 
366     /*
367      * ZK = CLOSED
368      */
369 
370     // Region of enabled table closed but not ack
371     region = enabledRegions.remove(0);
372     regionsThatShouldBeOnline.add(region);
373     int version = ZKAssign.createNodeClosing(zkw, region, serverName);
374     ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
375 
376     // Region of disabled table closed but not ack
377     region = disabledRegions.remove(0);
378     regionsThatShouldBeOffline.add(region);
379     version = ZKAssign.createNodeClosing(zkw, region, serverName);
380     ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
381 
382     /*
383      * ZK = OPENING
384      */
385 
386     // RS was opening a region of enabled table but never finishes
387     region = enabledRegions.remove(0);
388     regionsThatShouldBeOnline.add(region);
389     ZKAssign.createNodeOffline(zkw, region, serverName);
390     ZKAssign.transitionNodeOpening(zkw, region, serverName);
391 
392     /*
393      * ZK = OPENED
394      */
395 
396     // Region of enabled table was opened on RS
397     region = enabledRegions.remove(0);
398     regionsThatShouldBeOnline.add(region);
399     ZKAssign.createNodeOffline(zkw, region, serverName);
400     hrs.openRegion(region);
401     while (true) {
402       RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
403       if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
404         break;
405       }
406       Thread.sleep(100);
407     }
408 
409     // Region of disable table was opened on RS
410     region = disabledRegions.remove(0);
411     regionsThatShouldBeOffline.add(region);
412     ZKAssign.createNodeOffline(zkw, region, serverName);
413     hrs.openRegion(region);
414     while (true) {
415       RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
416       if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
417         break;
418       }
419       Thread.sleep(100);
420     }
421 
422     /*
423      * ZK = NONE
424      */
425 
426     /*
427      * DONE MOCKING
428      */
429 
430     log("Done mocking data up in ZK");
431 
432     // Start up a new master
433     log("Starting up a new master");
434     master = cluster.startMaster().getMaster();
435     log("Waiting for master to be ready");
436     cluster.waitForActiveAndReadyMaster();
437     log("Master is ready");
438 
439     // Failover should be completed, now wait for no RIT
440     log("Waiting for no more RIT");
441     ZKAssign.blockUntilNoRIT(zkw);
442     log("No more RIT in ZK, now doing final test verification");
443 
444     // Grab all the regions that are online across RSs
445     Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
446     for (JVMClusterUtil.RegionServerThread rst :
447       cluster.getRegionServerThreads()) {
448       onlineRegions.addAll(rst.getRegionServer().getOnlineRegions());
449     }
450 
451     // Now, everything that should be online should be online
452     for (HRegionInfo hri : regionsThatShouldBeOnline) {
453       assertTrue(onlineRegions.contains(hri));
454     }
455 
456     // Everything that should be offline should not be online
457     for (HRegionInfo hri : regionsThatShouldBeOffline) {
458       assertFalse(onlineRegions.contains(hri));
459     }
460 
461     log("Done with verification, all passed, shutting down cluster");
462 
463     // Done, shutdown the cluster
464     TEST_UTIL.shutdownMiniCluster();
465   }
466 
467 
468   /**
469    * Complex test of master failover that tests as many permutations of the
470    * different possible states that regions in transition could be in within ZK
471    * pointing to an RS that has died while no master is around to process it.
472    * <p>
473    * This tests the proper handling of these states by the failed-over master
474    * and includes a thorough testing of the timeout code as well.
475    * <p>
476    * Starts with a single master and two regionservers.
477    * <p>
478    * Creates two tables, enabledTable and disabledTable, each containing 5
479    * regions.  The disabledTable is then disabled.
480    * <p>
481    * After reaching steady-state, the master is killed.  We then mock several
482    * states in ZK.  And one of the RS will be killed.
483    * <p>
484    * After mocking them and killing an RS, we will startup a new master which
485    * should become the active master and also detect that it is a failover.  The
486    * primary test passing condition will be that all regions of the enabled
487    * table are assigned and all the regions of the disabled table are not
488    * assigned.
489    * <p>
490    * The different scenarios to be tested are below:
491    * <p>
492    * <b>ZK State:  CLOSING</b>
493    * <p>A node can get into CLOSING state if</p>
494    * <ul>
495    * <li>An RS has begun to close a region
496    * </ul>
497    * <p>We will mock the scenarios</p>
498    * <ul>
499    * <li>Region was being closed but the RS died before finishing the close
500    * </ul>
501    * <b>ZK State:  OPENED</b>
502    * <p>A node can get into OPENED state if</p>
503    * <ul>
504    * <li>An RS has finished opening a region but not acknowledged by master yet
505    * </ul>
506    * <p>We will mock the scenarios</p>
507    * <ul>
508    * <li>Region of a table that should be enabled was opened by a now-dead RS
509    * <li>Region of a table that should be disabled was opened by a now-dead RS
510    * </ul>
511    * <p>
512    * <b>ZK State:  NONE</b>
513    * <p>A region could not have a transition node if</p>
514    * <ul>
515    * <li>The server hosting the region died and no master processed it
516    * </ul>
517    * <p>We will mock the scenarios</p>
518    * <ul>
519    * <li>Region of enabled table was on a dead RS that was not yet processed
520    * <li>Region of disabled table was on a dead RS that was not yet processed
521    * </ul>
522    * @throws Exception
523    */
524   @Test (timeout=180000)
525   public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception {
526 
527     final int NUM_MASTERS = 1;
528     final int NUM_RS = 2;
529 
530     // Create config to use for this cluster
531     Configuration conf = HBaseConfiguration.create();
532     // Need to drop the timeout much lower
533     conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
534     conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
535     conf.setInt("hbase.master.wait.on.regionservers.mintostart", 1);
536     conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 2);
537 
538     // Create and start the cluster
539     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
540     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
541     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
542     log("Cluster started");
543 
544     // Create a ZKW to use in the test
545     ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
546         "unittest", new Abortable() {
547           @Override
548           public void abort(String why, Throwable e) {
549             LOG.error("Fatal ZK Error: " + why, e);
550             org.junit.Assert.assertFalse("Fatal ZK error", true);
551           }
552     });
553 
554     // get all the master threads
555     List<MasterThread> masterThreads = cluster.getMasterThreads();
556     assertEquals(1, masterThreads.size());
557 
558     // only one master thread, let's wait for it to be initialized
559     assertTrue(cluster.waitForActiveAndReadyMaster());
560     HMaster master = masterThreads.get(0).getMaster();
561     assertTrue(master.isActiveMaster());
562     assertTrue(master.isInitialized());
563 
564     // disable load balancing on this master
565     master.balanceSwitch(false);
566 
567     // create two tables in META, each with 10 regions
568     byte [] FAMILY = Bytes.toBytes("family");
569     byte [][] SPLIT_KEYS = new byte [][] {
570         new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
571         Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
572         Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
573         Bytes.toBytes("iii"), Bytes.toBytes("jjj")
574     };
575 
576     byte [] enabledTable = Bytes.toBytes("enabledTable");
577     HTableDescriptor htdEnabled = new HTableDescriptor(enabledTable);
578     htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
579     List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
580         TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
581 
582     byte [] disabledTable = Bytes.toBytes("disabledTable");
583     HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
584     htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
585     List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
586         TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
587 
588     log("Regions in META have been created");
589 
590     // at this point we only expect 2 regions to be assigned out (catalogs)
591     assertEquals(2, cluster.countServedRegions());
592 
593     // The first RS will stay online
594     HRegionServer hrs = cluster.getRegionServer(0);
595     HServerInfo hsiAlive = hrs.getServerInfo();
596 
597     // The second RS is going to be hard-killed
598     HRegionServer hrsDead = cluster.getRegionServer(1);
599     String deadServerName = hrsDead.getServerName();
600     HServerInfo hsiDead = hrsDead.getServerInfo();
601 
602     // we'll need some regions to already be assigned out properly on live RS
603     List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
604     enabledAndAssignedRegions.add(enabledRegions.remove(0));
605     enabledAndAssignedRegions.add(enabledRegions.remove(0));
606     List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
607     disabledAndAssignedRegions.add(disabledRegions.remove(0));
608     disabledAndAssignedRegions.add(disabledRegions.remove(0));
609 
610     // now actually assign them
611     for (HRegionInfo hri : enabledAndAssignedRegions) {
612       master.assignmentManager.regionPlans.put(hri.getEncodedName(),
613           new RegionPlan(hri, null, hsiAlive));
614       master.assignRegion(hri);
615     }
616     for (HRegionInfo hri : disabledAndAssignedRegions) {
617       master.assignmentManager.regionPlans.put(hri.getEncodedName(),
618           new RegionPlan(hri, null, hsiAlive));
619       master.assignRegion(hri);
620     }
621 
622     // we also need regions assigned out on the dead server
623     List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
624     enabledAndOnDeadRegions.add(enabledRegions.remove(0));
625     enabledAndOnDeadRegions.add(enabledRegions.remove(0));
626     List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
627     disabledAndOnDeadRegions.add(disabledRegions.remove(0));
628     disabledAndOnDeadRegions.add(disabledRegions.remove(0));
629 
630     // set region plan to server to be killed and trigger assign
631     for (HRegionInfo hri : enabledAndOnDeadRegions) {
632       master.assignmentManager.regionPlans.put(hri.getEncodedName(),
633           new RegionPlan(hri, null, hsiDead));
634       master.assignRegion(hri);
635     }
636     for (HRegionInfo hri : disabledAndOnDeadRegions) {
637       master.assignmentManager.regionPlans.put(hri.getEncodedName(),
638           new RegionPlan(hri, null, hsiDead));
639       master.assignRegion(hri);
640     }
641 
642     // wait for no more RIT
643     log("Waiting for assignment to finish");
644     ZKAssign.blockUntilNoRIT(zkw);
645     log("Assignment completed");
646 
647     // Stop the master
648     log("Aborting master");
649     cluster.abortMaster(0);
650     cluster.waitOnMaster(0);
651     log("Master has aborted");
652 
653     /*
654      * Now, let's start mocking up some weird states as described in the method
655      * javadoc.
656      */
657 
658     List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
659     List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
660 
661     log("Beginning to mock scenarios");
662 
663     // Disable the disabledTable in ZK
664     ZKTable zktable = new ZKTable(zkw);
665     zktable.setDisabledTable(Bytes.toString(disabledTable));
666 
667     /*
668      * ZK = CLOSING
669      */
670 
671     // Region of enabled table being closed on dead RS but not finished
672     HRegionInfo region = enabledAndOnDeadRegions.remove(0);
673     regionsThatShouldBeOnline.add(region);
674     ZKAssign.createNodeClosing(zkw, region, deadServerName);
675     LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" +
676         region + "\n\n");
677 
678     // Region of disabled table being closed on dead RS but not finished
679     region = disabledAndOnDeadRegions.remove(0);
680     regionsThatShouldBeOffline.add(region);
681     ZKAssign.createNodeClosing(zkw, region, deadServerName);
682     LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" +
683         region + "\n\n");
684 
685     /*
686      * ZK = CLOSED
687      */
688 
689     // Region of enabled on dead server gets closed but not ack'd by master
690     region = enabledAndOnDeadRegions.remove(0);
691     regionsThatShouldBeOnline.add(region);
692     int version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
693     ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
694     LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" +
695         region + "\n\n");
696 
697     // Region of disabled on dead server gets closed but not ack'd by master
698     region = disabledAndOnDeadRegions.remove(0);
699     regionsThatShouldBeOffline.add(region);
700     version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
701     ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
702     LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" +
703         region + "\n\n");
704 
705     /*
706      * ZK = OPENING
707      */
708 
709     // RS was opening a region of enabled table then died
710     region = enabledRegions.remove(0);
711     regionsThatShouldBeOnline.add(region);
712     ZKAssign.createNodeOffline(zkw, region, deadServerName);
713     ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
714     LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" +
715         region + "\n\n");
716 
717     // RS was opening a region of disabled table then died
718     region = disabledRegions.remove(0);
719     regionsThatShouldBeOffline.add(region);
720     ZKAssign.createNodeOffline(zkw, region, deadServerName);
721     ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
722     LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" +
723         region + "\n\n");
724 
725     /*
726      * ZK = OPENED
727      */
728 
729     // Region of enabled table was opened on dead RS
730     region = enabledRegions.remove(0);
731     regionsThatShouldBeOnline.add(region);
732     ZKAssign.createNodeOffline(zkw, region, deadServerName);
733     hrsDead.openRegion(region);
734     while (true) {
735       RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
736       if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
737         break;
738       }
739       Thread.sleep(100);
740     }
741     LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" +
742         region + "\n\n");
743 
744     // Region of disabled table was opened on dead RS
745     region = disabledRegions.remove(0);
746     regionsThatShouldBeOffline.add(region);
747     ZKAssign.createNodeOffline(zkw, region, deadServerName);
748     hrsDead.openRegion(region);
749     while (true) {
750       RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
751       if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
752         break;
753       }
754       Thread.sleep(100);
755     }
756     LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" +
757         region + "\n\n");
758 
759     /*
760      * ZK = NONE
761      */
762 
763     // Region of enabled table was open at steady-state on dead RS
764     region = enabledRegions.remove(0);
765     regionsThatShouldBeOnline.add(region);
766     ZKAssign.createNodeOffline(zkw, region, deadServerName);
767     hrsDead.openRegion(region);
768     while (true) {
769       RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
770       if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
771         ZKAssign.deleteOpenedNode(zkw, region.getEncodedName());
772         break;
773       }
774       Thread.sleep(100);
775     }
776     LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS"
777         + "\n" + region + "\n\n");
778 
779     // Region of disabled table was open at steady-state on dead RS
780     region = disabledRegions.remove(0);
781     regionsThatShouldBeOffline.add(region);
782     ZKAssign.createNodeOffline(zkw, region, deadServerName);
783     hrsDead.openRegion(region);
784     while (true) {
785       RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
786       if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
787         ZKAssign.deleteOpenedNode(zkw, region.getEncodedName());
788         break;
789       }
790       Thread.sleep(100);
791     }
792     LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS"
793         + "\n" + region + "\n\n");
794 
795     /*
796      * DONE MOCKING
797      */
798 
799     log("Done mocking data up in ZK");
800 
801     // Kill the RS that had a hard death
802     log("Killing RS " + deadServerName);
803     hrsDead.abort("Killing for unit test");
804     log("RS " + deadServerName + " killed");
805 
806     // Start up a new master
807     log("Starting up a new master");
808     master = cluster.startMaster().getMaster();
809     log("Waiting for master to be ready");
810     cluster.waitForActiveAndReadyMaster();
811     log("Master is ready");
812 
813     // Let's add some weird states to master in-memory state
814 
815     // After HBASE-3181, we need to have some ZK state if we're PENDING_OPEN
816     // b/c it is impossible for us to get into this state w/o a zk node
817     // this is not true of PENDING_CLOSE
818 
819     // PENDING_OPEN and enabled
820     region = enabledRegions.remove(0);
821     regionsThatShouldBeOnline.add(region);
822     master.assignmentManager.regionsInTransition.put(region.getEncodedName(),
823         new RegionState(region, RegionState.State.PENDING_OPEN, 0));
824     ZKAssign.createNodeOffline(zkw, region, master.getServerName());
825     // PENDING_OPEN and disabled
826     region = disabledRegions.remove(0);
827     regionsThatShouldBeOffline.add(region);
828     master.assignmentManager.regionsInTransition.put(region.getEncodedName(),
829         new RegionState(region, RegionState.State.PENDING_OPEN, 0));
830     ZKAssign.createNodeOffline(zkw, region, master.getServerName());
831     // This test is bad.  It puts up a PENDING_CLOSE but doesn't say what
832     // server we were PENDING_CLOSE against -- i.e. an entry in
833     // AssignmentManager#regions.  W/o a server, we NPE trying to resend close.
834     // In past, there was wonky logic that had us reassign region if no server
835     // at tail of the unassign.  This was removed.  Commenting out for now.
836     // TODO: Remove completely.
837     /*
838     // PENDING_CLOSE and enabled
839     region = enabledRegions.remove(0);
840     LOG.info("Setting PENDING_CLOSE enabled " + region.getEncodedName());
841     regionsThatShouldBeOnline.add(region);
842     master.assignmentManager.regionsInTransition.put(region.getEncodedName(),
843       new RegionState(region, RegionState.State.PENDING_CLOSE, 0));
844     // PENDING_CLOSE and disabled
845     region = disabledRegions.remove(0);
846     LOG.info("Setting PENDING_CLOSE disabled " + region.getEncodedName());
847     regionsThatShouldBeOffline.add(region);
848     master.assignmentManager.regionsInTransition.put(region.getEncodedName(),
849       new RegionState(region, RegionState.State.PENDING_CLOSE, 0));
850       */
851 
852     // Failover should be completed, now wait for no RIT
853     log("Waiting for no more RIT");
854     ZKAssign.blockUntilNoRIT(zkw);
855     log("No more RIT in ZK");
856     long now = System.currentTimeMillis();
857     final long maxTime = 120000;
858     boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
859     if (!done) {
860       LOG.info("rit=" + master.assignmentManager.getRegionsInTransition());
861     }
862     long elapsed = System.currentTimeMillis() - now;
863     assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
864       elapsed < maxTime);
865     log("No more RIT in RIT map, doing final test verification");
866 
867     // Grab all the regions that are online across RSs
868     Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
869     for (JVMClusterUtil.RegionServerThread rst :
870       cluster.getRegionServerThreads()) {
871       onlineRegions.addAll(rst.getRegionServer().getOnlineRegions());
872     }
873 
874     // Now, everything that should be online should be online
875     for (HRegionInfo hri : regionsThatShouldBeOnline) {
876       assertTrue("region=" + hri.getRegionNameAsString(), onlineRegions.contains(hri));
877     }
878 
879     // Everything that should be offline should not be online
880     for (HRegionInfo hri : regionsThatShouldBeOffline) {
881       assertFalse(onlineRegions.contains(hri));
882     }
883 
884     log("Done with verification, all passed, shutting down cluster");
885 
886     // Done, shutdown the cluster
887     TEST_UTIL.shutdownMiniCluster();
888   }
889 
890   // TODO: Next test to add is with testing permutations of the RIT or the RS
891   //       killed are hosting ROOT and META regions.
892 
893   private void log(String string) {
894     LOG.info("\n\n" + string + " \n\n");
895   }
896 }