View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertTrue;
25  
26  import java.io.IOException;
27  import java.util.ArrayList;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.Set;
31  import java.util.TreeSet;
32  
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.fs.FileSystem;
37  import org.apache.hadoop.fs.Path;
38  import org.apache.hadoop.hbase.Abortable;
39  import org.apache.hadoop.hbase.ClusterStatus;
40  import org.apache.hadoop.hbase.HBaseConfiguration;
41  import org.apache.hadoop.hbase.HBaseTestingUtility;
42  import org.apache.hadoop.hbase.HColumnDescriptor;
43  import org.apache.hadoop.hbase.HConstants;
44  import org.apache.hadoop.hbase.HRegionInfo;
45  import org.apache.hadoop.hbase.HTableDescriptor;
46  import org.apache.hadoop.hbase.testclassification.LargeTests;
47  import org.apache.hadoop.hbase.MetaTableAccessor;
48  import org.apache.hadoop.hbase.MiniHBaseCluster;
49  import org.apache.hadoop.hbase.RegionTransition;
50  import org.apache.hadoop.hbase.ServerName;
51  import org.apache.hadoop.hbase.TableName;
52  import org.apache.hadoop.hbase.TableStateManager;
53  import org.apache.hadoop.hbase.client.RegionLocator;
54  import org.apache.hadoop.hbase.client.Table;
55  import org.apache.hadoop.hbase.executor.EventType;
56  import org.apache.hadoop.hbase.master.RegionState.State;
57  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
58  import org.apache.hadoop.hbase.protobuf.RequestConverter;
59  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
60  import org.apache.hadoop.hbase.regionserver.HRegion;
61  import org.apache.hadoop.hbase.regionserver.HRegionServer;
62  import org.apache.hadoop.hbase.regionserver.RegionMergeTransaction;
63  import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
64  import org.apache.hadoop.hbase.util.Bytes;
65  import org.apache.hadoop.hbase.util.FSTableDescriptors;
66  import org.apache.hadoop.hbase.util.FSUtils;
67  import org.apache.hadoop.hbase.util.JVMClusterUtil;
68  import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
69  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
70  import org.apache.hadoop.hbase.util.Threads;
71  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
72  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
73  import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager;
74  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
75  import org.apache.zookeeper.data.Stat;
76  import org.junit.Test;
77  import org.junit.experimental.categories.Category;
78  
79  @Category(LargeTests.class)
80  public class TestMasterFailover {
81    private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
82  
83    /**
84     * Complex test of master failover that tests as many permutations of the
85     * different possible states that regions in transition could be in within ZK.
86     * <p>
87     * This tests the proper handling of these states by the failed-over master
88     * and includes a thorough testing of the timeout code as well.
89     * <p>
90     * Starts with a single master and three regionservers.
91     * <p>
92     * Creates two tables, enabledTable and disabledTable, each containing 5
93     * regions.  The disabledTable is then disabled.
94     * <p>
95     * After reaching steady-state, the master is killed.  We then mock several
96     * states in ZK.
97     * <p>
98     * After mocking them, we will startup a new master which should become the
99     * active master and also detect that it is a failover.  The primary test
100    * passing condition will be that all regions of the enabled table are
101    * assigned and all the regions of the disabled table are not assigned.
102    * <p>
103    * The different scenarios to be tested are below:
104    * <p>
105    * <b>ZK State:  OFFLINE</b>
106    * <p>A node can get into OFFLINE state if</p>
107    * <ul>
108    * <li>An RS fails to open a region, so it reverts the state back to OFFLINE
109    * <li>The Master is assigning the region to a RS before it sends RPC
110    * </ul>
111    * <p>We will mock the scenarios</p>
112    * <ul>
113    * <li>Master has assigned an enabled region but RS failed so a region is
114    *     not assigned anywhere and is sitting in ZK as OFFLINE</li>
115    * <li>This seems to cover both cases?</li>
116    * </ul>
117    * <p>
118    * <b>ZK State:  CLOSING</b>
119    * <p>A node can get into CLOSING state if</p>
120    * <ul>
121    * <li>An RS has begun to close a region
122    * </ul>
123    * <p>We will mock the scenarios</p>
124    * <ul>
125    * <li>Region of enabled table was being closed but did not complete
126    * <li>Region of disabled table was being closed but did not complete
127    * </ul>
128    * <p>
129    * <b>ZK State:  CLOSED</b>
130    * <p>A node can get into CLOSED state if</p>
131    * <ul>
132    * <li>An RS has completed closing a region but not acknowledged by master yet
133    * </ul>
134    * <p>We will mock the scenarios</p>
135    * <ul>
136    * <li>Region of a table that should be enabled was closed on an RS
137    * <li>Region of a table that should be disabled was closed on an RS
138    * </ul>
139    * <p>
140    * <b>ZK State:  OPENING</b>
141    * <p>A node can get into OPENING state if</p>
142    * <ul>
143    * <li>An RS has begun to open a region
144    * </ul>
145    * <p>We will mock the scenarios</p>
146    * <ul>
147    * <li>RS was opening a region of enabled table but never finishes
148    * </ul>
149    * <p>
150    * <b>ZK State:  OPENED</b>
151    * <p>A node can get into OPENED state if</p>
152    * <ul>
153    * <li>An RS has finished opening a region but not acknowledged by master yet
154    * </ul>
155    * <p>We will mock the scenarios</p>
156    * <ul>
157    * <li>Region of a table that should be enabled was opened on an RS
158    * <li>Region of a table that should be disabled was opened on an RS
159    * </ul>
160    * @throws Exception
161    */
162   @Test (timeout=240000)
163   public void testMasterFailoverWithMockedRIT() throws Exception {
164 
165     final int NUM_MASTERS = 1;
166     final int NUM_RS = 3;
167 
168     // Create config to use for this cluster
169     Configuration conf = HBaseConfiguration.create();
170     conf.setBoolean("hbase.assignment.usezk", true);
171 
172     // Start the cluster
173     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
174     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
175     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
176     log("Cluster started");
177 
178     // Create a ZKW to use in the test
179     ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
180 
181     // get all the master threads
182     List<MasterThread> masterThreads = cluster.getMasterThreads();
183     assertEquals(1, masterThreads.size());
184 
185     // only one master thread, let's wait for it to be initialized
186     assertTrue(cluster.waitForActiveAndReadyMaster());
187     HMaster master = masterThreads.get(0).getMaster();
188     assertTrue(master.isActiveMaster());
189     assertTrue(master.isInitialized());
190 
191     // disable load balancing on this master
192     master.balanceSwitch(false);
193 
194     // create two tables in META, each with 10 regions
195     byte [] FAMILY = Bytes.toBytes("family");
196     byte [][] SPLIT_KEYS = new byte [][] {
197         new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
198         Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
199         Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
200         Bytes.toBytes("iii"), Bytes.toBytes("jjj")
201     };
202 
203     byte [] enabledTable = Bytes.toBytes("enabledTable");
204     HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
205     htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
206 
207     FileSystem filesystem = FileSystem.get(conf);
208     Path rootdir = FSUtils.getRootDir(conf);
209     FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
210     // Write the .tableinfo
211     fstd.createTableDescriptor(htdEnabled);
212 
213     HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(), null, null);
214     createRegion(hriEnabled, rootdir, conf, htdEnabled);
215 
216     List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
217         TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
218 
219     TableName disabledTable = TableName.valueOf("disabledTable");
220     HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
221     htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
222     // Write the .tableinfo
223     fstd.createTableDescriptor(htdDisabled);
224     HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
225     createRegion(hriDisabled, rootdir, conf, htdDisabled);
226     List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
227         TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
228 
229     TableName tableWithMergingRegions = TableName.valueOf("tableWithMergingRegions");
230     TEST_UTIL.createTable(tableWithMergingRegions, FAMILY, new byte [][] {Bytes.toBytes("m")});
231 
232     log("Regions in hbase:meta and namespace have been created");
233 
234     // at this point we only expect 4 regions to be assigned out
235     // (catalogs and namespace, + 2 merging regions)
236     assertEquals(4, cluster.countServedRegions());
237 
238     // Move merging regions to the same region server
239     AssignmentManager am = master.getAssignmentManager();
240     RegionStates regionStates = am.getRegionStates();
241     List<HRegionInfo> mergingRegions = regionStates.getRegionsOfTable(tableWithMergingRegions);
242     assertEquals(2, mergingRegions.size());
243     HRegionInfo a = mergingRegions.get(0);
244     HRegionInfo b = mergingRegions.get(1);
245     HRegionInfo newRegion = RegionMergeTransaction.getMergedRegionInfo(a, b);
246     ServerName mergingServer = regionStates.getRegionServerOfRegion(a);
247     ServerName serverB = regionStates.getRegionServerOfRegion(b);
248     if (!serverB.equals(mergingServer)) {
249       RegionPlan plan = new RegionPlan(b, serverB, mergingServer);
250       am.balance(plan);
251       assertTrue(am.waitForAssignment(b));
252     }
253 
254     // Let's just assign everything to first RS
255     HRegionServer hrs = cluster.getRegionServer(0);
256     ServerName serverName = hrs.getServerName();
257     HRegionInfo closingRegion = enabledRegions.remove(0);
258     // we'll need some regions to already be assigned out properly on live RS
259     List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
260     enabledAndAssignedRegions.add(enabledRegions.remove(0));
261     enabledAndAssignedRegions.add(enabledRegions.remove(0));
262     enabledAndAssignedRegions.add(closingRegion);
263 
264     List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
265     disabledAndAssignedRegions.add(disabledRegions.remove(0));
266     disabledAndAssignedRegions.add(disabledRegions.remove(0));
267 
268     // now actually assign them
269     for (HRegionInfo hri : enabledAndAssignedRegions) {
270       master.assignmentManager.addPlan(hri.getEncodedName(),
271           new RegionPlan(hri, null, serverName));
272       master.assignRegion(hri);
273     }
274 
275     for (HRegionInfo hri : disabledAndAssignedRegions) {
276       master.assignmentManager.addPlan(hri.getEncodedName(),
277           new RegionPlan(hri, null, serverName));
278       master.assignRegion(hri);
279     }
280 
281     // wait for no more RIT
282     log("Waiting for assignment to finish");
283     ZKAssign.blockUntilNoRIT(zkw);
284     log("Assignment completed");
285 
286     // Stop the master
287     log("Aborting master");
288     cluster.abortMaster(0);
289     cluster.waitOnMaster(0);
290     log("Master has aborted");
291 
292     /*
293      * Now, let's start mocking up some weird states as described in the method
294      * javadoc.
295      */
296 
297     List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
298     List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
299 
300     log("Beginning to mock scenarios");
301 
302     // Disable the disabledTable in ZK
303     TableStateManager zktable = new ZKTableStateManager(zkw);
304     zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
305 
306     /*
307      *  ZK = OFFLINE
308      */
309 
310     // Region that should be assigned but is not and is in ZK as OFFLINE
311     // Cause: This can happen if the master crashed after creating the znode but before sending the
312     //  request to the region server
313     HRegionInfo region = enabledRegions.remove(0);
314     regionsThatShouldBeOnline.add(region);
315     ZKAssign.createNodeOffline(zkw, region, serverName);
316 
317     /*
318      * ZK = CLOSING
319      */
320     // Cause: Same as offline.
321     regionsThatShouldBeOnline.add(closingRegion);
322     ZKAssign.createNodeClosing(zkw, closingRegion, serverName);
323 
324     /*
325      * ZK = CLOSED
326      */
327 
328     // Region of enabled table closed but not ack
329     //Cause: Master was down while the region server updated the ZK status.
330     region = enabledRegions.remove(0);
331     regionsThatShouldBeOnline.add(region);
332     int version = ZKAssign.createNodeClosing(zkw, region, serverName);
333     ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
334 
335     // Region of disabled table closed but not ack
336     region = disabledRegions.remove(0);
337     regionsThatShouldBeOffline.add(region);
338     version = ZKAssign.createNodeClosing(zkw, region, serverName);
339     ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
340 
341     /*
342      * ZK = OPENED
343      */
344 
345     // Region of enabled table was opened on RS
346     // Cause: as offline
347     region = enabledRegions.remove(0);
348     regionsThatShouldBeOnline.add(region);
349     ZKAssign.createNodeOffline(zkw, region, serverName);
350     ProtobufUtil.openRegion(hrs.getRSRpcServices(), hrs.getServerName(), region);
351     while (true) {
352       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
353       RegionTransition rt = RegionTransition.parseFrom(bytes);
354       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
355         break;
356       }
357       Thread.sleep(100);
358     }
359 
360     // Region of disable table was opened on RS
361     // Cause: Master failed while updating the status for this region server.
362     region = disabledRegions.remove(0);
363     regionsThatShouldBeOffline.add(region);
364     ZKAssign.createNodeOffline(zkw, region, serverName);
365     ProtobufUtil.openRegion(hrs.getRSRpcServices(), hrs.getServerName(), region);
366     while (true) {
367       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
368       RegionTransition rt = RegionTransition.parseFrom(bytes);
369       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
370         break;
371       }
372       Thread.sleep(100);
373     }
374 
375     /*
376      * ZK = MERGING
377      */
378 
379     // Regions of table of merging regions
380     // Cause: Master was down while merging was going on
381     hrs.getCoordinatedStateManager().
382       getRegionMergeCoordination().startRegionMergeTransaction(newRegion, mergingServer, a, b);
383 
384     /*
385      * ZK = NONE
386      */
387 
388     /*
389      * DONE MOCKING
390      */
391 
392     log("Done mocking data up in ZK");
393 
394     // Start up a new master
395     log("Starting up a new master");
396     master = cluster.startMaster().getMaster();
397     log("Waiting for master to be ready");
398     cluster.waitForActiveAndReadyMaster();
399     log("Master is ready");
400 
401     // Get new region states since master restarted
402     regionStates = master.getAssignmentManager().getRegionStates();
403     // Merging region should remain merging
404     assertTrue(regionStates.isRegionInState(a, State.MERGING));
405     assertTrue(regionStates.isRegionInState(b, State.MERGING));
406     assertTrue(regionStates.isRegionInState(newRegion, State.MERGING_NEW));
407     // Now remove the faked merging znode, merging regions should be
408     // offlined automatically, otherwise it is a bug in AM.
409     ZKAssign.deleteNodeFailSilent(zkw, newRegion);
410 
411     // Failover should be completed, now wait for no RIT
412     log("Waiting for no more RIT");
413     ZKAssign.blockUntilNoRIT(zkw);
414     log("No more RIT in ZK, now doing final test verification");
415 
416     // Grab all the regions that are online across RSs
417     Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
418     for (JVMClusterUtil.RegionServerThread rst :
419       cluster.getRegionServerThreads()) {
420       onlineRegions.addAll(ProtobufUtil.getOnlineRegions(
421         rst.getRegionServer().getRSRpcServices()));
422     }
423 
424     // Now, everything that should be online should be online
425     for (HRegionInfo hri : regionsThatShouldBeOnline) {
426       assertTrue(onlineRegions.contains(hri));
427     }
428 
429     // Everything that should be offline should not be online
430     for (HRegionInfo hri : regionsThatShouldBeOffline) {
431       if (onlineRegions.contains(hri)) {
432        LOG.debug(hri);
433       }
434       assertFalse(onlineRegions.contains(hri));
435     }
436 
437     log("Done with verification, all passed, shutting down cluster");
438 
439     // Done, shutdown the cluster
440     TEST_UTIL.shutdownMiniCluster();
441   }
442 
443   /**
444    * Complex test of master failover that tests as many permutations of the
445    * different possible states that regions in transition could be in within ZK
446    * pointing to an RS that has died while no master is around to process it.
447    * <p>
448    * This tests the proper handling of these states by the failed-over master
449    * and includes a thorough testing of the timeout code as well.
450    * <p>
451    * Starts with a single master and two regionservers.
452    * <p>
453    * Creates two tables, enabledTable and disabledTable, each containing 5
454    * regions.  The disabledTable is then disabled.
455    * <p>
456    * After reaching steady-state, the master is killed.  We then mock several
457    * states in ZK.  And one of the RS will be killed.
458    * <p>
459    * After mocking them and killing an RS, we will startup a new master which
460    * should become the active master and also detect that it is a failover.  The
461    * primary test passing condition will be that all regions of the enabled
462    * table are assigned and all the regions of the disabled table are not
463    * assigned.
464    * <p>
465    * The different scenarios to be tested are below:
466    * <p>
467    * <b>ZK State:  CLOSING</b>
468    * <p>A node can get into CLOSING state if</p>
469    * <ul>
470    * <li>An RS has begun to close a region
471    * </ul>
472    * <p>We will mock the scenarios</p>
473    * <ul>
474    * <li>Region was being closed but the RS died before finishing the close
475    * </ul>
476    * <b>ZK State:  OPENED</b>
477    * <p>A node can get into OPENED state if</p>
478    * <ul>
479    * <li>An RS has finished opening a region but not acknowledged by master yet
480    * </ul>
481    * <p>We will mock the scenarios</p>
482    * <ul>
483    * <li>Region of a table that should be enabled was opened by a now-dead RS
484    * <li>Region of a table that should be disabled was opened by a now-dead RS
485    * </ul>
486    * <p>
487    * <b>ZK State:  NONE</b>
488    * <p>A region could not have a transition node if</p>
489    * <ul>
490    * <li>The server hosting the region died and no master processed it
491    * </ul>
492    * <p>We will mock the scenarios</p>
493    * <ul>
494    * <li>Region of enabled table was on a dead RS that was not yet processed
495    * <li>Region of disabled table was on a dead RS that was not yet processed
496    * </ul>
497    * @throws Exception
498    */
499   @Test (timeout=180000)
500   public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception {
501 
502     final int NUM_MASTERS = 1;
503     final int NUM_RS = 2;
504 
505     // Create and start the cluster
506     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
507     Configuration conf = TEST_UTIL.getConfiguration();
508     conf.setBoolean("hbase.assignment.usezk", true);
509 
510     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
511     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 2);
512     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
513     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
514     log("Cluster started");
515 
516     // Create a ZKW to use in the test
517     ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
518         "unittest", new Abortable() {
519 
520           @Override
521           public void abort(String why, Throwable e) {
522             LOG.error("Fatal ZK Error: " + why, e);
523             org.junit.Assert.assertFalse("Fatal ZK error", true);
524           }
525 
526           @Override
527           public boolean isAborted() {
528             return false;
529           }
530 
531     });
532 
533     // get all the master threads
534     List<MasterThread> masterThreads = cluster.getMasterThreads();
535     assertEquals(1, masterThreads.size());
536 
537     // only one master thread, let's wait for it to be initialized
538     assertTrue(cluster.waitForActiveAndReadyMaster());
539     HMaster master = masterThreads.get(0).getMaster();
540     assertTrue(master.isActiveMaster());
541     assertTrue(master.isInitialized());
542 
543     // disable load balancing on this master
544     master.balanceSwitch(false);
545 
546     // create two tables in META, each with 30 regions
547     byte [] FAMILY = Bytes.toBytes("family");
548     byte[][] SPLIT_KEYS =
549         TEST_UTIL.getRegionSplitStartKeys(Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 30);
550 
551     byte [] enabledTable = Bytes.toBytes("enabledTable");
552     HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
553     htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
554     FileSystem filesystem = FileSystem.get(conf);
555     Path rootdir = FSUtils.getRootDir(conf);
556     FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
557     // Write the .tableinfo
558     fstd.createTableDescriptor(htdEnabled);
559     HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(),
560         null, null);
561     createRegion(hriEnabled, rootdir, conf, htdEnabled);
562 
563     List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
564         TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
565 
566     TableName disabledTable =
567         TableName.valueOf("disabledTable");
568     HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
569     htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
570     // Write the .tableinfo
571     fstd.createTableDescriptor(htdDisabled);
572     HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
573     createRegion(hriDisabled, rootdir, conf, htdDisabled);
574 
575     List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
576         TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
577 
578     log("Regions in hbase:meta and Namespace have been created");
579 
580     // at this point we only expect 2 regions to be assigned out (catalogs and namespace  )
581     assertEquals(2, cluster.countServedRegions());
582 
583     // The first RS will stay online
584     List<RegionServerThread> regionservers =
585       cluster.getRegionServerThreads();
586     HRegionServer hrs = regionservers.get(0).getRegionServer();
587 
588     // The second RS is going to be hard-killed
589     RegionServerThread hrsDeadThread = regionservers.get(1);
590     HRegionServer hrsDead = hrsDeadThread.getRegionServer();
591     ServerName deadServerName = hrsDead.getServerName();
592 
593     // we'll need some regions to already be assigned out properly on live RS
594     List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
595     enabledAndAssignedRegions.addAll(enabledRegions.subList(0, 6));
596     enabledRegions.removeAll(enabledAndAssignedRegions);
597     List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
598     disabledAndAssignedRegions.addAll(disabledRegions.subList(0, 6));
599     disabledRegions.removeAll(disabledAndAssignedRegions);
600 
601     // now actually assign them
602     for (HRegionInfo hri : enabledAndAssignedRegions) {
603       master.assignmentManager.addPlan(hri.getEncodedName(),
604           new RegionPlan(hri, null, hrs.getServerName()));
605       master.assignRegion(hri);
606     }
607     for (HRegionInfo hri : disabledAndAssignedRegions) {
608       master.assignmentManager.addPlan(hri.getEncodedName(),
609           new RegionPlan(hri, null, hrs.getServerName()));
610       master.assignRegion(hri);
611     }
612 
613     log("Waiting for assignment to finish");
614     ZKAssign.blockUntilNoRIT(zkw);
615     master.assignmentManager.waitUntilNoRegionsInTransition(60000);
616     log("Assignment completed");
617 
618     assertTrue(" Table must be enabled.", master.getAssignmentManager()
619         .getTableStateManager().isTableState(TableName.valueOf("enabledTable"),
620         ZooKeeperProtos.Table.State.ENABLED));
621     // we also need regions assigned out on the dead server
622     List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
623     enabledAndOnDeadRegions.addAll(enabledRegions.subList(0, 6));
624     enabledRegions.removeAll(enabledAndOnDeadRegions);
625     List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
626     disabledAndOnDeadRegions.addAll(disabledRegions.subList(0, 6));
627     disabledRegions.removeAll(disabledAndOnDeadRegions);
628 
629     // set region plan to server to be killed and trigger assign
630     for (HRegionInfo hri : enabledAndOnDeadRegions) {
631       master.assignmentManager.addPlan(hri.getEncodedName(),
632           new RegionPlan(hri, null, deadServerName));
633       master.assignRegion(hri);
634     }
635     for (HRegionInfo hri : disabledAndOnDeadRegions) {
636       master.assignmentManager.addPlan(hri.getEncodedName(),
637           new RegionPlan(hri, null, deadServerName));
638       master.assignRegion(hri);
639     }
640 
641     // wait for no more RIT
642     log("Waiting for assignment to finish");
643     ZKAssign.blockUntilNoRIT(zkw);
644     master.assignmentManager.waitUntilNoRegionsInTransition(60000);
645     log("Assignment completed");
646 
647     // Due to master.assignRegion(hri) could fail to assign a region to a specified RS
648     // therefore, we need make sure that regions are in the expected RS
649     verifyRegionLocation(hrs, enabledAndAssignedRegions);
650     verifyRegionLocation(hrs, disabledAndAssignedRegions);
651     verifyRegionLocation(hrsDead, enabledAndOnDeadRegions);
652     verifyRegionLocation(hrsDead, disabledAndOnDeadRegions);
653 
654     assertTrue(" Didn't get enough regions of enabledTalbe on live rs.",
655       enabledAndAssignedRegions.size() >= 2);
656     assertTrue(" Didn't get enough regions of disalbedTable on live rs.",
657       disabledAndAssignedRegions.size() >= 2);
658     assertTrue(" Didn't get enough regions of enabledTalbe on dead rs.",
659       enabledAndOnDeadRegions.size() >= 2);
660     assertTrue(" Didn't get enough regions of disalbedTable on dead rs.",
661       disabledAndOnDeadRegions.size() >= 2);
662 
663     // Stop the master
664     log("Aborting master");
665     cluster.abortMaster(0);
666     cluster.waitOnMaster(0);
667     log("Master has aborted");
668 
669     /*
670      * Now, let's start mocking up some weird states as described in the method
671      * javadoc.
672      */
673 
674     List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
675     List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
676 
677     log("Beginning to mock scenarios");
678 
679     // Disable the disabledTable in ZK
680     TableStateManager zktable = new ZKTableStateManager(zkw);
681     zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
682 
683     assertTrue(" The enabled table should be identified on master fail over.",
684         zktable.isTableState(TableName.valueOf("enabledTable"),
685           ZooKeeperProtos.Table.State.ENABLED));
686 
687     /*
688      * ZK = CLOSING
689      */
690 
691     // Region of enabled table being closed on dead RS but not finished
692     HRegionInfo region = enabledAndOnDeadRegions.remove(0);
693     regionsThatShouldBeOnline.add(region);
694     ZKAssign.createNodeClosing(zkw, region, deadServerName);
695     LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" +
696         region + "\n\n");
697 
698     // Region of disabled table being closed on dead RS but not finished
699     region = disabledAndOnDeadRegions.remove(0);
700     regionsThatShouldBeOffline.add(region);
701     ZKAssign.createNodeClosing(zkw, region, deadServerName);
702     LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" +
703         region + "\n\n");
704 
705     /*
706      * ZK = CLOSED
707      */
708 
709     // Region of enabled on dead server gets closed but not ack'd by master
710     region = enabledAndOnDeadRegions.remove(0);
711     regionsThatShouldBeOnline.add(region);
712     int version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
713     ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
714     LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" +
715         region + "\n\n");
716 
717     // Region of disabled on dead server gets closed but not ack'd by master
718     region = disabledAndOnDeadRegions.remove(0);
719     regionsThatShouldBeOffline.add(region);
720     version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
721     ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
722     LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" +
723         region + "\n\n");
724 
725     /*
726      * ZK = OPENING
727      */
728 
729     // RS was opening a region of enabled table then died
730     region = enabledRegions.remove(0);
731     regionsThatShouldBeOnline.add(region);
732     ZKAssign.createNodeOffline(zkw, region, deadServerName);
733     ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
734     LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" +
735         region + "\n\n");
736 
737     // RS was opening a region of disabled table then died
738     region = disabledRegions.remove(0);
739     regionsThatShouldBeOffline.add(region);
740     ZKAssign.createNodeOffline(zkw, region, deadServerName);
741     ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
742     LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" +
743         region + "\n\n");
744 
745     /*
746      * ZK = OPENED
747      */
748 
749     // Region of enabled table was opened on dead RS
750     region = enabledRegions.remove(0);
751     regionsThatShouldBeOnline.add(region);
752     ZKAssign.createNodeOffline(zkw, region, deadServerName);
753     ProtobufUtil.openRegion(hrsDead.getRSRpcServices(),
754       hrsDead.getServerName(), region);
755     while (true) {
756       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
757       RegionTransition rt = RegionTransition.parseFrom(bytes);
758       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
759         break;
760       }
761       Thread.sleep(100);
762     }
763     LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" +
764         region + "\n\n");
765 
766     // Region of disabled table was opened on dead RS
767     region = disabledRegions.remove(0);
768     regionsThatShouldBeOffline.add(region);
769     ZKAssign.createNodeOffline(zkw, region, deadServerName);
770     ProtobufUtil.openRegion(hrsDead.getRSRpcServices(),
771       hrsDead.getServerName(), region);
772     while (true) {
773       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
774       RegionTransition rt = RegionTransition.parseFrom(bytes);
775       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
776         break;
777       }
778       Thread.sleep(100);
779     }
780     LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" +
781         region + "\n\n");
782 
783     /*
784      * ZK = NONE
785      */
786 
787     // Region of enabled table was open at steady-state on dead RS
788     region = enabledRegions.remove(0);
789     regionsThatShouldBeOnline.add(region);
790     ZKAssign.createNodeOffline(zkw, region, deadServerName);
791     ProtobufUtil.openRegion(hrsDead.getRSRpcServices(),
792       hrsDead.getServerName(), region);
793     while (true) {
794       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
795       RegionTransition rt = RegionTransition.parseFrom(bytes);
796       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
797         ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
798         LOG.debug("DELETED " + rt);
799         break;
800       }
801       Thread.sleep(100);
802     }
803     LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS"
804         + "\n" + region + "\n\n");
805 
806     // Region of disabled table was open at steady-state on dead RS
807     region = disabledRegions.remove(0);
808     regionsThatShouldBeOffline.add(region);
809     ZKAssign.createNodeOffline(zkw, region, deadServerName);
810     ProtobufUtil.openRegion(hrsDead.getRSRpcServices(),
811       hrsDead.getServerName(), region);
812     while (true) {
813       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
814       RegionTransition rt = RegionTransition.parseFrom(bytes);
815       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
816         ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
817         break;
818       }
819       Thread.sleep(100);
820     }
821     LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS"
822       + "\n" + region + "\n\n");
823 
824     /*
825      * DONE MOCKING
826      */
827 
828     log("Done mocking data up in ZK");
829 
830     // Kill the RS that had a hard death
831     log("Killing RS " + deadServerName);
832     hrsDead.abort("Killing for unit test");
833     log("RS " + deadServerName + " killed");
834 
835     // Start up a new master.  Wait until regionserver is completely down
836     // before starting new master because of hbase-4511.
837     while (hrsDeadThread.isAlive()) {
838       Threads.sleep(10);
839     }
840     log("Starting up a new master");
841     master = cluster.startMaster().getMaster();
842     log("Waiting for master to be ready");
843     assertTrue(cluster.waitForActiveAndReadyMaster());
844     log("Master is ready");
845 
846     // Wait until SSH processing completed for dead server.
847     while (master.getServerManager().areDeadServersInProgress()) {
848       Thread.sleep(10);
849     }
850 
851     // Failover should be completed, now wait for no RIT
852     log("Waiting for no more RIT");
853     ZKAssign.blockUntilNoRIT(zkw);
854     log("No more RIT in ZK");
855     long now = System.currentTimeMillis();
856     long maxTime = 120000;
857     boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
858     if (!done) {
859       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
860       LOG.info("rit=" + regionStates.getRegionsInTransition());
861     }
862     long elapsed = System.currentTimeMillis() - now;
863     assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
864       elapsed < maxTime);
865     log("No more RIT in RIT map, doing final test verification");
866 
867     // Grab all the regions that are online across RSs
868     Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
869     now = System.currentTimeMillis();
870     maxTime = 30000;
871     for (JVMClusterUtil.RegionServerThread rst :
872         cluster.getRegionServerThreads()) {
873       try {
874         HRegionServer rs = rst.getRegionServer();
875         while (!rs.getRegionsInTransitionInRS().isEmpty()) {
876           elapsed = System.currentTimeMillis() - now;
877           assertTrue("Test timed out in getting online regions", elapsed < maxTime);
878           if (rs.isAborted() || rs.isStopped()) {
879             // This region server is stopped, skip it.
880             break;
881           }
882           Thread.sleep(100);
883         }
884         onlineRegions.addAll(ProtobufUtil.getOnlineRegions(rs.getRSRpcServices()));
885       } catch (RegionServerStoppedException e) {
886         LOG.info("Got RegionServerStoppedException", e);
887       }
888     }
889 
890     // Now, everything that should be online should be online
891     for (HRegionInfo hri : regionsThatShouldBeOnline) {
892       assertTrue("region=" + hri.getRegionNameAsString() + ", " + onlineRegions.toString(),
893         onlineRegions.contains(hri));
894     }
895 
896     // Everything that should be offline should not be online
897     for (HRegionInfo hri : regionsThatShouldBeOffline) {
898       assertFalse(onlineRegions.contains(hri));
899     }
900 
901     log("Done with verification, all passed, shutting down cluster");
902 
903     // Done, shutdown the cluster
904     TEST_UTIL.shutdownMiniCluster();
905   }
906 
907   /**
908    * Verify regions are on the expected region server
909    */
910   private void verifyRegionLocation(HRegionServer hrs, List<HRegionInfo> regions)
911       throws IOException {
912     List<HRegionInfo> tmpOnlineRegions =
913       ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
914     Iterator<HRegionInfo> itr = regions.iterator();
915     while (itr.hasNext()) {
916       HRegionInfo tmp = itr.next();
917       if (!tmpOnlineRegions.contains(tmp)) {
918         itr.remove();
919       }
920     }
921   }
922 
923   HRegion createRegion(final HRegionInfo  hri, final Path rootdir, final Configuration c,
924       final HTableDescriptor htd)
925   throws IOException {
926     HRegion r = HRegion.createHRegion(hri, rootdir, c, htd);
927     // The above call to create a region will create an wal file.  Each
928     // log file create will also create a running thread to do syncing.  We need
929     // to close out this log else we will have a running thread trying to sync
930     // the file system continuously which is ugly when dfs is taken away at the
931     // end of the test.
932     HRegion.closeHRegion(r);
933     return r;
934   }
935 
936   // TODO: Next test to add is with testing permutations of the RIT or the RS
937   //       killed are hosting ROOT and hbase:meta regions.
938 
939   private void log(String string) {
940     LOG.info("\n\n" + string + " \n\n");
941   }
942 
943   @Test (timeout=180000)
944   public void testShouldCheckMasterFailOverWhenMETAIsInOpenedState()
945       throws Exception {
946     LOG.info("Starting testShouldCheckMasterFailOverWhenMETAIsInOpenedState");
947     final int NUM_MASTERS = 1;
948     final int NUM_RS = 2;
949 
950     // Start the cluster
951     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
952     Configuration conf = TEST_UTIL.getConfiguration();
953     conf.setInt("hbase.master.info.port", -1);
954     conf.setBoolean("hbase.assignment.usezk", true);
955 
956     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
957     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
958 
959     // Find regionserver carrying meta.
960     List<RegionServerThread> regionServerThreads =
961       cluster.getRegionServerThreads();
962     HRegion metaRegion = null;
963     HRegionServer metaRegionServer = null;
964     for (RegionServerThread regionServerThread : regionServerThreads) {
965       HRegionServer regionServer = regionServerThread.getRegionServer();
966       metaRegion = regionServer.getOnlineRegion(HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
967       regionServer.abort("");
968       if (null != metaRegion) {
969         metaRegionServer = regionServer;
970         break;
971       }
972     }
973 
974     TEST_UTIL.shutdownMiniHBaseCluster();
975 
976     // Create a ZKW to use in the test
977     ZooKeeperWatcher zkw =
978       HBaseTestingUtility.createAndForceNodeToOpenedState(TEST_UTIL,
979           metaRegion, metaRegionServer.getServerName());
980 
981     LOG.info("Staring cluster for second time");
982     TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
983 
984     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
985     while (!master.isInitialized()) {
986       Thread.sleep(100);
987     }
988     // Failover should be completed, now wait for no RIT
989     log("Waiting for no more RIT");
990     ZKAssign.blockUntilNoRIT(zkw);
991 
992     zkw.close();
993     // Stop the cluster
994     TEST_UTIL.shutdownMiniCluster();
995   }
996 
997   /**
998    * This tests a RIT in offline state will get re-assigned after a master restart
999    */
1000   @Test(timeout=240000)
1001   public void testOfflineRegionReAssginedAfterMasterRestart() throws Exception {
1002     final TableName table = TableName.valueOf("testOfflineRegionReAssginedAfterMasterRestart");
1003     final int NUM_MASTERS = 1;
1004     final int NUM_RS = 2;
1005 
1006     // Create config to use for this cluster
1007     Configuration conf = HBaseConfiguration.create();
1008     conf.setBoolean("hbase.assignment.usezk", true);
1009 
1010     // Start the cluster
1011     final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1012     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1013     log("Cluster started");
1014 
1015     TEST_UTIL.createTable(table, Bytes.toBytes("family"));
1016     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
1017     RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1018     HRegionInfo hri = regionStates.getRegionsOfTable(table).get(0);
1019     ServerName serverName = regionStates.getRegionServerOfRegion(hri);
1020     TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
1021 
1022     ServerName dstName = null;
1023     for (ServerName tmpServer : master.serverManager.getOnlineServers().keySet()) {
1024       if (!tmpServer.equals(serverName)) {
1025         dstName = tmpServer;
1026         break;
1027       }
1028     }
1029     // find a different server
1030     assertTrue(dstName != null);
1031     // shutdown HBase cluster
1032     TEST_UTIL.shutdownMiniHBaseCluster();
1033     // create a RIT node in offline state
1034     ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
1035     ZKAssign.createNodeOffline(zkw, hri, dstName);
1036     Stat stat = new Stat();
1037     byte[] data =
1038         ZKAssign.getDataNoWatch(zkw, hri.getEncodedName(), stat);
1039     assertTrue(data != null);
1040     RegionTransition rt = RegionTransition.parseFrom(data);
1041     assertTrue(rt.getEventType() == EventType.M_ZK_REGION_OFFLINE);
1042 
1043     LOG.info(hri.getEncodedName() + " region is in offline state with source server=" + serverName
1044         + " and dst server=" + dstName);
1045 
1046     // start HBase cluster
1047     TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
1048 
1049     while (true) {
1050       master = TEST_UTIL.getHBaseCluster().getMaster();
1051       if (master != null && master.isInitialized()) {
1052         ServerManager serverManager = master.getServerManager();
1053         if (!serverManager.areDeadServersInProgress()) {
1054           break;
1055         }
1056       }
1057       Thread.sleep(200);
1058     }
1059 
1060     // verify the region is assigned
1061     master = TEST_UTIL.getHBaseCluster().getMaster();
1062     master.getAssignmentManager().waitForAssignment(hri);
1063     regionStates = master.getAssignmentManager().getRegionStates();
1064     RegionState newState = regionStates.getRegionState(hri);
1065     assertTrue(newState.isOpened());
1066   }
1067   
1068  /**
1069    * Simple test of master failover.
1070    * <p>
1071    * Starts with three masters.  Kills a backup master.  Then kills the active
1072    * master.  Ensures the final master becomes active and we can still contact
1073    * the cluster.
1074    * @throws Exception
1075    */
1076   @Test (timeout=240000)
1077   public void testSimpleMasterFailover() throws Exception {
1078 
1079     final int NUM_MASTERS = 3;
1080     final int NUM_RS = 3;
1081 
1082     // Start the cluster
1083     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
1084 
1085     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1086     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1087 
1088     // get all the master threads
1089     List<MasterThread> masterThreads = cluster.getMasterThreads();
1090 
1091     // wait for each to come online
1092     for (MasterThread mt : masterThreads) {
1093       assertTrue(mt.isAlive());
1094     }
1095 
1096     // verify only one is the active master and we have right number
1097     int numActive = 0;
1098     int activeIndex = -1;
1099     ServerName activeName = null;
1100     HMaster active = null;
1101     for (int i = 0; i < masterThreads.size(); i++) {
1102       if (masterThreads.get(i).getMaster().isActiveMaster()) {
1103         numActive++;
1104         activeIndex = i;
1105         active = masterThreads.get(activeIndex).getMaster();
1106         activeName = active.getServerName();
1107       }
1108     }
1109     assertEquals(1, numActive);
1110     assertEquals(NUM_MASTERS, masterThreads.size());
1111     LOG.info("Active master " + activeName);
1112 
1113     // Check that ClusterStatus reports the correct active and backup masters
1114     assertNotNull(active);
1115     ClusterStatus status = active.getClusterStatus();
1116     assertTrue(status.getMaster().equals(activeName));
1117     assertEquals(2, status.getBackupMastersSize());
1118     assertEquals(2, status.getBackupMasters().size());
1119 
1120     // attempt to stop one of the inactive masters
1121     int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
1122     HMaster master = cluster.getMaster(backupIndex);
1123     LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
1124     cluster.stopMaster(backupIndex, false);
1125     cluster.waitOnMaster(backupIndex);
1126 
1127     // Verify still one active master and it's the same
1128     for (int i = 0; i < masterThreads.size(); i++) {
1129       if (masterThreads.get(i).getMaster().isActiveMaster()) {
1130         assertTrue(activeName.equals(masterThreads.get(i).getMaster().getServerName()));
1131         activeIndex = i;
1132         active = masterThreads.get(activeIndex).getMaster();
1133       }
1134     }
1135     assertEquals(1, numActive);
1136     assertEquals(2, masterThreads.size());
1137     int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
1138     LOG.info("Active master " + active.getServerName() + " managing " + rsCount +  " regions servers");
1139     assertEquals(3, rsCount);
1140 
1141     // Check that ClusterStatus reports the correct active and backup masters
1142     assertNotNull(active);
1143     status = active.getClusterStatus();
1144     assertTrue(status.getMaster().equals(activeName));
1145     assertEquals(1, status.getBackupMastersSize());
1146     assertEquals(1, status.getBackupMasters().size());
1147 
1148     // kill the active master
1149     LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
1150     cluster.stopMaster(activeIndex, false);
1151     cluster.waitOnMaster(activeIndex);
1152 
1153     // wait for an active master to show up and be ready
1154     assertTrue(cluster.waitForActiveAndReadyMaster());
1155 
1156     LOG.debug("\n\nVerifying backup master is now active\n");
1157     // should only have one master now
1158     assertEquals(1, masterThreads.size());
1159 
1160     // and he should be active
1161     active = masterThreads.get(0).getMaster();
1162     assertNotNull(active);
1163     status = active.getClusterStatus();
1164     ServerName mastername = status.getMaster();
1165     assertTrue(mastername.equals(active.getServerName()));
1166     assertTrue(active.isActiveMaster());
1167     assertEquals(0, status.getBackupMastersSize());
1168     assertEquals(0, status.getBackupMasters().size());
1169     int rss = status.getServersSize();
1170     LOG.info("Active master " + mastername.getServerName() + " managing " +
1171       rss +  " region servers");
1172     assertEquals(3, rss);
1173 
1174     // Stop the cluster
1175     TEST_UTIL.shutdownMiniCluster();
1176   }
1177 
1178   /**
1179    * Test region in pending_open/close and failed_open/close when master failover
1180    */
1181   @Test (timeout=180000)
1182   @SuppressWarnings("deprecation")
1183   public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
1184     final int NUM_MASTERS = 1;
1185     final int NUM_RS = 1;
1186 
1187     // Create config to use for this cluster
1188     Configuration conf = HBaseConfiguration.create();
1189     conf.setBoolean("hbase.assignment.usezk", false);
1190 
1191     // Start the cluster
1192     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1193     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1194     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1195     log("Cluster started");
1196 
1197     // get all the master threads
1198     List<MasterThread> masterThreads = cluster.getMasterThreads();
1199     assertEquals(1, masterThreads.size());
1200 
1201     // only one master thread, let's wait for it to be initialized
1202     assertTrue(cluster.waitForActiveAndReadyMaster());
1203     HMaster master = masterThreads.get(0).getMaster();
1204     assertTrue(master.isActiveMaster());
1205     assertTrue(master.isInitialized());
1206 
1207     // Create a table with a region online
1208     Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
1209     onlineTable.close();
1210     // Create a table in META, so it has a region offline
1211     HTableDescriptor offlineTable = new HTableDescriptor(
1212       TableName.valueOf(Bytes.toBytes("offlineTable")));
1213     offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));
1214 
1215     FileSystem filesystem = FileSystem.get(conf);
1216     Path rootdir = FSUtils.getRootDir(conf);
1217     FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
1218     fstd.createTableDescriptor(offlineTable);
1219 
1220     HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
1221     createRegion(hriOffline, rootdir, conf, offlineTable);
1222     MetaTableAccessor.addRegionToMeta(master.getConnection(), hriOffline);
1223 
1224     log("Regions in hbase:meta and namespace have been created");
1225 
1226     // at this point we only expect 3 regions to be assigned out
1227     // (catalogs and namespace, + 1 online region)
1228     assertEquals(3, cluster.countServedRegions());
1229     HRegionInfo hriOnline = null;
1230     try (RegionLocator locator =
1231         TEST_UTIL.getConnection().getRegionLocator(TableName.valueOf("onlineTable"))) {
1232       hriOnline = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
1233     }
1234     RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1235     RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();
1236 
1237     // Put the online region in pending_close. It is actually already opened.
1238     // This is to simulate that the region close RPC is not sent out before failover
1239     RegionState oldState = regionStates.getRegionState(hriOnline);
1240     RegionState newState = new RegionState(
1241       hriOnline, State.PENDING_CLOSE, oldState.getServerName());
1242     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1243 
1244     // Put the offline region in pending_open. It is actually not opened yet.
1245     // This is to simulate that the region open RPC is not sent out before failover
1246     oldState = new RegionState(hriOffline, State.OFFLINE);
1247     newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
1248     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1249     
1250     HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
1251     createRegion(failedClose, rootdir, conf, offlineTable);
1252     MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);
1253     
1254     oldState = new RegionState(failedClose, State.PENDING_CLOSE);
1255     newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
1256     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1257     
1258    
1259     HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
1260     createRegion(failedOpen, rootdir, conf, offlineTable);
1261     MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);
1262     
1263     // Simulate a region transitioning to failed open when the region server reports the
1264     // transition as FAILED_OPEN
1265     oldState = new RegionState(failedOpen, State.PENDING_OPEN);
1266     newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
1267     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1268     
1269     HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
1270     createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
1271     MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);
1272     
1273     // Simulate a region transitioning to failed open when the master couldn't find a plan for
1274     // the region
1275     oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
1276     newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
1277     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1278     
1279     
1280 
1281     // Stop the master
1282     log("Aborting master");
1283     cluster.abortMaster(0);
1284     cluster.waitOnMaster(0);
1285     log("Master has aborted");
1286 
1287     // Start up a new master
1288     log("Starting up a new master");
1289     master = cluster.startMaster().getMaster();
1290     log("Waiting for master to be ready");
1291     cluster.waitForActiveAndReadyMaster();
1292     log("Master is ready");
1293 
1294     // Wait till no region in transition any more
1295     master.getAssignmentManager().waitUntilNoRegionsInTransition(60000);
1296 
1297     // Get new region states since master restarted
1298     regionStates = master.getAssignmentManager().getRegionStates();
1299 
1300     // Both pending_open (RPC sent/not yet) regions should be online
1301     assertTrue(regionStates.isRegionOnline(hriOffline));
1302     assertTrue(regionStates.isRegionOnline(hriOnline));
1303     assertTrue(regionStates.isRegionOnline(failedClose));
1304     assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
1305     assertTrue(regionStates.isRegionOnline(failedOpen));
1306     
1307     log("Done with verification, shutting down cluster");
1308 
1309     // Done, shutdown the cluster
1310     TEST_UTIL.shutdownMiniCluster();
1311   }
1312 
1313   /**
1314    * Test meta in transition when master failover
1315    */
1316   @Test(timeout = 180000)
1317   public void testMetaInTransitionWhenMasterFailover() throws Exception {
1318     final int NUM_MASTERS = 1;
1319     final int NUM_RS = 1;
1320 
1321     // Start the cluster
1322     Configuration conf = HBaseConfiguration.create();
1323     conf.setBoolean("hbase.assignment.usezk", false);
1324     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1325     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1326     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1327     log("Cluster started");
1328 
1329     log("Moving meta off the master");
1330     HMaster activeMaster = cluster.getMaster();
1331     HRegionServer rs = cluster.getRegionServer(0);
1332     ServerName metaServerName = cluster.getLiveRegionServerThreads()
1333       .get(0).getRegionServer().getServerName();
1334     activeMaster.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
1335       Bytes.toBytes(metaServerName.getServerName()));
1336     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1337     assertEquals("Meta should be assigned on expected regionserver",
1338       metaServerName, activeMaster.getMetaTableLocator()
1339         .getMetaRegionLocation(activeMaster.getZooKeeper()));
1340 
1341     // Now kill master, meta should remain on rs, where we placed it before.
1342     log("Aborting master");
1343     activeMaster.abort("test-kill");
1344     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1345     log("Master has aborted");
1346 
1347     // meta should remain where it was
1348     RegionState metaState =
1349       MetaTableLocator.getMetaRegionState(rs.getZooKeeper());
1350     assertEquals("hbase:meta should be onlined on RS",
1351       metaState.getServerName(), rs.getServerName());
1352     assertEquals("hbase:meta should be onlined on RS",
1353       metaState.getState(), State.OPEN);
1354 
1355     // Start up a new master
1356     log("Starting up a new master");
1357     activeMaster = cluster.startMaster().getMaster();
1358     log("Waiting for master to be ready");
1359     cluster.waitForActiveAndReadyMaster();
1360     log("Master is ready");
1361 
1362     // ensure meta is still deployed on RS
1363     metaState =
1364       MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1365     assertEquals("hbase:meta should be onlined on RS",
1366       metaState.getServerName(), rs.getServerName());
1367     assertEquals("hbase:meta should be onlined on RS",
1368       metaState.getState(), State.OPEN);
1369 
1370     // Update meta state as PENDING_OPEN, then kill master
1371     // that simulates, that RS successfully deployed, but
1372     // RPC was lost right before failure.
1373     // region server should expire (how it can be verified?)
1374     MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1375       rs.getServerName(), State.PENDING_OPEN);
1376     HRegion meta = rs.getFromOnlineRegions(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
1377     rs.removeFromOnlineRegions(meta, null);
1378     meta.close();
1379 
1380     log("Aborting master");
1381     activeMaster.abort("test-kill");
1382     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1383     log("Master has aborted");
1384 
1385     // Start up a new master
1386     log("Starting up a new master");
1387     activeMaster = cluster.startMaster().getMaster();
1388     log("Waiting for master to be ready");
1389     cluster.waitForActiveAndReadyMaster();
1390     log("Master is ready");
1391 
1392     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1393     log("Meta was assigned");
1394 
1395     metaState =
1396       MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1397     assertEquals("hbase:meta should be onlined on RS",
1398       metaState.getServerName(), rs.getServerName());
1399     assertEquals("hbase:meta should be onlined on RS",
1400       metaState.getState(), State.OPEN);
1401 
1402     // Update meta state as PENDING_CLOSE, then kill master
1403     // that simulates, that RS successfully deployed, but
1404     // RPC was lost right before failure.
1405     // region server should expire (how it can be verified?)
1406     MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1407       rs.getServerName(), State.PENDING_CLOSE);
1408 
1409     log("Aborting master");
1410     activeMaster.abort("test-kill");
1411     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1412     log("Master has aborted");
1413 
1414     rs.getRSRpcServices().closeRegion(null, RequestConverter.buildCloseRegionRequest(
1415       rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1416 
1417     // Start up a new master
1418     log("Starting up a new master");
1419     activeMaster = cluster.startMaster().getMaster();
1420     log("Waiting for master to be ready");
1421     cluster.waitForActiveAndReadyMaster();
1422     log("Master is ready");
1423 
1424     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1425     log("Meta was assigned");
1426 
1427     rs.getRSRpcServices().closeRegion(
1428       null,
1429       RequestConverter.buildCloseRegionRequest(rs.getServerName(),
1430         HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1431 
1432     // Set a dummy server to check if master reassigns meta on restart
1433     MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1434       ServerName.valueOf("dummyserver.example.org", 1234, -1L), State.OPEN);
1435 
1436     log("Aborting master");
1437     activeMaster.stop("test-kill");
1438 
1439     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1440     log("Master has aborted");
1441 
1442     // Start up a new master
1443     log("Starting up a new master");
1444     activeMaster = cluster.startMaster().getMaster();
1445     log("Waiting for master to be ready");
1446     cluster.waitForActiveAndReadyMaster();
1447     log("Master is ready");
1448 
1449     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1450     log("Meta was assigned");
1451 
1452     // Done, shutdown the cluster
1453     TEST_UTIL.shutdownMiniCluster();
1454   }
1455 }
1456