1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase;
19  
20  
21  import java.io.IOException;
22  import java.util.List;
23  
24  import junit.framework.Assert;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.fs.FileSystem;
29  import org.apache.hadoop.hbase.client.HBaseAdmin;
30  import org.apache.hadoop.hbase.master.HMaster;
31  import org.apache.hadoop.hbase.regionserver.HRegionServer;
32  import org.apache.hadoop.hbase.util.Bytes;
33  import org.apache.hadoop.hbase.util.FSUtils;
34  import org.apache.hadoop.hbase.util.FSTableDescriptors;
35  import org.apache.hadoop.hbase.util.Threads;
36  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
37  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
38  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
39  import org.apache.zookeeper.KeeperException;
40  import org.junit.AfterClass;
41  import org.junit.BeforeClass;
42  import org.junit.Test;
43  import org.junit.experimental.categories.Category;
44  
45  /**
46   * Test the draining servers feature.
47   * @see <a href="https://issues.apache.org/jira/browse/HBASE-4298">HBASE-4298</a>
48   */
49  @Category(MediumTests.class)
50  public class TestDrainingServer {
51    private static final Log LOG = LogFactory.getLog(TestDrainingServer.class);
52    private static final HBaseTestingUtility TEST_UTIL =
53      new HBaseTestingUtility();
54    private static final byte [] TABLENAME = Bytes.toBytes("t");
55    private static final byte [] FAMILY = Bytes.toBytes("f");
56    private static final int COUNT_OF_REGIONS = HBaseTestingUtility.KEYS.length;
57    private static final int NB_SLAVES = 5;
58  
59    /**
60     * Spin up a cluster with a bunch of regions on it.
61     */
62    @BeforeClass
63    public static void setUpBeforeClass() throws Exception {
64      TEST_UTIL.startMiniCluster(NB_SLAVES);
65      TEST_UTIL.getConfiguration().setBoolean("hbase.master.enabletable.roundrobin", true);
66      ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
67      HTableDescriptor htd = new HTableDescriptor(TABLENAME);
68      htd.addFamily(new HColumnDescriptor(FAMILY));
69      TEST_UTIL.createMultiRegionsInMeta(TEST_UTIL.getConfiguration(), htd,
70          HBaseTestingUtility.KEYS);
71      // Make a mark for the table in the filesystem.
72      FileSystem fs = FileSystem.get(TEST_UTIL.getConfiguration());
73      FSTableDescriptors.
74        createTableDescriptor(fs, FSUtils.getRootDir(TEST_UTIL.getConfiguration()), htd);
75      // Assign out the regions we just created.
76      HBaseAdmin admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
77      MiniHBaseCluster cluster = TEST_UTIL.getMiniHBaseCluster();
78      admin.disableTable(TABLENAME);
79      admin.enableTable(TABLENAME);
80      boolean ready = false;
81      while (!ready) {
82        ZKAssign.blockUntilNoRIT(zkw);
83        // Assert that every regionserver has some regions on it, else invoke the balancer.
84        ready = true;
85        for (int i = 0; i < NB_SLAVES; i++) {
86          HRegionServer hrs = cluster.getRegionServer(i);
87          if (hrs.getOnlineRegions().isEmpty()) {
88            ready = false;
89            break;
90          }
91        }
92        if (!ready) {
93          admin.balancer();
94          Thread.sleep(100);
95        }
96      }
97    }
98  
99    private static HRegionServer setDrainingServer(final HRegionServer hrs)
100   throws KeeperException {
101     LOG.info("Making " + hrs.getServerName() + " the draining server; " +
102       "it has " + hrs.getNumberOfOnlineRegions() + " online regions");
103     ZooKeeperWatcher zkw = hrs.getZooKeeper();
104     String hrsDrainingZnode =
105       ZKUtil.joinZNode(zkw.drainingZNode, hrs.getServerName().toString());
106     ZKUtil.createWithParents(zkw, hrsDrainingZnode);
107     return hrs;
108   }
109 
110   private static HRegionServer unsetDrainingServer(final HRegionServer hrs)
111   throws KeeperException {
112     ZooKeeperWatcher zkw = hrs.getZooKeeper();
113     String hrsDrainingZnode =
114       ZKUtil.joinZNode(zkw.drainingZNode, hrs.getServerName().toString());
115     ZKUtil.deleteNode(zkw, hrsDrainingZnode);
116     return hrs;
117   }
118 
119   @AfterClass
120   public static void tearDownAfterClass() throws Exception {
121     TEST_UTIL.shutdownMiniCluster();
122   }
123 
124   /**
125    * Test adding server to draining servers and then move regions off it.
126    * Make sure that no regions are moved back to the draining server.
127    * @throws IOException 
128    * @throws KeeperException 
129    */
130   @Test  // (timeout=30000)
131   public void testDrainingServerOffloading()
132   throws IOException, KeeperException {
133     // I need master in the below.
134     HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
135     HRegionInfo hriToMoveBack = null;
136     // Set first server as draining server.
137     HRegionServer drainingServer =
138       setDrainingServer(TEST_UTIL.getMiniHBaseCluster().getRegionServer(0));
139     try {
140       final int regionsOnDrainingServer =
141         drainingServer.getNumberOfOnlineRegions();
142       Assert.assertTrue(regionsOnDrainingServer > 0);
143       List<HRegionInfo> hris = drainingServer.getOnlineRegions();
144       for (HRegionInfo hri : hris) {
145         // Pass null and AssignmentManager will chose a random server BUT it
146         // should exclude draining servers.
147         master.move(hri.getEncodedNameAsBytes(), null);
148         // Save off region to move back.
149         hriToMoveBack = hri;
150       }
151       // Wait for regions to come back on line again.
152       waitForAllRegionsOnline();
153       Assert.assertEquals(0, drainingServer.getNumberOfOnlineRegions());
154     } finally {
155       unsetDrainingServer(drainingServer);
156     }
157     // Now we've unset the draining server, we should be able to move a region
158     // to what was the draining server.
159     master.move(hriToMoveBack.getEncodedNameAsBytes(),
160       Bytes.toBytes(drainingServer.getServerName().toString()));
161     // Wait for regions to come back on line again.
162     waitForAllRegionsOnline();
163     Assert.assertEquals(1, drainingServer.getNumberOfOnlineRegions());
164   }
165 
166   /**
167    * Test that draining servers are ignored even after killing regionserver(s).
168    * Verify that the draining server is not given any of the dead servers regions.
169    * @throws KeeperException
170    * @throws IOException
171    */
172   @Test  (timeout=30000)
173   public void testDrainingServerWithAbort() throws KeeperException, IOException {
174     // Add first server to draining servers up in zk.
175     HRegionServer drainingServer =
176       setDrainingServer(TEST_UTIL.getMiniHBaseCluster().getRegionServer(0));
177     try {
178       final int regionsOnDrainingServer =
179         drainingServer.getNumberOfOnlineRegions();
180       Assert.assertTrue(regionsOnDrainingServer > 0);
181       // Kill a few regionservers.
182       int aborted = 0;
183       final int numberToAbort = 2;
184       for (int i = 1; i < TEST_UTIL.getMiniHBaseCluster().countServedRegions(); i++) {
185         HRegionServer hrs = TEST_UTIL.getMiniHBaseCluster().getRegionServer(i);
186         if (hrs.getServerName().equals(drainingServer.getServerName())) continue;
187         hrs.abort("Aborting");
188         aborted++;
189         if (aborted >= numberToAbort) break;
190       }
191       // Wait for regions to come back on line again.
192       waitForAllRegionsOnline();
193       // Assert the draining server still has the same number of regions.
194       Assert.assertEquals(regionsOnDrainingServer,
195         drainingServer.getNumberOfOnlineRegions());
196     } finally {
197       unsetDrainingServer(drainingServer);
198     }
199   }
200 
201   private void waitForAllRegionsOnline() {
202     while (TEST_UTIL.getMiniHBaseCluster().getMaster().
203         getAssignmentManager().isRegionsInTransition()) {
204       Threads.sleep(10);
205     }
206     // Wait for regions to come back on line again.
207     while (!isAllRegionsOnline()) {
208       Threads.sleep(10);
209     }
210   }
211 
212   private boolean isAllRegionsOnline() {
213     return TEST_UTIL.getMiniHBaseCluster().countServedRegions() ==
214       (COUNT_OF_REGIONS + 2 /*catalog regions*/);
215   }
216 
217   @org.junit.Rule
218   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
219     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
220 }
221