View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.fail;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.hbase.client.Admin;
27  import org.apache.hadoop.hbase.client.Connection;
28  import org.apache.hadoop.hbase.client.ConnectionFactory;
29  import org.apache.hadoop.hbase.client.HBaseAdmin;
30  import org.apache.hadoop.hbase.client.HTable;
31  import org.apache.hadoop.hbase.client.RegionLocator;
32  import org.apache.hadoop.hbase.master.RegionStates;
33  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
34  import org.apache.hadoop.hbase.regionserver.HRegionServer;
35  import org.apache.hadoop.hbase.testclassification.LargeTests;
36  import org.apache.hadoop.hbase.util.Bytes;
37  import org.apache.hadoop.hbase.util.JVMClusterUtil;
38  import org.apache.hadoop.hbase.util.Threads;
39  import org.junit.After;
40  import org.junit.Before;
41  import org.junit.Test;
42  import org.junit.experimental.categories.Category;
43  import org.junit.runner.RunWith;
44  import org.junit.runners.Parameterized;
45  import org.junit.runners.Parameterized.Parameters;
46  
47  import java.io.IOException;
48  import java.util.ArrayList;
49  import java.util.Arrays;
50  import java.util.Collection;
51  import java.util.List;
52  
53  /**
54   * Test whether region re-balancing works. (HBASE-71)
55   */
56  @Category(LargeTests.class)
57  @RunWith(value = Parameterized.class)
58  public class TestRegionRebalancing {
59  
60    @Parameters
61    public static Collection<Object[]> data() {
62      Object[][] balancers =
63          new String[][] { { "org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer" },
64              { "org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer" } };
65      return Arrays.asList(balancers);
66    }
67  
68    private static final byte[] FAMILY_NAME = Bytes.toBytes("col");
69    public static final Log LOG = LogFactory.getLog(TestRegionRebalancing.class);
70    private final HBaseTestingUtility UTIL = new HBaseTestingUtility();
71    private RegionLocator table;
72    private HTableDescriptor desc;
73    private String balancerName;
74  
75    public TestRegionRebalancing(String balancerName) {
76      this.balancerName = balancerName;
77  
78    }
79  
80    @After
81    public void after() throws Exception {
82      UTIL.shutdownMiniCluster();
83    }
84  
85    @Before
86    public void before() throws Exception {
87      UTIL.getConfiguration().set("hbase.master.loadbalancer.class", this.balancerName);
88      UTIL.startMiniCluster(1);
89      this.desc = new HTableDescriptor(TableName.valueOf("test"));
90      this.desc.addFamily(new HColumnDescriptor(FAMILY_NAME));
91    }
92  
93    /**
94     * For HBASE-71. Try a few different configurations of starting and stopping
95     * region servers to see if the assignment or regions is pretty balanced.
96     * @throws IOException
97     * @throws InterruptedException
98     */
99    @Test (timeout=300000)
100   @SuppressWarnings("deprecation")
101   public void testRebalanceOnRegionServerNumberChange()
102   throws IOException, InterruptedException {
103     Connection connection = ConnectionFactory.createConnection(UTIL.getConfiguration());
104     Admin admin = connection.getAdmin();
105     admin.createTable(this.desc, Arrays.copyOfRange(HBaseTestingUtility.KEYS,
106         1, HBaseTestingUtility.KEYS.length));
107     this.table = new HTable(UTIL.getConfiguration(), this.desc.getTableName());
108 
109     MetaTableAccessor.fullScanMetaAndPrint(admin.getConnection());
110 
111     assertEquals("Test table should have right number of regions",
112       HBaseTestingUtility.KEYS.length,
113       this.table.getStartKeys().length);
114 
115     // verify that the region assignments are balanced to start out
116     assertRegionsAreBalanced();
117 
118     // add a region server - total of 2
119     LOG.info("Started second server=" +
120       UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
121     UTIL.getHBaseCluster().getMaster().balance();
122     assertRegionsAreBalanced();
123 
124     // On a balanced cluster, calling balance() should return true
125     assert(UTIL.getHBaseCluster().getMaster().balance() == true);
126 
127     // if we add a server, then the balance() call should return true
128     // add a region server - total of 3
129     LOG.info("Started third server=" +
130         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
131     assert(UTIL.getHBaseCluster().getMaster().balance() == true);
132     assertRegionsAreBalanced();
133 
134     // kill a region server - total of 2
135     LOG.info("Stopped third server=" + UTIL.getHBaseCluster().stopRegionServer(2, false));
136     UTIL.getHBaseCluster().waitOnRegionServer(2);
137     UTIL.getHBaseCluster().getMaster().balance();
138     assertRegionsAreBalanced();
139 
140     // start two more region servers - total of 4
141     LOG.info("Readding third server=" +
142         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
143     LOG.info("Added fourth server=" +
144         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
145     assert(UTIL.getHBaseCluster().getMaster().balance() == true);
146     assertRegionsAreBalanced();
147 
148     for (int i = 0; i < 6; i++){
149       LOG.info("Adding " + (i + 5) + "th region server");
150       UTIL.getHBaseCluster().startRegionServer();
151     }
152     assert(UTIL.getHBaseCluster().getMaster().balance() == true);
153     assertRegionsAreBalanced();
154     table.close();
155     admin.close();
156   }
157 
158   /**
159    * Determine if regions are balanced. Figure out the total, divide by the
160    * number of online servers, then test if each server is +/- 1 of average
161    * rounded up.
162    */
163   private void assertRegionsAreBalanced() throws IOException {
164     // TODO: Fix this test.  Old balancer used to run with 'slop'.  New
165     // balancer does not.
166     boolean success = false;
167     float slop = (float)UTIL.getConfiguration().getFloat("hbase.regions.slop", 0.1f);
168     if (slop <= 0) slop = 1;
169 
170     for (int i = 0; i < 5; i++) {
171       success = true;
172       // make sure all the regions are reassigned before we test balance
173       waitForAllRegionsAssigned();
174 
175       long regionCount = UTIL.getMiniHBaseCluster().countServedRegions();
176       List<HRegionServer> servers = getOnlineRegionServers();
177       double avg = UTIL.getHBaseCluster().getMaster().getAverageLoad();
178       int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop));
179       int avgLoadMinusSlop = (int)Math.floor(avg * (1 - slop)) - 1;
180       LOG.debug("There are " + servers.size() + " servers and " + regionCount
181         + " regions. Load Average: " + avg + " low border: " + avgLoadMinusSlop
182         + ", up border: " + avgLoadPlusSlop + "; attempt: " + i);
183 
184       for (HRegionServer server : servers) {
185         int serverLoad =
186           ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
187         LOG.debug(server.getServerName() + " Avg: " + avg + " actual: " + serverLoad);
188         if (!(avg > 2.0 && serverLoad <= avgLoadPlusSlop
189             && serverLoad >= avgLoadMinusSlop)) {
190           for (HRegionInfo hri :
191               ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) {
192             if (hri.isMetaRegion()) serverLoad--;
193             // LOG.debug(hri.getRegionNameAsString());
194           }
195           if (!(serverLoad <= avgLoadPlusSlop && serverLoad >= avgLoadMinusSlop)) {
196             LOG.debug(server.getServerName() + " Isn't balanced!!! Avg: " + avg +
197                 " actual: " + serverLoad + " slop: " + slop);
198             success = false;
199             break;
200           }
201         }
202       }
203 
204       if (!success) {
205         // one or more servers are not balanced. sleep a little to give it a
206         // chance to catch up. then, go back to the retry loop.
207         try {
208           Thread.sleep(10000);
209         } catch (InterruptedException e) {}
210 
211         UTIL.getHBaseCluster().getMaster().balance();
212         continue;
213       }
214 
215       // if we get here, all servers were balanced, so we should just return.
216       return;
217     }
218     // if we get here, we tried 5 times and never got to short circuit out of
219     // the retry loop, so this is a failure.
220     fail("After 5 attempts, region assignments were not balanced.");
221   }
222 
223   private List<HRegionServer> getOnlineRegionServers() {
224     List<HRegionServer> list = new ArrayList<HRegionServer>();
225     for (JVMClusterUtil.RegionServerThread rst :
226         UTIL.getHBaseCluster().getRegionServerThreads()) {
227       if (rst.getRegionServer().isOnline()) {
228         list.add(rst.getRegionServer());
229       }
230     }
231     return list;
232   }
233 
234   /**
235    * Wait until all the regions are assigned.
236    */
237   private void waitForAllRegionsAssigned() throws IOException {
238     int totalRegions = HBaseTestingUtility.KEYS.length;
239     while (UTIL.getMiniHBaseCluster().countServedRegions() < totalRegions) {
240     // while (!cluster.getMaster().allRegionsAssigned()) {
241       LOG.debug("Waiting for there to be "+ totalRegions +" regions, but there are "
242         + UTIL.getMiniHBaseCluster().countServedRegions() + " right now.");
243       try {
244         Thread.sleep(200);
245       } catch (InterruptedException e) {}
246     }
247     RegionStates regionStates = UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
248     while (!regionStates.getRegionsInTransition().isEmpty()) {
249       Threads.sleep(100);
250     }
251   }
252 
253 }
254