1   /**
2    * Copyright 2008 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase;
21  
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.fail;
24  
25  import java.io.IOException;
26  import java.util.ArrayList;
27  import java.util.Arrays;
28  import java.util.List;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.hbase.catalog.CatalogTracker;
33  import org.apache.hadoop.hbase.catalog.MetaReader;
34  import org.apache.hadoop.hbase.client.HBaseAdmin;
35  import org.apache.hadoop.hbase.client.HTable;
36  import org.apache.hadoop.hbase.regionserver.HRegionServer;
37  import org.apache.hadoop.hbase.util.Bytes;
38  import org.apache.hadoop.hbase.util.JVMClusterUtil;
39  import org.junit.AfterClass;
40  import org.junit.Before;
41  import org.junit.BeforeClass;
42  import org.junit.Test;
43  import org.junit.experimental.categories.Category;
44  
45  /**
46   * Test whether region rebalancing works. (HBASE-71)
47   */
48  @Category(LargeTests.class)
49  public class TestRegionRebalancing {
50    final Log LOG = LogFactory.getLog(this.getClass().getName());
51    private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
52    HTable table;
53    HTableDescriptor desc;
54    private static final byte [] FAMILY_NAME = Bytes.toBytes("col");
55  
56    @BeforeClass
57    public static void beforeClass() throws Exception {
58      UTIL.startMiniCluster(1);
59    }
60  
61    @AfterClass
62    public static void afterClass() throws Exception {
63      UTIL.shutdownMiniCluster();
64    }
65  
66    @Before
67    public void before() {
68      this.desc = new HTableDescriptor("test");
69      this.desc.addFamily(new HColumnDescriptor(FAMILY_NAME));
70    }
71  
72    /**
73     * For HBASE-71. Try a few different configurations of starting and stopping
74     * region servers to see if the assignment or regions is pretty balanced.
75     * @throws IOException
76     * @throws InterruptedException
77     */
78    @Test
79    public void testRebalanceOnRegionServerNumberChange()
80    throws IOException, InterruptedException {
81      HBaseAdmin admin = new HBaseAdmin(UTIL.getConfiguration());
82      admin.createTable(this.desc, Arrays.copyOfRange(HBaseTestingUtility.KEYS,
83          1, HBaseTestingUtility.KEYS.length));
84      this.table = new HTable(UTIL.getConfiguration(), this.desc.getName());
85      CatalogTracker ct = new CatalogTracker(UTIL.getConfiguration());
86      ct.start();
87      try {
88        MetaReader.fullScanMetaAndPrint(ct);
89      } finally {
90        ct.stop();
91      }
92      assertEquals("Test table should have right number of regions",
93        HBaseTestingUtility.KEYS.length,
94        this.table.getStartKeys().length);
95  
96      // verify that the region assignments are balanced to start out
97      assertRegionsAreBalanced();
98  
99      // add a region server - total of 2
100     LOG.info("Started second server=" +
101       UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
102     UTIL.getHBaseCluster().getMaster().balance();
103     assertRegionsAreBalanced();
104 
105     // add a region server - total of 3
106     LOG.info("Started third server=" +
107         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
108     UTIL.getHBaseCluster().getMaster().balance();
109     assertRegionsAreBalanced();
110 
111     // kill a region server - total of 2
112     LOG.info("Stopped third server=" + UTIL.getHBaseCluster().stopRegionServer(2, false));
113     UTIL.getHBaseCluster().waitOnRegionServer(2);
114     UTIL.getHBaseCluster().getMaster().balance();
115     assertRegionsAreBalanced();
116 
117     // start two more region servers - total of 4
118     LOG.info("Readding third server=" +
119         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
120     LOG.info("Added fourth server=" +
121         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
122     UTIL.getHBaseCluster().getMaster().balance();
123     assertRegionsAreBalanced();
124 
125     for (int i = 0; i < 6; i++){
126       LOG.info("Adding " + (i + 5) + "th region server");
127       UTIL.getHBaseCluster().startRegionServer();
128     }
129     UTIL.getHBaseCluster().getMaster().balance();
130     assertRegionsAreBalanced();
131     table.close();
132   }
133 
134   /** figure out how many regions are currently being served. */
135   private int getRegionCount() throws IOException {
136     int total = 0;
137     for (HRegionServer server : getOnlineRegionServers()) {
138       total += server.getOnlineRegions().size();
139     }
140     return total;
141   }
142 
143   /**
144    * Determine if regions are balanced. Figure out the total, divide by the
145    * number of online servers, then test if each server is +/- 1 of average
146    * rounded up.
147    */
148   private void assertRegionsAreBalanced() throws IOException {
149     // TODO: Fix this test.  Old balancer used to run with 'slop'.  New
150     // balancer does not.
151     boolean success = false;
152     float slop = (float)UTIL.getConfiguration().getFloat("hbase.regions.slop", 0.1f);
153     if (slop <= 0) slop = 1;
154 
155     for (int i = 0; i < 5; i++) {
156       success = true;
157       // make sure all the regions are reassigned before we test balance
158       waitForAllRegionsAssigned();
159 
160       int regionCount = getRegionCount();
161       List<HRegionServer> servers = getOnlineRegionServers();
162       double avg = UTIL.getHBaseCluster().getMaster().getAverageLoad();
163       int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop));
164       int avgLoadMinusSlop = (int)Math.floor(avg * (1 - slop)) - 1;
165       LOG.debug("There are " + servers.size() + " servers and " + regionCount
166         + " regions. Load Average: " + avg + " low border: " + avgLoadMinusSlop
167         + ", up border: " + avgLoadPlusSlop + "; attempt: " + i);
168 
169       for (HRegionServer server : servers) {
170         int serverLoad = server.getOnlineRegions().size();
171         LOG.debug(server.getServerName() + " Avg: " + avg + " actual: " + serverLoad);
172         if (!(avg > 2.0 && serverLoad <= avgLoadPlusSlop
173             && serverLoad >= avgLoadMinusSlop)) {
174           for (HRegionInfo hri : server.getOnlineRegions()) {
175             if (hri.isMetaRegion() || hri.isRootRegion()) serverLoad--;
176             // LOG.debug(hri.getRegionNameAsString());
177           }
178           if (!(serverLoad <= avgLoadPlusSlop && serverLoad >= avgLoadMinusSlop)) {
179             LOG.debug(server.getServerName() + " Isn't balanced!!! Avg: " + avg +
180                 " actual: " + serverLoad + " slop: " + slop);
181             success = false;            
182             break;
183           }
184         }
185       }
186 
187       if (!success) {
188         // one or more servers are not balanced. sleep a little to give it a
189         // chance to catch up. then, go back to the retry loop.
190         try {
191           Thread.sleep(10000);
192         } catch (InterruptedException e) {}
193 
194         UTIL.getHBaseCluster().getMaster().balance();
195         continue;
196       }
197 
198       // if we get here, all servers were balanced, so we should just return.
199       return;
200     }
201     // if we get here, we tried 5 times and never got to short circuit out of
202     // the retry loop, so this is a failure.
203     fail("After 5 attempts, region assignments were not balanced.");
204   }
205 
206   private List<HRegionServer> getOnlineRegionServers() {
207     List<HRegionServer> list = new ArrayList<HRegionServer>();
208     for (JVMClusterUtil.RegionServerThread rst :
209         UTIL.getHBaseCluster().getRegionServerThreads()) {
210       if (rst.getRegionServer().isOnline()) {
211         list.add(rst.getRegionServer());
212       }
213     }
214     return list;
215   }
216 
217   /**
218    * Wait until all the regions are assigned.
219    */
220   private void waitForAllRegionsAssigned() throws IOException {
221     int totalRegions = HBaseTestingUtility.KEYS.length+2;
222     while (getRegionCount() < totalRegions) {
223     // while (!cluster.getMaster().allRegionsAssigned()) {
224       LOG.debug("Waiting for there to be "+ totalRegions +" regions, but there are " + getRegionCount() + " right now.");
225       try {
226         Thread.sleep(200);
227       } catch (InterruptedException e) {}
228     }
229   }
230 
231   @org.junit.Rule
232   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
233     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
234 }
235