View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.balancer;
19  
20  import java.util.ArrayList;
21  import java.util.Arrays;
22  import java.util.List;
23  import java.util.Map;
24  import java.util.Queue;
25  import java.util.TreeMap;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.hbase.ClusterStatus;
31  import org.apache.hadoop.hbase.HBaseConfiguration;
32  import org.apache.hadoop.hbase.HRegionInfo;
33  import org.apache.hadoop.hbase.testclassification.MediumTests;
34  import org.apache.hadoop.hbase.RegionLoad;
35  import org.apache.hadoop.hbase.ServerLoad;
36  import org.apache.hadoop.hbase.ServerName;
37  import org.apache.hadoop.hbase.master.RegionPlan;
38  import org.apache.hadoop.hbase.util.Bytes;
39  import org.junit.BeforeClass;
40  import org.junit.Test;
41  import org.junit.experimental.categories.Category;
42  
43  import static org.junit.Assert.assertEquals;
44  import static org.junit.Assert.assertNotNull;
45  import static org.junit.Assert.assertNull;
46  import static org.junit.Assert.assertTrue;
47  import static org.mockito.Mockito.mock;
48  import static org.mockito.Mockito.when;
49  
50  @Category(MediumTests.class)
51  public class TestStochasticLoadBalancer extends BalancerTestBase {
52    public static final String REGION_KEY = "testRegion";
53    private static StochasticLoadBalancer loadBalancer;
54    private static final Log LOG = LogFactory.getLog(TestStochasticLoadBalancer.class);
55  
56    @BeforeClass
57    public static void beforeAllTests() throws Exception {
58      Configuration conf = HBaseConfiguration.create();
59      conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f);
60      conf.setFloat("hbase.regions.slop", 0.0f);
61      loadBalancer = new StochasticLoadBalancer();
62      loadBalancer.setConf(conf);
63    }
64  
65    int[] largeCluster = new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
66        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
67        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
68        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
69        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
70        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
71        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
72        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
73        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
74        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
75        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
76        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
77        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
78        0, 0, 0, 0, 0, 56 };
79  
80    // int[testnum][servernumber] -> numregions
81    int[][] clusterStateMocks = new int[][]{
82        // 1 node
83        new int[]{0},
84        new int[]{1},
85        new int[]{10},
86        // 2 node
87        new int[]{0, 0},
88        new int[]{2, 0},
89        new int[]{2, 1},
90        new int[]{2, 2},
91        new int[]{2, 3},
92        new int[]{2, 4},
93        new int[]{1, 1},
94        new int[]{0, 1},
95        new int[]{10, 1},
96        new int[]{514, 1432},
97        new int[]{48, 53},
98        // 3 node
99        new int[]{0, 1, 2},
100       new int[]{1, 2, 3},
101       new int[]{0, 2, 2},
102       new int[]{0, 3, 0},
103       new int[]{0, 4, 0},
104       new int[]{20, 20, 0},
105       // 4 node
106       new int[]{0, 1, 2, 3},
107       new int[]{4, 0, 0, 0},
108       new int[]{5, 0, 0, 0},
109       new int[]{6, 6, 0, 0},
110       new int[]{6, 2, 0, 0},
111       new int[]{6, 1, 0, 0},
112       new int[]{6, 0, 0, 0},
113       new int[]{4, 4, 4, 7},
114       new int[]{4, 4, 4, 8},
115       new int[]{0, 0, 0, 7},
116       // 5 node
117       new int[]{1, 1, 1, 1, 4},
118       // more nodes
119       new int[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
120       new int[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 10},
121       new int[]{6, 6, 5, 6, 6, 6, 6, 6, 6, 1},
122       new int[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 54},
123       new int[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 55},
124       new int[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 56},
125       new int[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 16},
126       new int[]{1, 1, 1, 1, 1, 1, 1, 1, 1, 8},
127       new int[]{1, 1, 1, 1, 1, 1, 1, 1, 1, 9},
128       new int[]{1, 1, 1, 1, 1, 1, 1, 1, 1, 10},
129       new int[]{1, 1, 1, 1, 1, 1, 1, 1, 1, 123},
130       new int[]{1, 1, 1, 1, 1, 1, 1, 1, 1, 155},
131       new int[]{10, 7, 12, 8, 11, 10, 9, 14},
132       new int[]{13, 14, 6, 10, 10, 10, 8, 10},
133       new int[]{130, 14, 60, 10, 100, 10, 80, 10},
134       new int[]{130, 140, 60, 100, 100, 100, 80, 100},
135       largeCluster,
136 
137   };
138 
139   @Test
140   public void testKeepRegionLoad() throws Exception {
141 
142     ServerName sn = ServerName.valueOf("test:8080", 100);
143     int numClusterStatusToAdd = 20000;
144     for (int i = 0; i < numClusterStatusToAdd; i++) {
145       ServerLoad sl = mock(ServerLoad.class);
146 
147       RegionLoad rl = mock(RegionLoad.class);
148       when(rl.getStores()).thenReturn(i);
149 
150       Map<byte[], RegionLoad> regionLoadMap =
151           new TreeMap<byte[], RegionLoad>(Bytes.BYTES_COMPARATOR);
152       regionLoadMap.put(Bytes.toBytes(REGION_KEY), rl);
153       when(sl.getRegionsLoad()).thenReturn(regionLoadMap);
154 
155       ClusterStatus clusterStatus = mock(ClusterStatus.class);
156       when(clusterStatus.getServers()).thenReturn(Arrays.asList(sn));
157       when(clusterStatus.getLoad(sn)).thenReturn(sl);
158 
159       loadBalancer.setClusterStatus(clusterStatus);
160     }
161     assertTrue(loadBalancer.loads.get(REGION_KEY) != null);
162     assertTrue(loadBalancer.loads.get(REGION_KEY).size() == 15);
163 
164     Queue<RegionLoad> loads = loadBalancer.loads.get(REGION_KEY);
165     int i = 0;
166     while(loads.size() > 0) {
167       RegionLoad rl = loads.remove();
168       assertEquals(i + (numClusterStatusToAdd - 15), rl.getStores());
169       i ++;
170     }
171   }
172 
173   /**
174    * Test the load balancing algorithm.
175    *
176    * Invariant is that all servers should be hosting either floor(average) or
177    * ceiling(average)
178    *
179    * @throws Exception
180    */
181   @Test
182   public void testBalanceCluster() throws Exception {
183 
184     for (int[] mockCluster : clusterStateMocks) {
185       Map<ServerName, List<HRegionInfo>> servers = mockClusterServers(mockCluster);
186       List<ServerAndLoad> list = convertToList(servers);
187       LOG.info("Mock Cluster : " + printMock(list) + " " + printStats(list));
188       List<RegionPlan> plans = loadBalancer.balanceCluster(servers);
189       List<ServerAndLoad> balancedCluster = reconcile(list, plans, servers);
190       LOG.info("Mock Balance : " + printMock(balancedCluster));
191       assertClusterAsBalanced(balancedCluster);
192       List<RegionPlan> secondPlans =  loadBalancer.balanceCluster(servers);
193       assertNull(secondPlans);
194       for (Map.Entry<ServerName, List<HRegionInfo>> entry : servers.entrySet()) {
195         returnRegions(entry.getValue());
196         returnServer(entry.getKey());
197       }
198     }
199 
200   }
201 
202   @Test
203   public void testMoveCost() throws Exception {
204     Configuration conf = HBaseConfiguration.create();
205     StochasticLoadBalancer.CostFunction
206         costFunction = new StochasticLoadBalancer.MoveCostFunction(conf);
207     for (int[] mockCluster : clusterStateMocks) {
208       BaseLoadBalancer.Cluster cluster = mockCluster(mockCluster);
209       double cost = costFunction.cost(cluster);
210       assertEquals(0.0f, cost, 0.001);
211 
212       // cluster region number is smaller than maxMoves=600
213       cluster.setNumRegions(190);
214       cluster.setNumMovedRegions(10);
215       cost = costFunction.cost(cluster);
216       assertEquals(0.05f, cost, 0.001);
217       cluster.setNumMovedRegions(100);
218       cost = costFunction.cost(cluster);
219       assertEquals(0.5f, cost, 0.001);
220 
221       // cluster region number is bigger than maxMoves=2500
222       cluster.setNumRegions(10000);
223       cluster.setNumMovedRegions(250);
224       cost = costFunction.cost(cluster);
225       assertEquals(0.1f, cost, 0.01);
226       cluster.setNumMovedRegions(1250);
227       cost = costFunction.cost(cluster);
228       assertEquals(0.5f, cost, 0.01);
229       cluster.setNumMovedRegions(2500);
230       cluster.setNumMovedMetaRegions(1);
231       cost = costFunction.cost(cluster);
232       assertEquals(1.0f, cost, 0.01);
233     }
234   }
235 
236   @Test
237   public void testSkewCost() {
238     Configuration conf = HBaseConfiguration.create();
239     StochasticLoadBalancer.CostFunction
240         costFunction = new StochasticLoadBalancer.RegionCountSkewCostFunction(conf);
241     for (int[] mockCluster : clusterStateMocks) {
242       double cost = costFunction.cost(mockCluster(mockCluster));
243       assertTrue(cost >= 0);
244       assertTrue(cost <= 1.01);
245     }
246 
247     assertEquals(0,
248         costFunction.cost(mockCluster(new int[]{0, 0, 0, 0, 1})), 0.01);
249     assertEquals(0,
250         costFunction.cost(mockCluster(new int[]{0, 0, 0, 1, 1})), 0.01);
251     assertEquals(0,
252         costFunction.cost(mockCluster(new int[]{0, 0, 1, 1, 1})), 0.01);
253     assertEquals(0,
254         costFunction.cost(mockCluster(new int[]{0, 1, 1, 1, 1})), 0.01);
255     assertEquals(0,
256         costFunction.cost(mockCluster(new int[]{1, 1, 1, 1, 1})), 0.01);
257     assertEquals(0,
258         costFunction.cost(mockCluster(new int[]{10, 10, 10, 10, 10})), 0.01);
259     assertEquals(1,
260         costFunction.cost(mockCluster(new int[]{10000, 0, 0, 0, 0})), 0.01);
261   }
262 
263   @Test
264   public void testTableSkewCost() {
265     Configuration conf = HBaseConfiguration.create();
266     StochasticLoadBalancer.CostFunction
267         costFunction = new StochasticLoadBalancer.TableSkewCostFunction(conf);
268     for (int[] mockCluster : clusterStateMocks) {
269       BaseLoadBalancer.Cluster cluster = mockCluster(mockCluster);
270       double cost = costFunction.cost(cluster);
271       assertTrue(cost >= 0);
272       assertTrue(cost <= 1.01);
273     }
274   }
275 
276   @Test
277   public void testCostFromArray() {
278     Configuration conf = HBaseConfiguration.create();
279     StochasticLoadBalancer.CostFromRegionLoadFunction
280         costFunction = new StochasticLoadBalancer.MemstoreSizeCostFunction(conf);
281 
282     double[] statOne = new double[100];
283     for (int i =0; i < 100; i++) {
284       statOne[i] = 10;
285     }
286     assertEquals(0, costFunction.costFromArray(statOne), 0.01);
287 
288     double[] statTwo= new double[101];
289     for (int i =0; i < 100; i++) {
290       statTwo[i] = 0;
291     }
292     statTwo[100] = 101;
293     assertEquals(1, costFunction.costFromArray(statTwo), 0.01);
294 
295     double[] statThree = new double[200];
296     for (int i =0; i < 100; i++) {
297       statThree[i] = (0);
298       statThree[i+100] = 100;
299     }
300     assertEquals(0.5, costFunction.costFromArray(statThree), 0.01);
301   }
302 
303   @Test(timeout =  60000)
304   public void testLosingRs() throws Exception {
305     int numNodes = 3;
306     int numRegions = 20;
307     int numRegionsPerServer = 3; //all servers except one
308     int numTables = 2;
309 
310     Map<ServerName, List<HRegionInfo>> serverMap =
311         createServerMap(numNodes, numRegions, numRegionsPerServer, numTables);
312     List<ServerAndLoad> list = convertToList(serverMap);
313 
314 
315     List<RegionPlan> plans = loadBalancer.balanceCluster(serverMap);
316     assertNotNull(plans);
317 
318     // Apply the plan to the mock cluster.
319     List<ServerAndLoad> balancedCluster = reconcile(list, plans, serverMap);
320 
321     assertClusterAsBalanced(balancedCluster);
322 
323     ServerName sn = serverMap.keySet().toArray(new ServerName[serverMap.size()])[0];
324 
325     ServerName deadSn = ServerName.valueOf(sn.getHostname(), sn.getPort(), sn.getStartcode() - 100);
326 
327     serverMap.put(deadSn, new ArrayList<HRegionInfo>(0));
328 
329     plans = loadBalancer.balanceCluster(serverMap);
330     assertNull(plans);
331   }
332 
333   @Test (timeout = 60000)
334   public void testSmallCluster() {
335     int numNodes = 10;
336     int numRegions = 1000;
337     int numRegionsPerServer = 40; //all servers except one
338     int numTables = 10;
339     testWithCluster(numNodes, numRegions, numRegionsPerServer, numTables, true);
340   }
341 
342   @Test (timeout = 60000)
343   public void testSmallCluster2() {
344     int numNodes = 20;
345     int numRegions = 2000;
346     int numRegionsPerServer = 40; //all servers except one
347     int numTables = 10;
348     testWithCluster(numNodes, numRegions, numRegionsPerServer, numTables, true);
349   }
350 
351   @Test (timeout = 60000)
352   public void testSmallCluster3() {
353     int numNodes = 20;
354     int numRegions = 2000;
355     int numRegionsPerServer = 1; // all servers except one
356     int numTables = 10;
357     testWithCluster(numNodes, numRegions, numRegionsPerServer, numTables, false /* max moves */);
358   }
359 
360   @Test (timeout = 800000)
361   public void testMidCluster() {
362     int numNodes = 100;
363     int numRegions = 10000;
364     int numRegionsPerServer = 60; // all servers except one
365     int numTables = 40;
366     testWithCluster(numNodes, numRegions, numRegionsPerServer, numTables, true);
367   }
368 
369   @Test (timeout = 800000)
370   public void testMidCluster2() {
371     int numNodes = 200;
372     int numRegions = 100000;
373     int numRegionsPerServer = 40; // all servers except one
374     int numTables = 400;
375     testWithCluster(numNodes,
376         numRegions,
377         numRegionsPerServer,
378         numTables,
379         false /* num large num regions means may not always get to best balance with one run */);
380   }
381 
382 
383   @Test (timeout = 800000)
384   public void testMidCluster3() {
385     int numNodes = 100;
386     int numRegions = 2000;
387     int numRegionsPerServer = 9; // all servers except one
388     int numTables = 110;
389     testWithCluster(numNodes, numRegions, numRegionsPerServer, numTables, true);
390     // TODO(eclark): Make sure that the tables are well distributed.
391   }
392 
393   @Test
394   public void testLargeCluster() {
395     int numNodes = 1000;
396     int numRegions = 100000; //100 regions per RS
397     int numRegionsPerServer = 80; //all servers except one
398     int numTables = 100;
399     testWithCluster(numNodes, numRegions, numRegionsPerServer, numTables, true);
400   }
401 
402   protected void testWithCluster(int numNodes,
403                                  int numRegions,
404                                  int numRegionsPerServer,
405                                  int numTables,
406                                  boolean assertFullyBalanced) {
407     Map<ServerName, List<HRegionInfo>> serverMap =
408         createServerMap(numNodes, numRegions, numRegionsPerServer, numTables);
409 
410     List<ServerAndLoad> list = convertToList(serverMap);
411     LOG.info("Mock Cluster : " + printMock(list) + " " + printStats(list));
412 
413     // Run the balancer.
414     List<RegionPlan> plans = loadBalancer.balanceCluster(serverMap);
415     assertNotNull(plans);
416 
417     // Check to see that this actually got to a stable place.
418     if (assertFullyBalanced) {
419       // Apply the plan to the mock cluster.
420       List<ServerAndLoad> balancedCluster = reconcile(list, plans, serverMap);
421 
422       // Print out the cluster loads to make debugging easier.
423       LOG.info("Mock Balance : " + printMock(balancedCluster));
424       assertClusterAsBalanced(balancedCluster);
425       List<RegionPlan> secondPlans =  loadBalancer.balanceCluster(serverMap);
426       assertNull(secondPlans);
427     }
428   }
429 
430   private Map<ServerName, List<HRegionInfo>> createServerMap(int numNodes,
431                                                              int numRegions,
432                                                              int numRegionsPerServer,
433                                                              int numTables) {
434     //construct a cluster of numNodes, having  a total of numRegions. Each RS will hold
435     //numRegionsPerServer many regions except for the last one, which will host all the
436     //remaining regions
437     int[] cluster = new int[numNodes];
438     for (int i =0; i < numNodes; i++) {
439       cluster[i] = numRegionsPerServer;
440     }
441     cluster[cluster.length - 1] = numRegions - ((cluster.length - 1) * numRegionsPerServer);
442     return mockClusterServers(cluster, numTables);
443   }
444 }