1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.master.balancer;
19
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.Collections;
23 import java.util.HashMap;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.NavigableMap;
27 import java.util.Random;
28 import java.util.TreeMap;
29
30 import org.apache.commons.logging.Log;
31 import org.apache.commons.logging.LogFactory;
32 import org.apache.hadoop.classification.InterfaceAudience;
33 import org.apache.hadoop.hbase.HRegionInfo;
34 import org.apache.hadoop.hbase.ServerName;
35 import org.apache.hadoop.hbase.master.AssignmentManager;
36 import org.apache.hadoop.hbase.master.RegionPlan;
37
38 import com.google.common.collect.MinMaxPriorityQueue;
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56 @InterfaceAudience.Private
57 public class DefaultLoadBalancer extends BaseLoadBalancer {
58 private static final Log LOG = LogFactory.getLog(DefaultLoadBalancer.class);
59 private static final Random RANDOM = new Random(System.currentTimeMillis());
60
61 private RegionInfoComparator riComparator = new RegionInfoComparator();
62 private RegionPlan.RegionPlanComparator rpComparator = new RegionPlan.RegionPlanComparator();
63
64
65
66
67
68
69
70
71
72
73 static class BalanceInfo {
74
75 private final int nextRegionForUnload;
76 private int numRegionsAdded;
77
78 public BalanceInfo(int nextRegionForUnload, int numRegionsAdded) {
79 this.nextRegionForUnload = nextRegionForUnload;
80 this.numRegionsAdded = numRegionsAdded;
81 }
82
83 int getNextRegionForUnload() {
84 return nextRegionForUnload;
85 }
86
87 int getNumRegionsAdded() {
88 return numRegionsAdded;
89 }
90
91 void setNumRegionsAdded(int numAdded) {
92 this.numRegionsAdded = numAdded;
93 }
94 }
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181 public List<RegionPlan> balanceCluster(
182 Map<ServerName, List<HRegionInfo>> clusterMap) {
183 boolean emptyRegionServerPresent = false;
184 long startTime = System.currentTimeMillis();
185
186
187 ClusterLoadState cs = new ClusterLoadState(clusterMap);
188
189 int numServers = cs.getNumServers();
190 if (numServers == 0) {
191 LOG.debug("numServers=0 so skipping load balancing");
192 return null;
193 }
194 NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
195
196 int numRegions = cs.getNumRegions();
197
198 if (!this.needsBalance(cs)) {
199
200 float average = cs.getLoadAverage();
201 LOG.info("Skipping load balancing because balanced cluster; " +
202 "servers=" + numServers + " " +
203 "regions=" + numRegions + " average=" + average + " " +
204 "mostloaded=" + serversByLoad.lastKey().getLoad() +
205 " leastloaded=" + serversByLoad.firstKey().getLoad());
206 return null;
207 }
208
209 int min = numRegions / numServers;
210 int max = numRegions % numServers == 0 ? min : min + 1;
211
212
213 StringBuilder strBalanceParam = new StringBuilder();
214 strBalanceParam.append("Balance parameter: numRegions=").append(numRegions)
215 .append(", numServers=").append(numServers).append(", max=").append(max)
216 .append(", min=").append(min);
217 LOG.debug(strBalanceParam.toString());
218
219
220
221 MinMaxPriorityQueue<RegionPlan> regionsToMove =
222 MinMaxPriorityQueue.orderedBy(rpComparator).create();
223 List<RegionPlan> regionsToReturn = new ArrayList<RegionPlan>();
224
225
226 int serversOverloaded = 0;
227
228 boolean fetchFromTail = false;
229 Map<ServerName, BalanceInfo> serverBalanceInfo =
230 new TreeMap<ServerName, BalanceInfo>();
231 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server:
232 serversByLoad.descendingMap().entrySet()) {
233 ServerAndLoad sal = server.getKey();
234 int regionCount = sal.getLoad();
235 if (regionCount <= max) {
236 serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0));
237 break;
238 }
239 serversOverloaded++;
240 List<HRegionInfo> regions = server.getValue();
241 int numToOffload = Math.min(regionCount - max, regions.size());
242
243
244 Collections.sort(regions, riComparator);
245 int numTaken = 0;
246 for (int i = 0; i <= numToOffload; ) {
247 HRegionInfo hri = regions.get(i);
248 if (fetchFromTail) {
249 hri = regions.get(regions.size() - 1 - i);
250 }
251 i++;
252
253 if (hri.isMetaRegion()) continue;
254 regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null));
255 numTaken++;
256 if (numTaken >= numToOffload) break;
257
258 if (emptyRegionServerPresent) {
259 fetchFromTail = !fetchFromTail;
260 }
261 }
262 serverBalanceInfo.put(sal.getServerName(),
263 new BalanceInfo(numToOffload, (-1)*numTaken));
264 }
265 int totalNumMoved = regionsToMove.size();
266
267
268 int neededRegions = 0;
269 fetchFromTail = false;
270
271 Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>();
272 float average = (float)numRegions / numServers;
273 int maxToTake = numRegions - (int)average;
274 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server:
275 serversByLoad.entrySet()) {
276 if (maxToTake == 0) break;
277 int regionCount = server.getKey().getLoad();
278 if (regionCount >= min && regionCount > 0) {
279 continue;
280 }
281 int regionsToPut = min - regionCount;
282 if (regionsToPut == 0)
283 {
284 regionsToPut = 1;
285 maxToTake--;
286 }
287 underloadedServers.put(server.getKey().getServerName(), regionsToPut);
288 }
289
290 int serversUnderloaded = underloadedServers.size();
291 int incr = 1;
292 List<ServerName> sns =
293 Arrays.asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded]));
294 Collections.shuffle(sns, RANDOM);
295 while (regionsToMove.size() > 0) {
296 int cnt = 0;
297 int i = incr > 0 ? 0 : underloadedServers.size()-1;
298 for (; i >= 0 && i < underloadedServers.size(); i += incr) {
299 if (regionsToMove.isEmpty()) break;
300 ServerName si = sns.get(i);
301 int numToTake = underloadedServers.get(si);
302 if (numToTake == 0) continue;
303
304 addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn);
305 if (emptyRegionServerPresent) {
306 fetchFromTail = !fetchFromTail;
307 }
308
309 underloadedServers.put(si, numToTake-1);
310 cnt++;
311 BalanceInfo bi = serverBalanceInfo.get(si);
312 if (bi == null) {
313 bi = new BalanceInfo(0, 0);
314 serverBalanceInfo.put(si, bi);
315 }
316 bi.setNumRegionsAdded(bi.getNumRegionsAdded()+1);
317 }
318 if (cnt == 0) break;
319
320 incr = -incr;
321 }
322 for (Integer i : underloadedServers.values()) {
323
324 neededRegions += i;
325 }
326
327
328
329 if (neededRegions == 0 && regionsToMove.isEmpty()) {
330 long endTime = System.currentTimeMillis();
331 LOG.info("Calculated a load balance in " + (endTime-startTime) + "ms. " +
332 "Moving " + totalNumMoved + " regions off of " +
333 serversOverloaded + " overloaded servers onto " +
334 serversUnderloaded + " less loaded servers");
335 return regionsToReturn;
336 }
337
338
339
340
341
342 if (neededRegions != 0) {
343
344 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server :
345 serversByLoad.descendingMap().entrySet()) {
346 BalanceInfo balanceInfo =
347 serverBalanceInfo.get(server.getKey().getServerName());
348 int idx =
349 balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload();
350 if (idx >= server.getValue().size()) break;
351 HRegionInfo region = server.getValue().get(idx);
352 if (region.isMetaRegion()) continue;
353 regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null));
354 totalNumMoved++;
355 if (--neededRegions == 0) {
356
357 break;
358 }
359 }
360 }
361
362
363
364
365
366 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server :
367 serversByLoad.entrySet()) {
368 int regionCount = server.getKey().getLoad();
369 if (regionCount >= min) break;
370 BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
371 if(balanceInfo != null) {
372 regionCount += balanceInfo.getNumRegionsAdded();
373 }
374 if(regionCount >= min) {
375 continue;
376 }
377 int numToTake = min - regionCount;
378 int numTaken = 0;
379 while(numTaken < numToTake && 0 < regionsToMove.size()) {
380 addRegionPlan(regionsToMove, fetchFromTail,
381 server.getKey().getServerName(), regionsToReturn);
382 numTaken++;
383 if (emptyRegionServerPresent) {
384 fetchFromTail = !fetchFromTail;
385 }
386 }
387 }
388
389
390 if (0 < regionsToMove.size()) {
391 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server :
392 serversByLoad.entrySet()) {
393 int regionCount = server.getKey().getLoad();
394 if(regionCount >= max) {
395 break;
396 }
397 addRegionPlan(regionsToMove, fetchFromTail,
398 server.getKey().getServerName(), regionsToReturn);
399 if (emptyRegionServerPresent) {
400 fetchFromTail = !fetchFromTail;
401 }
402 if (regionsToMove.isEmpty()) {
403 break;
404 }
405 }
406 }
407
408 long endTime = System.currentTimeMillis();
409
410 if (!regionsToMove.isEmpty() || neededRegions != 0) {
411
412 LOG.warn("regionsToMove=" + totalNumMoved +
413 ", numServers=" + numServers + ", serversOverloaded=" + serversOverloaded +
414 ", serversUnderloaded=" + serversUnderloaded);
415 StringBuilder sb = new StringBuilder();
416 for (Map.Entry<ServerName, List<HRegionInfo>> e: clusterMap.entrySet()) {
417 if (sb.length() > 0) sb.append(", ");
418 sb.append(e.getKey().toString());
419 sb.append(" ");
420 sb.append(e.getValue().size());
421 }
422 LOG.warn("Input " + sb.toString());
423 }
424
425
426 LOG.info("Done. Calculated a load balance in " + (endTime-startTime) + "ms. " +
427 "Moving " + totalNumMoved + " regions off of " +
428 serversOverloaded + " overloaded servers onto " +
429 serversUnderloaded + " less loaded servers");
430
431 return regionsToReturn;
432 }
433
434
435
436
437 private void addRegionPlan(final MinMaxPriorityQueue<RegionPlan> regionsToMove,
438 final boolean fetchFromTail, final ServerName sn, List<RegionPlan> regionsToReturn) {
439 RegionPlan rp = null;
440 if (!fetchFromTail) rp = regionsToMove.remove();
441 else rp = regionsToMove.removeLast();
442 rp.setDestination(sn);
443 regionsToReturn.add(rp);
444 }
445 }