1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.master;
21
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.Collections;
25 import java.util.Comparator;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.NavigableSet;
29 import java.util.Random;
30 import java.util.TreeMap;
31 import java.util.TreeSet;
32
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.conf.Configuration;
36 import org.apache.hadoop.fs.BlockLocation;
37 import org.apache.hadoop.fs.FileStatus;
38 import org.apache.hadoop.fs.FileSystem;
39 import org.apache.hadoop.fs.Path;
40 import org.apache.hadoop.hbase.HRegionInfo;
41 import org.apache.hadoop.hbase.HServerAddress;
42 import org.apache.hadoop.hbase.HServerInfo;
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60 public class LoadBalancer {
61 private static final Log LOG = LogFactory.getLog(LoadBalancer.class);
62 private static final Random RANDOM = new Random(System.currentTimeMillis());
63
64 private float slop;
65
66 LoadBalancer(Configuration conf) {
67 this.slop = conf.getFloat("hbase.regions.slop", (float) 0.0);
68 if (slop < 0) slop = 0;
69 else if (slop > 1) slop = 1;
70 }
71
72 static class RegionPlanComparator implements Comparator<RegionPlan> {
73 @Override
74 public int compare(RegionPlan l, RegionPlan r) {
75 long diff = r.getRegionInfo().getRegionId() - l.getRegionInfo().getRegionId();
76 if (diff < 0) return -1;
77 if (diff > 0) return 1;
78 return 0;
79 }
80 }
81 static RegionPlanComparator rpComparator = new RegionPlanComparator();
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153 public List<RegionPlan> balanceCluster(
154 Map<HServerInfo,List<HRegionInfo>> clusterState) {
155 long startTime = System.currentTimeMillis();
156
157
158 TreeMap<HServerInfo,List<HRegionInfo>> serversByLoad =
159 new TreeMap<HServerInfo,List<HRegionInfo>>(
160 new HServerInfo.LoadComparator());
161 int numServers = clusterState.size();
162 if (numServers == 0) {
163 LOG.debug("numServers=0 so skipping load balancing");
164 return null;
165 }
166 int numRegions = 0;
167
168 for(Map.Entry<HServerInfo, List<HRegionInfo>> server:
169 clusterState.entrySet()) {
170 server.getKey().getLoad().setNumberOfRegions(server.getValue().size());
171 numRegions += server.getKey().getLoad().getNumberOfRegions();
172 serversByLoad.put(server.getKey(), server.getValue());
173 }
174
175
176 float average = (float)numRegions / numServers;
177
178 int floor = (int) Math.floor(average * (1 - slop));
179 int ceiling = (int) Math.ceil(average * (1 + slop));
180 if(serversByLoad.lastKey().getLoad().getNumberOfRegions() <= ceiling &&
181 serversByLoad.firstKey().getLoad().getNumberOfRegions() >= floor) {
182
183 LOG.info("Skipping load balancing. servers=" + numServers + " " +
184 "regions=" + numRegions + " average=" + average + " " +
185 "mostloaded=" + serversByLoad.lastKey().getLoad().getNumberOfRegions() +
186 " leastloaded=" + serversByLoad.lastKey().getLoad().getNumberOfRegions());
187 return null;
188 }
189 int min = numRegions / numServers;
190 int max = numRegions % numServers == 0 ? min : min + 1;
191
192
193
194 List<RegionPlan> regionsToMove = new ArrayList<RegionPlan>();
195 int regionidx = 0;
196
197
198 int serversOverloaded = 0;
199 Map<HServerInfo,BalanceInfo> serverBalanceInfo =
200 new TreeMap<HServerInfo,BalanceInfo>();
201 for(Map.Entry<HServerInfo, List<HRegionInfo>> server :
202 serversByLoad.descendingMap().entrySet()) {
203 HServerInfo serverInfo = server.getKey();
204 int regionCount = serverInfo.getLoad().getNumberOfRegions();
205 if(regionCount <= max) {
206 serverBalanceInfo.put(serverInfo, new BalanceInfo(0, 0));
207 break;
208 }
209 serversOverloaded++;
210 List<HRegionInfo> regions = randomize(server.getValue());
211 int numToOffload = Math.min(regionCount - max, regions.size());
212 int numTaken = 0;
213 for (int i = regions.size() - 1; i >= 0; i--) {
214 HRegionInfo hri = regions.get(i);
215
216 if (hri.isMetaRegion()) continue;
217 regionsToMove.add(new RegionPlan(hri, serverInfo, null));
218 numTaken++;
219 if (numTaken >= numToOffload) break;
220 }
221 serverBalanceInfo.put(serverInfo,
222 new BalanceInfo(numToOffload, (-1)*numTaken));
223 }
224
225
226 int serversUnderloaded = 0;
227 int neededRegions = 0;
228 for(Map.Entry<HServerInfo, List<HRegionInfo>> server :
229 serversByLoad.entrySet()) {
230 int regionCount = server.getKey().getLoad().getNumberOfRegions();
231 if(regionCount >= min) {
232 break;
233 }
234 serversUnderloaded++;
235 int numToTake = min - regionCount;
236 int numTaken = 0;
237 while(numTaken < numToTake && regionidx < regionsToMove.size()) {
238 regionsToMove.get(regionidx).setDestination(server.getKey());
239 numTaken++;
240 regionidx++;
241 }
242 serverBalanceInfo.put(server.getKey(), new BalanceInfo(0, numTaken));
243
244 if(numTaken < numToTake) {
245 neededRegions += (numToTake - numTaken);
246 }
247 }
248
249
250
251 if(neededRegions == 0 && regionidx == regionsToMove.size()) {
252 long endTime = System.currentTimeMillis();
253 LOG.info("Calculated a load balance in " + (endTime-startTime) + "ms. " +
254 "Moving " + regionsToMove.size() + " regions off of " +
255 serversOverloaded + " overloaded servers onto " +
256 serversUnderloaded + " less loaded servers");
257 return regionsToMove;
258 }
259
260
261
262
263
264 if (neededRegions != 0) {
265
266 for(Map.Entry<HServerInfo, List<HRegionInfo>> server :
267 serversByLoad.descendingMap().entrySet()) {
268 BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey());
269 int idx =
270 balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload();
271 if (idx >= server.getValue().size()) break;
272 HRegionInfo region = server.getValue().get(idx);
273 if (region.isMetaRegion()) continue;
274 regionsToMove.add(new RegionPlan(region, server.getKey(), null));
275 if(--neededRegions == 0) {
276
277 break;
278 }
279 }
280 }
281
282
283
284
285
286 for(Map.Entry<HServerInfo, List<HRegionInfo>> server :
287 serversByLoad.entrySet()) {
288 int regionCount = server.getKey().getLoad().getNumberOfRegions();
289 if (regionCount >= min) break;
290 BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey());
291 if(balanceInfo != null) {
292 regionCount += balanceInfo.getNumRegionsAdded();
293 }
294 if(regionCount >= min) {
295 continue;
296 }
297 int numToTake = min - regionCount;
298 int numTaken = 0;
299 while(numTaken < numToTake && regionidx < regionsToMove.size()) {
300 regionsToMove.get(regionidx).setDestination(server.getKey());
301 numTaken++;
302 regionidx++;
303 }
304 }
305
306
307 if(regionidx != regionsToMove.size()) {
308 for(Map.Entry<HServerInfo, List<HRegionInfo>> server :
309 serversByLoad.entrySet()) {
310 int regionCount = server.getKey().getLoad().getNumberOfRegions();
311 if(regionCount >= max) {
312 break;
313 }
314 regionsToMove.get(regionidx).setDestination(server.getKey());
315 regionidx++;
316 if(regionidx == regionsToMove.size()) {
317 break;
318 }
319 }
320 }
321
322 long endTime = System.currentTimeMillis();
323
324 if (regionidx != regionsToMove.size() || neededRegions != 0) {
325
326 LOG.warn("regionidx=" + regionidx + ", regionsToMove=" + regionsToMove.size() +
327 ", numServers=" + numServers + ", serversOverloaded=" + serversOverloaded +
328 ", serversUnderloaded=" + serversUnderloaded);
329 StringBuilder sb = new StringBuilder();
330 for (Map.Entry<HServerInfo, List<HRegionInfo>> e: clusterState.entrySet()) {
331 if (sb.length() > 0) sb.append(", ");
332 sb.append(e.getKey().getServerName());
333 sb.append(" ");
334 sb.append(e.getValue().size());
335 }
336 LOG.warn("Input " + sb.toString());
337 }
338
339
340 LOG.info("Calculated a load balance in " + (endTime-startTime) + "ms. " +
341 "Moving " + regionsToMove.size() + " regions off of " +
342 serversOverloaded + " overloaded servers onto " +
343 serversUnderloaded + " less loaded servers");
344
345 return regionsToMove;
346 }
347
348
349
350
351
352 static List<HRegionInfo> randomize(final List<HRegionInfo> regions) {
353 Collections.shuffle(regions, RANDOM);
354 return regions;
355 }
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370 private static class BalanceInfo {
371
372 private final int nextRegionForUnload;
373 private final int numRegionsAdded;
374
375 public BalanceInfo(int nextRegionForUnload, int numRegionsAdded) {
376 this.nextRegionForUnload = nextRegionForUnload;
377 this.numRegionsAdded = numRegionsAdded;
378 }
379
380 public int getNextRegionForUnload() {
381 return nextRegionForUnload;
382 }
383
384 public int getNumRegionsAdded() {
385 return numRegionsAdded;
386 }
387 }
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406 public static Map<HServerInfo,List<HRegionInfo>> roundRobinAssignment(
407 List<HRegionInfo> regions, List<HServerInfo> servers) {
408 if(regions.size() == 0 || servers.size() == 0) {
409 return null;
410 }
411 Map<HServerInfo,List<HRegionInfo>> assignments =
412 new TreeMap<HServerInfo,List<HRegionInfo>>();
413 int numRegions = regions.size();
414 int numServers = servers.size();
415 int max = (int)Math.ceil((float)numRegions/numServers);
416 int serverIdx = 0;
417 if (numServers > 1) {
418 serverIdx = RANDOM.nextInt(numServers);
419 }
420 int regionIdx = 0;
421 for (int j = 0; j < numServers; j++) {
422 HServerInfo server = servers.get((j+serverIdx) % numServers);
423 List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
424 for (int i=regionIdx; i<numRegions; i += numServers) {
425 serverRegions.add(regions.get(i % numRegions));
426 }
427 assignments.put(server, serverRegions);
428 regionIdx++;
429 }
430 return assignments;
431 }
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449 public static Map<HServerInfo, List<HRegionInfo>> retainAssignment(
450 Map<HRegionInfo, HServerAddress> regions, List<HServerInfo> servers) {
451 Map<HServerInfo, List<HRegionInfo>> assignments =
452 new TreeMap<HServerInfo, List<HRegionInfo>>();
453
454 Map<HServerAddress, HServerInfo> serverMap =
455 new TreeMap<HServerAddress, HServerInfo>();
456 for (HServerInfo server : servers) {
457 serverMap.put(server.getServerAddress(), server);
458 assignments.put(server, new ArrayList<HRegionInfo>());
459 }
460 for (Map.Entry<HRegionInfo, HServerAddress> region : regions.entrySet()) {
461 HServerAddress hsa = region.getValue();
462 HServerInfo server = hsa == null? null: serverMap.get(hsa);
463 if (server != null) {
464 assignments.get(server).add(region.getKey());
465 } else {
466 assignments.get(servers.get(RANDOM.nextInt(assignments.size()))).add(
467 region.getKey());
468 }
469 }
470 return assignments;
471 }
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491 @SuppressWarnings("unused")
492 private List<String> getTopBlockLocations(FileSystem fs, HRegionInfo region)
493 throws IOException {
494 String encodedName = region.getEncodedName();
495 Path path = new Path("/hbase/table/" + encodedName);
496 FileStatus status = fs.getFileStatus(path);
497 BlockLocation [] blockLocations =
498 fs.getFileBlockLocations(status, 0, status.getLen());
499 Map<HostAndWeight,HostAndWeight> hostWeights =
500 new TreeMap<HostAndWeight,HostAndWeight>(new HostAndWeight.HostComparator());
501 for(BlockLocation bl : blockLocations) {
502 String [] hosts = bl.getHosts();
503 long len = bl.getLength();
504 for(String host : hosts) {
505 HostAndWeight haw = hostWeights.get(host);
506 if(haw == null) {
507 haw = new HostAndWeight(host, len);
508 hostWeights.put(haw, haw);
509 } else {
510 haw.addWeight(len);
511 }
512 }
513 }
514 NavigableSet<HostAndWeight> orderedHosts = new TreeSet<HostAndWeight>(
515 new HostAndWeight.WeightComparator());
516 orderedHosts.addAll(hostWeights.values());
517 List<String> topHosts = new ArrayList<String>(orderedHosts.size());
518 for(HostAndWeight haw : orderedHosts.descendingSet()) {
519 topHosts.add(haw.getHost());
520 }
521 return topHosts;
522 }
523
524
525
526
527
528
529
530
531
532
533
534 private static class HostAndWeight {
535
536 private final String host;
537 private long weight;
538
539 public HostAndWeight(String host, long weight) {
540 this.host = host;
541 this.weight = weight;
542 }
543
544 public void addWeight(long weight) {
545 this.weight += weight;
546 }
547
548 public String getHost() {
549 return host;
550 }
551
552 public long getWeight() {
553 return weight;
554 }
555
556 private static class HostComparator implements Comparator<HostAndWeight> {
557 @Override
558 public int compare(HostAndWeight l, HostAndWeight r) {
559 return l.getHost().compareTo(r.getHost());
560 }
561 }
562
563 private static class WeightComparator implements Comparator<HostAndWeight> {
564 @Override
565 public int compare(HostAndWeight l, HostAndWeight r) {
566 if(l.getWeight() == r.getWeight()) {
567 return l.getHost().compareTo(r.getHost());
568 }
569 return l.getWeight() < r.getWeight() ? -1 : 1;
570 }
571 }
572 }
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592 public static Map<HRegionInfo,HServerInfo> immediateAssignment(
593 List<HRegionInfo> regions, List<HServerInfo> servers) {
594 Map<HRegionInfo,HServerInfo> assignments =
595 new TreeMap<HRegionInfo,HServerInfo>();
596 for(HRegionInfo region : regions) {
597 assignments.put(region, servers.get(RANDOM.nextInt(servers.size())));
598 }
599 return assignments;
600 }
601
602 public static HServerInfo randomAssignment(List<HServerInfo> servers) {
603 if (servers == null || servers.isEmpty()) {
604 LOG.warn("Wanted to do random assignment but no servers to assign to");
605 return null;
606 }
607 return servers.get(RANDOM.nextInt(servers.size()));
608 }
609
610
611
612
613
614
615
616
617
618
619
620 public static class RegionPlan implements Comparable<RegionPlan> {
621 private final HRegionInfo hri;
622 private final HServerInfo source;
623 private HServerInfo dest;
624
625
626
627
628
629
630
631
632
633
634
635
636 public RegionPlan(final HRegionInfo hri, HServerInfo source, HServerInfo dest) {
637 this.hri = hri;
638 this.source = source;
639 this.dest = dest;
640 }
641
642
643
644
645 public void setDestination(HServerInfo dest) {
646 this.dest = dest;
647 }
648
649
650
651
652
653 public HServerInfo getSource() {
654 return source;
655 }
656
657
658
659
660
661 public HServerInfo getDestination() {
662 return dest;
663 }
664
665
666
667
668
669 public String getRegionName() {
670 return this.hri.getEncodedName();
671 }
672
673 public HRegionInfo getRegionInfo() {
674 return this.hri;
675 }
676
677
678
679
680
681 @Override
682 public int compareTo(RegionPlan o) {
683 return getRegionName().compareTo(o.getRegionName());
684 }
685
686 @Override
687 public String toString() {
688 return "hri=" + this.hri.getRegionNameAsString() + ", src=" +
689 (this.source == null? "": this.source.getServerName()) +
690 ", dest=" + (this.dest == null? "": this.dest.getServerName());
691 }
692 }
693 }