1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.master;
21
22 import java.io.FileNotFoundException;
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.Collection;
27 import java.util.Collections;
28 import java.util.Comparator;
29 import java.util.HashMap;
30 import java.util.List;
31 import java.util.Map;
32 import java.util.NavigableMap;
33 import java.util.Random;
34 import java.util.Set;
35 import java.util.TreeMap;
36
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.hadoop.conf.Configuration;
40 import org.apache.hadoop.fs.FileSystem;
41 import org.apache.hadoop.hbase.ClusterStatus;
42 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
43 import org.apache.hadoop.hbase.HRegionInfo;
44 import org.apache.hadoop.hbase.HTableDescriptor;
45 import org.apache.hadoop.hbase.ServerName;
46 import org.apache.hadoop.hbase.TableExistsException;
47 import org.apache.hadoop.hbase.regionserver.HRegion;
48 import org.apache.hadoop.hbase.util.Bytes;
49
50 import com.google.common.base.Joiner;
51 import com.google.common.collect.ArrayListMultimap;
52 import com.google.common.collect.MinMaxPriorityQueue;
53 import com.google.common.collect.Sets;
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71 public class DefaultLoadBalancer implements LoadBalancer {
72 private static final Log LOG = LogFactory.getLog(LoadBalancer.class);
73 private static final Random RANDOM = new Random(System.currentTimeMillis());
74
75 private float slop;
76 private Configuration config;
77 private ClusterStatus status;
78 private MasterServices services;
79
80 public void setClusterStatus(ClusterStatus st) {
81 this.status = st;
82 }
83
84 public void setMasterServices(MasterServices masterServices) {
85 this.services = masterServices;
86 }
87
88 @Override
89 public void setConf(Configuration conf) {
90 this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2);
91 if (slop < 0) slop = 0;
92 else if (slop > 1) slop = 1;
93 this.config = conf;
94 }
95
96 @Override
97 public Configuration getConf() {
98 return this.config;
99 }
100
101
102
103
104
105
106
107
108
109 private static class RegionInfoComparator implements Comparator<HRegionInfo> {
110 @Override
111 public int compare(HRegionInfo l, HRegionInfo r) {
112 long diff = r.getRegionId() - l.getRegionId();
113 if (diff < 0) return -1;
114 if (diff > 0) return 1;
115 return 0;
116 }
117 }
118
119
120 RegionInfoComparator riComparator = new RegionInfoComparator();
121
122 private class RegionPlanComparator implements Comparator<RegionPlan> {
123 @Override
124 public int compare(RegionPlan l, RegionPlan r) {
125 long diff = r.getRegionInfo().getRegionId() - l.getRegionInfo().getRegionId();
126 if (diff < 0) return -1;
127 if (diff > 0) return 1;
128 return 0;
129 }
130 }
131
132 RegionPlanComparator rpComparator = new RegionPlanComparator();
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219 public List<RegionPlan> balanceCluster(
220 Map<ServerName, List<HRegionInfo>> clusterState) {
221 boolean emptyRegionServerPresent = false;
222 long startTime = System.currentTimeMillis();
223
224 int numServers = clusterState.size();
225 if (numServers == 0) {
226 LOG.debug("numServers=0 so skipping load balancing");
227 return null;
228 }
229 NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad =
230 new TreeMap<ServerAndLoad, List<HRegionInfo>>();
231 int numRegions = 0;
232 int maxRegionCountPerServer = 0;
233
234 for (Map.Entry<ServerName, List<HRegionInfo>> server: clusterState.entrySet()) {
235 List<HRegionInfo> regions = server.getValue();
236 int sz = regions.size();
237 if (sz == 0) emptyRegionServerPresent = true;
238 numRegions += sz;
239 if (maxRegionCountPerServer < sz) maxRegionCountPerServer = sz;
240 serversByLoad.put(new ServerAndLoad(server.getKey(), sz), regions);
241 }
242
243 float average = (float)numRegions / numServers;
244
245 int floor = (int) Math.floor(average * (1 - slop));
246 int ceiling = (int) Math.ceil(average * (1 + slop));
247 if (serversByLoad.lastKey().getLoad() <= ceiling &&
248 serversByLoad.firstKey().getLoad() >= floor) {
249
250 LOG.info("Skipping load balancing because balanced cluster; " +
251 "servers=" + numServers + " " +
252 "regions=" + numRegions + " average=" + average + " " +
253 "mostloaded=" + serversByLoad.lastKey().getLoad() +
254 " leastloaded=" + serversByLoad.firstKey().getLoad());
255 return null;
256 }
257 int min = numRegions / numServers;
258 int max = numRegions % numServers == 0 ? min : min + 1;
259 if (maxRegionCountPerServer == 1) return null;
260
261
262 StringBuilder strBalanceParam = new StringBuilder();
263 strBalanceParam.append("Balance parameter: numRegions=").append(numRegions)
264 .append(", numServers=").append(numServers).append(", max=").append(max)
265 .append(", min=").append(min);
266 LOG.debug(strBalanceParam.toString());
267
268
269
270 MinMaxPriorityQueue<RegionPlan> regionsToMove =
271 MinMaxPriorityQueue.orderedBy(rpComparator).create();
272 List<RegionPlan> regionsToReturn = new ArrayList<RegionPlan>();
273
274
275 int serversOverloaded = 0;
276
277 boolean fetchFromTail = false;
278 Map<ServerName, BalanceInfo> serverBalanceInfo =
279 new TreeMap<ServerName, BalanceInfo>();
280 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server:
281 serversByLoad.descendingMap().entrySet()) {
282 ServerAndLoad sal = server.getKey();
283 int regionCount = sal.getLoad();
284 if (regionCount <= max) {
285 serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0));
286 break;
287 }
288 serversOverloaded++;
289 List<HRegionInfo> regions = server.getValue();
290 int numToOffload = Math.min(regionCount - max, regions.size());
291
292
293 Collections.sort(regions, riComparator);
294 int numTaken = 0;
295 for (int i = 0; i <= numToOffload; ) {
296 HRegionInfo hri = regions.get(i);
297 if (fetchFromTail) {
298 hri = regions.get(regions.size() - 1 - i);
299 }
300 i++;
301
302 if (hri.isMetaRegion()) continue;
303 regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null));
304 numTaken++;
305 if (numTaken >= numToOffload) break;
306
307 if (emptyRegionServerPresent) {
308 fetchFromTail = !fetchFromTail;
309 }
310 }
311 serverBalanceInfo.put(sal.getServerName(),
312 new BalanceInfo(numToOffload, (-1)*numTaken));
313 }
314 int totalNumMoved = regionsToMove.size();
315
316
317 int neededRegions = 0;
318 fetchFromTail = false;
319
320 Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>();
321 int maxToTake = numRegions - (int)average;
322 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server:
323 serversByLoad.entrySet()) {
324 if (maxToTake == 0) break;
325 int regionCount = server.getKey().getLoad();
326 if (regionCount >= min && regionCount > 0) {
327 continue;
328 }
329 int regionsToPut = min - regionCount;
330 if (regionsToPut == 0)
331 {
332 regionsToPut = 1;
333 }
334 maxToTake -= regionsToPut;
335 underloadedServers.put(server.getKey().getServerName(), regionsToPut);
336 }
337
338 int serversUnderloaded = underloadedServers.size();
339 int incr = 1;
340 List<ServerName> sns =
341 Arrays.asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded]));
342 Collections.shuffle(sns, RANDOM);
343 while (regionsToMove.size() > 0) {
344 int cnt = 0;
345 int i = incr > 0 ? 0 : underloadedServers.size()-1;
346 for (; i >= 0 && i < underloadedServers.size(); i += incr) {
347 if (regionsToMove.isEmpty()) break;
348 ServerName si = sns.get(i);
349 int numToTake = underloadedServers.get(si);
350 if (numToTake == 0) continue;
351
352 addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn);
353 if (emptyRegionServerPresent) {
354 fetchFromTail = !fetchFromTail;
355 }
356
357 underloadedServers.put(si, numToTake-1);
358 cnt++;
359 BalanceInfo bi = serverBalanceInfo.get(si);
360 if (bi == null) {
361 bi = new BalanceInfo(0, 0);
362 serverBalanceInfo.put(si, bi);
363 }
364 bi.setNumRegionsAdded(bi.getNumRegionsAdded()+1);
365 }
366 if (cnt == 0) break;
367
368 incr = -incr;
369 }
370 for (Integer i : underloadedServers.values()) {
371
372 neededRegions += i;
373 }
374
375
376
377 if (neededRegions == 0 && regionsToMove.isEmpty()) {
378 long endTime = System.currentTimeMillis();
379 LOG.info("Calculated a load balance in " + (endTime-startTime) + "ms. " +
380 "Moving " + totalNumMoved + " regions off of " +
381 serversOverloaded + " overloaded servers onto " +
382 serversUnderloaded + " less loaded servers");
383 return regionsToReturn;
384 }
385
386
387
388
389
390 if (neededRegions != 0) {
391
392 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server :
393 serversByLoad.descendingMap().entrySet()) {
394 BalanceInfo balanceInfo =
395 serverBalanceInfo.get(server.getKey().getServerName());
396 int idx =
397 balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload();
398 if (idx >= server.getValue().size()) break;
399 HRegionInfo region = server.getValue().get(idx);
400 if (region.isMetaRegion()) continue;
401 regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null));
402 totalNumMoved++;
403 if (--neededRegions == 0) {
404
405 break;
406 }
407 }
408 }
409
410
411
412
413
414 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server :
415 serversByLoad.entrySet()) {
416 int regionCount = server.getKey().getLoad();
417 if (regionCount >= min) break;
418 BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
419 if(balanceInfo != null) {
420 regionCount += balanceInfo.getNumRegionsAdded();
421 }
422 if(regionCount >= min) {
423 continue;
424 }
425 int numToTake = min - regionCount;
426 int numTaken = 0;
427 while(numTaken < numToTake && 0 < regionsToMove.size()) {
428 addRegionPlan(regionsToMove, fetchFromTail,
429 server.getKey().getServerName(), regionsToReturn);
430 numTaken++;
431 if (emptyRegionServerPresent) {
432 fetchFromTail = !fetchFromTail;
433 }
434 }
435 }
436
437
438 if (0 < regionsToMove.size()) {
439 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server :
440 serversByLoad.entrySet()) {
441 int regionCount = server.getKey().getLoad();
442 BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
443 if(balanceInfo != null) {
444 regionCount += balanceInfo.getNumRegionsAdded();
445 }
446 if(regionCount >= max) {
447 break;
448 }
449 addRegionPlan(regionsToMove, fetchFromTail,
450 server.getKey().getServerName(), regionsToReturn);
451 if (emptyRegionServerPresent) {
452 fetchFromTail = !fetchFromTail;
453 }
454 if (regionsToMove.isEmpty()) {
455 break;
456 }
457 }
458 }
459
460 long endTime = System.currentTimeMillis();
461
462 if (!regionsToMove.isEmpty() || neededRegions != 0) {
463
464 LOG.warn("regionsToMove=" + totalNumMoved +
465 ", numServers=" + numServers + ", serversOverloaded=" + serversOverloaded +
466 ", serversUnderloaded=" + serversUnderloaded);
467 StringBuilder sb = new StringBuilder();
468 for (Map.Entry<ServerName, List<HRegionInfo>> e: clusterState.entrySet()) {
469 if (sb.length() > 0) sb.append(", ");
470 sb.append(e.getKey().toString());
471 sb.append(" ");
472 sb.append(e.getValue().size());
473 }
474 LOG.warn("Input " + sb.toString());
475 }
476
477
478 LOG.info("Done. Calculated a load balance in " + (endTime-startTime) + "ms. " +
479 "Moving " + totalNumMoved + " regions off of " +
480 serversOverloaded + " overloaded servers onto " +
481 serversUnderloaded + " less loaded servers");
482
483 return regionsToReturn;
484 }
485
486
487
488
489 void addRegionPlan(final MinMaxPriorityQueue<RegionPlan> regionsToMove,
490 final boolean fetchFromTail, final ServerName sn, List<RegionPlan> regionsToReturn) {
491 RegionPlan rp = null;
492 if (!fetchFromTail) rp = regionsToMove.remove();
493 else rp = regionsToMove.removeLast();
494 rp.setDestination(sn);
495 regionsToReturn.add(rp);
496 }
497
498
499
500
501
502
503
504
505
506 private static class BalanceInfo {
507
508 private final int nextRegionForUnload;
509 private int numRegionsAdded;
510
511 public BalanceInfo(int nextRegionForUnload, int numRegionsAdded) {
512 this.nextRegionForUnload = nextRegionForUnload;
513 this.numRegionsAdded = numRegionsAdded;
514 }
515
516 public int getNextRegionForUnload() {
517 return nextRegionForUnload;
518 }
519
520 public int getNumRegionsAdded() {
521 return numRegionsAdded;
522 }
523
524 public void setNumRegionsAdded(int numAdded) {
525 this.numRegionsAdded = numAdded;
526 }
527 }
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546 public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(
547 List<HRegionInfo> regions, List<ServerName> servers) {
548 if (regions.isEmpty() || servers.isEmpty()) {
549 return null;
550 }
551 Map<ServerName, List<HRegionInfo>> assignments =
552 new TreeMap<ServerName,List<HRegionInfo>>();
553 int numRegions = regions.size();
554 int numServers = servers.size();
555 int max = (int)Math.ceil((float)numRegions/numServers);
556 int serverIdx = 0;
557 if (numServers > 1) {
558 serverIdx = RANDOM.nextInt(numServers);
559 }
560 int regionIdx = 0;
561 for (int j = 0; j < numServers; j++) {
562 ServerName server = servers.get((j + serverIdx) % numServers);
563 List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
564 for (int i=regionIdx; i<numRegions; i += numServers) {
565 serverRegions.add(regions.get(i % numRegions));
566 }
567 assignments.put(server, serverRegions);
568 regionIdx++;
569 }
570 return assignments;
571 }
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589 public Map<ServerName, List<HRegionInfo>> retainAssignment(
590 Map<HRegionInfo, ServerName> regions, List<ServerName> servers) {
591
592
593
594
595
596
597 ArrayListMultimap<String, ServerName> serversByHostname =
598 ArrayListMultimap.create();
599 for (ServerName server : servers) {
600 serversByHostname.put(server.getHostname(), server);
601 }
602
603
604 Map<ServerName, List<HRegionInfo>> assignments =
605 new TreeMap<ServerName, List<HRegionInfo>>();
606
607 for (ServerName server : servers) {
608 assignments.put(server, new ArrayList<HRegionInfo>());
609 }
610
611
612
613
614 Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
615
616 int numRandomAssignments = 0;
617 int numRetainedAssigments = 0;
618 for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
619 HRegionInfo region = entry.getKey();
620 ServerName oldServerName = entry.getValue();
621 List<ServerName> localServers = new ArrayList<ServerName>();
622 if (oldServerName != null) {
623 localServers = serversByHostname.get(oldServerName.getHostname());
624 }
625 if (localServers.isEmpty()) {
626
627
628 ServerName randomServer = servers.get(RANDOM.nextInt(servers.size()));
629 assignments.get(randomServer).add(region);
630 numRandomAssignments++;
631 if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
632 } else if (localServers.size() == 1) {
633
634 assignments.get(localServers.get(0)).add(region);
635 numRetainedAssigments++;
636 } else {
637
638 int size = localServers.size();
639 ServerName target = localServers.get(RANDOM.nextInt(size));
640 assignments.get(target).add(region);
641 numRetainedAssigments++;
642 }
643 }
644
645 String randomAssignMsg = "";
646 if (numRandomAssignments > 0) {
647 randomAssignMsg = numRandomAssignments + " regions were assigned " +
648 "to random hosts, since the old hosts for these regions are no " +
649 "longer present in the cluster. These hosts were:\n " +
650 Joiner.on("\n ").join(oldHostsNoLongerPresent);
651 }
652
653 LOG.info("Reassigned " + regions.size() + " regions. " +
654 numRetainedAssigments + " retained the pre-restart assignment. " +
655 randomAssignMsg);
656 return assignments;
657 }
658
659
660
661
662
663
664
665
666
667
668
669 @SuppressWarnings("unused")
670 private List<ServerName> getTopBlockLocations(FileSystem fs,
671 HRegionInfo region) {
672 List<ServerName> topServerNames = null;
673 try {
674 HTableDescriptor tableDescriptor = getTableDescriptor(
675 region.getTableName());
676 if (tableDescriptor != null) {
677 HDFSBlocksDistribution blocksDistribution =
678 HRegion.computeHDFSBlocksDistribution(config, tableDescriptor,
679 region.getEncodedName());
680 List<String> topHosts = blocksDistribution.getTopHosts();
681 topServerNames = mapHostNameToServerName(topHosts);
682 }
683 } catch (IOException ioe) {
684 LOG.debug("IOException during HDFSBlocksDistribution computation. for " +
685 "region = " + region.getEncodedName() , ioe);
686 }
687
688 return topServerNames;
689 }
690
691
692
693
694
695
696
697 private HTableDescriptor getTableDescriptor(byte[] tableName)
698 throws IOException {
699 HTableDescriptor tableDescriptor = null;
700 try {
701 if ( this.services != null)
702 {
703 tableDescriptor = this.services.getTableDescriptors().
704 get(Bytes.toString(tableName));
705 }
706 } catch (FileNotFoundException fnfe) {
707 LOG.debug("FileNotFoundException during getTableDescriptors." +
708 " Current table name = " + tableName , fnfe);
709 }
710
711 return tableDescriptor;
712 }
713
714
715
716
717
718
719
720 private List<ServerName> mapHostNameToServerName(List<String> hosts) {
721 if ( hosts == null || status == null) {
722 return null;
723 }
724
725 List<ServerName> topServerNames = new ArrayList<ServerName>();
726 Collection<ServerName> regionServers = status.getServers();
727
728
729 HashMap<String, ServerName> hostToServerName =
730 new HashMap<String, ServerName>();
731 for (ServerName sn : regionServers) {
732 hostToServerName.put(sn.getHostname(), sn);
733 }
734
735 for (String host : hosts ) {
736 ServerName sn = hostToServerName.get(host);
737
738
739 if (sn != null) {
740 topServerNames.add(sn);
741 }
742 }
743 return topServerNames;
744 }
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765 public Map<HRegionInfo, ServerName> immediateAssignment(
766 List<HRegionInfo> regions, List<ServerName> servers) {
767 Map<HRegionInfo,ServerName> assignments =
768 new TreeMap<HRegionInfo,ServerName>();
769 for(HRegionInfo region : regions) {
770 assignments.put(region, servers.get(RANDOM.nextInt(servers.size())));
771 }
772 return assignments;
773 }
774
775 public ServerName randomAssignment(List<ServerName> servers) {
776 if (servers == null || servers.isEmpty()) {
777 LOG.warn("Wanted to do random assignment but no servers to assign to");
778 return null;
779 }
780 return servers.get(RANDOM.nextInt(servers.size()));
781 }
782
783 }