1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.master;
21
22 import java.io.FileNotFoundException;
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.Collection;
27 import java.util.Collections;
28 import java.util.Comparator;
29 import java.util.HashMap;
30 import java.util.List;
31 import java.util.Map;
32 import java.util.NavigableMap;
33 import java.util.Random;
34 import java.util.Set;
35 import java.util.TreeMap;
36
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.hadoop.conf.Configuration;
40 import org.apache.hadoop.fs.FileSystem;
41 import org.apache.hadoop.hbase.ClusterStatus;
42 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
43 import org.apache.hadoop.hbase.HRegionInfo;
44 import org.apache.hadoop.hbase.HTableDescriptor;
45 import org.apache.hadoop.hbase.ServerName;
46 import org.apache.hadoop.hbase.TableExistsException;
47 import org.apache.hadoop.hbase.regionserver.HRegion;
48 import org.apache.hadoop.hbase.util.Bytes;
49
50 import com.google.common.base.Joiner;
51 import com.google.common.collect.ArrayListMultimap;
52 import com.google.common.collect.MinMaxPriorityQueue;
53 import com.google.common.collect.Sets;
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71 public class DefaultLoadBalancer implements LoadBalancer {
72 private static final Log LOG = LogFactory.getLog(LoadBalancer.class);
73 private static final Random RANDOM = new Random(System.currentTimeMillis());
74
75 private float slop;
76 private Configuration config;
77 private ClusterStatus status;
78 private MasterServices services;
79
80 public void setClusterStatus(ClusterStatus st) {
81 this.status = st;
82 }
83
84 public void setMasterServices(MasterServices masterServices) {
85 this.services = masterServices;
86 }
87
88 @Override
89 public void setConf(Configuration conf) {
90 this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2);
91 if (slop < 0) slop = 0;
92 else if (slop > 1) slop = 1;
93 this.config = conf;
94 }
95
96 @Override
97 public Configuration getConf() {
98 return this.config;
99 }
100
101
102
103
104
105
106
107
108
109 private static class RegionInfoComparator implements Comparator<HRegionInfo> {
110 @Override
111 public int compare(HRegionInfo l, HRegionInfo r) {
112 long diff = r.getRegionId() - l.getRegionId();
113 if (diff < 0) return -1;
114 if (diff > 0) return 1;
115 return 0;
116 }
117 }
118
119
120 RegionInfoComparator riComparator = new RegionInfoComparator();
121
122 private class RegionPlanComparator implements Comparator<RegionPlan> {
123 @Override
124 public int compare(RegionPlan l, RegionPlan r) {
125 long diff = r.getRegionInfo().getRegionId() - l.getRegionInfo().getRegionId();
126 if (diff < 0) return -1;
127 if (diff > 0) return 1;
128 return 0;
129 }
130 }
131
132 RegionPlanComparator rpComparator = new RegionPlanComparator();
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219 public List<RegionPlan> balanceCluster(
220 Map<ServerName, List<HRegionInfo>> clusterState) {
221 boolean emptyRegionServerPresent = false;
222 long startTime = System.currentTimeMillis();
223
224 int numServers = clusterState.size();
225 if (numServers == 0) {
226 LOG.debug("numServers=0 so skipping load balancing");
227 return null;
228 }
229 NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad =
230 new TreeMap<ServerAndLoad, List<HRegionInfo>>();
231 int numRegions = 0;
232 int maxRegionCountPerServer = 0;
233
234 for (Map.Entry<ServerName, List<HRegionInfo>> server: clusterState.entrySet()) {
235 List<HRegionInfo> regions = server.getValue();
236 int sz = regions.size();
237 if (sz == 0) emptyRegionServerPresent = true;
238 numRegions += sz;
239 if (maxRegionCountPerServer < sz) maxRegionCountPerServer = sz;
240 serversByLoad.put(new ServerAndLoad(server.getKey(), sz), regions);
241 }
242
243 float average = (float)numRegions / numServers;
244
245 int floor = (int) Math.floor(average * (1 - slop));
246 int ceiling = (int) Math.ceil(average * (1 + slop));
247 if (serversByLoad.lastKey().getLoad() <= ceiling &&
248 serversByLoad.firstKey().getLoad() >= floor) {
249
250 LOG.info("Skipping load balancing because balanced cluster; " +
251 "servers=" + numServers + " " +
252 "regions=" + numRegions + " average=" + average + " " +
253 "mostloaded=" + serversByLoad.lastKey().getLoad() +
254 " leastloaded=" + serversByLoad.firstKey().getLoad());
255 return null;
256 }
257 int min = numRegions / numServers;
258 int max = numRegions % numServers == 0 ? min : min + 1;
259 if (maxRegionCountPerServer == 1) return null;
260
261
262 StringBuilder strBalanceParam = new StringBuilder();
263 strBalanceParam.append("Balance parameter: numRegions=").append(numRegions)
264 .append(", numServers=").append(numServers).append(", max=").append(max)
265 .append(", min=").append(min);
266 LOG.debug(strBalanceParam.toString());
267
268
269
270 MinMaxPriorityQueue<RegionPlan> regionsToMove =
271 MinMaxPriorityQueue.orderedBy(rpComparator).create();
272 List<RegionPlan> regionsToReturn = new ArrayList<RegionPlan>();
273
274
275 int serversOverloaded = 0;
276
277 boolean fetchFromTail = false;
278 Map<ServerName, BalanceInfo> serverBalanceInfo =
279 new TreeMap<ServerName, BalanceInfo>();
280 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server:
281 serversByLoad.descendingMap().entrySet()) {
282 ServerAndLoad sal = server.getKey();
283 int regionCount = sal.getLoad();
284 if (regionCount <= max) {
285 serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0));
286 break;
287 }
288 serversOverloaded++;
289 List<HRegionInfo> regions = server.getValue();
290 int numToOffload = Math.min(regionCount - max, regions.size());
291
292
293 Collections.sort(regions, riComparator);
294 int numTaken = 0;
295 for (int i = 0; i <= numToOffload; ) {
296 HRegionInfo hri = regions.get(i);
297 if (fetchFromTail) {
298 hri = regions.get(regions.size() - 1 - i);
299 }
300 i++;
301
302 if (hri.isMetaRegion()) continue;
303 regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null));
304 numTaken++;
305 if (numTaken >= numToOffload) break;
306
307 if (emptyRegionServerPresent) {
308 fetchFromTail = !fetchFromTail;
309 }
310 }
311 serverBalanceInfo.put(sal.getServerName(),
312 new BalanceInfo(numToOffload, (-1)*numTaken));
313 }
314 int totalNumMoved = regionsToMove.size();
315
316
317 int neededRegions = 0;
318 fetchFromTail = false;
319
320 Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>();
321 int maxToTake = numRegions - (int)average;
322 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server:
323 serversByLoad.entrySet()) {
324 if (maxToTake == 0) break;
325 int regionCount = server.getKey().getLoad();
326 if (regionCount >= min && regionCount > 0) {
327 continue;
328 }
329 int regionsToPut = min - regionCount;
330 if (regionsToPut == 0)
331 {
332 regionsToPut = 1;
333 maxToTake--;
334 }
335 underloadedServers.put(server.getKey().getServerName(), regionsToPut);
336 }
337
338 int serversUnderloaded = underloadedServers.size();
339 int incr = 1;
340 List<ServerName> sns =
341 Arrays.asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded]));
342 Collections.shuffle(sns, RANDOM);
343 while (regionsToMove.size() > 0) {
344 int cnt = 0;
345 int i = incr > 0 ? 0 : underloadedServers.size()-1;
346 for (; i >= 0 && i < underloadedServers.size(); i += incr) {
347 if (regionsToMove.isEmpty()) break;
348 ServerName si = sns.get(i);
349 int numToTake = underloadedServers.get(si);
350 if (numToTake == 0) continue;
351
352 addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn);
353 if (emptyRegionServerPresent) {
354 fetchFromTail = !fetchFromTail;
355 }
356
357 underloadedServers.put(si, numToTake-1);
358 cnt++;
359 BalanceInfo bi = serverBalanceInfo.get(si);
360 if (bi == null) {
361 bi = new BalanceInfo(0, 0);
362 serverBalanceInfo.put(si, bi);
363 }
364 bi.setNumRegionsAdded(bi.getNumRegionsAdded()+1);
365 }
366 if (cnt == 0) break;
367
368 incr = -incr;
369 }
370 for (Integer i : underloadedServers.values()) {
371
372 neededRegions += i;
373 }
374
375
376
377 if (neededRegions == 0 && regionsToMove.isEmpty()) {
378 long endTime = System.currentTimeMillis();
379 LOG.info("Calculated a load balance in " + (endTime-startTime) + "ms. " +
380 "Moving " + totalNumMoved + " regions off of " +
381 serversOverloaded + " overloaded servers onto " +
382 serversUnderloaded + " less loaded servers");
383 return regionsToReturn;
384 }
385
386
387
388
389
390 if (neededRegions != 0) {
391
392 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server :
393 serversByLoad.descendingMap().entrySet()) {
394 BalanceInfo balanceInfo =
395 serverBalanceInfo.get(server.getKey().getServerName());
396 int idx =
397 balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload();
398 if (idx >= server.getValue().size()) break;
399 HRegionInfo region = server.getValue().get(idx);
400 if (region.isMetaRegion()) continue;
401 regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null));
402 totalNumMoved++;
403 if (--neededRegions == 0) {
404
405 break;
406 }
407 }
408 }
409
410
411
412
413
414 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server :
415 serversByLoad.entrySet()) {
416 int regionCount = server.getKey().getLoad();
417 if (regionCount >= min) break;
418 BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
419 if(balanceInfo != null) {
420 regionCount += balanceInfo.getNumRegionsAdded();
421 }
422 if(regionCount >= min) {
423 continue;
424 }
425 int numToTake = min - regionCount;
426 int numTaken = 0;
427 while(numTaken < numToTake && 0 < regionsToMove.size()) {
428 addRegionPlan(regionsToMove, fetchFromTail,
429 server.getKey().getServerName(), regionsToReturn);
430 numTaken++;
431 if (emptyRegionServerPresent) {
432 fetchFromTail = !fetchFromTail;
433 }
434 }
435 }
436
437
438 if (0 < regionsToMove.size()) {
439 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server :
440 serversByLoad.entrySet()) {
441 int regionCount = server.getKey().getLoad();
442 if(regionCount >= max) {
443 break;
444 }
445 addRegionPlan(regionsToMove, fetchFromTail,
446 server.getKey().getServerName(), regionsToReturn);
447 if (emptyRegionServerPresent) {
448 fetchFromTail = !fetchFromTail;
449 }
450 if (regionsToMove.isEmpty()) {
451 break;
452 }
453 }
454 }
455
456 long endTime = System.currentTimeMillis();
457
458 if (!regionsToMove.isEmpty() || neededRegions != 0) {
459
460 LOG.warn("regionsToMove=" + totalNumMoved +
461 ", numServers=" + numServers + ", serversOverloaded=" + serversOverloaded +
462 ", serversUnderloaded=" + serversUnderloaded);
463 StringBuilder sb = new StringBuilder();
464 for (Map.Entry<ServerName, List<HRegionInfo>> e: clusterState.entrySet()) {
465 if (sb.length() > 0) sb.append(", ");
466 sb.append(e.getKey().toString());
467 sb.append(" ");
468 sb.append(e.getValue().size());
469 }
470 LOG.warn("Input " + sb.toString());
471 }
472
473
474 LOG.info("Done. Calculated a load balance in " + (endTime-startTime) + "ms. " +
475 "Moving " + totalNumMoved + " regions off of " +
476 serversOverloaded + " overloaded servers onto " +
477 serversUnderloaded + " less loaded servers");
478
479 return regionsToReturn;
480 }
481
482
483
484
485 void addRegionPlan(final MinMaxPriorityQueue<RegionPlan> regionsToMove,
486 final boolean fetchFromTail, final ServerName sn, List<RegionPlan> regionsToReturn) {
487 RegionPlan rp = null;
488 if (!fetchFromTail) rp = regionsToMove.remove();
489 else rp = regionsToMove.removeLast();
490 rp.setDestination(sn);
491 regionsToReturn.add(rp);
492 }
493
494
495
496
497
498
499
500
501
502 private static class BalanceInfo {
503
504 private final int nextRegionForUnload;
505 private int numRegionsAdded;
506
507 public BalanceInfo(int nextRegionForUnload, int numRegionsAdded) {
508 this.nextRegionForUnload = nextRegionForUnload;
509 this.numRegionsAdded = numRegionsAdded;
510 }
511
512 public int getNextRegionForUnload() {
513 return nextRegionForUnload;
514 }
515
516 public int getNumRegionsAdded() {
517 return numRegionsAdded;
518 }
519
520 public void setNumRegionsAdded(int numAdded) {
521 this.numRegionsAdded = numAdded;
522 }
523 }
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542 public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(
543 List<HRegionInfo> regions, List<ServerName> servers) {
544 if (regions.isEmpty() || servers.isEmpty()) {
545 return null;
546 }
547 Map<ServerName, List<HRegionInfo>> assignments =
548 new TreeMap<ServerName,List<HRegionInfo>>();
549 int numRegions = regions.size();
550 int numServers = servers.size();
551 int max = (int)Math.ceil((float)numRegions/numServers);
552 int serverIdx = 0;
553 if (numServers > 1) {
554 serverIdx = RANDOM.nextInt(numServers);
555 }
556 int regionIdx = 0;
557 for (int j = 0; j < numServers; j++) {
558 ServerName server = servers.get((j + serverIdx) % numServers);
559 List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
560 for (int i=regionIdx; i<numRegions; i += numServers) {
561 serverRegions.add(regions.get(i % numRegions));
562 }
563 assignments.put(server, serverRegions);
564 regionIdx++;
565 }
566 return assignments;
567 }
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585 public Map<ServerName, List<HRegionInfo>> retainAssignment(
586 Map<HRegionInfo, ServerName> regions, List<ServerName> servers) {
587
588
589
590
591
592
593 ArrayListMultimap<String, ServerName> serversByHostname =
594 ArrayListMultimap.create();
595 for (ServerName server : servers) {
596 serversByHostname.put(server.getHostname(), server);
597 }
598
599
600 Map<ServerName, List<HRegionInfo>> assignments =
601 new TreeMap<ServerName, List<HRegionInfo>>();
602
603 for (ServerName server : servers) {
604 assignments.put(server, new ArrayList<HRegionInfo>());
605 }
606
607
608
609
610 Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
611
612 int numRandomAssignments = 0;
613 int numRetainedAssigments = 0;
614 for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
615 HRegionInfo region = entry.getKey();
616 ServerName oldServerName = entry.getValue();
617 List<ServerName> localServers = new ArrayList<ServerName>();
618 if (oldServerName != null) {
619 localServers = serversByHostname.get(oldServerName.getHostname());
620 }
621 if (localServers.isEmpty()) {
622
623
624 ServerName randomServer = servers.get(RANDOM.nextInt(servers.size()));
625 assignments.get(randomServer).add(region);
626 numRandomAssignments++;
627 if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
628 } else if (localServers.size() == 1) {
629
630 assignments.get(localServers.get(0)).add(region);
631 numRetainedAssigments++;
632 } else {
633
634 int size = localServers.size();
635 ServerName target = localServers.get(RANDOM.nextInt(size));
636 assignments.get(target).add(region);
637 numRetainedAssigments++;
638 }
639 }
640
641 String randomAssignMsg = "";
642 if (numRandomAssignments > 0) {
643 randomAssignMsg = numRandomAssignments + " regions were assigned " +
644 "to random hosts, since the old hosts for these regions are no " +
645 "longer present in the cluster. These hosts were:\n " +
646 Joiner.on("\n ").join(oldHostsNoLongerPresent);
647 }
648
649 LOG.info("Reassigned " + regions.size() + " regions. " +
650 numRetainedAssigments + " retained the pre-restart assignment. " +
651 randomAssignMsg);
652 return assignments;
653 }
654
655
656
657
658
659
660
661
662
663
664
665 @SuppressWarnings("unused")
666 private List<ServerName> getTopBlockLocations(FileSystem fs,
667 HRegionInfo region) {
668 List<ServerName> topServerNames = null;
669 try {
670 HTableDescriptor tableDescriptor = getTableDescriptor(
671 region.getTableName());
672 if (tableDescriptor != null) {
673 HDFSBlocksDistribution blocksDistribution =
674 HRegion.computeHDFSBlocksDistribution(config, tableDescriptor,
675 region.getEncodedName());
676 List<String> topHosts = blocksDistribution.getTopHosts();
677 topServerNames = mapHostNameToServerName(topHosts);
678 }
679 } catch (IOException ioe) {
680 LOG.debug("IOException during HDFSBlocksDistribution computation. for " +
681 "region = " + region.getEncodedName() , ioe);
682 }
683
684 return topServerNames;
685 }
686
687
688
689
690
691
692
693 private HTableDescriptor getTableDescriptor(byte[] tableName)
694 throws IOException {
695 HTableDescriptor tableDescriptor = null;
696 try {
697 if ( this.services != null)
698 {
699 tableDescriptor = this.services.getTableDescriptors().
700 get(Bytes.toString(tableName));
701 }
702 } catch (FileNotFoundException fnfe) {
703 LOG.debug("FileNotFoundException during getTableDescriptors." +
704 " Current table name = " + tableName , fnfe);
705 }
706
707 return tableDescriptor;
708 }
709
710
711
712
713
714
715
716 private List<ServerName> mapHostNameToServerName(List<String> hosts) {
717 if ( hosts == null || status == null) {
718 return null;
719 }
720
721 List<ServerName> topServerNames = new ArrayList<ServerName>();
722 Collection<ServerName> regionServers = status.getServers();
723
724
725 HashMap<String, ServerName> hostToServerName =
726 new HashMap<String, ServerName>();
727 for (ServerName sn : regionServers) {
728 hostToServerName.put(sn.getHostname(), sn);
729 }
730
731 for (String host : hosts ) {
732 ServerName sn = hostToServerName.get(host);
733
734
735 if (sn != null) {
736 topServerNames.add(sn);
737 }
738 }
739 return topServerNames;
740 }
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761 public Map<HRegionInfo, ServerName> immediateAssignment(
762 List<HRegionInfo> regions, List<ServerName> servers) {
763 Map<HRegionInfo,ServerName> assignments =
764 new TreeMap<HRegionInfo,ServerName>();
765 for(HRegionInfo region : regions) {
766 assignments.put(region, servers.get(RANDOM.nextInt(servers.size())));
767 }
768 return assignments;
769 }
770
771 public ServerName randomAssignment(List<ServerName> servers) {
772 if (servers == null || servers.isEmpty()) {
773 LOG.warn("Wanted to do random assignment but no servers to assign to");
774 return null;
775 }
776 return servers.get(RANDOM.nextInt(servers.size()));
777 }
778
779 }