1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.master.balancer;
19
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.Comparator;
23 import java.util.Deque;
24 import java.util.HashMap;
25 import java.util.List;
26 import java.util.Map;
27 import java.util.Map.Entry;
28 import java.util.NavigableMap;
29 import java.util.Random;
30 import java.util.Set;
31 import java.util.TreeMap;
32
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.conf.Configuration;
36 import org.apache.hadoop.hbase.ClusterStatus;
37 import org.apache.hadoop.hbase.HBaseIOException;
38 import org.apache.hadoop.hbase.HRegionInfo;
39 import org.apache.hadoop.hbase.RegionLoad;
40 import org.apache.hadoop.hbase.ServerName;
41 import org.apache.hadoop.hbase.master.AssignmentManager;
42 import org.apache.hadoop.hbase.master.LoadBalancer;
43 import org.apache.hadoop.hbase.master.MasterServices;
44
45 import com.google.common.annotations.VisibleForTesting;
46 import com.google.common.base.Joiner;
47 import com.google.common.collect.ArrayListMultimap;
48 import com.google.common.collect.Sets;
49
50
51
52
53
54
55
56 public abstract class BaseLoadBalancer implements LoadBalancer {
57 private static final int MIN_SERVER_BALANCE = 2;
58 private volatile boolean stopped = false;
59
60
61
62
63
64
65 protected static class Cluster {
66 ServerName[] servers;
67 ArrayList<String> tables;
68 HRegionInfo[] regions;
69 Deque<RegionLoad>[] regionLoads;
70 int[][] regionLocations;
71
72 int[][] regionsPerServer;
73 int[] regionIndexToServerIndex;
74 int[] initialRegionIndexToServerIndex;
75 int[] regionIndexToTableIndex;
76 int[][] numRegionsPerServerPerTable;
77 int[] numMaxRegionsPerTable;
78
79 Integer[] serverIndicesSortedByRegionCount;
80
81 Map<String, Integer> serversToIndex;
82 Map<String, Integer> tablesToIndex;
83
84 int numRegions;
85 int numServers;
86 int numTables;
87
88 int numMovedRegions = 0;
89 int numMovedMetaRegions = 0;
90
91 protected Cluster(Map<ServerName, List<HRegionInfo>> clusterState, Map<String, Deque<RegionLoad>> loads,
92 RegionLocationFinder regionFinder) {
93
94 serversToIndex = new HashMap<String, Integer>();
95 tablesToIndex = new HashMap<String, Integer>();
96
97
98
99 tables = new ArrayList<String>();
100
101
102 numRegions = 0;
103
104 int serverIndex = 0;
105
106
107
108 for (ServerName sn:clusterState.keySet()) {
109 if (serversToIndex.get(sn.getHostAndPort()) == null) {
110 serversToIndex.put(sn.getHostAndPort(), serverIndex++);
111 }
112 }
113
114
115 for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
116 numRegions += entry.getValue().size();
117 }
118
119 numServers = serversToIndex.size();
120 regionsPerServer = new int[serversToIndex.size()][];
121
122 servers = new ServerName[numServers];
123 regions = new HRegionInfo[numRegions];
124 regionIndexToServerIndex = new int[numRegions];
125 initialRegionIndexToServerIndex = new int[numRegions];
126 regionIndexToTableIndex = new int[numRegions];
127 regionLoads = new Deque[numRegions];
128 regionLocations = new int[numRegions][];
129 serverIndicesSortedByRegionCount = new Integer[numServers];
130
131 int tableIndex = 0, regionIndex = 0, regionPerServerIndex = 0;
132
133 for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
134 serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
135
136
137
138 if (servers[serverIndex] == null ||
139 servers[serverIndex].getStartcode() < entry.getKey().getStartcode()) {
140 servers[serverIndex] = entry.getKey();
141 }
142
143 if (regionsPerServer[serverIndex] != null) {
144
145
146 regionsPerServer[serverIndex] = new int[entry.getValue().size() + regionsPerServer[serverIndex].length];
147 } else {
148 regionsPerServer[serverIndex] = new int[entry.getValue().size()];
149 }
150 serverIndicesSortedByRegionCount[serverIndex] = serverIndex;
151 }
152
153 for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
154 serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
155 regionPerServerIndex = 0;
156
157 for (HRegionInfo region : entry.getValue()) {
158 String tableName = region.getTable().getNameAsString();
159 Integer idx = tablesToIndex.get(tableName);
160 if (idx == null) {
161 tables.add(tableName);
162 idx = tableIndex;
163 tablesToIndex.put(tableName, tableIndex++);
164 }
165
166 regions[regionIndex] = region;
167 regionIndexToServerIndex[regionIndex] = serverIndex;
168 initialRegionIndexToServerIndex[regionIndex] = serverIndex;
169 regionIndexToTableIndex[regionIndex] = idx;
170 regionsPerServer[serverIndex][regionPerServerIndex++] = regionIndex;
171
172
173 if (loads != null) {
174 Deque<RegionLoad> rl = loads.get(region.getRegionNameAsString());
175
176 if (rl == null) {
177
178 rl = loads.get(region.getEncodedName());
179 }
180 regionLoads[regionIndex] = rl;
181 }
182
183 if (regionFinder != null) {
184
185 List<ServerName> loc = regionFinder.getTopBlockLocations(region);
186 regionLocations[regionIndex] = new int[loc.size()];
187 for (int i=0; i < loc.size(); i++) {
188 regionLocations[regionIndex][i] =
189 loc.get(i) == null ? -1 :
190 (serversToIndex.get(loc.get(i).getHostAndPort()) == null ? -1 : serversToIndex.get(loc.get(i).getHostAndPort()));
191 }
192 }
193
194 regionIndex++;
195 }
196 }
197
198 numTables = tables.size();
199 numRegionsPerServerPerTable = new int[numServers][numTables];
200
201 for (int i = 0; i < numServers; i++) {
202 for (int j = 0; j < numTables; j++) {
203 numRegionsPerServerPerTable[i][j] = 0;
204 }
205 }
206
207 for (int i=0; i < regionIndexToServerIndex.length; i++) {
208 numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
209 }
210
211 numMaxRegionsPerTable = new int[numTables];
212 for (serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
213 for (tableIndex = 0 ; tableIndex < numRegionsPerServerPerTable[serverIndex].length; tableIndex++) {
214 if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
215 numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
216 }
217 }
218 }
219 }
220
221 public void moveOrSwapRegion(int lServer, int rServer, int lRegion, int rRegion) {
222
223 if (rRegion >= 0 && lRegion >= 0) {
224 regionMoved(rRegion, rServer, lServer);
225 regionsPerServer[rServer] = replaceRegion(regionsPerServer[rServer], rRegion, lRegion);
226 regionMoved(lRegion, lServer, rServer);
227 regionsPerServer[lServer] = replaceRegion(regionsPerServer[lServer], lRegion, rRegion);
228 } else if (rRegion >= 0) {
229 regionMoved(rRegion, rServer, lServer);
230 regionsPerServer[rServer] = removeRegion(regionsPerServer[rServer], rRegion);
231 regionsPerServer[lServer] = addRegion(regionsPerServer[lServer], rRegion);
232 } else if (lRegion >= 0) {
233 regionMoved(lRegion, lServer, rServer);
234 regionsPerServer[lServer] = removeRegion(regionsPerServer[lServer], lRegion);
235 regionsPerServer[rServer] = addRegion(regionsPerServer[rServer], lRegion);
236 }
237 }
238
239
240 void regionMoved(int regionIndex, int oldServerIndex, int newServerIndex) {
241 regionIndexToServerIndex[regionIndex] = newServerIndex;
242 if (initialRegionIndexToServerIndex[regionIndex] == newServerIndex) {
243 numMovedRegions--;
244 if (regions[regionIndex].isMetaRegion()) {
245 numMovedMetaRegions--;
246 }
247 } else if (initialRegionIndexToServerIndex[regionIndex] == oldServerIndex) {
248 numMovedRegions++;
249 if (regions[regionIndex].isMetaRegion()) {
250 numMovedMetaRegions++;
251 }
252 }
253 int tableIndex = regionIndexToTableIndex[regionIndex];
254 numRegionsPerServerPerTable[oldServerIndex][tableIndex]--;
255 numRegionsPerServerPerTable[newServerIndex][tableIndex]++;
256
257
258 if (numRegionsPerServerPerTable[newServerIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
259 numRegionsPerServerPerTable[newServerIndex][tableIndex] = numMaxRegionsPerTable[tableIndex];
260 } else if ((numRegionsPerServerPerTable[oldServerIndex][tableIndex] + 1)
261 == numMaxRegionsPerTable[tableIndex]) {
262
263 for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
264 if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
265 numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
266 }
267 }
268 }
269 }
270
271 int[] removeRegion(int[] regions, int regionIndex) {
272
273 int[] newRegions = new int[regions.length - 1];
274 int i = 0;
275 for (i = 0; i < regions.length; i++) {
276 if (regions[i] == regionIndex) {
277 break;
278 }
279 newRegions[i] = regions[i];
280 }
281 System.arraycopy(regions, i+1, newRegions, i, newRegions.length - i);
282 return newRegions;
283 }
284
285 int[] addRegion(int[] regions, int regionIndex) {
286 int[] newRegions = new int[regions.length + 1];
287 System.arraycopy(regions, 0, newRegions, 0, regions.length);
288 newRegions[newRegions.length - 1] = regionIndex;
289 return newRegions;
290 }
291
292 int[] replaceRegion(int[] regions, int regionIndex, int newRegionIndex) {
293 int i = 0;
294 for (i = 0; i < regions.length; i++) {
295 if (regions[i] == regionIndex) {
296 regions[i] = newRegionIndex;
297 break;
298 }
299 }
300 return regions;
301 }
302
303 void sortServersByRegionCount() {
304 Arrays.sort(serverIndicesSortedByRegionCount, numRegionsComparator);
305 }
306
307 int getNumRegions(int server) {
308 return regionsPerServer[server].length;
309 }
310
311 private Comparator<Integer> numRegionsComparator = new Comparator<Integer>() {
312 @Override
313 public int compare(Integer integer, Integer integer2) {
314 return Integer.valueOf(getNumRegions(integer)).compareTo(getNumRegions(integer2));
315 }
316 };
317
318 @VisibleForTesting
319 protected void setNumRegions(int numRegions) {
320 this.numRegions = numRegions;
321 }
322
323 @VisibleForTesting
324 protected void setNumMovedRegions(int numMovedRegions) {
325 this.numMovedRegions = numMovedRegions;
326 }
327
328 @VisibleForTesting
329 protected void setNumMovedMetaRegions(int numMovedMetaRegions) {
330 this.numMovedMetaRegions = numMovedMetaRegions;
331 }
332
333 @Override
334 public String toString() {
335 String desc = "Cluster{" +
336 "servers=[";
337 for(ServerName sn:servers) {
338 desc += sn.getHostAndPort() + ", ";
339 }
340 desc +=
341 ", serverIndicesSortedByRegionCount="+
342 Arrays.toString(serverIndicesSortedByRegionCount) +
343 ", regionsPerServer=[";
344
345 for (int[]r:regionsPerServer) {
346 desc += Arrays.toString(r);
347 }
348 desc += "]" +
349 ", numMaxRegionsPerTable=" +
350 Arrays.toString(numMaxRegionsPerTable) +
351 ", numRegions=" +
352 numRegions +
353 ", numServers=" +
354 numServers +
355 ", numTables=" +
356 numTables +
357 ", numMovedRegions=" +
358 numMovedRegions +
359 ", numMovedMetaRegions=" +
360 numMovedMetaRegions +
361 '}';
362 return desc;
363 }
364 }
365
366
367 protected float slop;
368 private Configuration config;
369 private static final Random RANDOM = new Random(System.currentTimeMillis());
370 private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
371
372 protected final MetricsBalancer metricsBalancer = new MetricsBalancer();
373 protected MasterServices services;
374
375 @Override
376 public void setConf(Configuration conf) {
377 setSlop(conf);
378 if (slop < 0) slop = 0;
379 else if (slop > 1) slop = 1;
380
381 this.config = conf;
382 }
383
384 protected void setSlop(Configuration conf) {
385 this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2);
386 }
387
388 @Override
389 public Configuration getConf() {
390 return this.config;
391 }
392
393 @Override
394 public void setClusterStatus(ClusterStatus st) {
395
396 }
397
398 @Override
399 public void setMasterServices(MasterServices masterServices) {
400 this.services = masterServices;
401 }
402
403 protected boolean needsBalance(ClusterLoadState cs) {
404 if (cs.getNumServers() < MIN_SERVER_BALANCE) {
405 if (LOG.isDebugEnabled()) {
406 LOG.debug("Not running balancer because only " + cs.getNumServers()
407 + " active regionserver(s)");
408 }
409 return false;
410 }
411
412
413 float average = cs.getLoadAverage();
414 int floor = (int) Math.floor(average * (1 - slop));
415 int ceiling = (int) Math.ceil(average * (1 + slop));
416 if (!(cs.getMaxLoad() > ceiling || cs.getMinLoad() < floor)) {
417 NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
418 if (LOG.isTraceEnabled()) {
419
420 LOG.trace("Skipping load balancing because balanced cluster; " +
421 "servers=" + cs.getNumServers() + " " +
422 "regions=" + cs.getNumRegions() + " average=" + average + " " +
423 "mostloaded=" + serversByLoad.lastKey().getLoad() +
424 " leastloaded=" + serversByLoad.firstKey().getLoad());
425 }
426 return false;
427 }
428 return true;
429 }
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448 @Override
449 public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
450 List<ServerName> servers) {
451 metricsBalancer.incrMiscInvocations();
452
453 if (regions.isEmpty() || servers.isEmpty()) {
454 return null;
455 }
456 Map<ServerName, List<HRegionInfo>> assignments = new TreeMap<ServerName, List<HRegionInfo>>();
457 int numRegions = regions.size();
458 int numServers = servers.size();
459 int max = (int) Math.ceil((float) numRegions / numServers);
460 int serverIdx = 0;
461 if (numServers > 1) {
462 serverIdx = RANDOM.nextInt(numServers);
463 }
464 int regionIdx = 0;
465 for (int j = 0; j < numServers; j++) {
466 ServerName server = servers.get((j + serverIdx) % numServers);
467 List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
468 for (int i = regionIdx; i < numRegions; i += numServers) {
469 serverRegions.add(regions.get(i % numRegions));
470 }
471 assignments.put(server, serverRegions);
472 regionIdx++;
473 }
474 return assignments;
475 }
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494 @Override
495 public Map<HRegionInfo, ServerName> immediateAssignment(List<HRegionInfo> regions,
496 List<ServerName> servers) {
497 metricsBalancer.incrMiscInvocations();
498
499 Map<HRegionInfo, ServerName> assignments = new TreeMap<HRegionInfo, ServerName>();
500 for (HRegionInfo region : regions) {
501 assignments.put(region, randomAssignment(region, servers));
502 }
503 return assignments;
504 }
505
506
507
508
509 @Override
510 public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
511 metricsBalancer.incrMiscInvocations();
512
513 if (servers == null || servers.isEmpty()) {
514 LOG.warn("Wanted to do random assignment but no servers to assign to");
515 return null;
516 }
517 return servers.get(RANDOM.nextInt(servers.size()));
518 }
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537 @Override
538 public Map<ServerName, List<HRegionInfo>> retainAssignment(Map<HRegionInfo, ServerName> regions,
539 List<ServerName> servers) {
540
541 metricsBalancer.incrMiscInvocations();
542
543
544
545
546
547
548
549 ArrayListMultimap<String, ServerName> serversByHostname = ArrayListMultimap.create();
550 for (ServerName server : servers) {
551 serversByHostname.put(server.getHostname(), server);
552 }
553
554
555 Map<ServerName, List<HRegionInfo>> assignments = new TreeMap<ServerName, List<HRegionInfo>>();
556
557 for (ServerName server : servers) {
558 assignments.put(server, new ArrayList<HRegionInfo>());
559 }
560
561
562
563
564 Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
565
566 int numRandomAssignments = 0;
567 int numRetainedAssigments = 0;
568 for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
569 HRegionInfo region = entry.getKey();
570 ServerName oldServerName = entry.getValue();
571 List<ServerName> localServers = new ArrayList<ServerName>();
572 if (oldServerName != null) {
573 localServers = serversByHostname.get(oldServerName.getHostname());
574 }
575 if (localServers.isEmpty()) {
576
577
578 ServerName randomServer = servers.get(RANDOM.nextInt(servers.size()));
579 assignments.get(randomServer).add(region);
580 numRandomAssignments++;
581 if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
582 } else if (localServers.size() == 1) {
583
584 assignments.get(localServers.get(0)).add(region);
585 numRetainedAssigments++;
586 } else {
587
588 int size = localServers.size();
589 ServerName target =
590 localServers.contains(oldServerName) ? oldServerName : localServers.get(RANDOM
591 .nextInt(size));
592 assignments.get(target).add(region);
593 numRetainedAssigments++;
594 }
595 }
596
597 String randomAssignMsg = "";
598 if (numRandomAssignments > 0) {
599 randomAssignMsg =
600 numRandomAssignments + " regions were assigned "
601 + "to random hosts, since the old hosts for these regions are no "
602 + "longer present in the cluster. These hosts were:\n "
603 + Joiner.on("\n ").join(oldHostsNoLongerPresent);
604 }
605
606 LOG.info("Reassigned " + regions.size() + " regions. " + numRetainedAssigments
607 + " retained the pre-restart assignment. " + randomAssignMsg);
608 return assignments;
609 }
610
611 @Override
612 public void initialize() throws HBaseIOException{
613 }
614
615 @Override
616 public void regionOnline(HRegionInfo regionInfo, ServerName sn) {
617 }
618
619 @Override
620 public void regionOffline(HRegionInfo regionInfo) {
621 }
622
623 @Override
624 public boolean isStopped() {
625 return stopped;
626 }
627
628 @Override
629 public void stop(String why) {
630 LOG.info("Load Balancer stop requested: "+why);
631 stopped = true;
632 }
633 }