1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.master.balancer;
19
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.Collection;
23 import java.util.Comparator;
24 import java.util.Deque;
25 import java.util.HashMap;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Map.Entry;
29 import java.util.Random;
30 import java.util.Set;
31 import java.util.TreeMap;
32 import java.util.NavigableMap;
33
34 import org.apache.commons.logging.Log;
35 import org.apache.commons.logging.LogFactory;
36 import org.apache.hadoop.conf.Configuration;
37 import org.apache.hadoop.hbase.ClusterStatus;
38 import org.apache.hadoop.hbase.HBaseIOException;
39 import org.apache.hadoop.hbase.HRegionInfo;
40 import org.apache.hadoop.hbase.RegionLoad;
41 import org.apache.hadoop.hbase.ServerName;
42 import org.apache.hadoop.hbase.master.AssignmentManager;
43 import org.apache.hadoop.hbase.master.LoadBalancer;
44 import org.apache.hadoop.hbase.master.MasterServices;
45
46 import com.google.common.base.Joiner;
47 import com.google.common.collect.ArrayListMultimap;
48 import com.google.common.collect.Sets;
49
50
51
52
53
54
55
56 public abstract class BaseLoadBalancer implements LoadBalancer {
57 private static final int MIN_SERVER_BALANCE = 2;
58 private volatile boolean stopped = false;
59
60
61
62
63
64
65 protected static class Cluster {
66 ServerName[] servers;
67 ArrayList<String> tables;
68 HRegionInfo[] regions;
69 Deque<RegionLoad>[] regionLoads;
70 int[][] regionLocations;
71
72 int[][] regionsPerServer;
73 int[] regionIndexToServerIndex;
74 int[] initialRegionIndexToServerIndex;
75 int[] regionIndexToTableIndex;
76 int[][] numRegionsPerServerPerTable;
77 int[] numMaxRegionsPerTable;
78
79 Integer[] serverIndicesSortedByRegionCount;
80
81 Map<String, Integer> serversToIndex;
82 Map<String, Integer> tablesToIndex;
83
84 int numRegions;
85 int numServers;
86 int numTables;
87
88 int numMovedRegions = 0;
89 int numMovedMetaRegions = 0;
90
91 protected Cluster(Map<ServerName, List<HRegionInfo>> clusterState, Map<String, Deque<RegionLoad>> loads,
92 RegionLocationFinder regionFinder) {
93
94 serversToIndex = new HashMap<String, Integer>();
95 tablesToIndex = new HashMap<String, Integer>();
96
97
98
99 tables = new ArrayList<String>();
100
101
102 numRegions = 0;
103
104 int serverIndex = 0;
105
106
107
108 for (ServerName sn:clusterState.keySet()) {
109 if (serversToIndex.get(sn.getHostAndPort()) == null) {
110 serversToIndex.put(sn.getHostAndPort(), serverIndex++);
111 }
112 }
113
114
115 for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
116 numRegions += entry.getValue().size();
117 }
118
119 numServers = serversToIndex.size();
120 regionsPerServer = new int[serversToIndex.size()][];
121
122 servers = new ServerName[numServers];
123 regions = new HRegionInfo[numRegions];
124 regionIndexToServerIndex = new int[numRegions];
125 initialRegionIndexToServerIndex = new int[numRegions];
126 regionIndexToTableIndex = new int[numRegions];
127 regionLoads = new Deque[numRegions];
128 regionLocations = new int[numRegions][];
129 serverIndicesSortedByRegionCount = new Integer[numServers];
130
131 int tableIndex = 0, regionIndex = 0, regionPerServerIndex = 0;
132
133 for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
134 serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
135
136
137
138 if (servers[serverIndex] == null ||
139 servers[serverIndex].getStartcode() < entry.getKey().getStartcode()) {
140 servers[serverIndex] = entry.getKey();
141 }
142
143 regionsPerServer[serverIndex] = new int[entry.getValue().size()];
144 serverIndicesSortedByRegionCount[serverIndex] = serverIndex;
145 }
146
147 for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
148 serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
149 regionPerServerIndex = 0;
150
151 for (HRegionInfo region : entry.getValue()) {
152 String tableName = region.getTable().getNameAsString();
153 Integer idx = tablesToIndex.get(tableName);
154 if (idx == null) {
155 tables.add(tableName);
156 idx = tableIndex;
157 tablesToIndex.put(tableName, tableIndex++);
158 }
159
160 regions[regionIndex] = region;
161 regionIndexToServerIndex[regionIndex] = serverIndex;
162 initialRegionIndexToServerIndex[regionIndex] = serverIndex;
163 regionIndexToTableIndex[regionIndex] = idx;
164 regionsPerServer[serverIndex][regionPerServerIndex++] = regionIndex;
165
166
167 if (loads != null) {
168 Deque<RegionLoad> rl = loads.get(region.getRegionNameAsString());
169
170 if (rl == null) {
171
172 rl = loads.get(region.getEncodedName());
173 }
174 regionLoads[regionIndex] = rl;
175 }
176
177 if (regionFinder != null) {
178
179 List<ServerName> loc = regionFinder.getTopBlockLocations(region);
180 regionLocations[regionIndex] = new int[loc.size()];
181 for (int i=0; i < loc.size(); i++) {
182 regionLocations[regionIndex][i] =
183 loc.get(i) == null ? -1 :
184 (serversToIndex.get(loc.get(i)) == null ? -1 : serversToIndex.get(loc.get(i)));
185 }
186 }
187
188 regionIndex++;
189 }
190 }
191
192 numTables = tables.size();
193 numRegionsPerServerPerTable = new int[numServers][numTables];
194
195 for (int i = 0; i < numServers; i++) {
196 for (int j = 0; j < numTables; j++) {
197 numRegionsPerServerPerTable[i][j] = 0;
198 }
199 }
200
201 for (int i=0; i < regionIndexToServerIndex.length; i++) {
202 numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
203 }
204
205 numMaxRegionsPerTable = new int[numTables];
206 for (serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
207 for (tableIndex = 0 ; tableIndex < numRegionsPerServerPerTable[serverIndex].length; tableIndex++) {
208 if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
209 numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
210 }
211 }
212 }
213 }
214
215 public void moveOrSwapRegion(int lServer, int rServer, int lRegion, int rRegion) {
216
217 if (rRegion >= 0 && lRegion >= 0) {
218 regionMoved(rRegion, rServer, lServer);
219 regionsPerServer[rServer] = replaceRegion(regionsPerServer[rServer], rRegion, lRegion);
220 regionMoved(lRegion, lServer, rServer);
221 regionsPerServer[lServer] = replaceRegion(regionsPerServer[lServer], lRegion, rRegion);
222 } else if (rRegion >= 0) {
223 regionMoved(rRegion, rServer, lServer);
224 regionsPerServer[rServer] = removeRegion(regionsPerServer[rServer], rRegion);
225 regionsPerServer[lServer] = addRegion(regionsPerServer[lServer], rRegion);
226 } else if (lRegion >= 0) {
227 regionMoved(lRegion, lServer, rServer);
228 regionsPerServer[lServer] = removeRegion(regionsPerServer[lServer], lRegion);
229 regionsPerServer[rServer] = addRegion(regionsPerServer[rServer], lRegion);
230 }
231 }
232
233
234 void regionMoved(int regionIndex, int oldServerIndex, int newServerIndex) {
235 regionIndexToServerIndex[regionIndex] = newServerIndex;
236 if (initialRegionIndexToServerIndex[regionIndex] == newServerIndex) {
237 numMovedRegions--;
238 if (regions[regionIndex].isMetaRegion()) {
239 numMovedMetaRegions--;
240 }
241 } else if (initialRegionIndexToServerIndex[regionIndex] == oldServerIndex) {
242 numMovedRegions++;
243 if (regions[regionIndex].isMetaRegion()) {
244 numMovedMetaRegions++;
245 }
246 }
247 int tableIndex = regionIndexToTableIndex[regionIndex];
248 numRegionsPerServerPerTable[oldServerIndex][tableIndex]--;
249 numRegionsPerServerPerTable[newServerIndex][tableIndex]++;
250
251
252 if (numRegionsPerServerPerTable[newServerIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
253 numRegionsPerServerPerTable[newServerIndex][tableIndex] = numMaxRegionsPerTable[tableIndex];
254 } else if ((numRegionsPerServerPerTable[oldServerIndex][tableIndex] + 1)
255 == numMaxRegionsPerTable[tableIndex]) {
256
257 for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
258 if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
259 numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
260 }
261 }
262 }
263 }
264
265 int[] removeRegion(int[] regions, int regionIndex) {
266
267 int[] newRegions = new int[regions.length - 1];
268 int i = 0;
269 for (i = 0; i < regions.length; i++) {
270 if (regions[i] == regionIndex) {
271 break;
272 }
273 newRegions[i] = regions[i];
274 }
275 System.arraycopy(regions, i+1, newRegions, i, newRegions.length - i);
276 return newRegions;
277 }
278
279 int[] addRegion(int[] regions, int regionIndex) {
280 int[] newRegions = new int[regions.length + 1];
281 System.arraycopy(regions, 0, newRegions, 0, regions.length);
282 newRegions[newRegions.length - 1] = regionIndex;
283 return newRegions;
284 }
285
286 int[] replaceRegion(int[] regions, int regionIndex, int newRegionIndex) {
287 int i = 0;
288 for (i = 0; i < regions.length; i++) {
289 if (regions[i] == regionIndex) {
290 regions[i] = newRegionIndex;
291 break;
292 }
293 }
294 return regions;
295 }
296
297 void sortServersByRegionCount() {
298 Arrays.sort(serverIndicesSortedByRegionCount, numRegionsComparator);
299 }
300
301 int getNumRegions(int server) {
302 return regionsPerServer[server].length;
303 }
304
305 private Comparator<Integer> numRegionsComparator = new Comparator<Integer>() {
306 @Override
307 public int compare(Integer integer, Integer integer2) {
308 return Integer.valueOf(getNumRegions(integer)).compareTo(getNumRegions(integer2));
309 }
310 };
311
312 @Override
313 public String toString() {
314 String desc = "Cluster{" +
315 "servers=[";
316 for(ServerName sn:servers) {
317 desc += sn.getHostAndPort() + ", ";
318 }
319 desc +=
320 ", serverIndicesSortedByRegionCount="+
321 Arrays.toString(serverIndicesSortedByRegionCount) +
322 ", regionsPerServer=[";
323
324 for (int[]r:regionsPerServer) {
325 desc += Arrays.toString(r);
326 }
327 desc += "]" +
328 ", numMaxRegionsPerTable=" +
329 Arrays.toString(numMaxRegionsPerTable) +
330 ", numRegions=" +
331 numRegions +
332 ", numServers=" +
333 numServers +
334 ", numTables=" +
335 numTables +
336 ", numMovedRegions=" +
337 numMovedRegions +
338 ", numMovedMetaRegions=" +
339 numMovedMetaRegions +
340 '}';
341 return desc;
342 }
343 }
344
345
346 protected float slop;
347 private Configuration config;
348 private static final Random RANDOM = new Random(System.currentTimeMillis());
349 private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
350
351 protected final MetricsBalancer metricsBalancer = new MetricsBalancer();
352 protected MasterServices services;
353
354 @Override
355 public void setConf(Configuration conf) {
356 setSlop(conf);
357 if (slop < 0) slop = 0;
358 else if (slop > 1) slop = 1;
359
360 this.config = conf;
361 }
362
363 protected void setSlop(Configuration conf) {
364 this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2);
365 }
366
367 @Override
368 public Configuration getConf() {
369 return this.config;
370 }
371
372 public void setClusterStatus(ClusterStatus st) {
373
374 }
375
376 public void setMasterServices(MasterServices masterServices) {
377 this.services = masterServices;
378 }
379
380 protected boolean needsBalance(ClusterLoadState cs) {
381 if (cs.getNumServers() < MIN_SERVER_BALANCE) {
382 if (LOG.isDebugEnabled()) {
383 LOG.debug("Not running balancer because only " + cs.getNumServers()
384 + " active regionserver(s)");
385 }
386 return false;
387 }
388
389
390 float average = cs.getLoadAverage();
391 int floor = (int) Math.floor(average * (1 - slop));
392 int ceiling = (int) Math.ceil(average * (1 + slop));
393 if (!(cs.getMinLoad() > ceiling || cs.getMaxLoad() < floor)) {
394 NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
395 if (LOG.isTraceEnabled()) {
396
397 LOG.trace("Skipping load balancing because balanced cluster; " +
398 "servers=" + cs.getNumServers() + " " +
399 "regions=" + cs.getNumRegions() + " average=" + average + " " +
400 "mostloaded=" + serversByLoad.lastKey().getLoad() +
401 " leastloaded=" + serversByLoad.firstKey().getLoad());
402 }
403 return false;
404 }
405 return true;
406 }
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425 public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
426 List<ServerName> servers) {
427 metricsBalancer.incrMiscInvocations();
428
429 if (regions.isEmpty() || servers.isEmpty()) {
430 return null;
431 }
432 Map<ServerName, List<HRegionInfo>> assignments = new TreeMap<ServerName, List<HRegionInfo>>();
433 int numRegions = regions.size();
434 int numServers = servers.size();
435 int max = (int) Math.ceil((float) numRegions / numServers);
436 int serverIdx = 0;
437 if (numServers > 1) {
438 serverIdx = RANDOM.nextInt(numServers);
439 }
440 int regionIdx = 0;
441 for (int j = 0; j < numServers; j++) {
442 ServerName server = servers.get((j + serverIdx) % numServers);
443 List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
444 for (int i = regionIdx; i < numRegions; i += numServers) {
445 serverRegions.add(regions.get(i % numRegions));
446 }
447 assignments.put(server, serverRegions);
448 regionIdx++;
449 }
450 return assignments;
451 }
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470 public Map<HRegionInfo, ServerName> immediateAssignment(List<HRegionInfo> regions,
471 List<ServerName> servers) {
472 metricsBalancer.incrMiscInvocations();
473
474 Map<HRegionInfo, ServerName> assignments = new TreeMap<HRegionInfo, ServerName>();
475 for (HRegionInfo region : regions) {
476 assignments.put(region, randomAssignment(region, servers));
477 }
478 return assignments;
479 }
480
481
482
483
484 public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
485 metricsBalancer.incrMiscInvocations();
486
487 if (servers == null || servers.isEmpty()) {
488 LOG.warn("Wanted to do random assignment but no servers to assign to");
489 return null;
490 }
491 return servers.get(RANDOM.nextInt(servers.size()));
492 }
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511 public Map<ServerName, List<HRegionInfo>> retainAssignment(Map<HRegionInfo, ServerName> regions,
512 List<ServerName> servers) {
513
514 metricsBalancer.incrMiscInvocations();
515
516
517
518
519
520
521
522 ArrayListMultimap<String, ServerName> serversByHostname = ArrayListMultimap.create();
523 for (ServerName server : servers) {
524 serversByHostname.put(server.getHostname(), server);
525 }
526
527
528 Map<ServerName, List<HRegionInfo>> assignments = new TreeMap<ServerName, List<HRegionInfo>>();
529
530 for (ServerName server : servers) {
531 assignments.put(server, new ArrayList<HRegionInfo>());
532 }
533
534
535
536
537 Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
538
539 int numRandomAssignments = 0;
540 int numRetainedAssigments = 0;
541 for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
542 HRegionInfo region = entry.getKey();
543 ServerName oldServerName = entry.getValue();
544 List<ServerName> localServers = new ArrayList<ServerName>();
545 if (oldServerName != null) {
546 localServers = serversByHostname.get(oldServerName.getHostname());
547 }
548 if (localServers.isEmpty()) {
549
550
551 ServerName randomServer = servers.get(RANDOM.nextInt(servers.size()));
552 assignments.get(randomServer).add(region);
553 numRandomAssignments++;
554 if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
555 } else if (localServers.size() == 1) {
556
557 assignments.get(localServers.get(0)).add(region);
558 numRetainedAssigments++;
559 } else {
560
561 int size = localServers.size();
562 ServerName target = localServers.get(RANDOM.nextInt(size));
563 assignments.get(target).add(region);
564 numRetainedAssigments++;
565 }
566 }
567
568 String randomAssignMsg = "";
569 if (numRandomAssignments > 0) {
570 randomAssignMsg =
571 numRandomAssignments + " regions were assigned "
572 + "to random hosts, since the old hosts for these regions are no "
573 + "longer present in the cluster. These hosts were:\n "
574 + Joiner.on("\n ").join(oldHostsNoLongerPresent);
575 }
576
577 LOG.info("Reassigned " + regions.size() + " regions. " + numRetainedAssigments
578 + " retained the pre-restart assignment. " + randomAssignMsg);
579 return assignments;
580 }
581
582 @Override
583 public void initialize() throws HBaseIOException{
584 }
585
586 @Override
587 public boolean isStopped() {
588 return stopped;
589 }
590
591 @Override
592 public void stop(String why) {
593 LOG.info("Load Balancer stop requested: "+why);
594 stopped = true;
595 }
596 }