1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.master.balancer;
19
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.Comparator;
23 import java.util.HashMap;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.Map.Entry;
27 import java.util.Random;
28 import java.util.Set;
29 import java.util.TreeMap;
30 import java.util.NavigableMap;
31
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34 import org.apache.hadoop.conf.Configuration;
35 import org.apache.hadoop.hbase.ClusterStatus;
36 import org.apache.hadoop.hbase.HRegionInfo;
37 import org.apache.hadoop.hbase.RegionLoad;
38 import org.apache.hadoop.hbase.ServerName;
39 import org.apache.hadoop.hbase.master.AssignmentManager;
40 import org.apache.hadoop.hbase.master.LoadBalancer;
41 import org.apache.hadoop.hbase.master.MasterServices;
42
43 import com.google.common.base.Joiner;
44 import com.google.common.collect.ArrayListMultimap;
45 import com.google.common.collect.Sets;
46
47
48
49
50
51
52
53 public abstract class BaseLoadBalancer implements LoadBalancer {
54
55
56
57
58
59
60 protected static class Cluster {
61 ServerName[] servers;
62 ArrayList<String> tables;
63 HRegionInfo[] regions;
64 List<RegionLoad>[] regionLoads;
65 int[][] regionLocations;
66
67 int[][] regionsPerServer;
68 int[] regionIndexToServerIndex;
69 int[] initialRegionIndexToServerIndex;
70 int[] regionIndexToTableIndex;
71 int[][] numRegionsPerServerPerTable;
72 int[] numMaxRegionsPerTable;
73
74 Integer[] serverIndicesSortedByRegionCount;
75
76 Map<String, Integer> serversToIndex;
77 Map<String, Integer> tablesToIndex;
78
79 int numRegions;
80 int numServers;
81 int numTables;
82
83 int numMovedRegions = 0;
84 int numMovedMetaRegions = 0;
85
86 protected Cluster(Map<ServerName, List<HRegionInfo>> clusterState, Map<String, List<RegionLoad>> loads,
87 RegionLocationFinder regionFinder) {
88
89 serversToIndex = new HashMap<String, Integer>();
90 tablesToIndex = new HashMap<String, Integer>();
91
92
93
94 tables = new ArrayList<String>();
95
96
97 numRegions = 0;
98
99 int serverIndex = 0;
100
101
102
103 for (ServerName sn:clusterState.keySet()) {
104 if (serversToIndex.get(sn.getHostAndPort()) == null) {
105 serversToIndex.put(sn.getHostAndPort(), serverIndex++);
106 }
107 }
108
109
110 for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
111 numRegions += entry.getValue().size();
112 }
113
114 numServers = serversToIndex.size();
115 regionsPerServer = new int[serversToIndex.size()][];
116
117 servers = new ServerName[numServers];
118 regions = new HRegionInfo[numRegions];
119 regionIndexToServerIndex = new int[numRegions];
120 initialRegionIndexToServerIndex = new int[numRegions];
121 regionIndexToTableIndex = new int[numRegions];
122 regionLoads = new List[numRegions];
123 regionLocations = new int[numRegions][];
124 serverIndicesSortedByRegionCount = new Integer[numServers];
125
126 int tableIndex = 0, regionIndex = 0, regionPerServerIndex = 0;
127
128 for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
129 serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
130
131
132
133 if (servers[serverIndex] == null ||
134 servers[serverIndex].getStartcode() < entry.getKey().getStartcode()) {
135 servers[serverIndex] = entry.getKey();
136 }
137
138 regionsPerServer[serverIndex] = new int[entry.getValue().size()];
139 serverIndicesSortedByRegionCount[serverIndex] = serverIndex;
140 }
141
142 for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
143 serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
144 regionPerServerIndex = 0;
145
146 for (HRegionInfo region : entry.getValue()) {
147 String tableName = region.getTableName().getNameAsString();
148 Integer idx = tablesToIndex.get(tableName);
149 if (idx == null) {
150 tables.add(tableName);
151 idx = tableIndex;
152 tablesToIndex.put(tableName, tableIndex++);
153 }
154
155 regions[regionIndex] = region;
156 regionIndexToServerIndex[regionIndex] = serverIndex;
157 initialRegionIndexToServerIndex[regionIndex] = serverIndex;
158 regionIndexToTableIndex[regionIndex] = idx;
159 regionsPerServer[serverIndex][regionPerServerIndex++] = regionIndex;
160
161
162 if (loads != null) {
163 List<RegionLoad> rl = loads.get(region.getRegionNameAsString());
164
165 if (rl == null) {
166
167 rl = loads.get(region.getEncodedName());
168 }
169 regionLoads[regionIndex] = rl;
170 }
171
172 if (regionFinder != null) {
173
174 List<ServerName> loc = regionFinder.getTopBlockLocations(region);
175 regionLocations[regionIndex] = new int[loc.size()];
176 for (int i=0; i < loc.size(); i++) {
177 regionLocations[regionIndex][i] =
178 loc.get(i) == null ? -1 :
179 (serversToIndex.get(loc.get(i)) == null ? -1 : serversToIndex.get(loc.get(i)));
180 }
181 }
182
183 regionIndex++;
184 }
185 }
186
187 numTables = tables.size();
188 numRegionsPerServerPerTable = new int[numServers][numTables];
189
190 for (int i = 0; i < numServers; i++) {
191 for (int j = 0; j < numTables; j++) {
192 numRegionsPerServerPerTable[i][j] = 0;
193 }
194 }
195
196 for (int i=0; i < regionIndexToServerIndex.length; i++) {
197 numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
198 }
199
200 numMaxRegionsPerTable = new int[numTables];
201 for (serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
202 for (tableIndex = 0 ; tableIndex < numRegionsPerServerPerTable[serverIndex].length; tableIndex++) {
203 if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
204 numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
205 }
206 }
207 }
208 }
209
210 public void moveOrSwapRegion(int lServer, int rServer, int lRegion, int rRegion) {
211
212 if (rRegion >= 0 && lRegion >= 0) {
213 regionMoved(rRegion, rServer, lServer);
214 regionsPerServer[rServer] = replaceRegion(regionsPerServer[rServer], rRegion, lRegion);
215 regionMoved(lRegion, lServer, rServer);
216 regionsPerServer[lServer] = replaceRegion(regionsPerServer[lServer], lRegion, rRegion);
217 } else if (rRegion >= 0) {
218 regionMoved(rRegion, rServer, lServer);
219 regionsPerServer[rServer] = removeRegion(regionsPerServer[rServer], rRegion);
220 regionsPerServer[lServer] = addRegion(regionsPerServer[lServer], rRegion);
221 } else if (lRegion >= 0) {
222 regionMoved(lRegion, lServer, rServer);
223 regionsPerServer[lServer] = removeRegion(regionsPerServer[lServer], lRegion);
224 regionsPerServer[rServer] = addRegion(regionsPerServer[rServer], lRegion);
225 }
226 }
227
228
229 void regionMoved(int regionIndex, int oldServerIndex, int newServerIndex) {
230 regionIndexToServerIndex[regionIndex] = newServerIndex;
231 if (initialRegionIndexToServerIndex[regionIndex] == newServerIndex) {
232 numMovedRegions--;
233 if (regions[regionIndex].isMetaRegion()) {
234 numMovedMetaRegions--;
235 }
236 } else if (initialRegionIndexToServerIndex[regionIndex] == oldServerIndex) {
237 numMovedRegions++;
238 if (regions[regionIndex].isMetaRegion()) {
239 numMovedMetaRegions++;
240 }
241 }
242 int tableIndex = regionIndexToTableIndex[regionIndex];
243 numRegionsPerServerPerTable[oldServerIndex][tableIndex]--;
244 numRegionsPerServerPerTable[newServerIndex][tableIndex]++;
245
246
247 if (numRegionsPerServerPerTable[newServerIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
248 numRegionsPerServerPerTable[newServerIndex][tableIndex] = numMaxRegionsPerTable[tableIndex];
249 } else if ((numRegionsPerServerPerTable[oldServerIndex][tableIndex] + 1)
250 == numMaxRegionsPerTable[tableIndex]) {
251
252 for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
253 if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
254 numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
255 }
256 }
257 }
258 }
259
260 int[] removeRegion(int[] regions, int regionIndex) {
261
262 int[] newRegions = new int[regions.length - 1];
263 int i = 0;
264 for (i = 0; i < regions.length; i++) {
265 if (regions[i] == regionIndex) {
266 break;
267 }
268 newRegions[i] = regions[i];
269 }
270 System.arraycopy(regions, i+1, newRegions, i, newRegions.length - i);
271 return newRegions;
272 }
273
274 int[] addRegion(int[] regions, int regionIndex) {
275 int[] newRegions = new int[regions.length + 1];
276 System.arraycopy(regions, 0, newRegions, 0, regions.length);
277 newRegions[newRegions.length - 1] = regionIndex;
278 return newRegions;
279 }
280
281 int[] replaceRegion(int[] regions, int regionIndex, int newRegionIndex) {
282 int i = 0;
283 for (i = 0; i < regions.length; i++) {
284 if (regions[i] == regionIndex) {
285 regions[i] = newRegionIndex;
286 break;
287 }
288 }
289 return regions;
290 }
291
292 void sortServersByRegionCount() {
293 Arrays.sort(serverIndicesSortedByRegionCount, numRegionsComparator);
294 }
295
296 int getNumRegions(int server) {
297 return regionsPerServer[server].length;
298 }
299
300 private Comparator<Integer> numRegionsComparator = new Comparator<Integer>() {
301 @Override
302 public int compare(Integer integer, Integer integer2) {
303 return Integer.valueOf(getNumRegions(integer)).compareTo(getNumRegions(integer2));
304 }
305 };
306
307 @Override
308 public String toString() {
309 String desc = "Cluster{" +
310 "servers=[";
311 for(ServerName sn:servers) {
312 desc += sn.getHostAndPort() + ", ";
313 }
314 desc +=
315 ", serverIndicesSortedByRegionCount="+
316 Arrays.toString(serverIndicesSortedByRegionCount) +
317 ", regionsPerServer=[";
318
319 for (int[]r:regionsPerServer) {
320 desc += Arrays.toString(r);
321 }
322 desc += "]" +
323 ", numMaxRegionsPerTable=" +
324 Arrays.toString(numMaxRegionsPerTable) +
325 ", numRegions=" +
326 numRegions +
327 ", numServers=" +
328 numServers +
329 ", numTables=" +
330 numTables +
331 ", numMovedRegions=" +
332 numMovedRegions +
333 ", numMovedMetaRegions=" +
334 numMovedMetaRegions +
335 '}';
336 return desc;
337 }
338 }
339
340
341 private float slop;
342 private Configuration config;
343 private static final Random RANDOM = new Random(System.currentTimeMillis());
344 private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
345 protected MasterServices services;
346
347 @Override
348 public void setConf(Configuration conf) {
349 this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2);
350 if (slop < 0) slop = 0;
351 else if (slop > 1) slop = 1;
352 this.config = conf;
353 }
354
355 @Override
356 public Configuration getConf() {
357 return this.config;
358 }
359
360 public void setClusterStatus(ClusterStatus st) {
361
362 }
363
364 public void setMasterServices(MasterServices masterServices) {
365 this.services = masterServices;
366 }
367
368 protected boolean needsBalance(ClusterLoadState cs) {
369 if (cs.getNumServers() == 0) {
370 LOG.debug("numServers=0 so skipping load balancing");
371 return false;
372 }
373
374
375 float average = cs.getLoadAverage();
376 int floor = (int) Math.floor(average * (1 - slop));
377 int ceiling = (int) Math.ceil(average * (1 + slop));
378 if (!(cs.getMinLoad() > ceiling || cs.getMaxLoad() < floor)) {
379 NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
380 if (LOG.isTraceEnabled()) {
381
382 LOG.trace("Skipping load balancing because balanced cluster; " +
383 "servers=" + cs.getNumServers() + " " +
384 "regions=" + cs.getNumRegions() + " average=" + average + " " +
385 "mostloaded=" + serversByLoad.lastKey().getLoad() +
386 " leastloaded=" + serversByLoad.firstKey().getLoad());
387 }
388 return false;
389 }
390 return true;
391 }
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410 public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
411 List<ServerName> servers) {
412 if (regions.isEmpty() || servers.isEmpty()) {
413 return null;
414 }
415 Map<ServerName, List<HRegionInfo>> assignments = new TreeMap<ServerName, List<HRegionInfo>>();
416 int numRegions = regions.size();
417 int numServers = servers.size();
418 int max = (int) Math.ceil((float) numRegions / numServers);
419 int serverIdx = 0;
420 if (numServers > 1) {
421 serverIdx = RANDOM.nextInt(numServers);
422 }
423 int regionIdx = 0;
424 for (int j = 0; j < numServers; j++) {
425 ServerName server = servers.get((j + serverIdx) % numServers);
426 List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
427 for (int i = regionIdx; i < numRegions; i += numServers) {
428 serverRegions.add(regions.get(i % numRegions));
429 }
430 assignments.put(server, serverRegions);
431 regionIdx++;
432 }
433 return assignments;
434 }
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453 public Map<HRegionInfo, ServerName> immediateAssignment(List<HRegionInfo> regions,
454 List<ServerName> servers) {
455 Map<HRegionInfo, ServerName> assignments = new TreeMap<HRegionInfo, ServerName>();
456 for (HRegionInfo region : regions) {
457 assignments.put(region, randomAssignment(region, servers));
458 }
459 return assignments;
460 }
461
462
463
464
465 public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
466 if (servers == null || servers.isEmpty()) {
467 LOG.warn("Wanted to do random assignment but no servers to assign to");
468 return null;
469 }
470 return servers.get(RANDOM.nextInt(servers.size()));
471 }
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490 public Map<ServerName, List<HRegionInfo>> retainAssignment(Map<HRegionInfo, ServerName> regions,
491 List<ServerName> servers) {
492
493
494
495
496
497
498 ArrayListMultimap<String, ServerName> serversByHostname = ArrayListMultimap.create();
499 for (ServerName server : servers) {
500 serversByHostname.put(server.getHostname(), server);
501 }
502
503
504 Map<ServerName, List<HRegionInfo>> assignments = new TreeMap<ServerName, List<HRegionInfo>>();
505
506 for (ServerName server : servers) {
507 assignments.put(server, new ArrayList<HRegionInfo>());
508 }
509
510
511
512
513 Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
514
515 int numRandomAssignments = 0;
516 int numRetainedAssigments = 0;
517 for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
518 HRegionInfo region = entry.getKey();
519 ServerName oldServerName = entry.getValue();
520 List<ServerName> localServers = new ArrayList<ServerName>();
521 if (oldServerName != null) {
522 localServers = serversByHostname.get(oldServerName.getHostname());
523 }
524 if (localServers.isEmpty()) {
525
526
527 ServerName randomServer = servers.get(RANDOM.nextInt(servers.size()));
528 assignments.get(randomServer).add(region);
529 numRandomAssignments++;
530 if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
531 } else if (localServers.size() == 1) {
532
533 assignments.get(localServers.get(0)).add(region);
534 numRetainedAssigments++;
535 } else {
536
537 int size = localServers.size();
538 ServerName target = localServers.get(RANDOM.nextInt(size));
539 assignments.get(target).add(region);
540 numRetainedAssigments++;
541 }
542 }
543
544 String randomAssignMsg = "";
545 if (numRandomAssignments > 0) {
546 randomAssignMsg =
547 numRandomAssignments + " regions were assigned "
548 + "to random hosts, since the old hosts for these regions are no "
549 + "longer present in the cluster. These hosts were:\n "
550 + Joiner.on("\n ").join(oldHostsNoLongerPresent);
551 }
552
553 LOG.info("Reassigned " + regions.size() + " regions. " + numRetainedAssigments
554 + " retained the pre-restart assignment. " + randomAssignMsg);
555 return assignments;
556 }
557
558 }