1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.master.balancer;
19
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.Collection;
23 import java.util.Collections;
24 import java.util.Comparator;
25 import java.util.Deque;
26 import java.util.HashMap;
27 import java.util.HashSet;
28 import java.util.Iterator;
29 import java.util.List;
30 import java.util.Map;
31 import java.util.Map.Entry;
32 import java.util.NavigableMap;
33 import java.util.Random;
34 import java.util.Set;
35 import java.util.TreeMap;
36
37 import org.apache.commons.lang.NotImplementedException;
38 import org.apache.commons.logging.Log;
39 import org.apache.commons.logging.LogFactory;
40 import org.apache.hadoop.conf.Configuration;
41 import org.apache.hadoop.hbase.ClusterStatus;
42 import org.apache.hadoop.hbase.HBaseIOException;
43 import org.apache.hadoop.hbase.HRegionInfo;
44 import org.apache.hadoop.hbase.RegionLoad;
45 import org.apache.hadoop.hbase.ServerName;
46 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
47 import org.apache.hadoop.hbase.conf.ConfigurationObserver;
48 import org.apache.hadoop.hbase.master.LoadBalancer;
49 import org.apache.hadoop.hbase.master.MasterServices;
50 import org.apache.hadoop.hbase.master.RackManager;
51 import org.apache.hadoop.hbase.master.RegionPlan;
52 import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
53 import org.apache.hadoop.util.StringUtils;
54
55 import com.google.common.base.Joiner;
56 import com.google.common.collect.ArrayListMultimap;
57 import com.google.common.collect.Lists;
58 import com.google.common.collect.Sets;
59
60
61
62
63
64
65
66
67 public abstract class BaseLoadBalancer implements LoadBalancer {
68 private static final int MIN_SERVER_BALANCE = 2;
69 private volatile boolean stopped = false;
70
71 private static final List<HRegionInfo> EMPTY_REGION_LIST = new ArrayList<HRegionInfo>(0);
72
73 protected final RegionLocationFinder regionFinder = new RegionLocationFinder();
74
75 private static class DefaultRackManager extends RackManager {
76 @Override
77 public String getRack(ServerName server) {
78 return UNKNOWN_RACK;
79 }
80 }
81
82
83
84
85
86
87
88
89
90
91
92 protected static class Cluster {
93 ServerName[] servers;
94 String[] hosts;
95 String[] racks;
96 boolean multiServersPerHost = false;
97
98 ArrayList<String> tables;
99 HRegionInfo[] regions;
100 Deque<RegionLoad>[] regionLoads;
101
102 int[][] regionLocations;
103
104 int[] serverIndexToHostIndex;
105 int[] serverIndexToRackIndex;
106
107 int[][] regionsPerServer;
108 int[][] regionsPerHost;
109 int[][] regionsPerRack;
110 int[][] primariesOfRegionsPerServer;
111 int[][] primariesOfRegionsPerHost;
112 int[][] primariesOfRegionsPerRack;
113
114 int[][] serversPerHost;
115 int[][] serversPerRack;
116 int[] regionIndexToServerIndex;
117 int[] initialRegionIndexToServerIndex;
118 int[] regionIndexToTableIndex;
119 int[][] numRegionsPerServerPerTable;
120 int[] numMaxRegionsPerTable;
121 int[] regionIndexToPrimaryIndex;
122 boolean hasRegionReplicas = false;
123
124 Integer[] serverIndicesSortedByRegionCount;
125
126 Map<String, Integer> serversToIndex;
127 Map<String, Integer> hostsToIndex;
128 Map<String, Integer> racksToIndex;
129 Map<String, Integer> tablesToIndex;
130 Map<HRegionInfo, Integer> regionsToIndex;
131
132 int numServers;
133 int numHosts;
134 int numRacks;
135 int numTables;
136 int numRegions;
137
138 int numMovedRegions = 0;
139 Map<ServerName, List<HRegionInfo>> clusterState;
140
141 protected final RackManager rackManager;
142
143 protected Cluster(
144 Map<ServerName, List<HRegionInfo>> clusterState,
145 Map<String, Deque<RegionLoad>> loads,
146 RegionLocationFinder regionFinder,
147 RackManager rackManager) {
148 this(null, clusterState, loads, regionFinder,
149 rackManager);
150 }
151
152 @SuppressWarnings("unchecked")
153 protected Cluster(
154 Collection<HRegionInfo> unassignedRegions,
155 Map<ServerName, List<HRegionInfo>> clusterState,
156 Map<String, Deque<RegionLoad>> loads,
157 RegionLocationFinder regionFinder,
158 RackManager rackManager) {
159
160 if (unassignedRegions == null) {
161 unassignedRegions = EMPTY_REGION_LIST;
162 }
163
164 serversToIndex = new HashMap<String, Integer>();
165 hostsToIndex = new HashMap<String, Integer>();
166 racksToIndex = new HashMap<String, Integer>();
167 tablesToIndex = new HashMap<String, Integer>();
168
169
170 tables = new ArrayList<String>();
171 this.rackManager = rackManager != null ? rackManager : new DefaultRackManager();
172
173 numRegions = 0;
174
175 List<List<Integer>> serversPerHostList = new ArrayList<List<Integer>>();
176 List<List<Integer>> serversPerRackList = new ArrayList<List<Integer>>();
177 this.clusterState = clusterState;
178
179
180
181 for (ServerName sn : clusterState.keySet()) {
182 if (serversToIndex.get(sn.getHostAndPort()) == null) {
183 serversToIndex.put(sn.getHostAndPort(), numServers++);
184 }
185 if (!hostsToIndex.containsKey(sn.getHostname())) {
186 hostsToIndex.put(sn.getHostname(), numHosts++);
187 serversPerHostList.add(new ArrayList<Integer>(1));
188 }
189
190 int serverIndex = serversToIndex.get(sn.getHostAndPort());
191 int hostIndex = hostsToIndex.get(sn.getHostname());
192 serversPerHostList.get(hostIndex).add(serverIndex);
193
194 String rack = this.rackManager.getRack(sn);
195 if (!racksToIndex.containsKey(rack)) {
196 racksToIndex.put(rack, numRacks++);
197 serversPerRackList.add(new ArrayList<Integer>());
198 }
199 int rackIndex = racksToIndex.get(rack);
200 serversPerRackList.get(rackIndex).add(serverIndex);
201 }
202
203
204 for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
205 numRegions += entry.getValue().size();
206 }
207 numRegions += unassignedRegions.size();
208
209 regionsToIndex = new HashMap<HRegionInfo, Integer>(numRegions);
210 servers = new ServerName[numServers];
211 serversPerHost = new int[numHosts][];
212 serversPerRack = new int[numRacks][];
213 regions = new HRegionInfo[numRegions];
214 regionIndexToServerIndex = new int[numRegions];
215 initialRegionIndexToServerIndex = new int[numRegions];
216 regionIndexToTableIndex = new int[numRegions];
217 regionIndexToPrimaryIndex = new int[numRegions];
218 regionLoads = new Deque[numRegions];
219 regionLocations = new int[numRegions][];
220 serverIndicesSortedByRegionCount = new Integer[numServers];
221
222 serverIndexToHostIndex = new int[numServers];
223 serverIndexToRackIndex = new int[numServers];
224 regionsPerServer = new int[numServers][];
225 regionsPerHost = new int[numHosts][];
226 regionsPerRack = new int[numRacks][];
227 primariesOfRegionsPerServer = new int[numServers][];
228 primariesOfRegionsPerHost = new int[numHosts][];
229 primariesOfRegionsPerRack = new int[numRacks][];
230
231 int tableIndex = 0, regionIndex = 0, regionPerServerIndex = 0;
232
233 for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
234 int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
235
236
237
238 if (servers[serverIndex] == null ||
239 servers[serverIndex].getStartcode() < entry.getKey().getStartcode()) {
240 servers[serverIndex] = entry.getKey();
241 }
242
243 if (regionsPerServer[serverIndex] != null) {
244
245
246 regionsPerServer[serverIndex] = new int[entry.getValue().size() + regionsPerServer[serverIndex].length];
247 } else {
248 regionsPerServer[serverIndex] = new int[entry.getValue().size()];
249 }
250 primariesOfRegionsPerServer[serverIndex] = new int[regionsPerServer[serverIndex].length];
251 serverIndicesSortedByRegionCount[serverIndex] = serverIndex;
252 }
253
254 hosts = new String[numHosts];
255 for (Entry<String, Integer> entry : hostsToIndex.entrySet()) {
256 hosts[entry.getValue()] = entry.getKey();
257 }
258 racks = new String[numRacks];
259 for (Entry<String, Integer> entry : racksToIndex.entrySet()) {
260 racks[entry.getValue()] = entry.getKey();
261 }
262
263 for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
264 int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
265 regionPerServerIndex = 0;
266
267 int hostIndex = hostsToIndex.get(entry.getKey().getHostname());
268 serverIndexToHostIndex[serverIndex] = hostIndex;
269
270 int rackIndex = racksToIndex.get(this.rackManager.getRack(entry.getKey()));
271 serverIndexToRackIndex[serverIndex] = rackIndex;
272
273 for (HRegionInfo region : entry.getValue()) {
274 registerRegion(region, regionIndex, serverIndex, loads, regionFinder);
275
276 regionsPerServer[serverIndex][regionPerServerIndex++] = regionIndex;
277 regionIndex++;
278 }
279 }
280 for (HRegionInfo region : unassignedRegions) {
281 registerRegion(region, regionIndex, -1, loads, regionFinder);
282 regionIndex++;
283 }
284
285 for (int i = 0; i < serversPerHostList.size(); i++) {
286 serversPerHost[i] = new int[serversPerHostList.get(i).size()];
287 for (int j = 0; j < serversPerHost[i].length; j++) {
288 serversPerHost[i][j] = serversPerHostList.get(i).get(j);
289 }
290 if (serversPerHost[i].length > 1) {
291 multiServersPerHost = true;
292 }
293 }
294
295 for (int i = 0; i < serversPerRackList.size(); i++) {
296 serversPerRack[i] = new int[serversPerRackList.get(i).size()];
297 for (int j = 0; j < serversPerRack[i].length; j++) {
298 serversPerRack[i][j] = serversPerRackList.get(i).get(j);
299 }
300 }
301
302 numTables = tables.size();
303 numRegionsPerServerPerTable = new int[numServers][numTables];
304
305 for (int i = 0; i < numServers; i++) {
306 for (int j = 0; j < numTables; j++) {
307 numRegionsPerServerPerTable[i][j] = 0;
308 }
309 }
310
311 for (int i=0; i < regionIndexToServerIndex.length; i++) {
312 if (regionIndexToServerIndex[i] >= 0) {
313 numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
314 }
315 }
316
317 numMaxRegionsPerTable = new int[numTables];
318 for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
319 for (tableIndex = 0 ; tableIndex < numRegionsPerServerPerTable[serverIndex].length; tableIndex++) {
320 if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
321 numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
322 }
323 }
324 }
325
326 for (int i = 0; i < regions.length; i ++) {
327 HRegionInfo info = regions[i];
328 if (RegionReplicaUtil.isDefaultReplica(info)) {
329 regionIndexToPrimaryIndex[i] = i;
330 } else {
331 hasRegionReplicas = true;
332 HRegionInfo primaryInfo = RegionReplicaUtil.getRegionInfoForDefaultReplica(info);
333 regionIndexToPrimaryIndex[i] =
334 regionsToIndex.containsKey(primaryInfo) ?
335 regionsToIndex.get(primaryInfo):
336 -1;
337 }
338 }
339
340 for (int i = 0; i < regionsPerServer.length; i++) {
341 primariesOfRegionsPerServer[i] = new int[regionsPerServer[i].length];
342 for (int j = 0; j < regionsPerServer[i].length; j++) {
343 int primaryIndex = regionIndexToPrimaryIndex[regionsPerServer[i][j]];
344 primariesOfRegionsPerServer[i][j] = primaryIndex;
345 }
346
347 Arrays.sort(primariesOfRegionsPerServer[i]);
348 }
349
350
351 if (multiServersPerHost) {
352 for (int i = 0 ; i < serversPerHost.length; i++) {
353 int numRegionsPerHost = 0;
354 for (int j = 0; j < serversPerHost[i].length; j++) {
355 numRegionsPerHost += regionsPerServer[serversPerHost[i][j]].length;
356 }
357 regionsPerHost[i] = new int[numRegionsPerHost];
358 primariesOfRegionsPerHost[i] = new int[numRegionsPerHost];
359 }
360 for (int i = 0 ; i < serversPerHost.length; i++) {
361 int numRegionPerHostIndex = 0;
362 for (int j = 0; j < serversPerHost[i].length; j++) {
363 for (int k = 0; k < regionsPerServer[serversPerHost[i][j]].length; k++) {
364 int region = regionsPerServer[serversPerHost[i][j]][k];
365 regionsPerHost[i][numRegionPerHostIndex] = region;
366 int primaryIndex = regionIndexToPrimaryIndex[region];
367 primariesOfRegionsPerHost[i][numRegionPerHostIndex] = primaryIndex;
368 numRegionPerHostIndex++;
369 }
370 }
371
372 Arrays.sort(primariesOfRegionsPerHost[i]);
373 }
374 }
375
376
377 if (numRacks > 1) {
378 for (int i = 0 ; i < serversPerRack.length; i++) {
379 int numRegionsPerRack = 0;
380 for (int j = 0; j < serversPerRack[i].length; j++) {
381 numRegionsPerRack += regionsPerServer[serversPerRack[i][j]].length;
382 }
383 regionsPerRack[i] = new int[numRegionsPerRack];
384 primariesOfRegionsPerRack[i] = new int[numRegionsPerRack];
385 }
386
387 for (int i = 0 ; i < serversPerRack.length; i++) {
388 int numRegionPerRackIndex = 0;
389 for (int j = 0; j < serversPerRack[i].length; j++) {
390 for (int k = 0; k < regionsPerServer[serversPerRack[i][j]].length; k++) {
391 int region = regionsPerServer[serversPerRack[i][j]][k];
392 regionsPerRack[i][numRegionPerRackIndex] = region;
393 int primaryIndex = regionIndexToPrimaryIndex[region];
394 primariesOfRegionsPerRack[i][numRegionPerRackIndex] = primaryIndex;
395 numRegionPerRackIndex++;
396 }
397 }
398
399 Arrays.sort(primariesOfRegionsPerRack[i]);
400 }
401 }
402 }
403
404
405 private void registerRegion(HRegionInfo region, int regionIndex, int serverIndex,
406 Map<String, Deque<RegionLoad>> loads, RegionLocationFinder regionFinder) {
407 String tableName = region.getTable().getNameAsString();
408 if (!tablesToIndex.containsKey(tableName)) {
409 tables.add(tableName);
410 tablesToIndex.put(tableName, tablesToIndex.size());
411 }
412 int tableIndex = tablesToIndex.get(tableName);
413
414 regionsToIndex.put(region, regionIndex);
415 regions[regionIndex] = region;
416 regionIndexToServerIndex[regionIndex] = serverIndex;
417 initialRegionIndexToServerIndex[regionIndex] = serverIndex;
418 regionIndexToTableIndex[regionIndex] = tableIndex;
419
420
421 if (loads != null) {
422 Deque<RegionLoad> rl = loads.get(region.getRegionNameAsString());
423
424 if (rl == null) {
425
426 rl = loads.get(region.getEncodedName());
427 }
428 regionLoads[regionIndex] = rl;
429 }
430
431 if (regionFinder != null) {
432
433 List<ServerName> loc = regionFinder.getTopBlockLocations(region);
434 regionLocations[regionIndex] = new int[loc.size()];
435 for (int i=0; i < loc.size(); i++) {
436 regionLocations[regionIndex][i] =
437 loc.get(i) == null ? -1 :
438 (serversToIndex.get(loc.get(i).getHostAndPort()) == null ? -1
439 : serversToIndex.get(loc.get(i).getHostAndPort()));
440 }
441 }
442 }
443
444
445 public static class Action {
446 public static enum Type {
447 ASSIGN_REGION,
448 MOVE_REGION,
449 SWAP_REGIONS,
450 NULL,
451 }
452
453 public Type type;
454 public Action (Type type) {this.type = type;}
455
456 public Action undoAction() { return this; }
457 @Override
458 public String toString() { return type + ":";}
459 }
460
461 public static class AssignRegionAction extends Action {
462 public int region;
463 public int server;
464 public AssignRegionAction(int region, int server) {
465 super(Type.ASSIGN_REGION);
466 this.region = region;
467 this.server = server;
468 }
469 @Override
470 public Action undoAction() {
471
472
473 throw new NotImplementedException();
474 }
475 @Override
476 public String toString() {
477 return type + ": " + region + ":" + server;
478 }
479 }
480
481 public static class MoveRegionAction extends Action {
482 public int region;
483 public int fromServer;
484 public int toServer;
485
486 public MoveRegionAction(int region, int fromServer, int toServer) {
487 super(Type.MOVE_REGION);
488 this.fromServer = fromServer;
489 this.region = region;
490 this.toServer = toServer;
491 }
492 @Override
493 public Action undoAction() {
494 return new MoveRegionAction (region, toServer, fromServer);
495 }
496 @Override
497 public String toString() {
498 return type + ": " + region + ":" + fromServer + " -> " + toServer;
499 }
500 }
501
502 public static class SwapRegionsAction extends Action {
503 public int fromServer;
504 public int fromRegion;
505 public int toServer;
506 public int toRegion;
507 public SwapRegionsAction(int fromServer, int fromRegion, int toServer, int toRegion) {
508 super(Type.SWAP_REGIONS);
509 this.fromServer = fromServer;
510 this.fromRegion = fromRegion;
511 this.toServer = toServer;
512 this.toRegion = toRegion;
513 }
514 @Override
515 public Action undoAction() {
516 return new SwapRegionsAction (fromServer, toRegion, toServer, fromRegion);
517 }
518 @Override
519 public String toString() {
520 return type + ": " + fromRegion + ":" + fromServer + " <-> " + toRegion + ":" + toServer;
521 }
522 }
523
524 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NM_FIELD_NAMING_CONVENTION",
525 justification="Mistake. Too disruptive to change now")
526 public static final Action NullAction = new Action(Type.NULL);
527
528 public void doAction(Action action) {
529 switch (action.type) {
530 case NULL: break;
531 case ASSIGN_REGION:
532 AssignRegionAction ar = (AssignRegionAction) action;
533 regionsPerServer[ar.server] = addRegion(regionsPerServer[ar.server], ar.region);
534 regionMoved(ar.region, -1, ar.server);
535 break;
536 case MOVE_REGION:
537 MoveRegionAction mra = (MoveRegionAction) action;
538 regionsPerServer[mra.fromServer] = removeRegion(regionsPerServer[mra.fromServer], mra.region);
539 regionsPerServer[mra.toServer] = addRegion(regionsPerServer[mra.toServer], mra.region);
540 regionMoved(mra.region, mra.fromServer, mra.toServer);
541 break;
542 case SWAP_REGIONS:
543 SwapRegionsAction a = (SwapRegionsAction) action;
544 regionsPerServer[a.fromServer] = replaceRegion(regionsPerServer[a.fromServer], a.fromRegion, a.toRegion);
545 regionsPerServer[a.toServer] = replaceRegion(regionsPerServer[a.toServer], a.toRegion, a.fromRegion);
546 regionMoved(a.fromRegion, a.fromServer, a.toServer);
547 regionMoved(a.toRegion, a.toServer, a.fromServer);
548 break;
549 default:
550 throw new RuntimeException("Uknown action:" + action.type);
551 }
552 }
553
554
555
556
557
558
559
560
561 boolean wouldLowerAvailability(HRegionInfo regionInfo, ServerName serverName) {
562 if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
563 return false;
564 }
565 int server = serversToIndex.get(serverName.getHostAndPort());
566 int region = regionsToIndex.get(regionInfo);
567
568 int primary = regionIndexToPrimaryIndex[region];
569
570
571
572
573 if (contains(primariesOfRegionsPerServer[server], primary)) {
574
575 for (int i = 0; i < primariesOfRegionsPerServer.length; i++) {
576 if (i != server && !contains(primariesOfRegionsPerServer[i], primary)) {
577 return true;
578 }
579 }
580 return false;
581 }
582
583
584 if (multiServersPerHost) {
585 int host = serverIndexToHostIndex[server];
586 if (contains(primariesOfRegionsPerHost[host], primary)) {
587
588 for (int i = 0; i < primariesOfRegionsPerHost.length; i++) {
589 if (i != host && !contains(primariesOfRegionsPerHost[i], primary)) {
590 return true;
591 }
592 }
593 return false;
594 }
595 }
596
597
598 if (numRacks > 1) {
599 int rack = serverIndexToRackIndex[server];
600 if (contains(primariesOfRegionsPerRack[rack], primary)) {
601
602 for (int i = 0; i < primariesOfRegionsPerRack.length; i++) {
603 if (i != rack && !contains(primariesOfRegionsPerRack[i], primary)) {
604 return true;
605 }
606 }
607 return false;
608 }
609 }
610 return false;
611 }
612
613 void doAssignRegion(HRegionInfo regionInfo, ServerName serverName) {
614 if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
615 return;
616 }
617 int server = serversToIndex.get(serverName.getHostAndPort());
618 int region = regionsToIndex.get(regionInfo);
619 doAction(new AssignRegionAction(region, server));
620 }
621
622 void regionMoved(int region, int oldServer, int newServer) {
623 regionIndexToServerIndex[region] = newServer;
624 if (initialRegionIndexToServerIndex[region] == newServer) {
625 numMovedRegions--;
626 } else if (oldServer >= 0 && initialRegionIndexToServerIndex[region] == oldServer) {
627 numMovedRegions++;
628 }
629 int tableIndex = regionIndexToTableIndex[region];
630 if (oldServer >= 0) {
631 numRegionsPerServerPerTable[oldServer][tableIndex]--;
632 }
633 numRegionsPerServerPerTable[newServer][tableIndex]++;
634
635
636 if (numRegionsPerServerPerTable[newServer][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
637 numRegionsPerServerPerTable[newServer][tableIndex] = numMaxRegionsPerTable[tableIndex];
638 } else if (oldServer >= 0 && (numRegionsPerServerPerTable[oldServer][tableIndex] + 1)
639 == numMaxRegionsPerTable[tableIndex]) {
640
641 for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
642 if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
643 numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
644 }
645 }
646 }
647
648
649 int primary = regionIndexToPrimaryIndex[region];
650 if (oldServer >= 0) {
651 primariesOfRegionsPerServer[oldServer] = removeRegion(
652 primariesOfRegionsPerServer[oldServer], primary);
653 }
654 primariesOfRegionsPerServer[newServer] = addRegionSorted(
655 primariesOfRegionsPerServer[newServer], primary);
656
657
658 if (multiServersPerHost) {
659 int oldHost = oldServer >= 0 ? serverIndexToHostIndex[oldServer] : -1;
660 int newHost = serverIndexToHostIndex[newServer];
661 if (newHost != oldHost) {
662 regionsPerHost[newHost] = addRegion(regionsPerHost[newHost], region);
663 primariesOfRegionsPerHost[newHost] = addRegionSorted(primariesOfRegionsPerHost[newHost], primary);
664 if (oldHost >= 0) {
665 regionsPerHost[oldHost] = removeRegion(regionsPerHost[oldHost], region);
666 primariesOfRegionsPerHost[oldHost] = removeRegion(
667 primariesOfRegionsPerHost[oldHost], primary);
668 }
669 }
670 }
671
672
673 if (numRacks > 1) {
674 int oldRack = oldServer >= 0 ? serverIndexToRackIndex[oldServer] : -1;
675 int newRack = serverIndexToRackIndex[newServer];
676 if (newRack != oldRack) {
677 regionsPerRack[newRack] = addRegion(regionsPerRack[newRack], region);
678 primariesOfRegionsPerRack[newRack] = addRegionSorted(primariesOfRegionsPerRack[newRack], primary);
679 if (oldRack >= 0) {
680 regionsPerRack[oldRack] = removeRegion(regionsPerRack[oldRack], region);
681 primariesOfRegionsPerRack[oldRack] = removeRegion(
682 primariesOfRegionsPerRack[oldRack], primary);
683 }
684 }
685 }
686 }
687
688 int[] removeRegion(int[] regions, int regionIndex) {
689
690 int[] newRegions = new int[regions.length - 1];
691 int i = 0;
692 for (i = 0; i < regions.length; i++) {
693 if (regions[i] == regionIndex) {
694 break;
695 }
696 newRegions[i] = regions[i];
697 }
698 System.arraycopy(regions, i+1, newRegions, i, newRegions.length - i);
699 return newRegions;
700 }
701
702 int[] addRegion(int[] regions, int regionIndex) {
703 int[] newRegions = new int[regions.length + 1];
704 System.arraycopy(regions, 0, newRegions, 0, regions.length);
705 newRegions[newRegions.length - 1] = regionIndex;
706 return newRegions;
707 }
708
709 int[] addRegionSorted(int[] regions, int regionIndex) {
710 int[] newRegions = new int[regions.length + 1];
711 int i = 0;
712 for (i = 0; i < regions.length; i++) {
713 if (regions[i] > regionIndex) {
714 break;
715 }
716 }
717 System.arraycopy(regions, 0, newRegions, 0, i);
718 System.arraycopy(regions, i, newRegions, i+1, regions.length - i);
719 newRegions[i] = regionIndex;
720
721 return newRegions;
722 }
723
724 int[] replaceRegion(int[] regions, int regionIndex, int newRegionIndex) {
725 int i = 0;
726 for (i = 0; i < regions.length; i++) {
727 if (regions[i] == regionIndex) {
728 regions[i] = newRegionIndex;
729 break;
730 }
731 }
732 return regions;
733 }
734
735 void sortServersByRegionCount() {
736 Arrays.sort(serverIndicesSortedByRegionCount, numRegionsComparator);
737 }
738
739 int getNumRegions(int server) {
740 return regionsPerServer[server].length;
741 }
742
743 boolean contains(int[] arr, int val) {
744 return Arrays.binarySearch(arr, val) >= 0;
745 }
746
747 private Comparator<Integer> numRegionsComparator = new Comparator<Integer>() {
748 @Override
749 public int compare(Integer integer, Integer integer2) {
750 return Integer.valueOf(getNumRegions(integer)).compareTo(getNumRegions(integer2));
751 }
752 };
753
754 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="SBSC_USE_STRINGBUFFER_CONCATENATION",
755 justification="Not important but should be fixed")
756 @Override
757 public String toString() {
758 String desc = "Cluster{" +
759 "servers=[";
760 for(ServerName sn:servers) {
761 desc += sn.getHostAndPort() + ", ";
762 }
763 desc +=
764 ", serverIndicesSortedByRegionCount="+
765 Arrays.toString(serverIndicesSortedByRegionCount) +
766 ", regionsPerServer=[";
767
768 for (int[]r:regionsPerServer) {
769 desc += Arrays.toString(r);
770 }
771 desc += "]" +
772 ", numMaxRegionsPerTable=" +
773 Arrays.toString(numMaxRegionsPerTable) +
774 ", numRegions=" +
775 numRegions +
776 ", numServers=" +
777 numServers +
778 ", numTables=" +
779 numTables +
780 ", numMovedRegions=" +
781 numMovedRegions +
782 '}';
783 return desc;
784 }
785 }
786
787
788 protected float slop;
789 protected Configuration config;
790 protected RackManager rackManager;
791 private static final Random RANDOM = new Random(System.currentTimeMillis());
792 private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
793
794 public static final String TABLES_ON_MASTER =
795 "hbase.balancer.tablesOnMaster";
796
797 protected final Set<String> tablesOnMaster = new HashSet<String>();
798 protected final MetricsBalancer metricsBalancer = new MetricsBalancer();
799 protected ClusterStatus clusterStatus = null;
800 protected ServerName masterServerName;
801 protected MasterServices services;
802
803 protected static String[] getTablesOnMaster(Configuration conf) {
804 String valueString = conf.get(TABLES_ON_MASTER);
805 if (valueString != null) {
806 valueString = valueString.trim();
807 }
808 if (valueString == null || valueString.equalsIgnoreCase("none")) {
809 return null;
810 }
811 return StringUtils.getStrings(valueString);
812 }
813
814
815
816
817 public static boolean tablesOnMaster(Configuration conf) {
818 String[] tables = getTablesOnMaster(conf);
819 return tables != null && tables.length > 0;
820 }
821
822 @Override
823 public void setConf(Configuration conf) {
824 setSlop(conf);
825 if (slop < 0) slop = 0;
826 else if (slop > 1) slop = 1;
827
828 this.config = conf;
829 String[] tables = getTablesOnMaster(conf);
830 if (tables != null && tables.length > 0) {
831 Collections.addAll(tablesOnMaster, tables);
832 }
833 this.rackManager = new RackManager(getConf());
834 regionFinder.setConf(conf);
835 }
836
837 protected void setSlop(Configuration conf) {
838 this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2);
839 }
840
841
842
843
844
845 public boolean shouldBeOnMaster(HRegionInfo region) {
846 return tablesOnMaster.contains(region.getTable().getNameAsString());
847 }
848
849
850
851
852 protected List<RegionPlan> balanceMasterRegions(
853 Map<ServerName, List<HRegionInfo>> clusterMap) {
854 if (masterServerName == null
855 || clusterMap == null || clusterMap.size() <= 1) return null;
856 List<RegionPlan> plans = null;
857 List<HRegionInfo> regions = clusterMap.get(masterServerName);
858 if (regions != null) {
859 Iterator<ServerName> keyIt = null;
860 for (HRegionInfo region: regions) {
861 if (shouldBeOnMaster(region)) continue;
862
863
864 if (keyIt == null || !keyIt.hasNext()) {
865 keyIt = clusterMap.keySet().iterator();
866 }
867 ServerName dest = keyIt.next();
868 if (masterServerName.equals(dest)) {
869 if (!keyIt.hasNext()) {
870 keyIt = clusterMap.keySet().iterator();
871 }
872 dest = keyIt.next();
873 }
874
875
876 RegionPlan plan = new RegionPlan(region, masterServerName, dest);
877 if (plans == null) {
878 plans = new ArrayList<RegionPlan>();
879 }
880 plans.add(plan);
881 }
882 }
883 for (Map.Entry<ServerName, List<HRegionInfo>> server: clusterMap.entrySet()) {
884 if (masterServerName.equals(server.getKey())) continue;
885 for (HRegionInfo region: server.getValue()) {
886 if (!shouldBeOnMaster(region)) continue;
887
888
889 RegionPlan plan = new RegionPlan(region, server.getKey(), masterServerName);
890 if (plans == null) {
891 plans = new ArrayList<RegionPlan>();
892 }
893 plans.add(plan);
894 }
895 }
896 return plans;
897 }
898
899
900
901
902 protected Map<ServerName, List<HRegionInfo>> assignMasterRegions(
903 Collection<HRegionInfo> regions, List<ServerName> servers) {
904 if (servers == null || regions == null || regions.isEmpty()) {
905 return null;
906 }
907 Map<ServerName, List<HRegionInfo>> assignments
908 = new TreeMap<ServerName, List<HRegionInfo>>();
909 if (masterServerName != null && servers.contains(masterServerName)) {
910 assignments.put(masterServerName, new ArrayList<HRegionInfo>());
911 for (HRegionInfo region: regions) {
912 if (shouldBeOnMaster(region)) {
913 assignments.get(masterServerName).add(region);
914 }
915 }
916 }
917 return assignments;
918 }
919
920 @Override
921 public Configuration getConf() {
922 return this.config;
923 }
924
925 @Override
926 public void setClusterStatus(ClusterStatus st) {
927 this.clusterStatus = st;
928 regionFinder.setClusterStatus(st);
929 }
930
931 @Override
932 public void setMasterServices(MasterServices masterServices) {
933 masterServerName = masterServices.getServerName();
934 this.services = masterServices;
935 this.regionFinder.setServices(masterServices);
936 }
937
938 public void setRackManager(RackManager rackManager) {
939 this.rackManager = rackManager;
940 }
941
942 protected boolean needsBalance(Cluster c) {
943 ClusterLoadState cs = new ClusterLoadState(c.clusterState);
944 if (cs.getNumServers() < MIN_SERVER_BALANCE) {
945 if (LOG.isDebugEnabled()) {
946 LOG.debug("Not running balancer because only " + cs.getNumServers()
947 + " active regionserver(s)");
948 }
949 return false;
950 }
951 if(areSomeRegionReplicasColocated(c)) return true;
952
953
954 float average = cs.getLoadAverage();
955 int floor = (int) Math.floor(average * (1 - slop));
956 int ceiling = (int) Math.ceil(average * (1 + slop));
957 if (!(cs.getMaxLoad() > ceiling || cs.getMinLoad() < floor)) {
958 NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
959 if (LOG.isTraceEnabled()) {
960
961 LOG.trace("Skipping load balancing because balanced cluster; " +
962 "servers=" + cs.getNumServers() +
963 " regions=" + cs.getNumRegions() + " average=" + average +
964 " mostloaded=" + serversByLoad.lastKey().getLoad() +
965 " leastloaded=" + serversByLoad.firstKey().getLoad());
966 }
967 return false;
968 }
969 return true;
970 }
971
972
973
974
975
976
977
978
979 protected boolean areSomeRegionReplicasColocated(Cluster c) {
980 return false;
981 }
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000 @Override
1001 public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
1002 List<ServerName> servers) {
1003 metricsBalancer.incrMiscInvocations();
1004 Map<ServerName, List<HRegionInfo>> assignments = assignMasterRegions(regions, servers);
1005 if (assignments != null && !assignments.isEmpty()) {
1006 servers = new ArrayList<ServerName>(servers);
1007
1008 servers.remove(masterServerName);
1009 List<HRegionInfo> masterRegions = assignments.get(masterServerName);
1010 if (!masterRegions.isEmpty()) {
1011 regions = new ArrayList<HRegionInfo>(regions);
1012 for (HRegionInfo region: masterRegions) {
1013 regions.remove(region);
1014 }
1015 }
1016 }
1017 if (regions == null || regions.isEmpty()) {
1018 return assignments;
1019 }
1020
1021 int numServers = servers == null ? 0 : servers.size();
1022 if (numServers == 0) {
1023 LOG.warn("Wanted to do round robin assignment but no servers to assign to");
1024 return null;
1025 }
1026
1027
1028
1029
1030
1031
1032 if (numServers == 1) {
1033 ServerName server = servers.get(0);
1034 assignments.put(server, new ArrayList<HRegionInfo>(regions));
1035 return assignments;
1036 }
1037
1038 Cluster cluster = createCluster(servers, regions);
1039 List<HRegionInfo> unassignedRegions = new ArrayList<HRegionInfo>();
1040
1041 roundRobinAssignment(cluster, regions, unassignedRegions,
1042 servers, assignments);
1043
1044 List<HRegionInfo> lastFewRegions = new ArrayList<HRegionInfo>();
1045
1046 int serverIdx = RANDOM.nextInt(numServers);
1047 for (HRegionInfo region : unassignedRegions) {
1048 boolean assigned = false;
1049 for (int j = 0; j < numServers; j++) {
1050 ServerName serverName = servers.get((j + serverIdx) % numServers);
1051 if (!cluster.wouldLowerAvailability(region, serverName)) {
1052 List<HRegionInfo> serverRegions = assignments.get(serverName);
1053 if (serverRegions == null) {
1054 serverRegions = new ArrayList<HRegionInfo>();
1055 assignments.put(serverName, serverRegions);
1056 }
1057 serverRegions.add(region);
1058 cluster.doAssignRegion(region, serverName);
1059 serverIdx = (j + serverIdx + 1) % numServers;
1060 assigned = true;
1061 break;
1062 }
1063 }
1064 if (!assigned) {
1065 lastFewRegions.add(region);
1066 }
1067 }
1068
1069
1070 for (HRegionInfo region : lastFewRegions) {
1071 int i = RANDOM.nextInt(numServers);
1072 ServerName server = servers.get(i);
1073 List<HRegionInfo> serverRegions = assignments.get(server);
1074 if (serverRegions == null) {
1075 serverRegions = new ArrayList<HRegionInfo>();
1076 assignments.put(server, serverRegions);
1077 }
1078 serverRegions.add(region);
1079 cluster.doAssignRegion(region, server);
1080 }
1081 return assignments;
1082 }
1083
1084 protected Cluster createCluster(List<ServerName> servers,
1085 Collection<HRegionInfo> regions) {
1086
1087
1088
1089
1090 Map<ServerName, List<HRegionInfo>> clusterState = getRegionAssignmentsByServer(regions);
1091
1092 for (ServerName server : servers) {
1093 if (!clusterState.containsKey(server)) {
1094 clusterState.put(server, EMPTY_REGION_LIST);
1095 }
1096 }
1097 return new Cluster(regions, clusterState, null, this.regionFinder,
1098 rackManager);
1099 }
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118 @Override
1119 public Map<HRegionInfo, ServerName> immediateAssignment(List<HRegionInfo> regions,
1120 List<ServerName> servers) {
1121 metricsBalancer.incrMiscInvocations();
1122 if (servers == null || servers.isEmpty()) {
1123 LOG.warn("Wanted to do random assignment but no servers to assign to");
1124 return null;
1125 }
1126
1127 Map<HRegionInfo, ServerName> assignments = new TreeMap<HRegionInfo, ServerName>();
1128 for (HRegionInfo region : regions) {
1129 assignments.put(region, randomAssignment(region, servers));
1130 }
1131 return assignments;
1132 }
1133
1134
1135
1136
1137 @Override
1138 public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
1139 metricsBalancer.incrMiscInvocations();
1140 if (servers != null && servers.contains(masterServerName)) {
1141 if (shouldBeOnMaster(regionInfo)) {
1142 return masterServerName;
1143 }
1144 servers = new ArrayList<ServerName>(servers);
1145
1146 servers.remove(masterServerName);
1147 }
1148
1149 int numServers = servers == null ? 0 : servers.size();
1150 if (numServers == 0) {
1151 LOG.warn("Wanted to do retain assignment but no servers to assign to");
1152 return null;
1153 }
1154 if (numServers == 1) {
1155 return servers.get(0);
1156 }
1157
1158 List<HRegionInfo> regions = Lists.newArrayList(regionInfo);
1159 Cluster cluster = createCluster(servers, regions);
1160 return randomAssignment(cluster, regionInfo, servers);
1161 }
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180 @Override
1181 public Map<ServerName, List<HRegionInfo>> retainAssignment(Map<HRegionInfo, ServerName> regions,
1182 List<ServerName> servers) {
1183
1184 metricsBalancer.incrMiscInvocations();
1185 Map<ServerName, List<HRegionInfo>> assignments
1186 = assignMasterRegions(regions.keySet(), servers);
1187 if (assignments != null && !assignments.isEmpty()) {
1188 servers = new ArrayList<ServerName>(servers);
1189
1190 servers.remove(masterServerName);
1191 List<HRegionInfo> masterRegions = assignments.get(masterServerName);
1192 if (!masterRegions.isEmpty()) {
1193 regions = new HashMap<HRegionInfo, ServerName>(regions);
1194 for (HRegionInfo region: masterRegions) {
1195 regions.remove(region);
1196 }
1197 }
1198 }
1199 if (regions == null || regions.isEmpty()) {
1200 return assignments;
1201 }
1202
1203 int numServers = servers == null ? 0 : servers.size();
1204 if (numServers == 0) {
1205 LOG.warn("Wanted to do retain assignment but no servers to assign to");
1206 return null;
1207 }
1208 if (numServers == 1) {
1209 ServerName server = servers.get(0);
1210 assignments.put(server, new ArrayList<HRegionInfo>(regions.keySet()));
1211 return assignments;
1212 }
1213
1214
1215
1216
1217
1218
1219
1220 ArrayListMultimap<String, ServerName> serversByHostname = ArrayListMultimap.create();
1221 for (ServerName server : servers) {
1222 assignments.put(server, new ArrayList<HRegionInfo>());
1223 serversByHostname.put(server.getHostname(), server);
1224 }
1225
1226
1227
1228
1229 Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
1230
1231 int numRandomAssignments = 0;
1232 int numRetainedAssigments = 0;
1233
1234 Cluster cluster = createCluster(servers, regions.keySet());
1235
1236 for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
1237 HRegionInfo region = entry.getKey();
1238 ServerName oldServerName = entry.getValue();
1239 List<ServerName> localServers = new ArrayList<ServerName>();
1240 if (oldServerName != null) {
1241 localServers = serversByHostname.get(oldServerName.getHostname());
1242 }
1243 if (localServers.isEmpty()) {
1244
1245
1246 ServerName randomServer = randomAssignment(cluster, region, servers);
1247 assignments.get(randomServer).add(region);
1248 numRandomAssignments++;
1249 if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
1250 } else if (localServers.size() == 1) {
1251
1252 ServerName target = localServers.get(0);
1253 assignments.get(target).add(region);
1254 cluster.doAssignRegion(region, target);
1255 numRetainedAssigments++;
1256 } else {
1257
1258 if (localServers.contains(oldServerName)) {
1259 assignments.get(oldServerName).add(region);
1260 cluster.doAssignRegion(region, oldServerName);
1261 } else {
1262 ServerName target = null;
1263 for (ServerName tmp: localServers) {
1264 if (tmp.getPort() == oldServerName.getPort()) {
1265 target = tmp;
1266 break;
1267 }
1268 }
1269 if (target == null) {
1270 target = randomAssignment(cluster, region, localServers);
1271 }
1272 assignments.get(target).add(region);
1273 }
1274 numRetainedAssigments++;
1275 }
1276 }
1277
1278 String randomAssignMsg = "";
1279 if (numRandomAssignments > 0) {
1280 randomAssignMsg =
1281 numRandomAssignments + " regions were assigned "
1282 + "to random hosts, since the old hosts for these regions are no "
1283 + "longer present in the cluster. These hosts were:\n "
1284 + Joiner.on("\n ").join(oldHostsNoLongerPresent);
1285 }
1286
1287 LOG.info("Reassigned " + regions.size() + " regions. " + numRetainedAssigments
1288 + " retained the pre-restart assignment. " + randomAssignMsg);
1289 return assignments;
1290 }
1291
1292 @Override
1293 public void initialize() throws HBaseIOException{
1294 }
1295
1296 @Override
1297 public void regionOnline(HRegionInfo regionInfo, ServerName sn) {
1298 }
1299
1300 @Override
1301 public void regionOffline(HRegionInfo regionInfo) {
1302 }
1303
1304 @Override
1305 public boolean isStopped() {
1306 return stopped;
1307 }
1308
1309 @Override
1310 public void stop(String why) {
1311 LOG.info("Load Balancer stop requested: "+why);
1312 stopped = true;
1313 }
1314
1315
1316
1317
1318 private ServerName randomAssignment(Cluster cluster, HRegionInfo regionInfo,
1319 List<ServerName> servers) {
1320 int numServers = servers.size();
1321 ServerName sn = null;
1322 final int maxIterations = numServers * 4;
1323 int iterations = 0;
1324
1325 do {
1326 int i = RANDOM.nextInt(numServers);
1327 sn = servers.get(i);
1328 } while (cluster.wouldLowerAvailability(regionInfo, sn)
1329 && iterations++ < maxIterations);
1330 cluster.doAssignRegion(regionInfo, sn);
1331 return sn;
1332 }
1333
1334
1335
1336
1337 private void roundRobinAssignment(Cluster cluster, List<HRegionInfo> regions,
1338 List<HRegionInfo> unassignedRegions, List<ServerName> servers,
1339 Map<ServerName, List<HRegionInfo>> assignments) {
1340
1341 int numServers = servers.size();
1342 int numRegions = regions.size();
1343 int max = (int) Math.ceil((float) numRegions / numServers);
1344 int serverIdx = 0;
1345 if (numServers > 1) {
1346 serverIdx = RANDOM.nextInt(numServers);
1347 }
1348 int regionIdx = 0;
1349
1350 for (int j = 0; j < numServers; j++) {
1351 ServerName server = servers.get((j + serverIdx) % numServers);
1352 List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
1353 for (int i = regionIdx; i < numRegions; i += numServers) {
1354 HRegionInfo region = regions.get(i % numRegions);
1355 if (cluster.wouldLowerAvailability(region, server)) {
1356 unassignedRegions.add(region);
1357 } else {
1358 serverRegions.add(region);
1359 cluster.doAssignRegion(region, server);
1360 }
1361 }
1362 assignments.put(server, serverRegions);
1363 regionIdx++;
1364 }
1365 }
1366
1367 protected Map<ServerName, List<HRegionInfo>> getRegionAssignmentsByServer(
1368 Collection<HRegionInfo> regions) {
1369 if (this.services != null && this.services.getAssignmentManager() != null) {
1370 return this.services.getAssignmentManager().getSnapShotOfAssignment(regions);
1371 } else {
1372 return new HashMap<ServerName, List<HRegionInfo>>();
1373 }
1374 }
1375
1376 @Override
1377 public void onConfigurationChange(Configuration conf) {
1378 }
1379 }