View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.balancer;
19  
20  import java.util.ArrayList;
21  import java.util.HashMap;
22  import java.util.LinkedList;
23  import java.util.List;
24  import java.util.Map;
25  import java.util.Map.Entry;
26  import java.util.Random;
27  
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
31  import org.apache.hadoop.classification.InterfaceAudience;
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.hbase.ClusterStatus;
34  import org.apache.hadoop.hbase.HRegionInfo;
35  import org.apache.hadoop.hbase.RegionLoad;
36  import org.apache.hadoop.hbase.ServerLoad;
37  import org.apache.hadoop.hbase.ServerName;
38  import org.apache.hadoop.hbase.master.MasterServices;
39  import org.apache.hadoop.hbase.master.RegionPlan;
40  import org.apache.hadoop.hbase.util.Bytes;
41  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
42  import org.apache.hadoop.hbase.util.Pair;
43  
44  /**
45   * <p>This is a best effort load balancer. Given a Cost function F(C) => x It will
46   * randomly try and mutate the cluster to Cprime. If F(Cprime) < F(C) then the
47   * new cluster state becomes the plan. It includes costs functions to compute the cost of:</p>
48   * <ul>
49   * <li>Region Load</li>
50   * <li>Table Load</li>
51   * <li>Data Locality</li>
52   * <li>Memstore Sizes</li>
53   * <li>Storefile Sizes</li>
54   * </ul>
55   *
56   *
57   * <p>Every cost function returns a number between 0 and 1 inclusive; where 0 is the lowest cost
58   * best solution, and 1 is the highest possible cost and the worst solution.  The computed costs are
59   * scaled by their respective multipliers:</p>
60   *
61   * <ul>
62   *   <li>hbase.master.balancer.stochastic.regionLoadCost</li>
63   *   <li>hbase.master.balancer.stochastic.moveCost</li>
64   *   <li>hbase.master.balancer.stochastic.tableLoadCost</li>
65   *   <li>hbase.master.balancer.stochastic.localityCost</li>
66   *   <li>hbase.master.balancer.stochastic.memstoreSizeCost</li>
67   *   <li>hbase.master.balancer.stochastic.storefileSizeCost</li>
68   * </ul>
69   *
70   * <p>In addition to the above configurations, the balancer can be tuned by the following
71   * configuration values:</p>
72   * <ul>
73   *   <li>hbase.master.balancer.stochastic.maxMoveRegions which
74   *   controls what the max number of regions that can be moved in a single invocation of this
75   *   balancer.</li>
76   *   <li>hbase.master.balancer.stochastic.stepsPerRegion is the coefficient by which the number of
77   *   regions is multiplied to try and get the number of times the balancer will
78   *   mutate all servers.</li>
79   *   <li>hbase.master.balancer.stochastic.maxSteps which controls the maximum number of times that
80   *   the balancer will try and mutate all the servers. The balancer will use the minimum of this
81   *   value and the above computation.</li>
82   * </ul>
83   *
84   * <p>This balancer is best used with hbase.master.loadbalance.bytable set to false
85   * so that the balancer gets the full picture of all loads on the cluster.</p>
86   */
87  @InterfaceAudience.Private
88  public class StochasticLoadBalancer extends BaseLoadBalancer {
89  
90    private static final String STEPS_PER_REGION_KEY =
91        "hbase.master.balancer.stochastic.stepsPerRegion";
92    private static final String MAX_STEPS_KEY =
93        "hbase.master.balancer.stochastic.maxSteps";
94    private static final String MAX_RUNNING_TIME_KEY =
95        "hbase.master.balancer.stochastic.maxRunningTime";
96    private static final String KEEP_REGION_LOADS =
97        "hbase.master.balancer.stochastic.numRegionLoadsToRemember";
98  
99    private static final Random RANDOM = new Random(System.currentTimeMillis());
100   private static final Log LOG = LogFactory.getLog(StochasticLoadBalancer.class);
101 
102   private final RegionLocationFinder regionFinder = new RegionLocationFinder();
103   private ClusterStatus clusterStatus = null;
104   private Map<String, List<RegionLoad>> loads = new HashMap<String, List<RegionLoad>>();
105 
106   // values are defaults
107   private int maxSteps = 1000000;
108   private int stepsPerRegion = 800;
109   private long maxRunningTime = 60 * 1000 * 1; // 1 min
110   private int numRegionLoadsToRemember = 15;
111 
112   private RegionPicker[] pickers;
113   private CostFromRegionLoadFunction[] regionLoadFunctions;
114   private CostFunction[] costFunctions;
115   // Keep locality based picker and cost function to alert them
116   // when new services are offered
117   private LocalityBasedPicker localityPicker;
118   private LocalityCostFunction localityCost;
119 
120   @Override
121   public void setConf(Configuration conf) {
122     super.setConf(conf);
123     regionFinder.setConf(conf);
124 
125     maxSteps = conf.getInt(MAX_STEPS_KEY, maxSteps);
126 
127     stepsPerRegion = conf.getInt(STEPS_PER_REGION_KEY, stepsPerRegion);
128     maxRunningTime = conf.getLong(MAX_RUNNING_TIME_KEY, maxRunningTime);
129 
130     numRegionLoadsToRemember = conf.getInt(KEEP_REGION_LOADS, numRegionLoadsToRemember);
131 
132     localityPicker = new LocalityBasedPicker(services);
133     localityCost = new LocalityCostFunction(conf, services);
134 
135     pickers = new RegionPicker[] {
136       new RandomRegionPicker(),
137       new LoadPicker(),
138       //localityPicker
139     };
140 
141     regionLoadFunctions = new CostFromRegionLoadFunction[] {
142       new ReadRequestCostFunction(conf),
143       new WriteRequestCostFunction(conf),
144       new MemstoreSizeCostFunction(conf),
145       new StoreFileCostFunction(conf)
146     };
147 
148     costFunctions = new CostFunction[]{
149       new RegionCountSkewCostFunction(conf),
150       new MoveCostFunction(conf),
151       localityCost,
152       new TableSkewCostFunction(conf),
153       regionLoadFunctions[0],
154       regionLoadFunctions[1],
155       regionLoadFunctions[2],
156       regionLoadFunctions[3],
157     };
158   }
159 
160   @Override
161   public void setClusterStatus(ClusterStatus st) {
162     super.setClusterStatus(st);
163     regionFinder.setClusterStatus(st);
164     this.clusterStatus = st;
165     updateRegionLoad();
166     for(CostFromRegionLoadFunction cost : regionLoadFunctions) {
167       cost.setClusterStatus(st);
168     }
169   }
170 
171   @Override
172   public void setMasterServices(MasterServices masterServices) {
173     super.setMasterServices(masterServices);
174     this.regionFinder.setServices(masterServices);
175     this.localityCost.setServices(masterServices);
176     this.localityPicker.setServices(masterServices);
177 
178   }
179 
180   /**
181    * Given the cluster state this will try and approach an optimal balance. This
182    * should always approach the optimal state given enough steps.
183    */
184   @Override
185   public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState) {
186     //if (!needsBalance(new ClusterLoadState(clusterState))) {
187     //  return null;
188     //}
189 
190     long startTime = EnvironmentEdgeManager.currentTimeMillis();
191 
192     // Keep track of servers to iterate through them.
193     Cluster cluster = new Cluster(clusterState, loads, regionFinder);
194     double currentCost = computeCost(cluster, Double.MAX_VALUE);
195 
196     double initCost = currentCost;
197     double newCost = currentCost;
198 
199     long computedMaxSteps = Math.min(this.maxSteps,
200         ((long)cluster.numRegions * (long)this.stepsPerRegion * (long)cluster.numServers));
201     // Perform a stochastic walk to see if we can get a good fit.
202     long step;
203     for (step = 0; step < computedMaxSteps; step++) {
204       int pickerIdx = RANDOM.nextInt(pickers.length);
205       RegionPicker p = pickers[pickerIdx];
206       Pair<Pair<Integer, Integer>, Pair<Integer, Integer>> picks = p.pick(cluster);
207 
208       int leftServer = picks.getFirst().getFirst();
209       int leftRegion = picks.getFirst().getSecond();
210       int rightServer = picks.getSecond().getFirst();
211       int rightRegion = picks.getSecond().getSecond();
212 
213       // We couldn't find a server
214       if (rightServer < 0 || leftServer < 0) {
215         continue;
216       }
217 
218       // We randomly picked to do nothing.
219       if (leftRegion < 0 && rightRegion < 0) {
220         continue;
221       }
222 
223       cluster.moveOrSwapRegion(leftServer,
224           rightServer,
225           leftRegion,
226           rightRegion);
227 
228       newCost = computeCost(cluster, currentCost);
229       // Should this be kept?
230       if (newCost < currentCost) {
231         currentCost = newCost;
232       } else {
233         // Put things back the way they were before.
234         // TODO: undo by remembering old values, using an UndoAction class
235         cluster.moveOrSwapRegion(leftServer,
236             rightServer,
237             rightRegion,
238             leftRegion);
239       }
240 
241       if (EnvironmentEdgeManager.currentTimeMillis() - startTime >
242           maxRunningTime) {
243         break;
244       }
245     }
246 
247     long endTime = EnvironmentEdgeManager.currentTimeMillis();
248 
249 
250     if (initCost > currentCost) {
251       List<RegionPlan> plans = createRegionPlans(cluster);
252       if (LOG.isDebugEnabled()) {
253         LOG.debug("Finished computing new load balance plan.  Computation took "
254             + (endTime - startTime) + "ms to try " + step
255             + " different iterations.  Found a solution that moves "
256             + plans.size() + " regions; Going from a computed cost of "
257             + initCost + " to a new cost of " + currentCost);
258       }
259       return plans;
260     }
261     if (LOG.isDebugEnabled()) {
262       LOG.debug("Could not find a better load balance plan.  Tried "
263           + step + " different configurations in " + (endTime - startTime)
264           + "ms, and did not find anything with a computed cost less than " + initCost);
265     }
266     return null;
267   }
268 
269   /**
270    * Create all of the RegionPlan's needed to move from the initial cluster state to the desired
271    * state.
272    *
273    * @param cluster The state of the cluster
274    * @return List of RegionPlan's that represent the moves needed to get to desired final state.
275    */
276   private List<RegionPlan> createRegionPlans(Cluster cluster) {
277     List<RegionPlan> plans = new LinkedList<RegionPlan>();
278     for (int regionIndex = 0;
279          regionIndex < cluster.regionIndexToServerIndex.length; regionIndex++) {
280       int initialServerIndex = cluster.initialRegionIndexToServerIndex[regionIndex];
281       int newServerIndex = cluster.regionIndexToServerIndex[regionIndex];
282 
283       if (initialServerIndex != newServerIndex) {
284         HRegionInfo region = cluster.regions[regionIndex];
285         ServerName initialServer = cluster.servers[initialServerIndex];
286         ServerName newServer = cluster.servers[newServerIndex];
287 
288         if (LOG.isTraceEnabled()) {
289           LOG.trace("Moving Region " + region.getEncodedName() + " from server "
290               + initialServer.getHostname() + " to " + newServer.getHostname());
291         }
292         RegionPlan rp = new RegionPlan(region, initialServer, newServer);
293         plans.add(rp);
294       }
295     }
296     return plans;
297   }
298 
299   /**
300    * Store the current region loads.
301    */
302   private synchronized void updateRegionLoad() {
303     // We create a new hashmap so that regions that are no longer there are removed.
304     // However we temporarily need the old loads so we can use them to keep the rolling average.
305     Map<String, List<RegionLoad>> oldLoads = loads;
306     loads = new HashMap<String, List<RegionLoad>>();
307 
308     for (ServerName sn : clusterStatus.getServers()) {
309       ServerLoad sl = clusterStatus.getLoad(sn);
310       if (sl == null) {
311         continue;
312       }
313       for (Entry<byte[], RegionLoad> entry : sl.getRegionsLoad().entrySet()) {
314         List<RegionLoad> rLoads = oldLoads.get(Bytes.toString(entry.getKey()));
315         if (rLoads != null) {
316           // We're only going to keep 15.  So if there are that many already take the last 14
317           if (rLoads.size() >= numRegionLoadsToRemember) {
318             int numToRemove = 1 + (rLoads.size() - numRegionLoadsToRemember);
319             rLoads = rLoads.subList(numToRemove, rLoads.size());
320           }
321 
322         } else {
323           // There was nothing there
324           rLoads = new ArrayList<RegionLoad>();
325         }
326         rLoads.add(entry.getValue());
327         loads.put(Bytes.toString(entry.getKey()), rLoads);
328 
329       }
330     }
331 
332     for(CostFromRegionLoadFunction cost : regionLoadFunctions) {
333       cost.setLoads(loads);
334     }
335   }
336 
337 
338   /**
339    * This is the main cost function.  It will compute a cost associated with a proposed cluster
340    * state.  All different costs will be combined with their multipliers to produce a double cost.
341    *
342    * @param cluster The state of the cluster
343    * @param previousCost the previous cost. This is used as an early out.
344    * @return a double of a cost associated with the proposed cluster state.  This cost is an
345    *         aggregate of all individual cost functions.
346    */
347   protected double computeCost(Cluster cluster, double previousCost) {
348     double total = 0;
349 
350     for (CostFunction c:costFunctions) {
351       if (c.getMultiplier() <= 0) {
352         continue;
353       }
354 
355       total += c.getMultiplier() * c.cost(cluster);
356 
357       if (total > previousCost) {
358         return total;
359       }
360     }
361     return total;
362   }
363 
364   abstract static class RegionPicker {
365     abstract Pair<Pair<Integer, Integer>, Pair<Integer, Integer>> pick(Cluster cluster);
366 
367     /**
368      * From a list of regions pick a random one. Null can be returned which
369      * {@link StochasticLoadBalancer#balanceCluster(Map)} recognize as signal to try a region move
370      * rather than swap.
371      *
372      * @param cluster        The state of the cluster
373      * @param server         index of the server
374      * @param chanceOfNoSwap Chance that this will decide to try a move rather
375      *                       than a swap.
376      * @return a random {@link HRegionInfo} or null if an asymmetrical move is
377      *         suggested.
378      */
379     protected int pickRandomRegion(Cluster cluster, int server, double chanceOfNoSwap) {
380       // Check to see if this is just a move.
381       if (cluster.regionsPerServer[server].length == 0 || RANDOM.nextFloat() < chanceOfNoSwap) {
382         // signal a move only.
383         return -1;
384       }
385       int rand = RANDOM.nextInt(cluster.regionsPerServer[server].length);
386       return cluster.regionsPerServer[server][rand];
387 
388     }
389     protected int pickRandomServer(Cluster cluster) {
390       if (cluster.numServers < 1) {
391         return -1;
392       }
393 
394       return RANDOM.nextInt(cluster.numServers);
395     }
396     protected int pickOtherRandomServer(Cluster cluster, int serverIndex) {
397       if (cluster.numServers < 2) {
398         return -1;
399       }
400       while (true) {
401         int otherServerIndex = pickRandomServer(cluster);
402         if (otherServerIndex != serverIndex) {
403           return otherServerIndex;
404         }
405       }
406     }
407 
408     protected Pair<Integer, Integer> pickRandomRegions(Cluster cluster,
409                                                        int thisServer,
410                                                        int otherServer) {
411       if (thisServer < 0 || otherServer < 0) {
412         return new Pair<Integer, Integer>(-1, -1);
413       }
414 
415       // Decide who is most likely to need another region
416       int thisRegionCount = cluster.getNumRegions(thisServer);
417       int otherRegionCount = cluster.getNumRegions(otherServer);
418 
419       // Assign the chance based upon the above
420       double thisChance = (thisRegionCount > otherRegionCount) ? 0 : 0.5;
421       double otherChance = (thisRegionCount <= otherRegionCount) ? 0 : 0.5;
422 
423       int thisRegion = pickRandomRegion(cluster, thisServer, thisChance);
424       int otherRegion = pickRandomRegion(cluster, otherServer, otherChance);
425 
426       return new Pair<Integer, Integer>(thisRegion, otherRegion);
427     }
428   }
429 
430   static class RandomRegionPicker extends RegionPicker {
431 
432     @Override
433     Pair<Pair<Integer, Integer>, Pair<Integer, Integer>> pick(Cluster cluster) {
434 
435       int thisServer = pickRandomServer(cluster);
436 
437       // Pick the other server
438       int otherServer = pickOtherRandomServer(cluster, thisServer);
439 
440       Pair<Integer, Integer> regions = pickRandomRegions(cluster, thisServer, otherServer);
441 
442       return new Pair<Pair<Integer, Integer>, Pair<Integer, Integer>>(
443           new Pair<Integer, Integer>(thisServer, regions.getFirst()),
444           new Pair<Integer, Integer>(otherServer, regions.getSecond())
445 
446       );
447     }
448 
449   }
450 
451   public static class LoadPicker extends RegionPicker {
452 
453     @Override
454     Pair<Pair<Integer, Integer>, Pair<Integer, Integer>> pick(Cluster cluster) {
455       cluster.sortServersByRegionCount();
456       int thisServer = pickMostLoadedServer(cluster, -1);
457       int otherServer = pickLeastLoadedServer(cluster, thisServer);
458 
459       Pair<Integer, Integer> regions = pickRandomRegions(cluster, thisServer, otherServer);
460       return new Pair<Pair<Integer, Integer>, Pair<Integer, Integer>>(
461           new Pair<Integer, Integer>(thisServer, regions.getFirst()),
462           new Pair<Integer, Integer>(otherServer, regions.getSecond())
463 
464       );
465     }
466 
467     private int pickLeastLoadedServer(final Cluster cluster, int thisServer) {
468       Integer[] servers = cluster.serverIndicesSortedByRegionCount;
469 
470       int index = 0;
471       while (servers[index] == null || servers[index] == thisServer) {
472         index++;
473         if (index == servers.length) {
474           return -1;
475         }
476       }
477       return servers[index];
478     }
479 
480     private int pickMostLoadedServer(final Cluster cluster, int thisServer) {
481       Integer[] servers = cluster.serverIndicesSortedByRegionCount;
482 
483       int index = servers.length - 1;
484       while (servers[index] == null || servers[index] == thisServer) {
485         index--;
486         if (index < 0) {
487           return -1;
488         }
489       }
490       return servers[index];
491     }
492   }
493 
494   static class LocalityBasedPicker extends RegionPicker {
495 
496     private MasterServices masterServices;
497 
498     LocalityBasedPicker(MasterServices masterServices) {
499       this.masterServices = masterServices;
500     }
501 
502     @Override
503     Pair<Pair<Integer, Integer>, Pair<Integer, Integer>> pick(Cluster cluster) {
504       if (this.masterServices == null) {
505         return new Pair<Pair<Integer, Integer>, Pair<Integer, Integer>>(
506             new Pair<Integer, Integer>(-1,-1),
507             new Pair<Integer, Integer>(-1,-1)
508         );
509       }
510       // Pick a random region server
511       int thisServer = pickRandomServer(cluster);
512 
513       // Pick a random region on this server
514       int thisRegion = pickRandomRegion(cluster, thisServer, 0.0f);
515 
516       if (thisRegion == -1) {
517         return new Pair<Pair<Integer, Integer>, Pair<Integer, Integer>>(
518             new Pair<Integer, Integer>(-1,-1),
519             new Pair<Integer, Integer>(-1,-1)
520         );
521       }
522 
523       // Pick the server with the highest locality
524       int otherServer = pickHighestLocalityServer(cluster, thisServer, thisRegion);
525 
526       // pick an region on the other server to potentially swap
527       int otherRegion = this.pickRandomRegion(cluster, otherServer, 0.5f);
528 
529       return new Pair<Pair<Integer, Integer>, Pair<Integer, Integer>>(
530           new Pair<Integer, Integer>(thisServer,thisRegion),
531           new Pair<Integer, Integer>(otherServer,otherRegion)
532       );
533     }
534 
535     private int pickHighestLocalityServer(Cluster cluster, int thisServer, int thisRegion) {
536       int[] regionLocations = cluster.regionLocations[thisRegion];
537 
538       if (regionLocations == null || regionLocations.length <= 1) {
539         return pickOtherRandomServer(cluster, thisServer);
540       }
541 
542       int idx = 0;
543 
544       while (idx < regionLocations.length && regionLocations[idx] == thisServer) {
545         idx++;
546       }
547 
548       return idx;
549     }
550 
551     void setServices(MasterServices services) {
552       this.masterServices = services;
553     }
554   }
555 
556   /**
557    * Base class of StochasticLoadBalancer's Cost Functions.
558    */
559   public abstract static class CostFunction {
560 
561     private float multiplier = 0;
562     private Configuration conf;
563 
564     CostFunction(Configuration c) {
565       this.conf = c;
566     }
567 
568     float getMultiplier() {
569       return multiplier;
570     }
571 
572     void setMultiplier(float m) {
573       this.multiplier = m;
574     }
575 
576     abstract double cost(Cluster cluster);
577 
578     /**
579      * Function to compute a scaled cost using {@link DescriptiveStatistics}. It
580      * assumes that this is a zero sum set of costs.  It assumes that the worst case
581      * possible is all of the elements in one region server and the rest having 0.
582      *
583      * @param stats the costs
584      * @return a scaled set of costs.
585      */
586     protected double costFromArray(double[] stats) {
587       double totalCost = 0;
588       double total = getSum(stats);
589       double mean = total/((double)stats.length);
590       double count = stats.length;
591 
592       // Compute max as if all region servers had 0 and one had the sum of all costs.  This must be
593       // a zero sum cost for this to make sense.
594       // TODO: Should we make this sum of square errors?
595       double max = ((count - 1) * mean) + (total - mean);
596       for (double n : stats) {
597         double diff = Math.abs(mean - n);
598         totalCost += diff;
599       }
600 
601       double scaled =  scale(0, max, totalCost);
602       return scaled;
603     }
604 
605 
606 
607     private double getSum(double[] stats) {
608       double total = 0;
609       for(double s:stats) {
610         total += s;
611       }
612       return total;
613     }
614 
615     /**
616      * Scale the value between 0 and 1.
617      *
618      * @param min   Min value
619      * @param max   The Max value
620      * @param value The value to be scaled.
621      * @return The scaled value.
622      */
623     protected double scale(double min, double max, double value) {
624       if (max == 0 || value == 0) {
625         return 0;
626       }
627 
628       return Math.max(0d, Math.min(1d, (value - min) / max));
629     }
630   }
631 
632   /**
633    * Given the starting state of the regions and a potential ending state
634    * compute cost based upon the number of regions that have moved.
635    */
636   public static class MoveCostFunction extends CostFunction {
637     private static final String MOVE_COST_KEY = "hbase.master.balancer.stochastic.moveCost";
638     private static final String MAX_MOVES_PERCENT_KEY =
639         "hbase.master.balancer.stochastic.maxMovePercent";
640     private static final float DEFAULT_MOVE_COST = 100;
641     private static final int DEFAULT_MAX_MOVES = 600;
642     private static final float DEFAULT_MAX_MOVE_PERCENT = 0.25f;
643     private static final int META_MOVE_COST_MULT = 10;
644 
645     private final float maxMovesPercent;
646 
647     MoveCostFunction(Configuration conf) {
648       super(conf);
649 
650       // Move cost multiplier should be the same cost or higher than the rest of the costs to ensure
651       // that large benefits are need to overcome the cost of a move.
652       this.setMultiplier(conf.getFloat(MOVE_COST_KEY, DEFAULT_MOVE_COST));
653       // What percent of the number of regions a single run of the balancer can move.
654       maxMovesPercent = conf.getFloat(MAX_MOVES_PERCENT_KEY, DEFAULT_MAX_MOVE_PERCENT);
655     }
656 
657     @Override
658     double cost(Cluster cluster) {
659       // Try and size the max number of Moves, but always be prepared to move some.
660       int maxMoves = Math.max((int) (cluster.numRegions * maxMovesPercent),
661           DEFAULT_MAX_MOVES);
662 
663       double moveCost = cluster.numMovedRegions;
664 
665       // Don't let this single balance move more than the max moves.
666       // This allows better scaling to accurately represent the actual cost of a move.
667       if (moveCost > maxMoves) {
668         return 1000000;   // return a number much greater than any of the other cost
669       }
670 
671       // META region is special
672       if (cluster.numMovedMetaRegions > 0) {
673         // assume each META region move costs 10 times
674         moveCost += META_MOVE_COST_MULT * cluster.numMovedMetaRegions;
675       }
676 
677       return scale(0, cluster.numRegions + META_MOVE_COST_MULT, moveCost);
678     }
679   }
680 
681   /**
682    * Compute the cost of a potential cluster state from skew in number of
683    * regions on a cluster.
684    */
685   public static class RegionCountSkewCostFunction extends CostFunction {
686     private static final String REGION_COUNT_SKEW_COST_KEY =
687         "hbase.master.balancer.stochastic.regionCountCost";
688     private static final float DEFAULT_REGION_COUNT_SKEW_COST = 500;
689 
690     private double[] stats = null;
691 
692     RegionCountSkewCostFunction(Configuration conf) {
693       super(conf);
694       // Load multiplier should be the greatest as it is the most general way to balance data.
695       this.setMultiplier(conf.getFloat(REGION_COUNT_SKEW_COST_KEY, DEFAULT_REGION_COUNT_SKEW_COST));
696     }
697 
698     @Override
699     double cost(Cluster cluster) {
700       if (stats == null || stats.length != cluster.numServers) {
701         stats = new double[cluster.numServers];
702       }
703 
704       for (int i =0; i < cluster.numServers; i++) {
705         stats[i] = cluster.regionsPerServer[i].length;
706       }
707       return costFromArray(stats);
708     }
709   }
710 
711   /**
712    * Compute the cost of a potential cluster configuration based upon how evenly
713    * distributed tables are.
714    */
715   public static class TableSkewCostFunction extends CostFunction {
716 
717     private static final String TABLE_SKEW_COST_KEY =
718         "hbase.master.balancer.stochastic.tableSkewCost";
719     private static final float DEFAULT_TABLE_SKEW_COST = 35;
720 
721     TableSkewCostFunction(Configuration conf) {
722       super(conf);
723       this.setMultiplier(conf.getFloat(TABLE_SKEW_COST_KEY, DEFAULT_TABLE_SKEW_COST));
724     }
725 
726     @Override
727     double cost(Cluster cluster) {
728       double max = cluster.numRegions;
729       double min = cluster.numRegions / cluster.numServers;
730       double value = 0;
731 
732       for (int i = 0; i < cluster.numMaxRegionsPerTable.length; i++) {
733         value += cluster.numMaxRegionsPerTable[i];
734       }
735 
736       return scale(min, max, value);
737     }
738   }
739 
740 
741   /**
742    * Compute a cost of a potential cluster configuration based upon where
743    * {@link org.apache.hadoop.hbase.regionserver.StoreFile}s are located.
744    */
745   public static class LocalityCostFunction extends CostFunction {
746 
747     private static final String LOCALITY_COST_KEY = "hbase.master.balancer.stochastic.localityCost";
748     private static final float DEFAULT_LOCALITY_COST = 25;
749 
750     private MasterServices services;
751 
752     LocalityCostFunction(Configuration conf, MasterServices srv) {
753       super(conf);
754       this.setMultiplier(conf.getFloat(LOCALITY_COST_KEY, DEFAULT_LOCALITY_COST));
755       this.services = srv;
756     }
757 
758     void setServices(MasterServices srvc) {
759       this.services = srvc;
760     }
761 
762     @Override
763     double cost(Cluster cluster) {
764       double max = 0;
765       double cost = 0;
766 
767       // If there's no master so there's no way anything else works.
768       if (this.services == null) {
769         return cost;
770       }
771 
772       for (int i = 0; i < cluster.regionLocations.length; i++) {
773         max += 1;
774         int serverIndex = cluster.regionIndexToServerIndex[i];
775         int[] regionLocations = cluster.regionLocations[i];
776 
777         // If we can't find where the data is getTopBlock returns null.
778         // so count that as being the best possible.
779         if (regionLocations == null) {
780           continue;
781         }
782 
783         int index = -1;
784         for (int j = 0; j < regionLocations.length; j++) {
785           if (regionLocations[j] >= 0 && regionLocations[j] == serverIndex) {
786             index = j;
787             break;
788           }
789         }
790 
791         if (index < 0) {
792           cost += 1;
793         } else {
794           cost += (double) index / (double) regionLocations.length;
795         }
796       }
797       return scale(0, max, cost);
798     }
799   }
800 
801   /**
802    * Base class the allows writing costs functions from rolling average of some
803    * number from RegionLoad.
804    */
805   public abstract static class CostFromRegionLoadFunction extends CostFunction {
806 
807     private ClusterStatus clusterStatus = null;
808     private Map<String, List<RegionLoad>> loads = null;
809     private double[] stats = null;
810     CostFromRegionLoadFunction(Configuration conf) {
811       super(conf);
812     }
813 
814     void setClusterStatus(ClusterStatus status) {
815       this.clusterStatus = status;
816     }
817 
818     void setLoads(Map<String, List<RegionLoad>> l) {
819       this.loads = l;
820     }
821 
822 
823     double cost(Cluster cluster) {
824       if (clusterStatus == null || loads == null) {
825         return 0;
826       }
827 
828       if (stats == null || stats.length != cluster.numServers) {
829         stats = new double[cluster.numServers];
830       }
831 
832       for (int i =0; i < stats.length; i++) {
833         //Cost this server has from RegionLoad
834         long cost = 0;
835 
836         // for every region on this server get the rl
837         for(int regionIndex:cluster.regionsPerServer[i]) {
838           List<RegionLoad> regionLoadList =  cluster.regionLoads[regionIndex];
839 
840           // Now if we found a region load get the type of cost that was requested.
841           if (regionLoadList != null) {
842             cost += getRegionLoadCost(regionLoadList);
843           }
844         }
845 
846         // Add the total cost to the stats.
847         stats[i] = cost;
848       }
849 
850       // Now return the scaled cost from data held in the stats object.
851       return costFromArray(stats);
852     }
853 
854     protected double getRegionLoadCost(List<RegionLoad> regionLoadList) {
855       double cost = 0;
856 
857       for (RegionLoad rl : regionLoadList) {
858         double toAdd = getCostFromRl(rl);
859 
860         if (cost == 0) {
861           cost = toAdd;
862         } else {
863           cost = (.5 * cost) + (.5 * toAdd);
864         }
865       }
866 
867       return cost;
868     }
869 
870     protected abstract double getCostFromRl(RegionLoad rl);
871   }
872 
873   /**
874    * Compute the cost of total number of read requests  The more unbalanced the higher the
875    * computed cost will be.  This uses a rolling average of regionload.
876    */
877 
878   public static class ReadRequestCostFunction extends CostFromRegionLoadFunction {
879 
880     private static final String READ_REQUEST_COST_KEY =
881         "hbase.master.balancer.stochastic.readRequestCost";
882     private static final float DEFAULT_READ_REQUEST_COST = 5;
883 
884     ReadRequestCostFunction(Configuration conf) {
885       super(conf);
886       this.setMultiplier(conf.getFloat(READ_REQUEST_COST_KEY, DEFAULT_READ_REQUEST_COST));
887     }
888 
889 
890     protected double getCostFromRl(RegionLoad rl) {
891       return rl.getReadRequestsCount();
892     }
893   }
894 
895   /**
896    * Compute the cost of total number of write requests.  The more unbalanced the higher the
897    * computed cost will be.  This uses a rolling average of regionload.
898    */
899   public static class WriteRequestCostFunction extends CostFromRegionLoadFunction {
900 
901     private static final String WRITE_REQUEST_COST_KEY =
902         "hbase.master.balancer.stochastic.writeRequestCost";
903     private static final float DEFAULT_WRITE_REQUEST_COST = 5;
904 
905     WriteRequestCostFunction(Configuration conf) {
906       super(conf);
907       this.setMultiplier(conf.getFloat(WRITE_REQUEST_COST_KEY, DEFAULT_WRITE_REQUEST_COST));
908     }
909 
910     protected double getCostFromRl(RegionLoad rl) {
911       return rl.getWriteRequestsCount();
912     }
913   }
914 
915   /**
916    * Compute the cost of total memstore size.  The more unbalanced the higher the
917    * computed cost will be.  This uses a rolling average of regionload.
918    */
919   public static class MemstoreSizeCostFunction extends CostFromRegionLoadFunction {
920 
921     private static final String MEMSTORE_SIZE_COST_KEY =
922         "hbase.master.balancer.stochastic.memstoreSizeCost";
923     private static final float DEFAULT_MEMSTORE_SIZE_COST = 5;
924 
925     MemstoreSizeCostFunction(Configuration conf) {
926       super(conf);
927       this.setMultiplier(conf.getFloat(MEMSTORE_SIZE_COST_KEY, DEFAULT_MEMSTORE_SIZE_COST));
928     }
929 
930     @Override
931     protected double getCostFromRl(RegionLoad rl) {
932       return rl.getMemStoreSizeMB();
933     }
934   }
935   /**
936    * Compute the cost of total open storefiles size.  The more unbalanced the higher the
937    * computed cost will be.  This uses a rolling average of regionload.
938    */
939   public static class StoreFileCostFunction extends CostFromRegionLoadFunction {
940 
941     private static final String STOREFILE_SIZE_COST_KEY =
942         "hbase.master.balancer.stochastic.storefileSizeCost";
943     private static final float DEFAULT_STOREFILE_SIZE_COST = 5;
944 
945     StoreFileCostFunction(Configuration conf) {
946       super(conf);
947       this.setMultiplier(conf.getFloat(STOREFILE_SIZE_COST_KEY, DEFAULT_STOREFILE_SIZE_COST));
948     }
949 
950     @Override
951     protected double getCostFromRl(RegionLoad rl) {
952       return rl.getStorefileSizeMB();
953     }
954   }
955 }