View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.balancer;
19  
20  import java.util.ArrayDeque;
21  import java.util.Collection;
22  import java.util.Deque;
23  import java.util.HashMap;
24  import java.util.LinkedList;
25  import java.util.List;
26  import java.util.Map;
27  import java.util.Map.Entry;
28  import java.util.Random;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
33  import org.apache.hadoop.classification.InterfaceAudience;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.hbase.ClusterStatus;
36  import org.apache.hadoop.hbase.HRegionInfo;
37  import org.apache.hadoop.hbase.RegionLoad;
38  import org.apache.hadoop.hbase.ServerLoad;
39  import org.apache.hadoop.hbase.ServerName;
40  import org.apache.hadoop.hbase.master.MasterServices;
41  import org.apache.hadoop.hbase.master.RegionPlan;
42  import org.apache.hadoop.hbase.util.Bytes;
43  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
44  import org.apache.hadoop.hbase.util.Pair;
45  
46  /**
47   * <p>This is a best effort load balancer. Given a Cost function F(C) => x It will
48   * randomly try and mutate the cluster to Cprime. If F(Cprime) < F(C) then the
49   * new cluster state becomes the plan. It includes costs functions to compute the cost of:</p>
50   * <ul>
51   * <li>Region Load</li>
52   * <li>Table Load</li>
53   * <li>Data Locality</li>
54   * <li>Memstore Sizes</li>
55   * <li>Storefile Sizes</li>
56   * </ul>
57   *
58   *
59   * <p>Every cost function returns a number between 0 and 1 inclusive; where 0 is the lowest cost
60   * best solution, and 1 is the highest possible cost and the worst solution.  The computed costs are
61   * scaled by their respective multipliers:</p>
62   *
63   * <ul>
64   *   <li>hbase.master.balancer.stochastic.regionLoadCost</li>
65   *   <li>hbase.master.balancer.stochastic.moveCost</li>
66   *   <li>hbase.master.balancer.stochastic.tableLoadCost</li>
67   *   <li>hbase.master.balancer.stochastic.localityCost</li>
68   *   <li>hbase.master.balancer.stochastic.memstoreSizeCost</li>
69   *   <li>hbase.master.balancer.stochastic.storefileSizeCost</li>
70   * </ul>
71   *
72   * <p>In addition to the above configurations, the balancer can be tuned by the following
73   * configuration values:</p>
74   * <ul>
75   *   <li>hbase.master.balancer.stochastic.maxMoveRegions which
76   *   controls what the max number of regions that can be moved in a single invocation of this
77   *   balancer.</li>
78   *   <li>hbase.master.balancer.stochastic.stepsPerRegion is the coefficient by which the number of
79   *   regions is multiplied to try and get the number of times the balancer will
80   *   mutate all servers.</li>
81   *   <li>hbase.master.balancer.stochastic.maxSteps which controls the maximum number of times that
82   *   the balancer will try and mutate all the servers. The balancer will use the minimum of this
83   *   value and the above computation.</li>
84   * </ul>
85   *
86   * <p>This balancer is best used with hbase.master.loadbalance.bytable set to false
87   * so that the balancer gets the full picture of all loads on the cluster.</p>
88   */
89  @InterfaceAudience.Private
90  public class StochasticLoadBalancer extends BaseLoadBalancer {
91  
92    private static final String STEPS_PER_REGION_KEY =
93        "hbase.master.balancer.stochastic.stepsPerRegion";
94    private static final String MAX_STEPS_KEY =
95        "hbase.master.balancer.stochastic.maxSteps";
96    private static final String MAX_RUNNING_TIME_KEY =
97        "hbase.master.balancer.stochastic.maxRunningTime";
98    private static final String KEEP_REGION_LOADS =
99        "hbase.master.balancer.stochastic.numRegionLoadsToRemember";
100 
101   private static final Random RANDOM = new Random(System.currentTimeMillis());
102   private static final Log LOG = LogFactory.getLog(StochasticLoadBalancer.class);
103 
104   private final RegionLocationFinder regionFinder = new RegionLocationFinder();
105   private ClusterStatus clusterStatus = null;
106   Map<String, Deque<RegionLoad>> loads = new HashMap<String, Deque<RegionLoad>>();
107 
108   // values are defaults
109   private int maxSteps = 1000000;
110   private int stepsPerRegion = 800;
111   private long maxRunningTime = 30 * 1000 * 1; // 30 seconds.
112   private int numRegionLoadsToRemember = 15;
113 
114   private RegionPicker[] pickers;
115   private CostFromRegionLoadFunction[] regionLoadFunctions;
116   private CostFunction[] costFunctions;
117   // Keep locality based picker and cost function to alert them
118   // when new services are offered
119   private LocalityBasedPicker localityPicker;
120   private LocalityCostFunction localityCost;
121 
122   @Override
123   public void setConf(Configuration conf) {
124     super.setConf(conf);
125 
126     regionFinder.setConf(conf);
127 
128     maxSteps = conf.getInt(MAX_STEPS_KEY, maxSteps);
129 
130     stepsPerRegion = conf.getInt(STEPS_PER_REGION_KEY, stepsPerRegion);
131     maxRunningTime = conf.getLong(MAX_RUNNING_TIME_KEY, maxRunningTime);
132 
133     numRegionLoadsToRemember = conf.getInt(KEEP_REGION_LOADS, numRegionLoadsToRemember);
134 
135     localityPicker = new LocalityBasedPicker(services);
136     localityCost = new LocalityCostFunction(conf, services);
137 
138     pickers = new RegionPicker[] {
139       new RandomRegionPicker(),
140       new LoadPicker(),
141       localityPicker
142     };
143 
144     regionLoadFunctions = new CostFromRegionLoadFunction[] {
145       new ReadRequestCostFunction(conf),
146       new WriteRequestCostFunction(conf),
147       new MemstoreSizeCostFunction(conf),
148       new StoreFileCostFunction(conf)
149     };
150 
151     costFunctions = new CostFunction[]{
152       new RegionCountSkewCostFunction(conf),
153       new MoveCostFunction(conf),
154       localityCost,
155       new TableSkewCostFunction(conf),
156       regionLoadFunctions[0],
157       regionLoadFunctions[1],
158       regionLoadFunctions[2],
159       regionLoadFunctions[3],
160     };
161   }
162 
163   @Override
164   protected void setSlop(Configuration conf) {
165     this.slop = conf.getFloat("hbase.regions.slop", 0.001F);
166   }
167 
168   @Override
169   public void setClusterStatus(ClusterStatus st) {
170     super.setClusterStatus(st);
171     regionFinder.setClusterStatus(st);
172     this.clusterStatus = st;
173     updateRegionLoad();
174     for(CostFromRegionLoadFunction cost : regionLoadFunctions) {
175       cost.setClusterStatus(st);
176     }
177   }
178 
179   @Override
180   public void setMasterServices(MasterServices masterServices) {
181     super.setMasterServices(masterServices);
182     this.regionFinder.setServices(masterServices);
183     this.localityCost.setServices(masterServices);
184     this.localityPicker.setServices(masterServices);
185 
186   }
187 
188   /**
189    * Given the cluster state this will try and approach an optimal balance. This
190    * should always approach the optimal state given enough steps.
191    */
192   @Override
193   public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState) {
194     if (!needsBalance(new ClusterLoadState(clusterState))) {
195       return null;
196     }
197 
198     long startTime = EnvironmentEdgeManager.currentTimeMillis();
199 
200     // Keep track of servers to iterate through them.
201     Cluster cluster = new Cluster(clusterState, loads, regionFinder);
202     double currentCost = computeCost(cluster, Double.MAX_VALUE);
203 
204     double initCost = currentCost;
205     double newCost = currentCost;
206 
207     long computedMaxSteps = Math.min(this.maxSteps,
208         ((long)cluster.numRegions * (long)this.stepsPerRegion * (long)cluster.numServers));
209     // Perform a stochastic walk to see if we can get a good fit.
210     long step;
211     for (step = 0; step < computedMaxSteps; step++) {
212       int pickerIdx = RANDOM.nextInt(pickers.length);
213       RegionPicker p = pickers[pickerIdx];
214       Pair<Pair<Integer, Integer>, Pair<Integer, Integer>> picks = p.pick(cluster);
215 
216       int leftServer = picks.getFirst().getFirst();
217       int leftRegion = picks.getFirst().getSecond();
218       int rightServer = picks.getSecond().getFirst();
219       int rightRegion = picks.getSecond().getSecond();
220 
221       // We couldn't find a server
222       if (rightServer < 0 || leftServer < 0) {
223         continue;
224       }
225 
226       // We randomly picked to do nothing.
227       if (leftRegion < 0 && rightRegion < 0) {
228         continue;
229       }
230 
231       cluster.moveOrSwapRegion(leftServer,
232           rightServer,
233           leftRegion,
234           rightRegion);
235 
236       newCost = computeCost(cluster, currentCost);
237       // Should this be kept?
238       if (newCost < currentCost) {
239         currentCost = newCost;
240       } else {
241         // Put things back the way they were before.
242         // TODO: undo by remembering old values, using an UndoAction class
243         cluster.moveOrSwapRegion(leftServer,
244             rightServer,
245             rightRegion,
246             leftRegion);
247       }
248 
249       if (EnvironmentEdgeManager.currentTimeMillis() - startTime >
250           maxRunningTime) {
251         break;
252       }
253     }
254 
255     long endTime = EnvironmentEdgeManager.currentTimeMillis();
256 
257     metricsBalancer.balanceCluster(endTime - startTime);
258 
259     if (initCost > currentCost) {
260       List<RegionPlan> plans = createRegionPlans(cluster);
261       if (LOG.isDebugEnabled()) {
262         LOG.debug("Finished computing new load balance plan.  Computation took "
263             + (endTime - startTime) + "ms to try " + step
264             + " different iterations.  Found a solution that moves "
265             + plans.size() + " regions; Going from a computed cost of "
266             + initCost + " to a new cost of " + currentCost);
267       }
268       return plans;
269     }
270     if (LOG.isDebugEnabled()) {
271       LOG.debug("Could not find a better load balance plan.  Tried "
272           + step + " different configurations in " + (endTime - startTime)
273           + "ms, and did not find anything with a computed cost less than " + initCost);
274     }
275     return null;
276   }
277 
278   /**
279    * Create all of the RegionPlan's needed to move from the initial cluster state to the desired
280    * state.
281    *
282    * @param cluster The state of the cluster
283    * @return List of RegionPlan's that represent the moves needed to get to desired final state.
284    */
285   private List<RegionPlan> createRegionPlans(Cluster cluster) {
286     List<RegionPlan> plans = new LinkedList<RegionPlan>();
287     for (int regionIndex = 0;
288          regionIndex < cluster.regionIndexToServerIndex.length; regionIndex++) {
289       int initialServerIndex = cluster.initialRegionIndexToServerIndex[regionIndex];
290       int newServerIndex = cluster.regionIndexToServerIndex[regionIndex];
291 
292       if (initialServerIndex != newServerIndex) {
293         HRegionInfo region = cluster.regions[regionIndex];
294         ServerName initialServer = cluster.servers[initialServerIndex];
295         ServerName newServer = cluster.servers[newServerIndex];
296 
297         if (LOG.isTraceEnabled()) {
298           LOG.trace("Moving Region " + region.getEncodedName() + " from server "
299               + initialServer.getHostname() + " to " + newServer.getHostname());
300         }
301         RegionPlan rp = new RegionPlan(region, initialServer, newServer);
302         plans.add(rp);
303       }
304     }
305     return plans;
306   }
307 
308   /**
309    * Store the current region loads.
310    */
311   private synchronized void updateRegionLoad() {
312     // We create a new hashmap so that regions that are no longer there are removed.
313     // However we temporarily need the old loads so we can use them to keep the rolling average.
314     Map<String, Deque<RegionLoad>> oldLoads = loads;
315     loads = new HashMap<String, Deque<RegionLoad>>();
316 
317     for (ServerName sn : clusterStatus.getServers()) {
318       ServerLoad sl = clusterStatus.getLoad(sn);
319       if (sl == null) {
320         continue;
321       }
322       for (Entry<byte[], RegionLoad> entry : sl.getRegionsLoad().entrySet()) {
323         Deque<RegionLoad> rLoads = oldLoads.get(Bytes.toString(entry.getKey()));
324         if (rLoads == null) {
325           // There was nothing there
326           rLoads = new ArrayDeque<RegionLoad>();
327         } else if (rLoads.size() >= 15) {
328           rLoads.remove();
329         }
330         rLoads.add(entry.getValue());
331         loads.put(Bytes.toString(entry.getKey()), rLoads);
332 
333       }
334     }
335 
336     for(CostFromRegionLoadFunction cost : regionLoadFunctions) {
337       cost.setLoads(loads);
338     }
339   }
340 
341 
342   /**
343    * This is the main cost function.  It will compute a cost associated with a proposed cluster
344    * state.  All different costs will be combined with their multipliers to produce a double cost.
345    *
346    * @param cluster The state of the cluster
347    * @param previousCost the previous cost. This is used as an early out.
348    * @return a double of a cost associated with the proposed cluster state.  This cost is an
349    *         aggregate of all individual cost functions.
350    */
351   protected double computeCost(Cluster cluster, double previousCost) {
352     double total = 0;
353 
354     for (CostFunction c:costFunctions) {
355       if (c.getMultiplier() <= 0) {
356         continue;
357       }
358 
359       total += c.getMultiplier() * c.cost(cluster);
360 
361       if (total > previousCost) {
362         return total;
363       }
364     }
365     return total;
366   }
367 
368   abstract static class RegionPicker {
369     abstract Pair<Pair<Integer, Integer>, Pair<Integer, Integer>> pick(Cluster cluster);
370 
371     /**
372      * From a list of regions pick a random one. Null can be returned which
373      * {@link StochasticLoadBalancer#balanceCluster(Map)} recognize as signal to try a region move
374      * rather than swap.
375      *
376      * @param cluster        The state of the cluster
377      * @param server         index of the server
378      * @param chanceOfNoSwap Chance that this will decide to try a move rather
379      *                       than a swap.
380      * @return a random {@link HRegionInfo} or null if an asymmetrical move is
381      *         suggested.
382      */
383     protected int pickRandomRegion(Cluster cluster, int server, double chanceOfNoSwap) {
384       // Check to see if this is just a move.
385       if (cluster.regionsPerServer[server].length == 0 || RANDOM.nextFloat() < chanceOfNoSwap) {
386         // signal a move only.
387         return -1;
388       }
389       int rand = RANDOM.nextInt(cluster.regionsPerServer[server].length);
390       return cluster.regionsPerServer[server][rand];
391 
392     }
393     protected int pickRandomServer(Cluster cluster) {
394       if (cluster.numServers < 1) {
395         return -1;
396       }
397 
398       return RANDOM.nextInt(cluster.numServers);
399     }
400     protected int pickOtherRandomServer(Cluster cluster, int serverIndex) {
401       if (cluster.numServers < 2) {
402         return -1;
403       }
404       while (true) {
405         int otherServerIndex = pickRandomServer(cluster);
406         if (otherServerIndex != serverIndex) {
407           return otherServerIndex;
408         }
409       }
410     }
411 
412     protected Pair<Integer, Integer> pickRandomRegions(Cluster cluster,
413                                                        int thisServer,
414                                                        int otherServer) {
415       if (thisServer < 0 || otherServer < 0) {
416         return new Pair<Integer, Integer>(-1, -1);
417       }
418 
419       // Decide who is most likely to need another region
420       int thisRegionCount = cluster.getNumRegions(thisServer);
421       int otherRegionCount = cluster.getNumRegions(otherServer);
422 
423       // Assign the chance based upon the above
424       double thisChance = (thisRegionCount > otherRegionCount) ? 0 : 0.5;
425       double otherChance = (thisRegionCount <= otherRegionCount) ? 0 : 0.5;
426 
427       int thisRegion = pickRandomRegion(cluster, thisServer, thisChance);
428       int otherRegion = pickRandomRegion(cluster, otherServer, otherChance);
429 
430       return new Pair<Integer, Integer>(thisRegion, otherRegion);
431     }
432   }
433 
434   static class RandomRegionPicker extends RegionPicker {
435 
436     @Override
437     Pair<Pair<Integer, Integer>, Pair<Integer, Integer>> pick(Cluster cluster) {
438 
439       int thisServer = pickRandomServer(cluster);
440 
441       // Pick the other server
442       int otherServer = pickOtherRandomServer(cluster, thisServer);
443 
444       Pair<Integer, Integer> regions = pickRandomRegions(cluster, thisServer, otherServer);
445 
446       return new Pair<Pair<Integer, Integer>, Pair<Integer, Integer>>(
447           new Pair<Integer, Integer>(thisServer, regions.getFirst()),
448           new Pair<Integer, Integer>(otherServer, regions.getSecond())
449 
450       );
451     }
452 
453   }
454 
455   public static class LoadPicker extends RegionPicker {
456 
457     @Override
458     Pair<Pair<Integer, Integer>, Pair<Integer, Integer>> pick(Cluster cluster) {
459       cluster.sortServersByRegionCount();
460       int thisServer = pickMostLoadedServer(cluster, -1);
461       int otherServer = pickLeastLoadedServer(cluster, thisServer);
462 
463       Pair<Integer, Integer> regions = pickRandomRegions(cluster, thisServer, otherServer);
464       return new Pair<Pair<Integer, Integer>, Pair<Integer, Integer>>(
465           new Pair<Integer, Integer>(thisServer, regions.getFirst()),
466           new Pair<Integer, Integer>(otherServer, regions.getSecond())
467 
468       );
469     }
470 
471     private int pickLeastLoadedServer(final Cluster cluster, int thisServer) {
472       Integer[] servers = cluster.serverIndicesSortedByRegionCount;
473 
474       int index = 0;
475       while (servers[index] == null || servers[index] == thisServer) {
476         index++;
477         if (index == servers.length) {
478           return -1;
479         }
480       }
481       return servers[index];
482     }
483 
484     private int pickMostLoadedServer(final Cluster cluster, int thisServer) {
485       Integer[] servers = cluster.serverIndicesSortedByRegionCount;
486 
487       int index = servers.length - 1;
488       while (servers[index] == null || servers[index] == thisServer) {
489         index--;
490         if (index < 0) {
491           return -1;
492         }
493       }
494       return servers[index];
495     }
496   }
497 
498   static class LocalityBasedPicker extends RegionPicker {
499 
500     private MasterServices masterServices;
501 
502     LocalityBasedPicker(MasterServices masterServices) {
503       this.masterServices = masterServices;
504     }
505 
506     @Override
507     Pair<Pair<Integer, Integer>, Pair<Integer, Integer>> pick(Cluster cluster) {
508       if (this.masterServices == null) {
509         return new Pair<Pair<Integer, Integer>, Pair<Integer, Integer>>(
510             new Pair<Integer, Integer>(-1,-1),
511             new Pair<Integer, Integer>(-1,-1)
512         );
513       }
514       // Pick a random region server
515       int thisServer = pickRandomServer(cluster);
516 
517       // Pick a random region on this server
518       int thisRegion = pickRandomRegion(cluster, thisServer, 0.0f);
519 
520       if (thisRegion == -1) {
521         return new Pair<Pair<Integer, Integer>, Pair<Integer, Integer>>(
522             new Pair<Integer, Integer>(-1,-1),
523             new Pair<Integer, Integer>(-1,-1)
524         );
525       }
526 
527       // Pick the server with the highest locality
528       int otherServer = pickHighestLocalityServer(cluster, thisServer, thisRegion);
529 
530       // pick an region on the other server to potentially swap
531       int otherRegion = this.pickRandomRegion(cluster, otherServer, 0.5f);
532 
533       return new Pair<Pair<Integer, Integer>, Pair<Integer, Integer>>(
534           new Pair<Integer, Integer>(thisServer,thisRegion),
535           new Pair<Integer, Integer>(otherServer,otherRegion)
536       );
537     }
538 
539     private int pickHighestLocalityServer(Cluster cluster, int thisServer, int thisRegion) {
540       int[] regionLocations = cluster.regionLocations[thisRegion];
541 
542       if (regionLocations == null || regionLocations.length <= 1) {
543         return pickOtherRandomServer(cluster, thisServer);
544       }
545 
546       int idx = 0;
547 
548       while (idx < regionLocations.length && regionLocations[idx] == thisServer) {
549         idx++;
550       }
551 
552       return idx;
553     }
554 
555     void setServices(MasterServices services) {
556       this.masterServices = services;
557     }
558   }
559 
560   /**
561    * Base class of StochasticLoadBalancer's Cost Functions.
562    */
563   public abstract static class CostFunction {
564 
565     private float multiplier = 0;
566     private Configuration conf;
567 
568     CostFunction(Configuration c) {
569       this.conf = c;
570     }
571 
572     float getMultiplier() {
573       return multiplier;
574     }
575 
576     void setMultiplier(float m) {
577       this.multiplier = m;
578     }
579 
580     abstract double cost(Cluster cluster);
581 
582     /**
583      * Function to compute a scaled cost using {@link DescriptiveStatistics}. It
584      * assumes that this is a zero sum set of costs.  It assumes that the worst case
585      * possible is all of the elements in one region server and the rest having 0.
586      *
587      * @param stats the costs
588      * @return a scaled set of costs.
589      */
590     protected double costFromArray(double[] stats) {
591       double totalCost = 0;
592       double total = getSum(stats);
593       double mean = total/((double)stats.length);
594       double count = stats.length;
595 
596       // Compute max as if all region servers had 0 and one had the sum of all costs.  This must be
597       // a zero sum cost for this to make sense.
598       // TODO: Should we make this sum of square errors?
599       double max = ((count - 1) * mean) + (total - mean);
600       for (double n : stats) {
601         double diff = Math.abs(mean - n);
602         totalCost += diff;
603       }
604 
605       double scaled =  scale(0, max, totalCost);
606       return scaled;
607     }
608 
609 
610 
611     private double getSum(double[] stats) {
612       double total = 0;
613       for(double s:stats) {
614         total += s;
615       }
616       return total;
617     }
618 
619     /**
620      * Scale the value between 0 and 1.
621      *
622      * @param min   Min value
623      * @param max   The Max value
624      * @param value The value to be scaled.
625      * @return The scaled value.
626      */
627     protected double scale(double min, double max, double value) {
628       if (max == 0 || value == 0) {
629         return 0;
630       }
631 
632       return Math.max(0d, Math.min(1d, (value - min) / max));
633     }
634   }
635 
636   /**
637    * Given the starting state of the regions and a potential ending state
638    * compute cost based upon the number of regions that have moved.
639    */
640   public static class MoveCostFunction extends CostFunction {
641     private static final String MOVE_COST_KEY = "hbase.master.balancer.stochastic.moveCost";
642     private static final String MAX_MOVES_PERCENT_KEY =
643         "hbase.master.balancer.stochastic.maxMovePercent";
644     private static final float DEFAULT_MOVE_COST = 100;
645     private static final int DEFAULT_MAX_MOVES = 600;
646     private static final float DEFAULT_MAX_MOVE_PERCENT = 0.25f;
647     private static final int META_MOVE_COST_MULT = 10;
648 
649     private final float maxMovesPercent;
650 
651     MoveCostFunction(Configuration conf) {
652       super(conf);
653 
654       // Move cost multiplier should be the same cost or higher than the rest of the costs to ensure
655       // that large benefits are need to overcome the cost of a move.
656       this.setMultiplier(conf.getFloat(MOVE_COST_KEY, DEFAULT_MOVE_COST));
657       // What percent of the number of regions a single run of the balancer can move.
658       maxMovesPercent = conf.getFloat(MAX_MOVES_PERCENT_KEY, DEFAULT_MAX_MOVE_PERCENT);
659     }
660 
661     @Override
662     double cost(Cluster cluster) {
663       // Try and size the max number of Moves, but always be prepared to move some.
664       int maxMoves = Math.max((int) (cluster.numRegions * maxMovesPercent),
665           DEFAULT_MAX_MOVES);
666 
667       double moveCost = cluster.numMovedRegions;
668 
669       // Don't let this single balance move more than the max moves.
670       // This allows better scaling to accurately represent the actual cost of a move.
671       if (moveCost > maxMoves) {
672         return 1000000;   // return a number much greater than any of the other cost
673       }
674 
675       // hbase:meta region is special
676       if (cluster.numMovedMetaRegions > 0) {
677         // assume each hbase:meta region move costs 10 times
678         moveCost += META_MOVE_COST_MULT * cluster.numMovedMetaRegions;
679       }
680 
681       return scale(0, cluster.numRegions + META_MOVE_COST_MULT, moveCost);
682     }
683   }
684 
685   /**
686    * Compute the cost of a potential cluster state from skew in number of
687    * regions on a cluster.
688    */
689   public static class RegionCountSkewCostFunction extends CostFunction {
690     private static final String REGION_COUNT_SKEW_COST_KEY =
691         "hbase.master.balancer.stochastic.regionCountCost";
692     private static final float DEFAULT_REGION_COUNT_SKEW_COST = 500;
693 
694     private double[] stats = null;
695 
696     RegionCountSkewCostFunction(Configuration conf) {
697       super(conf);
698       // Load multiplier should be the greatest as it is the most general way to balance data.
699       this.setMultiplier(conf.getFloat(REGION_COUNT_SKEW_COST_KEY, DEFAULT_REGION_COUNT_SKEW_COST));
700     }
701 
702     @Override
703     double cost(Cluster cluster) {
704       if (stats == null || stats.length != cluster.numServers) {
705         stats = new double[cluster.numServers];
706       }
707 
708       for (int i =0; i < cluster.numServers; i++) {
709         stats[i] = cluster.regionsPerServer[i].length;
710       }
711       return costFromArray(stats);
712     }
713   }
714 
715   /**
716    * Compute the cost of a potential cluster configuration based upon how evenly
717    * distributed tables are.
718    */
719   public static class TableSkewCostFunction extends CostFunction {
720 
721     private static final String TABLE_SKEW_COST_KEY =
722         "hbase.master.balancer.stochastic.tableSkewCost";
723     private static final float DEFAULT_TABLE_SKEW_COST = 35;
724 
725     TableSkewCostFunction(Configuration conf) {
726       super(conf);
727       this.setMultiplier(conf.getFloat(TABLE_SKEW_COST_KEY, DEFAULT_TABLE_SKEW_COST));
728     }
729 
730     @Override
731     double cost(Cluster cluster) {
732       double max = cluster.numRegions;
733       double min = cluster.numRegions / cluster.numServers;
734       double value = 0;
735 
736       for (int i = 0; i < cluster.numMaxRegionsPerTable.length; i++) {
737         value += cluster.numMaxRegionsPerTable[i];
738       }
739 
740       return scale(min, max, value);
741     }
742   }
743 
744 
745   /**
746    * Compute a cost of a potential cluster configuration based upon where
747    * {@link org.apache.hadoop.hbase.regionserver.StoreFile}s are located.
748    */
749   public static class LocalityCostFunction extends CostFunction {
750 
751     private static final String LOCALITY_COST_KEY = "hbase.master.balancer.stochastic.localityCost";
752     private static final float DEFAULT_LOCALITY_COST = 25;
753 
754     private MasterServices services;
755 
756     LocalityCostFunction(Configuration conf, MasterServices srv) {
757       super(conf);
758       this.setMultiplier(conf.getFloat(LOCALITY_COST_KEY, DEFAULT_LOCALITY_COST));
759       this.services = srv;
760     }
761 
762     void setServices(MasterServices srvc) {
763       this.services = srvc;
764     }
765 
766     @Override
767     double cost(Cluster cluster) {
768       double max = 0;
769       double cost = 0;
770 
771       // If there's no master so there's no way anything else works.
772       if (this.services == null) {
773         return cost;
774       }
775 
776       for (int i = 0; i < cluster.regionLocations.length; i++) {
777         max += 1;
778         int serverIndex = cluster.regionIndexToServerIndex[i];
779         int[] regionLocations = cluster.regionLocations[i];
780 
781         // If we can't find where the data is getTopBlock returns null.
782         // so count that as being the best possible.
783         if (regionLocations == null) {
784           continue;
785         }
786 
787         int index = -1;
788         for (int j = 0; j < regionLocations.length; j++) {
789           if (regionLocations[j] >= 0 && regionLocations[j] == serverIndex) {
790             index = j;
791             break;
792           }
793         }
794 
795         if (index < 0) {
796           cost += 1;
797         } else {
798           cost += (double) index / (double) regionLocations.length;
799         }
800       }
801       return scale(0, max, cost);
802     }
803   }
804 
805   /**
806    * Base class the allows writing costs functions from rolling average of some
807    * number from RegionLoad.
808    */
809   public abstract static class CostFromRegionLoadFunction extends CostFunction {
810 
811     private ClusterStatus clusterStatus = null;
812     private Map<String, Deque<RegionLoad>> loads = null;
813     private double[] stats = null;
814     CostFromRegionLoadFunction(Configuration conf) {
815       super(conf);
816     }
817 
818     void setClusterStatus(ClusterStatus status) {
819       this.clusterStatus = status;
820     }
821 
822     void setLoads(Map<String, Deque<RegionLoad>> l) {
823       this.loads = l;
824     }
825 
826 
827     double cost(Cluster cluster) {
828       if (clusterStatus == null || loads == null) {
829         return 0;
830       }
831 
832       if (stats == null || stats.length != cluster.numServers) {
833         stats = new double[cluster.numServers];
834       }
835 
836       for (int i =0; i < stats.length; i++) {
837         //Cost this server has from RegionLoad
838         long cost = 0;
839 
840         // for every region on this server get the rl
841         for(int regionIndex:cluster.regionsPerServer[i]) {
842           Collection<RegionLoad> regionLoadList =  cluster.regionLoads[regionIndex];
843 
844           // Now if we found a region load get the type of cost that was requested.
845           if (regionLoadList != null) {
846             cost += getRegionLoadCost(regionLoadList);
847           }
848         }
849 
850         // Add the total cost to the stats.
851         stats[i] = cost;
852       }
853 
854       // Now return the scaled cost from data held in the stats object.
855       return costFromArray(stats);
856     }
857 
858     protected double getRegionLoadCost(Collection<RegionLoad> regionLoadList) {
859       double cost = 0;
860 
861       for (RegionLoad rl : regionLoadList) {
862         double toAdd = getCostFromRl(rl);
863 
864         if (cost == 0) {
865           cost = toAdd;
866         } else {
867           cost = (.5 * cost) + (.5 * toAdd);
868         }
869       }
870 
871       return cost;
872     }
873 
874     protected abstract double getCostFromRl(RegionLoad rl);
875   }
876 
877   /**
878    * Compute the cost of total number of read requests  The more unbalanced the higher the
879    * computed cost will be.  This uses a rolling average of regionload.
880    */
881 
882   public static class ReadRequestCostFunction extends CostFromRegionLoadFunction {
883 
884     private static final String READ_REQUEST_COST_KEY =
885         "hbase.master.balancer.stochastic.readRequestCost";
886     private static final float DEFAULT_READ_REQUEST_COST = 5;
887 
888     ReadRequestCostFunction(Configuration conf) {
889       super(conf);
890       this.setMultiplier(conf.getFloat(READ_REQUEST_COST_KEY, DEFAULT_READ_REQUEST_COST));
891     }
892 
893 
894     protected double getCostFromRl(RegionLoad rl) {
895       return rl.getReadRequestsCount();
896     }
897   }
898 
899   /**
900    * Compute the cost of total number of write requests.  The more unbalanced the higher the
901    * computed cost will be.  This uses a rolling average of regionload.
902    */
903   public static class WriteRequestCostFunction extends CostFromRegionLoadFunction {
904 
905     private static final String WRITE_REQUEST_COST_KEY =
906         "hbase.master.balancer.stochastic.writeRequestCost";
907     private static final float DEFAULT_WRITE_REQUEST_COST = 5;
908 
909     WriteRequestCostFunction(Configuration conf) {
910       super(conf);
911       this.setMultiplier(conf.getFloat(WRITE_REQUEST_COST_KEY, DEFAULT_WRITE_REQUEST_COST));
912     }
913 
914     protected double getCostFromRl(RegionLoad rl) {
915       return rl.getWriteRequestsCount();
916     }
917   }
918 
919   /**
920    * Compute the cost of total memstore size.  The more unbalanced the higher the
921    * computed cost will be.  This uses a rolling average of regionload.
922    */
923   public static class MemstoreSizeCostFunction extends CostFromRegionLoadFunction {
924 
925     private static final String MEMSTORE_SIZE_COST_KEY =
926         "hbase.master.balancer.stochastic.memstoreSizeCost";
927     private static final float DEFAULT_MEMSTORE_SIZE_COST = 5;
928 
929     MemstoreSizeCostFunction(Configuration conf) {
930       super(conf);
931       this.setMultiplier(conf.getFloat(MEMSTORE_SIZE_COST_KEY, DEFAULT_MEMSTORE_SIZE_COST));
932     }
933 
934     @Override
935     protected double getCostFromRl(RegionLoad rl) {
936       return rl.getMemStoreSizeMB();
937     }
938   }
939   /**
940    * Compute the cost of total open storefiles size.  The more unbalanced the higher the
941    * computed cost will be.  This uses a rolling average of regionload.
942    */
943   public static class StoreFileCostFunction extends CostFromRegionLoadFunction {
944 
945     private static final String STOREFILE_SIZE_COST_KEY =
946         "hbase.master.balancer.stochastic.storefileSizeCost";
947     private static final float DEFAULT_STOREFILE_SIZE_COST = 5;
948 
949     StoreFileCostFunction(Configuration conf) {
950       super(conf);
951       this.setMultiplier(conf.getFloat(STOREFILE_SIZE_COST_KEY, DEFAULT_STOREFILE_SIZE_COST));
952     }
953 
954     @Override
955     protected double getCostFromRl(RegionLoad rl) {
956       return rl.getStorefileSizeMB();
957     }
958   }
959 }