View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.balancer;
19  
20  import org.apache.commons.lang.mutable.MutableInt;
21  import org.apache.commons.logging.Log;
22  import org.apache.commons.logging.LogFactory;
23  import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
24  import org.apache.hadoop.classification.InterfaceAudience;
25  import org.apache.hadoop.conf.Configuration;
26  import org.apache.hadoop.hbase.ClusterStatus;
27  import org.apache.hadoop.hbase.HRegionInfo;
28  import org.apache.hadoop.hbase.ServerLoad;
29  import org.apache.hadoop.hbase.RegionLoad;
30  import org.apache.hadoop.hbase.ServerName;
31  import org.apache.hadoop.hbase.master.MasterServices;
32  import org.apache.hadoop.hbase.master.RegionPlan;
33  import org.apache.hadoop.hbase.util.Bytes;
34  
35  import java.util.ArrayList;
36  import java.util.HashMap;
37  import java.util.LinkedList;
38  import java.util.List;
39  import java.util.Map;
40  import java.util.Map.Entry;
41  import java.util.Random;
42  
43  /**
44   * <p>This is a best effort load balancer. Given a Cost function F(C) => x It will
45   * randomly try and mutate the cluster to Cprime. If F(Cprime) < F(C) then the
46   * new cluster state becomes the plan. It includes costs functions to compute the cost of:</p>
47   * <ul>
48   * <li>Region Load</li>
49   * <li>Table Load</li>
50   * <li>Data Locality</li>
51   * <li>Memstore Sizes</li>
52   * <li>Storefile Sizes</li>
53   * </ul>
54   *
55   *
56   * <p>Every cost function returns a number between 0 and 1 inclusive; where 0 is the lowest cost
57   * best solution, and 1 is the highest possible cost and the worst solution.  The computed costs are
58   * scaled by their respective multipliers:</p>
59   *
60   * <ul>
61   *   <li>hbase.master.balancer.stochastic.regionLoadCost</li>
62   *   <li>hbase.master.balancer.stochastic.moveCost</li>
63   *   <li>hbase.master.balancer.stochastic.tableLoadCost</li>
64   *   <li>hbase.master.balancer.stochastic.localityCost</li>
65   *   <li>hbase.master.balancer.stochastic.memstoreSizeCost</li>
66   *   <li>hbase.master.balancer.stochastic.storefileSizeCost</li>
67   * </ul>
68   *
69   * <p>In addition to the above configurations, the balancer can be tuned by the following
70   * configuration values:</p>
71   * <ul>
72   *   <li>hbase.master.balancer.stochastic.maxMoveRegions which
73   *   controls what the max number of regions that can be moved in a single invocation of this
74   *   balancer.</li>
75   *   <li>hbase.master.balancer.stochastic.stepsPerRegion is the coefficient by which the number of
76   *   regions is multiplied to try and get the number of times the balancer will
77   *   mutate all servers.</li>
78   *   <li>hbase.master.balancer.stochastic.maxSteps which controls the maximum number of times that
79   *   the balancer will try and mutate all the servers. The balancer will use the minimum of this
80   *   value and the above computation.</li>
81   * </ul>
82   *
83   * <p>This balancer is best used with hbase.master.loadbalance.bytable set to false
84   * so that the balancer gets the full picture of all loads on the cluster.</p>
85   */
86  @InterfaceAudience.Private
87  public class StochasticLoadBalancer extends BaseLoadBalancer {
88  
89    private static final String STOREFILE_SIZE_COST_KEY =
90        "hbase.master.balancer.stochastic.storefileSizeCost";
91    private static final String MEMSTORE_SIZE_COST_KEY =
92        "hbase.master.balancer.stochastic.memstoreSizeCost";
93    private static final String WRITE_REQUEST_COST_KEY =
94        "hbase.master.balancer.stochastic.writeRequestCost";
95    private static final String READ_REQUEST_COST_KEY =
96        "hbase.master.balancer.stochastic.readRequestCost";
97    private static final String LOCALITY_COST_KEY = "hbase.master.balancer.stochastic.localityCost";
98    private static final String TABLE_LOAD_COST_KEY =
99        "hbase.master.balancer.stochastic.tableLoadCost";
100   private static final String MOVE_COST_KEY = "hbase.master.balancer.stochastic.moveCost";
101   private static final String REGION_LOAD_COST_KEY =
102       "hbase.master.balancer.stochastic.regionLoadCost";
103   private static final String STEPS_PER_REGION_KEY =
104       "hbase.master.balancer.stochastic.stepsPerRegion";
105   private static final String MAX_STEPS_KEY = "hbase.master.balancer.stochastic.maxSteps";
106   private static final String MAX_MOVES_KEY = "hbase.master.balancer.stochastic.maxMoveRegions";
107   private static final String KEEP_REGION_LOADS = "hbase.master.balancer.stochastic.numRegionLoadsToRemember";
108 
109   private static final Random RANDOM = new Random(System.currentTimeMillis());
110   private static final Log LOG = LogFactory.getLog(StochasticLoadBalancer.class);
111   private final RegionLocationFinder regionFinder = new RegionLocationFinder();
112   private ClusterStatus clusterStatus = null;
113   private Map<String, List<RegionLoad>> loads = new HashMap<String, List<RegionLoad>>();
114 
115   // values are defaults
116   private int maxSteps = 15000;
117   private int stepsPerRegion = 110;
118   private int maxMoves = 600;
119   private int numRegionLoadsToRemember = 15;
120   private float loadMultiplier = 55;
121   private float moveCostMultiplier = 5;
122   private float tableMultiplier = 5;
123   private float localityMultiplier = 5;
124   private float readRequestMultiplier = 0;
125   private float writeRequestMultiplier = 0;
126   private float memStoreSizeMultiplier = 5;
127   private float storeFileSizeMultiplier = 5;
128 
129 
130   @Override
131   public void setConf(Configuration conf) {
132     super.setConf(conf);
133     regionFinder.setConf(conf);
134 
135     maxSteps = conf.getInt(MAX_STEPS_KEY, maxSteps);
136     maxMoves = conf.getInt(MAX_MOVES_KEY, maxMoves);
137     stepsPerRegion = conf.getInt(STEPS_PER_REGION_KEY, stepsPerRegion);
138 
139     numRegionLoadsToRemember = conf.getInt(KEEP_REGION_LOADS, numRegionLoadsToRemember);
140 
141     // Load multiplier should be the greatest as it is the most general way to balance data.
142     loadMultiplier = conf.getFloat(REGION_LOAD_COST_KEY, loadMultiplier);
143 
144     // Move cost multiplier should be the same cost or higer than the rest of the costs to ensure
145     // that two costs must get better to justify a move cost.
146     moveCostMultiplier = conf.getFloat(MOVE_COST_KEY, moveCostMultiplier);
147 
148     // These are the added costs so that the stochastic load balancer can get a little bit smarter
149     // about where to move regions.
150     tableMultiplier = conf.getFloat(TABLE_LOAD_COST_KEY, tableMultiplier);
151     localityMultiplier = conf.getFloat(LOCALITY_COST_KEY, localityMultiplier);
152     memStoreSizeMultiplier = conf.getFloat(MEMSTORE_SIZE_COST_KEY, memStoreSizeMultiplier);
153     storeFileSizeMultiplier = conf.getFloat(STOREFILE_SIZE_COST_KEY, storeFileSizeMultiplier);
154     readRequestMultiplier = conf.getFloat(READ_REQUEST_COST_KEY, readRequestMultiplier);
155     writeRequestMultiplier = conf.getFloat(WRITE_REQUEST_COST_KEY, writeRequestMultiplier);
156   }
157 
158   @Override
159   public void setClusterStatus(ClusterStatus st) {
160     super.setClusterStatus(st);
161     regionFinder.setClusterStatus(st);
162     this.clusterStatus = st;
163     updateRegionLoad();
164   }
165 
166   @Override
167   public void setMasterServices(MasterServices masterServices) {
168     super.setMasterServices(masterServices);
169     this.services = masterServices;
170     this.regionFinder.setServices(masterServices);
171   }
172 
173   /**
174    * Given the cluster state this will try and approach an optimal balance. This
175    * should always approach the optimal state given enough steps.
176    */
177   @Override
178   public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState) {
179 
180     // No need to balance a one node cluster.
181     if (clusterState.size() <= 1) {
182       LOG.debug("Skipping load balance as cluster has only one node.");
183       return null;
184     }
185 
186     long startTime = System.currentTimeMillis();
187 
188     // Keep track of servers to iterate through them.
189     List<ServerName> servers = new ArrayList<ServerName>(clusterState.keySet());
190     Map<HRegionInfo, ServerName> initialRegionMapping = createRegionMapping(clusterState);
191     double currentCost, newCost, initCost;
192     currentCost = newCost = initCost = computeCost(initialRegionMapping, clusterState);
193 
194     int computedMaxSteps =
195         Math.min(this.maxSteps, (initialRegionMapping.size() * this.stepsPerRegion));
196     // Perform a stochastic walk to see if we can get a good fit.
197     for (int step = 0; step < computedMaxSteps; step++) {
198 
199       // try and perform a mutation
200       for (ServerName leftServer : servers) {
201 
202         // What server are we going to be swapping regions with ?
203         ServerName rightServer = pickOtherServer(leftServer, servers);
204         if (rightServer == null) {
205           continue;
206         }
207 
208         // Get the regions.
209         List<HRegionInfo> leftRegionList = clusterState.get(leftServer);
210         List<HRegionInfo> rightRegionList = clusterState.get(rightServer);
211 
212         // Pick what regions to swap around.
213         // If we get a null for one then this isn't a swap just a move
214         HRegionInfo lRegion = pickRandomRegion(leftRegionList, 0);
215         HRegionInfo rRegion = pickRandomRegion(rightRegionList, 0.5);
216 
217         // We randomly picked to do nothing.
218         if (lRegion == null && rRegion == null) {
219           continue;
220         }
221 
222         if (rRegion != null) {
223           leftRegionList.add(rRegion);
224         }
225 
226         if (lRegion != null) {
227           rightRegionList.add(lRegion);
228         }
229 
230         newCost = computeCost(initialRegionMapping, clusterState);
231 
232         // Should this be kept?
233         if (newCost < currentCost) {
234           currentCost = newCost;
235         } else {
236           // Put things back the way they were before.
237           if (rRegion != null) {
238             leftRegionList.remove(rRegion);
239             rightRegionList.add(rRegion);
240           }
241 
242           if (lRegion != null) {
243             rightRegionList.remove(lRegion);
244             leftRegionList.add(lRegion);
245           }
246         }
247       }
248 
249     }
250 
251     long endTime = System.currentTimeMillis();
252 
253     if (initCost > currentCost) {
254       List<RegionPlan> plans = createRegionPlans(initialRegionMapping, clusterState);
255 
256       LOG.debug("Finished computing new laod balance plan.  Computation took "
257           + (endTime - startTime) + "ms to try " + computedMaxSteps
258           + " different iterations.  Found a solution that moves " + plans.size()
259           + " regions; Going from a computed cost of " + initCost + " to a new cost of "
260           + currentCost);
261       return plans;
262     }
263     LOG.debug("Could not find a better load balance plan.  Tried " + computedMaxSteps
264         + " different configurations in " + (endTime - startTime)
265         + "ms, and did not find anything with a computed cost less than " + initCost);
266     return null;
267   }
268 
269   /**
270    * Create all of the RegionPlan's needed to move from the initial cluster state to the desired
271    * state.
272    *
273    * @param initialRegionMapping Initial mapping of Region to Server
274    * @param clusterState The desired mapping of ServerName to Regions
275    * @return List of RegionPlan's that represent the moves needed to get to desired final state.
276    */
277   private List<RegionPlan> createRegionPlans(Map<HRegionInfo, ServerName> initialRegionMapping,
278                                              Map<ServerName, List<HRegionInfo>> clusterState) {
279     List<RegionPlan> plans = new LinkedList<RegionPlan>();
280 
281     for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
282       ServerName newServer = entry.getKey();
283 
284       for (HRegionInfo region : entry.getValue()) {
285         ServerName initialServer = initialRegionMapping.get(region);
286         if (!newServer.equals(initialServer)) {
287           LOG.trace("Moving Region " + region.getEncodedName() + " from server "
288               + initialServer.getHostname() + " to " + newServer.getHostname());
289           RegionPlan rp = new RegionPlan(region, initialServer, newServer);
290           plans.add(rp);
291         }
292       }
293     }
294     return plans;
295   }
296 
297   /**
298    * Create a map that will represent the initial location of regions on a
299    * {@link ServerName}
300    *
301    * @param clusterState starting state of the cluster and regions.
302    * @return A map of {@link HRegionInfo} to the {@link ServerName} that is
303    *         currently hosting that region
304    */
305   private Map<HRegionInfo, ServerName> createRegionMapping(
306       Map<ServerName, List<HRegionInfo>> clusterState) {
307     Map<HRegionInfo, ServerName> mapping = new HashMap<HRegionInfo, ServerName>();
308 
309     for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
310       for (HRegionInfo region : entry.getValue()) {
311         mapping.put(region, entry.getKey());
312       }
313     }
314     return mapping;
315   }
316 
317   /** Store the current region loads. */
318   private synchronized void updateRegionLoad() {
319 
320     //We create a new hashmap so that regions that are no longer there are removed.
321     //However we temporarily need the old loads so we can use them to keep the rolling average.
322     Map<String, List<RegionLoad>> oldLoads = loads;
323     loads = new HashMap<String, List<RegionLoad>>();
324 
325     for (ServerName sn : clusterStatus.getServers()) {
326       ServerLoad sl = clusterStatus.getLoad(sn);
327       if (sl == null) continue;
328       for (Entry<byte[], RegionLoad> entry : sl.getRegionsLoad().entrySet()) {
329         List<RegionLoad> rLoads = oldLoads.get(Bytes.toString(entry.getKey()));
330         if (rLoads != null) {
331 
332           //We're only going to keep 15.  So if there are that many already take the last 14
333           if (rLoads.size() >= numRegionLoadsToRemember) {
334             int numToRemove = 1 +  (rLoads.size() - numRegionLoadsToRemember);
335 
336             rLoads = rLoads.subList(numToRemove, rLoads.size());
337           }
338 
339         } else {
340           //There was nothing there
341           rLoads = new ArrayList<RegionLoad>();
342         }
343         rLoads.add(entry.getValue());
344         loads.put(Bytes.toString(entry.getKey()), rLoads);
345 
346       }
347     }
348   }
349 
350   /**
351    * From a list of regions pick a random one. Null can be returned which
352    * {@link StochasticLoadBalancer#balanceCluster(Map)} recognize as signal to try a region move
353    * rather than swap.
354    *
355    * @param regions        list of regions.
356    * @param chanceOfNoSwap Chance that this will decide to try a move rather
357    *                       than a swap.
358    * @return a random {@link HRegionInfo} or null if an asymmetrical move is
359    *         suggested.
360    */
361   private HRegionInfo pickRandomRegion(List<HRegionInfo> regions, double chanceOfNoSwap) {
362 
363     //Check to see if this is just a move.
364     if (regions.isEmpty() || RANDOM.nextFloat() < chanceOfNoSwap) {
365       //signal a move only.
366       return null;
367     }
368 
369     int count = 0;
370     HRegionInfo r = null;
371 
372     //We will try and find a region up to 10 times.  If we always
373     while (count < 10 && r == null ) {
374       count++;
375       r = regions.get(RANDOM.nextInt(regions.size()));
376 
377       // If this is a special region we always try not to move it.
378       // so clear out r.  try again
379       if (r.isMetaRegion()) {
380         r = null;
381       }
382     }
383     if (r != null) {
384       regions.remove(r);
385     }
386     return r;
387   }
388 
389   /**
390    * Given a server we will want to switch regions with another server. This
391    * function picks a random server from the list.
392    *
393    * @param server     Current Server. This server will never be the return value.
394    * @param allServers list of all server from which to pick
395    * @return random server. Null if no other servers were found.
396    */
397   private ServerName pickOtherServer(ServerName server, List<ServerName> allServers) {
398     ServerName s = null;
399     int count = 0;
400     while (count < 100 && (s == null || s.equals(server))) {
401       count++;
402       s = allServers.get(RANDOM.nextInt(allServers.size()));
403     }
404 
405     // If nothing but the current server was found return null.
406     return (s == null || s.equals(server)) ? null : s;
407   }
408 
409   /**
410    * This is the main cost function.  It will compute a cost associated with a proposed cluster
411    * state.  All different costs will be combined with their multipliers to produce a double cost.
412    *
413    * @param initialRegionMapping Map of where the regions started.
414    * @param clusterState Map of ServerName to list of regions.
415    * @return a double of a cost associated with the proposed
416    */
417   protected double computeCost(Map<HRegionInfo, ServerName> initialRegionMapping,
418                                Map<ServerName, List<HRegionInfo>> clusterState) {
419 
420     double moveCost = moveCostMultiplier * computeMoveCost(initialRegionMapping, clusterState);
421 
422     double regionCountSkewCost = loadMultiplier * computeSkewLoadCost(clusterState);
423     double tableSkewCost = tableMultiplier * computeTableSkewLoadCost(clusterState);
424     double localityCost =
425         localityMultiplier * computeDataLocalityCost(initialRegionMapping, clusterState);
426 
427     double memstoreSizeCost =
428         memStoreSizeMultiplier
429             * computeRegionLoadCost(clusterState, RegionLoadCostType.MEMSTORE_SIZE);
430     double storefileSizeCost =
431         storeFileSizeMultiplier
432             * computeRegionLoadCost(clusterState, RegionLoadCostType.STOREFILE_SIZE);
433 
434 
435     double readRequestCost =
436         readRequestMultiplier
437             * computeRegionLoadCost(clusterState, RegionLoadCostType.READ_REQUEST);
438     double writeRequestCost =
439         writeRequestMultiplier
440             * computeRegionLoadCost(clusterState, RegionLoadCostType.WRITE_REQUEST);
441 
442      double total =
443         moveCost + regionCountSkewCost + tableSkewCost + localityCost + memstoreSizeCost
444             + storefileSizeCost + readRequestCost + writeRequestCost;
445     LOG.trace("Computed weights for a potential balancing total = " + total + " moveCost = "
446         + moveCost + " regionCountSkewCost = " + regionCountSkewCost + " tableSkewCost = "
447         + tableSkewCost + " localityCost = " + localityCost + " memstoreSizeCost = "
448         + memstoreSizeCost + " storefileSizeCost = " + storefileSizeCost);
449     return total;
450   }
451 
452   /**
453    * Given the starting state of the regions and a potential ending state
454    * compute cost based upon the number of regions that have moved.
455    *
456    * @param initialRegionMapping The starting location of regions.
457    * @param clusterState         The potential new cluster state.
458    * @return The cost. Between 0 and 1.
459    */
460   double computeMoveCost(Map<HRegionInfo, ServerName> initialRegionMapping,
461                          Map<ServerName, List<HRegionInfo>> clusterState) {
462     float moveCost = 0;
463     for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
464       for (HRegionInfo region : entry.getValue()) {
465         if (initialRegionMapping.get(region) != entry.getKey()) {
466           moveCost += 1;
467         }
468       }
469     }
470 
471     //Don't let this single balance move more than the max moves.
472     //This allows better scaling to accurately represent the actual cost of a move.
473     if (moveCost > maxMoves) {
474       return 10000;   //return a number much greater than any of the other cost functions
475     }
476 
477     return scale(0, Math.min(maxMoves, initialRegionMapping.size()), moveCost);
478   }
479 
480   /**
481    * Compute the cost of a potential cluster state from skew in number of
482    * regions on a cluster
483    *
484    * @param clusterState The proposed cluster state
485    * @return The cost of region load imbalance.
486    */
487   double computeSkewLoadCost(Map<ServerName, List<HRegionInfo>> clusterState) {
488     DescriptiveStatistics stats = new DescriptiveStatistics();
489     for (List<HRegionInfo> regions : clusterState.values()) {
490       int size = regions.size();
491       stats.addValue(size);
492     }
493     return costFromStats(stats);
494   }
495 
496   /**
497    * Compute the cost of a potential cluster configuration based upon how evenly
498    * distributed tables are.
499    *
500    * @param clusterState Proposed cluster state.
501    * @return Cost of imbalance in table.
502    */
503   double computeTableSkewLoadCost(Map<ServerName, List<HRegionInfo>> clusterState) {
504 
505     Map<String, MutableInt> tableRegionsTotal = new HashMap<String, MutableInt>();
506     Map<String, MutableInt> tableRegionsOnCurrentServer = new HashMap<String, MutableInt>();
507     Map<String, Integer> tableCostSeenSoFar = new HashMap<String, Integer>();
508     // Go through everything per server
509     for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
510       tableRegionsOnCurrentServer.clear();
511 
512       // For all of the regions count how many are from each table
513       for (HRegionInfo region : entry.getValue()) {
514         String tableName = region.getTableNameAsString();
515 
516         // See if this table already has a count on this server
517         MutableInt regionsOnServerCount = tableRegionsOnCurrentServer.get(tableName);
518 
519         // If this is the first time we've seen this table on this server
520         // create a new mutable int.
521         if (regionsOnServerCount == null) {
522           regionsOnServerCount = new MutableInt(0);
523           tableRegionsOnCurrentServer.put(tableName, regionsOnServerCount);
524         }
525 
526         // Increment the count of how many regions from this table are host on
527         // this server
528         regionsOnServerCount.increment();
529 
530         // Now count the number of regions in this table.
531         MutableInt totalCount = tableRegionsTotal.get(tableName);
532 
533         // If this is the first region from this table create a new counter for
534         // this table.
535         if (totalCount == null) {
536           totalCount = new MutableInt(0);
537           tableRegionsTotal.put(tableName, totalCount);
538         }
539         totalCount.increment();
540       }
541 
542       // Now go through all of the tables we have seen and keep the max number
543       // of regions of this table a single region server is hosting.
544       for (Entry<String, MutableInt> currentServerEntry: tableRegionsOnCurrentServer.entrySet()) {
545         String tableName = currentServerEntry.getKey();
546         Integer thisCount = currentServerEntry.getValue().toInteger();
547         Integer maxCountSoFar = tableCostSeenSoFar.get(tableName);
548 
549         if (maxCountSoFar == null || thisCount.compareTo(maxCountSoFar) > 0) {
550           tableCostSeenSoFar.put(tableName, thisCount);
551         }
552       }
553     }
554 
555     double max = 0;
556     double min = 0;
557     double value = 0;
558 
559     // Compute the min, value, and max.
560     for (Entry<String, MutableInt> currentEntry : tableRegionsTotal.entrySet()) {
561       max += tableRegionsTotal.get(currentEntry.getKey()).doubleValue();
562       min += tableRegionsTotal.get(currentEntry.getKey()).doubleValue() / clusterState.size();
563       value += tableCostSeenSoFar.get(currentEntry.getKey()).doubleValue();
564     }
565     return scale(min, max, value);
566   }
567 
568   /**
569    * Compute a cost of a potential cluster configuration based upon where
570    * {@link org.apache.hadoop.hbase.regionserver.StoreFile}s are located.
571    *
572    * @param initialRegionMapping - not used
573    * @param clusterState The state of the cluster
574    * @return A cost between 0 and 1. 0 Means all regions are on the sever with
575    *         the most local store files.
576    */
577   double computeDataLocalityCost(Map<HRegionInfo, ServerName> initialRegionMapping,
578                                  Map<ServerName, List<HRegionInfo>> clusterState) {
579 
580     double max = 0;
581     double cost = 0;
582 
583     // If there's no master so there's no way anything else works.
584     if (this.services == null) return cost;
585 
586     for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
587       ServerName sn = entry.getKey();
588       for (HRegionInfo region : entry.getValue()) {
589 
590         max += 1;
591 
592         List<ServerName> dataOnServers = regionFinder.getTopBlockLocations(region);
593 
594         // If we can't find where the data is getTopBlock returns null.
595         // so count that as being the best possible.
596         if (dataOnServers == null) {
597           continue;
598         }
599 
600         int index = dataOnServers.indexOf(sn);
601         if (index < 0) {
602           cost += 1;
603         } else {
604           cost += (double) index / (double) dataOnServers.size();
605         }
606 
607       }
608     }
609     return scale(0, max, cost);
610   }
611 
612   /** The cost's that can be derived from RegionLoad */
613   private enum RegionLoadCostType {
614     READ_REQUEST, WRITE_REQUEST, MEMSTORE_SIZE, STOREFILE_SIZE
615   }
616 
617   /**
618    * Compute the cost of the current cluster state due to some RegionLoadCost type
619    *
620    * @param clusterState the cluster
621    * @param costType     what type of cost to consider
622    * @return the scaled cost.
623    */
624   private double computeRegionLoadCost(Map<ServerName, List<HRegionInfo>> clusterState,
625                                        RegionLoadCostType costType) {
626 
627     if (this.clusterStatus == null || this.loads == null || this.loads.size() == 0) return 0;
628 
629     DescriptiveStatistics stats = new DescriptiveStatistics();
630 
631     // For every server look at the cost of each region
632     for (List<HRegionInfo> regions : clusterState.values()) {
633       long cost = 0; //Cost this server has from RegionLoad
634 
635       // For each region
636       for (HRegionInfo region : regions) {
637         // Try and get the region using the regionNameAsString
638         List<RegionLoad> rl = loads.get(region.getRegionNameAsString());
639 
640         // That could have failed if the RegionLoad is using the other regionName
641         if (rl == null) {
642           // Try getting the region load using encoded name.
643           rl = loads.get(region.getEncodedName());
644         }
645         // Now if we found a region load get the type of cost that was requested.
646         if (rl != null) {
647           cost += getRegionLoadCost(rl, costType);
648         }
649       }
650 
651       // Add the total cost to the stats.
652       stats.addValue(cost);
653     }
654 
655     // No return the scaled cost from data held in the stats object.
656     return costFromStats(stats);
657   }
658 
659   /**
660    * Get the un-scaled cost from a RegionLoad
661    *
662    * @param regionLoadList   the Region load List
663    * @param type The type of cost to extract
664    * @return the double representing the cost
665    */
666   private double getRegionLoadCost(List<RegionLoad> regionLoadList, RegionLoadCostType type) {
667     double cost = 0;
668 
669     int size = regionLoadList.size();
670     for(int i =0; i< size; i++) {
671       RegionLoad rl = regionLoadList.get(i);
672       double toAdd = 0;
673       switch (type) {
674         case READ_REQUEST:
675           toAdd =  rl.getReadRequestsCount();
676           break;
677         case WRITE_REQUEST:
678           toAdd =  rl.getWriteRequestsCount();
679           break;
680         case MEMSTORE_SIZE:
681           toAdd =  rl.getMemStoreSizeMB();
682           break;
683         case STOREFILE_SIZE:
684           toAdd =  rl.getStorefileSizeMB();
685           break;
686         default:
687           assert false : "RegionLoad cost type not supported.";
688           return 0;
689       }
690 
691       if (cost == 0) {
692         cost = toAdd;
693       } else {
694         cost = (.5 * cost) + (.5 * toAdd);
695       }
696     }
697 
698     return cost;
699 
700   }
701 
702   /**
703    * Function to compute a scaled cost using {@link DescriptiveStatistics}. It
704    * assumes that this is a zero sum set of costs.  It assumes that the worst case
705    * possible is all of the elements in one region server and the rest having 0.
706    *
707    * @param stats the costs
708    * @return a scaled set of costs.
709    */
710   double costFromStats(DescriptiveStatistics stats) {
711     double totalCost = 0;
712     double mean = stats.getMean();
713 
714     //Compute max as if all region servers had 0 and one had the sum of all costs.  This must be
715     // a zero sum cost for this to make sense.
716     double max = ((stats.getN() - 1) * stats.getMean()) + (stats.getSum() - stats.getMean());
717     for (double n : stats.getValues()) {
718       totalCost += Math.abs(mean - n);
719 
720     }
721 
722     return scale(0, max, totalCost);
723   }
724 
725   /**
726    * Scale the value between 0 and 1.
727    *
728    * @param min   Min value
729    * @param max   The Max value
730    * @param value The value to be scaled.
731    * @return The scaled value.
732    */
733   private double scale(double min, double max, double value) {
734     if (max == 0 || value == 0) {
735       return 0;
736     }
737 
738     return Math.max(0d, Math.min(1d, (value - min) / max));
739   }
740 }