View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.master.balancer;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.Arrays;
24  import java.util.HashMap;
25  import java.util.List;
26  import java.util.Map;
27  
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  import org.apache.hadoop.classification.InterfaceAudience;
31  import org.apache.hadoop.conf.Configuration;
32  import org.apache.hadoop.hbase.HRegionInfo;
33  import org.apache.hadoop.hbase.ServerLoad;
34  import org.apache.hadoop.hbase.ServerName;
35  import org.apache.hadoop.hbase.master.LoadBalancer;
36  import org.apache.hadoop.hbase.master.RackManager;
37  import org.apache.hadoop.hbase.master.RegionPlan;
38  import org.apache.hadoop.hbase.master.balancer.FavoredNodes.Position;
39  import org.apache.hadoop.hbase.util.Pair;
40  
41  /**
42   * An implementation of the {@link LoadBalancer} that assigns favored nodes for
43   * each region. There is a Primary RegionServer that hosts the region, and then
44   * there is Secondary and Tertiary RegionServers. Currently, the favored nodes
45   * information is used in creating HDFS files - the Primary RegionServer passes
46   * the primary, secondary, tertiary node addresses as hints to the DistributedFileSystem
47   * API for creating files on the filesystem. These nodes are treated as hints by
48   * the HDFS to place the blocks of the file. This alleviates the problem to do with
49   * reading from remote nodes (since we can make the Secondary RegionServer as the new
50   * Primary RegionServer) after a region is recovered. This should help provide consistent
51   * read latencies for the regions even when their primary region servers die.
52   *
53   */
54  @InterfaceAudience.Private
55  public class FavoredNodeLoadBalancer extends BaseLoadBalancer {
56    private static final Log LOG = LogFactory.getLog(FavoredNodeLoadBalancer.class);
57  
58    private FavoredNodes globalFavoredNodesAssignmentPlan;
59    private RackManager rackManager;
60  
61    @Override
62    public void setConf(Configuration conf) {
63      globalFavoredNodesAssignmentPlan = new FavoredNodes();
64      this.rackManager = new RackManager(conf);
65    }
66  
67    @Override
68    public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState) {
69      //TODO. At a high level, this should look at the block locality per region, and
70      //then reassign regions based on which nodes have the most blocks of the region
71      //file(s). There could be different ways like minimize region movement, or, maximum
72      //locality, etc. The other dimension to look at is whether Stochastic loadbalancer
73      //can be integrated with this
74      throw new UnsupportedOperationException("Not implemented yet");
75    }
76  
77    @Override
78    public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
79        List<ServerName> servers) {
80      Map<ServerName, List<HRegionInfo>> assignmentMap;
81      try {
82        FavoredNodeAssignmentHelper assignmentHelper =
83            new FavoredNodeAssignmentHelper(servers, rackManager);
84        assignmentHelper.initialize();
85        if (!assignmentHelper.canPlaceFavoredNodes()) {
86          return super.roundRobinAssignment(regions, servers);
87        }
88        // Segregate the regions into two types:
89        // 1. The regions that have favored node assignment, and where at least
90        //    one of the favored node is still alive. In this case, try to adhere
91        //    to the current favored nodes assignment as much as possible - i.e.,
92        //    if the current primary is gone, then make the secondary or tertiary
93        //    as the new host for the region (based on their current load). 
94        //    Note that we don't change the favored
95        //    node assignments here (even though one or more favored node is currently
96        //    down). It is up to the balanceCluster to do this hard work. The HDFS
97        //    can handle the fact that some nodes in the favored nodes hint is down
98        //    It'd allocate some other DNs. In combination with stale settings for HDFS,
99        //    we should be just fine.
100       // 2. The regions that currently don't have favored node assignment. We will
101       //    need to come up with favored nodes assignments for them. The corner case
102       //    in (1) above is that all the nodes are unavailable and in that case, we
103       //    will note that this region doesn't have favored nodes.
104       Pair<Map<ServerName,List<HRegionInfo>>, List<HRegionInfo>> segregatedRegions =
105           segregateRegionsAndAssignRegionsWithFavoredNodes(regions, servers);
106       Map<ServerName,List<HRegionInfo>> regionsWithFavoredNodesMap = segregatedRegions.getFirst();
107       List<HRegionInfo> regionsWithNoFavoredNodes = segregatedRegions.getSecond();
108       assignmentMap = new HashMap<ServerName, List<HRegionInfo>>();
109       roundRobinAssignmentImpl(assignmentHelper, assignmentMap, regionsWithNoFavoredNodes,
110           servers);
111       // merge the assignment maps
112       assignmentMap.putAll(regionsWithFavoredNodesMap);
113     } catch (Exception ex) {
114       LOG.warn("Encountered exception while doing favored-nodes assignment " + ex +
115           " Falling back to regular assignment");
116       assignmentMap = super.roundRobinAssignment(regions, servers);
117     }
118     return assignmentMap;
119   }
120 
121   @Override
122   public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
123     try {
124       FavoredNodeAssignmentHelper assignmentHelper =
125           new FavoredNodeAssignmentHelper(servers, rackManager);
126       assignmentHelper.initialize();
127       ServerName primary = super.randomAssignment(regionInfo, servers);
128       if (!assignmentHelper.canPlaceFavoredNodes()) {
129         return primary;
130       }
131       List<ServerName> favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo);
132       // check if we have a favored nodes mapping for this region and if so, return
133       // a server from the favored nodes list if the passed 'servers' contains this
134       // server as well (available servers, that is)
135       if (favoredNodes != null) {
136         for (ServerName s : favoredNodes) {
137           ServerName serverWithLegitStartCode = availableServersContains(servers, s);
138           if (serverWithLegitStartCode != null) {
139             return serverWithLegitStartCode;
140           }
141         }
142       }
143       List<HRegionInfo> regions = new ArrayList<HRegionInfo>(1);
144       regions.add(regionInfo);
145       Map<HRegionInfo, ServerName> primaryRSMap = new HashMap<HRegionInfo, ServerName>(1);
146       primaryRSMap.put(regionInfo, primary);
147       assignSecondaryAndTertiaryNodesForRegion(assignmentHelper, regions, primaryRSMap);
148       return primary;
149     } catch (Exception ex) {
150       LOG.warn("Encountered exception while doing favored-nodes (random)assignment " + ex +
151           " Falling back to regular assignment");
152       return super.randomAssignment(regionInfo, servers);
153     }
154   }
155 
156   private Pair<Map<ServerName, List<HRegionInfo>>, List<HRegionInfo>> 
157   segregateRegionsAndAssignRegionsWithFavoredNodes(List<HRegionInfo> regions,
158       List<ServerName> availableServers) {
159     Map<ServerName, List<HRegionInfo>> assignmentMapForFavoredNodes =
160         new HashMap<ServerName, List<HRegionInfo>>(regions.size() / 2);
161     List<HRegionInfo> regionsWithNoFavoredNodes = new ArrayList<HRegionInfo>(regions.size()/2);
162     for (HRegionInfo region : regions) {
163       List<ServerName> favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(region);
164       ServerName primaryHost = null;
165       ServerName secondaryHost = null;
166       ServerName tertiaryHost = null;
167       if (favoredNodes != null) {
168         for (ServerName s : favoredNodes) {
169           ServerName serverWithLegitStartCode = availableServersContains(availableServers, s);
170           if (serverWithLegitStartCode != null) {
171             FavoredNodes.Position position =
172                 FavoredNodes.getFavoredServerPosition(favoredNodes, s);
173             if (Position.PRIMARY.equals(position)) {
174               primaryHost = serverWithLegitStartCode;
175             } else if (Position.SECONDARY.equals(position)) {
176               secondaryHost = serverWithLegitStartCode;
177             } else if (Position.TERTIARY.equals(position)) {
178               tertiaryHost = serverWithLegitStartCode;
179             }
180           }
181         }
182         assignRegionToAvailableFavoredNode(assignmentMapForFavoredNodes, region,
183               primaryHost, secondaryHost, tertiaryHost);
184       }
185       if (primaryHost == null && secondaryHost == null && tertiaryHost == null) {
186         //all favored nodes unavailable
187         regionsWithNoFavoredNodes.add(region);
188       }
189     }
190     return new Pair<Map<ServerName, List<HRegionInfo>>, List<HRegionInfo>>(
191         assignmentMapForFavoredNodes, regionsWithNoFavoredNodes);
192   }
193 
194   // Do a check of the hostname and port and return the servername from the servers list
195   // that matched (the favoredNode will have a startcode of -1 but we want the real
196   // server with the legit startcode
197   private ServerName availableServersContains(List<ServerName> servers, ServerName favoredNode) {
198     for (ServerName server : servers) {
199       if (ServerName.isSameHostnameAndPort(favoredNode, server)) {
200         return server;
201       }
202     }
203     return null;
204   }
205 
206   private void assignRegionToAvailableFavoredNode(Map<ServerName,
207       List<HRegionInfo>> assignmentMapForFavoredNodes, HRegionInfo region, ServerName primaryHost,
208       ServerName secondaryHost, ServerName tertiaryHost) {
209     if (primaryHost != null) {
210       addRegionToMap(assignmentMapForFavoredNodes, region, primaryHost);
211     } else if (secondaryHost != null && tertiaryHost != null) {
212       // assign the region to the one with a lower load
213       // (both have the desired hdfs blocks)
214       ServerName s;
215       ServerLoad tertiaryLoad = super.services.getServerManager().getLoad(tertiaryHost);
216       ServerLoad secondaryLoad = super.services.getServerManager().getLoad(secondaryHost);
217       if (secondaryLoad.getLoad() < tertiaryLoad.getLoad()) {
218         s = secondaryHost;
219       } else {
220         s = tertiaryHost;
221       }
222       addRegionToMap(assignmentMapForFavoredNodes, region, s);
223     } else if (secondaryHost != null) {
224       addRegionToMap(assignmentMapForFavoredNodes, region, secondaryHost);
225     } else if (tertiaryHost != null) {
226       addRegionToMap(assignmentMapForFavoredNodes, region, tertiaryHost);
227     }
228   }
229 
230   private void addRegionToMap(Map<ServerName, List<HRegionInfo>> assignmentMapForFavoredNodes,
231       HRegionInfo region, ServerName host) {
232     List<HRegionInfo> regionsOnServer = null;
233     if ((regionsOnServer = assignmentMapForFavoredNodes.get(host)) == null) {
234       regionsOnServer = new ArrayList<HRegionInfo>();
235       assignmentMapForFavoredNodes.put(host, regionsOnServer);
236     }
237     regionsOnServer.add(region);
238   }
239 
240   public List<ServerName> getFavoredNodes(HRegionInfo regionInfo) {
241     return this.globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo);
242   }
243 
244   private void roundRobinAssignmentImpl(FavoredNodeAssignmentHelper assignmentHelper,
245       Map<ServerName, List<HRegionInfo>> assignmentMap,
246       List<HRegionInfo> regions, List<ServerName> servers) throws IOException {
247     Map<HRegionInfo, ServerName> primaryRSMap = new HashMap<HRegionInfo, ServerName>();
248     // figure the primary RSs
249     assignmentHelper.placePrimaryRSAsRoundRobin(assignmentMap, primaryRSMap, regions);
250     assignSecondaryAndTertiaryNodesForRegion(assignmentHelper, regions, primaryRSMap);
251   }
252 
253   private void assignSecondaryAndTertiaryNodesForRegion(
254       FavoredNodeAssignmentHelper assignmentHelper,
255       List<HRegionInfo> regions, Map<HRegionInfo, ServerName> primaryRSMap) {
256     // figure the secondary and tertiary RSs
257     Map<HRegionInfo, ServerName[]> secondaryAndTertiaryRSMap =
258         assignmentHelper.placeSecondaryAndTertiaryRS(primaryRSMap);
259     // now record all the assignments so that we can serve queries later
260     for (HRegionInfo region : regions) {
261       // Store the favored nodes without startCode for the ServerName objects
262       // We don't care about the startcode; but only the hostname really
263       List<ServerName> favoredNodesForRegion = new ArrayList<ServerName>(3);
264       ServerName sn = primaryRSMap.get(region);
265       favoredNodesForRegion.add(new ServerName(sn.getHostname(), sn.getPort(),
266           ServerName.NON_STARTCODE));
267       ServerName[] secondaryAndTertiaryNodes = secondaryAndTertiaryRSMap.get(region);
268       if (secondaryAndTertiaryNodes != null) {
269         favoredNodesForRegion.add(new ServerName(secondaryAndTertiaryNodes[0].getHostname(),
270             secondaryAndTertiaryNodes[0].getPort(), ServerName.NON_STARTCODE));
271         favoredNodesForRegion.add(new ServerName(secondaryAndTertiaryNodes[1].getHostname(),
272             secondaryAndTertiaryNodes[1].getPort(), ServerName.NON_STARTCODE));
273       }
274       globalFavoredNodesAssignmentPlan.updateFavoredNodesMap(region, favoredNodesForRegion);
275     }
276   }
277 
278   void noteFavoredNodes(final Map<HRegionInfo, ServerName[]> favoredNodesMap) {
279     for (Map.Entry<HRegionInfo, ServerName[]> entry : favoredNodesMap.entrySet()) {
280       // the META should already have favorednode ServerName objects without startcode
281       globalFavoredNodesAssignmentPlan.updateFavoredNodesMap(entry.getKey(),
282           Arrays.asList(entry.getValue()));
283     }
284   }
285 }