1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.master.balancer;
21
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.HashMap;
25 import java.util.HashSet;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Random;
29 import java.util.Set;
30 import java.util.TreeMap;
31
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34 import org.apache.hadoop.classification.InterfaceAudience;
35 import org.apache.hadoop.conf.Configuration;
36 import org.apache.hadoop.hbase.TableName;
37 import org.apache.hadoop.hbase.HConstants;
38 import org.apache.hadoop.hbase.HRegionInfo;
39 import org.apache.hadoop.hbase.ServerName;
40 import org.apache.hadoop.hbase.catalog.CatalogTracker;
41 import org.apache.hadoop.hbase.catalog.MetaEditor;
42 import org.apache.hadoop.hbase.catalog.MetaReader;
43 import org.apache.hadoop.hbase.catalog.MetaReader.Visitor;
44 import org.apache.hadoop.hbase.client.Put;
45 import org.apache.hadoop.hbase.client.Result;
46 import org.apache.hadoop.hbase.master.RackManager;
47 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
48 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
49 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.FavoredNodes;
50 import org.apache.hadoop.hbase.util.Bytes;
51 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
52 import org.apache.hadoop.hbase.util.Pair;
53
54 import com.google.protobuf.InvalidProtocolBufferException;
55
56
57
58
59
60
61
62
63 @InterfaceAudience.Private
64 public class FavoredNodeAssignmentHelper {
65 private static final Log LOG = LogFactory.getLog(FavoredNodeAssignmentHelper.class);
66 private RackManager rackManager;
67 private Map<String, List<ServerName>> rackToRegionServerMap;
68 private List<String> uniqueRackList;
69 private Map<ServerName, String> regionServerToRackMap;
70 private Random random;
71 private List<ServerName> servers;
72 public static final byte [] FAVOREDNODES_QUALIFIER = Bytes.toBytes("fn");
73 public final static short FAVORED_NODES_NUM = 3;
74
75 public FavoredNodeAssignmentHelper(final List<ServerName> servers, Configuration conf) {
76 this(servers, new RackManager(conf));
77 }
78
79 public FavoredNodeAssignmentHelper(final List<ServerName> servers,
80 final RackManager rackManager) {
81 this.servers = servers;
82 this.rackManager = rackManager;
83 this.rackToRegionServerMap = new HashMap<String, List<ServerName>>();
84 this.regionServerToRackMap = new HashMap<ServerName, String>();
85 this.uniqueRackList = new ArrayList<String>();
86 this.random = new Random();
87 }
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103 public static Map<HRegionInfo, ServerName> fullScan(
104 CatalogTracker catalogTracker, final Set<TableName> disabledTables,
105 final boolean excludeOfflinedSplitParents,
106 FavoredNodeLoadBalancer balancer) throws IOException {
107 final Map<HRegionInfo, ServerName> regions =
108 new TreeMap<HRegionInfo, ServerName>();
109 final Map<HRegionInfo, ServerName[]> favoredNodesMap =
110 new HashMap<HRegionInfo, ServerName[]>();
111 Visitor v = new Visitor() {
112 @Override
113 public boolean visit(Result r) throws IOException {
114 if (r == null || r.isEmpty()) return true;
115 Pair<HRegionInfo, ServerName> region = HRegionInfo.getHRegionInfoAndServerName(r);
116 HRegionInfo hri = region.getFirst();
117 if (hri == null) return true;
118 if (hri.getTableName() == null) return true;
119 if (disabledTables.contains(
120 hri.getTableName())) return true;
121
122 if (excludeOfflinedSplitParents && hri.isSplitParent()) return true;
123 regions.put(hri, region.getSecond());
124 byte[] favoredNodes = r.getValue(HConstants.CATALOG_FAMILY,
125 FavoredNodeAssignmentHelper.FAVOREDNODES_QUALIFIER);
126 if (favoredNodes != null) {
127 ServerName[] favoredServerList =
128 FavoredNodeAssignmentHelper.getFavoredNodesList(favoredNodes);
129 favoredNodesMap.put(hri, favoredServerList);
130 }
131 return true;
132 }
133 };
134 MetaReader.fullScan(catalogTracker, v);
135 balancer.noteFavoredNodes(favoredNodesMap);
136 return regions;
137 }
138
139 public static void updateMetaWithFavoredNodesInfo(
140 Map<HRegionInfo, List<ServerName>> regionToFavoredNodes,
141 CatalogTracker catalogTracker) throws IOException {
142 List<Put> puts = new ArrayList<Put>();
143 for (Map.Entry<HRegionInfo, List<ServerName>> entry : regionToFavoredNodes.entrySet()) {
144 Put put = makePutFromRegionInfo(entry.getKey(), entry.getValue());
145 if (put != null) {
146 puts.add(put);
147 }
148 }
149 MetaEditor.putsToMetaTable(catalogTracker, puts);
150 LOG.info("Added " + puts.size() + " regions in META");
151 }
152
153
154
155
156
157
158
159
160 static Put makePutFromRegionInfo(HRegionInfo regionInfo, List<ServerName>favoredNodeList)
161 throws IOException {
162 Put put = null;
163 if (favoredNodeList != null) {
164 put = MetaEditor.makePutFromRegionInfo(regionInfo);
165 byte[] favoredNodes = getFavoredNodes(favoredNodeList);
166 put.add(HConstants.CATALOG_FAMILY, FAVOREDNODES_QUALIFIER,
167 EnvironmentEdgeManager.currentTimeMillis(), favoredNodes);
168 LOG.info("Create the region " + regionInfo.getRegionNameAsString() +
169 " with favored nodes " + favoredNodes);
170 }
171 return put;
172 }
173
174
175
176
177
178
179 public static ServerName[] getFavoredNodesList(byte[] favoredNodes)
180 throws InvalidProtocolBufferException {
181 FavoredNodes f = FavoredNodes.parseFrom(favoredNodes);
182 List<HBaseProtos.ServerName> protoNodes = f.getFavoredNodeList();
183 ServerName[] servers = new ServerName[protoNodes.size()];
184 int i = 0;
185 for (HBaseProtos.ServerName node : protoNodes) {
186 servers[i++] = ProtobufUtil.toServerName(node);
187 }
188 return servers;
189 }
190
191
192
193
194
195 static byte[] getFavoredNodes(List<ServerName> serverAddrList) {
196 FavoredNodes.Builder f = FavoredNodes.newBuilder();
197 for (ServerName s : serverAddrList) {
198 HBaseProtos.ServerName.Builder b = HBaseProtos.ServerName.newBuilder();
199 b.setHostName(s.getHostname());
200 b.setPort(s.getPort());
201 b.setStartCode(s.getStartcode());
202 f.addFavoredNode(b.build());
203 }
204 return f.build().toByteArray();
205 }
206
207
208
209
210
211
212
213
214
215
216 void placePrimaryRSAsRoundRobin(Map<ServerName, List<HRegionInfo>> assignmentMap,
217 Map<HRegionInfo, ServerName> primaryRSMap, List<HRegionInfo> regions) {
218 List<String> rackList = new ArrayList<String>(rackToRegionServerMap.size());
219 rackList.addAll(rackToRegionServerMap.keySet());
220 int rackIndex = random.nextInt(rackList.size());
221 int maxRackSize = 0;
222 for (Map.Entry<String,List<ServerName>> r : rackToRegionServerMap.entrySet()) {
223 if (r.getValue().size() > maxRackSize) {
224 maxRackSize = r.getValue().size();
225 }
226 }
227 int numIterations = 0;
228 int firstServerIndex = random.nextInt(maxRackSize);
229
230 int serverIndex = firstServerIndex;
231 for (HRegionInfo regionInfo : regions) {
232 List<ServerName> currentServerList;
233 String rackName;
234 while (true) {
235 rackName = rackList.get(rackIndex);
236 numIterations++;
237
238 currentServerList = rackToRegionServerMap.get(rackName);
239
240 if (serverIndex >= currentServerList.size()) {
241 if (numIterations % rackList.size() == 0) {
242 if (++serverIndex >= maxRackSize) serverIndex = 0;
243 }
244 if ((++rackIndex) >= rackList.size()) {
245 rackIndex = 0;
246 }
247 } else break;
248 }
249
250
251 ServerName currentServer = currentServerList.get(serverIndex);
252
253
254 primaryRSMap.put(regionInfo, currentServer);
255 List<HRegionInfo> regionsForServer = assignmentMap.get(currentServer);
256 if (regionsForServer == null) {
257 regionsForServer = new ArrayList<HRegionInfo>();
258 assignmentMap.put(currentServer, regionsForServer);
259 }
260 regionsForServer.add(regionInfo);
261
262
263 if (numIterations % rackList.size() == 0) {
264 ++serverIndex;
265 }
266 if ((++rackIndex) >= rackList.size()) {
267 rackIndex = 0;
268 }
269 }
270 }
271
272 Map<HRegionInfo, ServerName[]> placeSecondaryAndTertiaryRS(
273 Map<HRegionInfo, ServerName> primaryRSMap) {
274 Map<HRegionInfo, ServerName[]> secondaryAndTertiaryMap =
275 new HashMap<HRegionInfo, ServerName[]>();
276 for (Map.Entry<HRegionInfo, ServerName> entry : primaryRSMap.entrySet()) {
277
278 HRegionInfo regionInfo = entry.getKey();
279 ServerName primaryRS = entry.getValue();
280 try {
281
282 ServerName[] favoredNodes;
283
284 String primaryRack = rackManager.getRack(primaryRS);
285
286 if (getTotalNumberOfRacks() == 1) {
287 favoredNodes = singleRackCase(regionInfo, primaryRS, primaryRack);
288 } else {
289 favoredNodes = multiRackCase(regionInfo, primaryRS, primaryRack);
290 }
291 if (favoredNodes != null) {
292 secondaryAndTertiaryMap.put(regionInfo, favoredNodes);
293 LOG.debug("Place the secondary and tertiary region server for region "
294 + regionInfo.getRegionNameAsString());
295 }
296 } catch (Exception e) {
297 LOG.warn("Cannot place the favored nodes for region " +
298 regionInfo.getRegionNameAsString() + " because " + e);
299 continue;
300 }
301 }
302 return secondaryAndTertiaryMap;
303 }
304
305 private ServerName[] singleRackCase(HRegionInfo regionInfo,
306 ServerName primaryRS,
307 String primaryRack) throws IOException {
308
309
310 List<ServerName> serverList = getServersFromRack(primaryRack);
311 if (serverList.size() <= 2) {
312
313
314 return null;
315 } else {
316
317
318 Set<ServerName> serverSkipSet = new HashSet<ServerName>();
319 serverSkipSet.add(primaryRS);
320
321
322 ServerName secondaryRS = getOneRandomServer(primaryRack, serverSkipSet);
323
324 serverSkipSet.add(secondaryRS);
325
326
327 ServerName tertiaryRS =
328 getOneRandomServer(primaryRack, serverSkipSet);
329
330 if (secondaryRS == null || tertiaryRS == null) {
331 LOG.error("Cannot place the secondary and terinary" +
332 "region server for region " +
333 regionInfo.getRegionNameAsString());
334 }
335
336 ServerName[] favoredNodes = new ServerName[2];
337 favoredNodes[0] = secondaryRS;
338 favoredNodes[1] = tertiaryRS;
339 return favoredNodes;
340 }
341 }
342
343 private ServerName[] multiRackCase(HRegionInfo regionInfo,
344 ServerName primaryRS,
345 String primaryRack) throws IOException {
346
347
348
349
350
351 Set<String> rackSkipSet = new HashSet<String>();
352 rackSkipSet.add(primaryRack);
353 ServerName[] favoredNodes = new ServerName[2];
354 String secondaryRack = getOneRandomRack(rackSkipSet);
355 List<ServerName> serverList = getServersFromRack(secondaryRack);
356 if (serverList.size() >= 2) {
357
358
359
360 ServerName secondaryRS = getOneRandomServer(secondaryRack);
361
362
363 Set<ServerName> skipServerSet = new HashSet<ServerName>();
364 skipServerSet.add(secondaryRS);
365
366 ServerName tertiaryRS = getOneRandomServer(secondaryRack, skipServerSet);
367
368 if (secondaryRS == null || tertiaryRS == null) {
369 LOG.error("Cannot place the secondary and terinary" +
370 "region server for region " +
371 regionInfo.getRegionNameAsString());
372 }
373
374 favoredNodes[0] = secondaryRS;
375 favoredNodes[1] = tertiaryRS;
376 } else {
377
378
379 favoredNodes[0] = getOneRandomServer(secondaryRack);
380
381
382 if (getTotalNumberOfRacks() == 2) {
383
384 Set<ServerName> serverSkipSet = new HashSet<ServerName>();
385 serverSkipSet.add(primaryRS);
386 favoredNodes[1] = getOneRandomServer(primaryRack, serverSkipSet);
387 } else {
388
389 rackSkipSet.add(secondaryRack);
390 String tertiaryRandomRack = getOneRandomRack(rackSkipSet);
391 favoredNodes[1] = getOneRandomServer(tertiaryRandomRack);
392 }
393 }
394 return favoredNodes;
395 }
396
397 boolean canPlaceFavoredNodes() {
398 int serverSize = this.regionServerToRackMap.size();
399 return (serverSize >= FAVORED_NODES_NUM);
400 }
401
402 void initialize() {
403 for (ServerName sn : this.servers) {
404 String rackName = this.rackManager.getRack(sn);
405 List<ServerName> serverList = this.rackToRegionServerMap.get(rackName);
406 if (serverList == null) {
407 serverList = new ArrayList<ServerName>();
408
409 this.uniqueRackList.add(rackName);
410 }
411 if (!serverList.contains(sn)) {
412 serverList.add(sn);
413 this.rackToRegionServerMap.put(rackName, serverList);
414 this.regionServerToRackMap.put(sn, rackName);
415 }
416 }
417 }
418
419 private int getTotalNumberOfRacks() {
420 return this.uniqueRackList.size();
421 }
422
423 private List<ServerName> getServersFromRack(String rack) {
424 return this.rackToRegionServerMap.get(rack);
425 }
426
427 private ServerName getOneRandomServer(String rack,
428 Set<ServerName> skipServerSet) throws IOException {
429 if(rack == null) return null;
430 List<ServerName> serverList = this.rackToRegionServerMap.get(rack);
431 if (serverList == null) return null;
432
433
434 if (skipServerSet != null && serverList.size() <= skipServerSet.size()) {
435 throw new IOException("Cannot randomly pick another random server");
436 }
437
438 ServerName randomServer;
439 do {
440 int randomIndex = random.nextInt(serverList.size());
441 randomServer = serverList.get(randomIndex);
442 } while (skipServerSet != null && skipServerSet.contains(randomServer));
443
444 return randomServer;
445 }
446
447 private ServerName getOneRandomServer(String rack) throws IOException {
448 return this.getOneRandomServer(rack, null);
449 }
450
451 private String getOneRandomRack(Set<String> skipRackSet) throws IOException {
452 if (skipRackSet == null || uniqueRackList.size() <= skipRackSet.size()) {
453 throw new IOException("Cannot randomly pick another random server");
454 }
455
456 String randomRack;
457 do {
458 int randomIndex = random.nextInt(this.uniqueRackList.size());
459 randomRack = this.uniqueRackList.get(randomIndex);
460 } while (skipRackSet.contains(randomRack));
461
462 return randomRack;
463 }
464 }