1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.master.handler;
21
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.NavigableMap;
27 import java.util.Set;
28
29 import org.apache.commons.logging.Log;
30 import org.apache.commons.logging.LogFactory;
31 import org.apache.hadoop.hbase.HConstants;
32 import org.apache.hadoop.hbase.HRegionInfo;
33 import org.apache.hadoop.hbase.Server;
34 import org.apache.hadoop.hbase.ServerName;
35 import org.apache.hadoop.hbase.catalog.CatalogTracker;
36 import org.apache.hadoop.hbase.catalog.MetaEditor;
37 import org.apache.hadoop.hbase.catalog.MetaReader;
38 import org.apache.hadoop.hbase.client.Result;
39 import org.apache.hadoop.hbase.executor.EventHandler;
40 import org.apache.hadoop.hbase.master.AssignmentManager;
41 import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
42 import org.apache.hadoop.hbase.master.DeadServer;
43 import org.apache.hadoop.hbase.master.MasterServices;
44 import org.apache.hadoop.hbase.master.ServerManager;
45 import org.apache.hadoop.hbase.util.Bytes;
46 import org.apache.hadoop.hbase.util.Pair;
47 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
48 import org.apache.zookeeper.KeeperException;
49
50
51
52
53
54
55 public class ServerShutdownHandler extends EventHandler {
56 private static final Log LOG = LogFactory.getLog(ServerShutdownHandler.class);
57 private final ServerName serverName;
58 private final MasterServices services;
59 private final DeadServer deadServers;
60 private final boolean shouldSplitHlog;
61
62 public ServerShutdownHandler(final Server server, final MasterServices services,
63 final DeadServer deadServers, final ServerName serverName,
64 final boolean shouldSplitHlog) {
65 this(server, services, deadServers, serverName, EventType.M_SERVER_SHUTDOWN,
66 shouldSplitHlog);
67 }
68
69 ServerShutdownHandler(final Server server, final MasterServices services,
70 final DeadServer deadServers, final ServerName serverName, EventType type,
71 final boolean shouldSplitHlog) {
72 super(server, type);
73 this.serverName = serverName;
74 this.server = server;
75 this.services = services;
76 this.deadServers = deadServers;
77 if (!this.deadServers.contains(this.serverName)) {
78 LOG.warn(this.serverName + " is NOT in deadservers; it should be!");
79 }
80 this.shouldSplitHlog = shouldSplitHlog;
81 }
82
83 @Override
84 public String getInformativeName() {
85 if (serverName != null) {
86 return this.getClass().getSimpleName() + " for " + serverName;
87 } else {
88 return super.getInformativeName();
89 }
90 }
91
92
93
94
95
96
97
98
99
100
101
102
103 private void verifyAndAssignRoot()
104 throws InterruptedException, IOException, KeeperException {
105 long timeout = this.server.getConfiguration().
106 getLong("hbase.catalog.verification.timeout", 1000);
107 if (!this.server.getCatalogTracker().verifyRootRegionLocation(timeout)) {
108 this.services.getAssignmentManager().assignRoot();
109 } else if (serverName.equals(server.getCatalogTracker().getRootLocation())) {
110 throw new IOException("-ROOT- is onlined on the dead server "
111 + serverName);
112 } else {
113 LOG.info("Skip assigning -ROOT-, because it is online on the "
114 + server.getCatalogTracker().getRootLocation());
115 }
116 }
117
118
119
120
121
122 private void verifyAndAssignRootWithRetries() throws IOException {
123 int iTimes = this.server.getConfiguration().getInt(
124 "hbase.catalog.verification.retries", 10);
125
126 long waitTime = this.server.getConfiguration().getLong(
127 "hbase.catalog.verification.timeout", 1000);
128
129 int iFlag = 0;
130 while (true) {
131 try {
132 verifyAndAssignRoot();
133 break;
134 } catch (KeeperException e) {
135 this.server.abort("In server shutdown processing, assigning root", e);
136 throw new IOException("Aborting", e);
137 } catch (Exception e) {
138 if (iFlag >= iTimes) {
139 this.server.abort("verifyAndAssignRoot failed after" + iTimes
140 + " times retries, aborting", e);
141 throw new IOException("Aborting", e);
142 }
143 try {
144 Thread.sleep(waitTime);
145 } catch (InterruptedException e1) {
146 LOG.warn("Interrupted when is the thread sleep", e1);
147 Thread.currentThread().interrupt();
148 throw new IOException("Interrupted", e1);
149 }
150 iFlag++;
151 }
152 }
153 }
154
155
156
157
158 boolean isCarryingRoot() {
159 return false;
160 }
161
162
163
164
165 boolean isCarryingMeta() {
166 return false;
167 }
168
169 @Override
170 public String toString() {
171 String name = "UnknownServerName";
172 if(server != null && server.getServerName() != null) {
173 name = server.getServerName().toString();
174 }
175 return getClass().getSimpleName() + "-" + name + "-" + getSeqid();
176 }
177
178 @Override
179 public void process() throws IOException {
180 final ServerName serverName = this.serverName;
181 try {
182 try {
183 if (this.shouldSplitHlog) {
184 LOG.info("Splitting logs for " + serverName);
185 this.services.getMasterFileSystem().splitLog(serverName);
186 } else {
187 LOG.info("Skipping log splitting for " + serverName);
188 }
189 } catch (IOException ioe) {
190 this.services.getExecutorService().submit(this);
191 this.deadServers.add(serverName);
192 throw new IOException("failed log splitting for " +
193 serverName + ", will retry", ioe);
194 }
195
196
197 if (isCarryingRoot()) {
198
199
200 if (this.services.getAssignmentManager().isCarryingRoot(serverName)) {
201 LOG.info("Server " + serverName
202 + " was carrying ROOT. Trying to assign.");
203 this.services.getAssignmentManager().regionOffline(
204 HRegionInfo.ROOT_REGIONINFO);
205 verifyAndAssignRootWithRetries();
206 } else {
207 LOG.info("ROOT has been assigned to otherwhere, skip assigning.");
208 }
209 }
210
211
212 if (isCarryingMeta()) {
213
214
215 if (this.services.getAssignmentManager().isCarryingMeta(serverName)) {
216 LOG.info("Server " + serverName
217 + " was carrying META. Trying to assign.");
218 this.services.getAssignmentManager().regionOffline(
219 HRegionInfo.FIRST_META_REGIONINFO);
220 this.services.getAssignmentManager().assignMeta();
221 } else {
222 LOG.info("META has been assigned to otherwhere, skip assigning.");
223 }
224 }
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243 if (isCarryingRoot() || isCarryingMeta()) {
244 this.services.getExecutorService().submit(new ServerShutdownHandler(
245 this.server, this.services, this.deadServers, serverName, false));
246 this.deadServers.add(serverName);
247 return;
248 }
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266 NavigableMap<HRegionInfo, Result> hris = null;
267 while (!this.server.isStopped()) {
268 try {
269 this.server.getCatalogTracker().waitForMeta();
270 hris = MetaReader.getServerUserRegions(this.server.getCatalogTracker(),
271 this.serverName);
272 break;
273 } catch (InterruptedException e) {
274 Thread.currentThread().interrupt();
275 throw new IOException("Interrupted", e);
276 } catch (IOException ioe) {
277 LOG.info("Received exception accessing META during server shutdown of " +
278 serverName + ", retrying META read", ioe);
279 }
280 }
281
282
283
284 Pair<Set<HRegionInfo>, List<RegionState>> p = this.services.getAssignmentManager()
285 .processServerShutdown(this.serverName);
286 Set<HRegionInfo> ritsGoingToServer = p.getFirst();
287 List<RegionState> ritsOnServer = p.getSecond();
288
289 List<HRegionInfo> regionsToAssign = getRegionsToAssign(hris, ritsOnServer, ritsGoingToServer);
290 for (HRegionInfo hri : ritsGoingToServer) {
291 if (!this.services.getAssignmentManager().isRegionAssigned(hri)) {
292 if (!regionsToAssign.contains(hri)) {
293 regionsToAssign.add(hri);
294 }
295 }
296 }
297 for (HRegionInfo hri : regionsToAssign) {
298 this.services.getAssignmentManager().assign(hri, true);
299 }
300 LOG.info(regionsToAssign.size() + " regions which were planned to open on " + this.serverName
301 + " have been re-assigned.");
302 } finally {
303 this.deadServers.finish(serverName);
304 }
305 LOG.info("Finished processing of shutdown of " + serverName);
306 }
307
308
309
310
311
312
313
314
315
316 private List<HRegionInfo> getRegionsToAssign(final NavigableMap<HRegionInfo, Result> metaHRIs,
317 final List<RegionState> ritsOnServer, Set<HRegionInfo> ritsGoingToServer) throws IOException {
318 List<HRegionInfo> toAssign = new ArrayList<HRegionInfo>();
319
320
321 if (metaHRIs == null || metaHRIs.isEmpty()) return toAssign;
322
323
324
325
326
327
328
329
330 for (RegionState rs : ritsOnServer) {
331 if (!rs.isClosing() && !rs.isPendingClose() && !rs.isSplitting()) {
332 LOG.debug("Removed " + rs.getRegion().getRegionNameAsString()
333 + " from list of regions to assign because region state: " + rs.getState());
334 metaHRIs.remove(rs.getRegion());
335 }
336 }
337
338 for (Map.Entry<HRegionInfo, Result> e : metaHRIs.entrySet()) {
339 RegionState rit = services.getAssignmentManager().getRegionsInTransition().get(
340 e.getKey().getEncodedName());
341 AssignmentManager assignmentManager = this.services.getAssignmentManager();
342 if (processDeadRegion(e.getKey(), e.getValue(), assignmentManager,
343 this.server.getCatalogTracker())) {
344 ServerName addressFromAM = assignmentManager.getRegionServerOfRegion(e.getKey());
345 if (rit != null && !rit.isClosing() && !rit.isPendingClose() && !rit.isSplitting()
346 && !ritsGoingToServer.contains(e.getKey())) {
347
348
349 LOG.info("Skip assigning region " + rit.toString());
350 } else if (addressFromAM != null && !addressFromAM.equals(this.serverName)) {
351 LOG.debug("Skip assigning region " + e.getKey().getRegionNameAsString()
352 + " because it has been opened in " + addressFromAM.getServerName());
353 ritsGoingToServer.remove(e.getKey());
354 } else {
355 if (rit != null) {
356
357 try {
358 LOG.info("Reassigning region with rs =" + rit + " and deleting zk node if exists");
359 ZKAssign.deleteNodeFailSilent(services.getZooKeeper(), e.getKey());
360 } catch (KeeperException ke) {
361 this.server.abort("Unexpected ZK exception deleting unassigned node " + e.getKey(),
362 ke);
363 return null;
364 }
365 }
366 toAssign.add(e.getKey());
367 }
368 } else if (rit != null && (rit.isSplitting() || rit.isSplit())) {
369
370
371
372
373
374 HRegionInfo region = rit.getRegion();
375 AssignmentManager am = assignmentManager;
376 am.regionOffline(region);
377 ritsGoingToServer.remove(region);
378 }
379
380
381
382
383 toAssign = checkForDisablingOrDisabledTables(ritsGoingToServer, toAssign, rit, assignmentManager);
384 }
385 return toAssign;
386 }
387
388 private List<HRegionInfo> checkForDisablingOrDisabledTables(Set<HRegionInfo> regionsFromRIT,
389 List<HRegionInfo> toAssign, RegionState rit, AssignmentManager assignmentManager) {
390 if (rit == null) {
391 return toAssign;
392 }
393 if (!rit.isClosing() && !rit.isPendingClose()) {
394 return toAssign;
395 }
396 if (!assignmentManager.getZKTable().isDisablingOrDisabledTable(
397 rit.getRegion().getTableNameAsString())) {
398 return toAssign;
399 }
400 HRegionInfo hri = rit.getRegion();
401 AssignmentManager am = assignmentManager;
402 am.deleteClosingOrClosedNode(hri);
403 am.regionOffline(hri);
404
405 toAssign.remove(hri);
406 regionsFromRIT.remove(hri);
407 return toAssign;
408 }
409
410
411
412
413
414
415
416
417
418
419
420 public static boolean processDeadRegion(HRegionInfo hri, Result result,
421 AssignmentManager assignmentManager, CatalogTracker catalogTracker)
422 throws IOException {
423 boolean tablePresent = assignmentManager.getZKTable().isTablePresent(
424 hri.getTableNameAsString());
425 if (!tablePresent) {
426 LOG.info("The table " + hri.getTableNameAsString()
427 + " was deleted. Hence not proceeding.");
428 return false;
429 }
430
431 boolean disabled = assignmentManager.getZKTable().isDisabledTable(
432 hri.getTableNameAsString());
433 if (disabled){
434 LOG.info("The table " + hri.getTableNameAsString()
435 + " was disabled. Hence not proceeding.");
436 return false;
437 }
438 if (hri.isOffline() && hri.isSplit()) {
439 LOG.debug("Offlined and split region " + hri.getRegionNameAsString() +
440 "; checking daughter presence");
441 if (MetaReader.getRegion(catalogTracker, hri.getRegionName()) == null) {
442 return false;
443 }
444 fixupDaughters(result, assignmentManager, catalogTracker);
445 return false;
446 }
447 boolean disabling = assignmentManager.getZKTable().isDisablingTable(
448 hri.getTableNameAsString());
449 if (disabling) {
450 LOG.info("The table " + hri.getTableNameAsString()
451 + " is disabled. Hence not assigning region" + hri.getEncodedName());
452 return false;
453 }
454 return true;
455 }
456
457
458
459
460
461
462
463
464 public static int fixupDaughters(final Result result,
465 final AssignmentManager assignmentManager,
466 final CatalogTracker catalogTracker)
467 throws IOException {
468 int fixedA = fixupDaughter(result, HConstants.SPLITA_QUALIFIER,
469 assignmentManager, catalogTracker);
470 int fixedB = fixupDaughter(result, HConstants.SPLITB_QUALIFIER,
471 assignmentManager, catalogTracker);
472 return fixedA + fixedB;
473 }
474
475
476
477
478
479
480
481
482 static int fixupDaughter(final Result result, final byte [] qualifier,
483 final AssignmentManager assignmentManager,
484 final CatalogTracker catalogTracker)
485 throws IOException {
486 HRegionInfo daughter =
487 MetaReader.parseHRegionInfoFromCatalogResult(result, qualifier);
488 if (daughter == null) return 0;
489 if (isDaughterMissing(catalogTracker, daughter)) {
490 LOG.info("Fixup; missing daughter " + daughter.getRegionNameAsString());
491 MetaEditor.addDaughter(catalogTracker, daughter, null);
492
493
494
495
496
497
498 assignmentManager.assign(daughter, true);
499 return 1;
500 } else {
501 LOG.debug("Daughter " + daughter.getRegionNameAsString() + " present");
502 }
503 return 0;
504 }
505
506
507
508
509
510
511
512
513 private static boolean isDaughterMissing(final CatalogTracker catalogTracker,
514 final HRegionInfo daughter) throws IOException {
515 FindDaughterVisitor visitor = new FindDaughterVisitor(daughter);
516
517
518
519
520
521
522 byte [] startrow = daughter.getRegionName();
523 MetaReader.fullScan(catalogTracker, visitor, startrow);
524 return !visitor.foundDaughter();
525 }
526
527
528
529
530
531 static class FindDaughterVisitor implements MetaReader.Visitor {
532 private final HRegionInfo daughter;
533 private boolean found = false;
534
535 FindDaughterVisitor(final HRegionInfo daughter) {
536 this.daughter = daughter;
537 }
538
539
540
541
542 boolean foundDaughter() {
543 return this.found;
544 }
545
546 @Override
547 public boolean visit(Result r) throws IOException {
548 HRegionInfo hri =
549 MetaReader.parseHRegionInfoFromCatalogResult(r, HConstants.REGIONINFO_QUALIFIER);
550 if (hri == null) {
551 LOG.warn("No serialized HRegionInfo in " + r);
552 return true;
553 }
554 byte [] value = r.getValue(HConstants.CATALOG_FAMILY,
555 HConstants.SERVER_QUALIFIER);
556
557 if (value == null) return false;
558
559
560 if (!Bytes.equals(daughter.getTableName(),
561 hri.getTableName())) {
562
563 return false;
564 }
565
566 if (!Bytes.equals(daughter.getStartKey(), hri.getStartKey())) {
567 return false;
568 }
569
570
571
572 this.found = true;
573 return false;
574 }
575 }
576 }