1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver.handler;
20
21 import java.io.IOException;
22 import java.util.concurrent.atomic.AtomicBoolean;
23
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.hadoop.hbase.classification.InterfaceAudience;
27 import org.apache.hadoop.hbase.HRegionInfo;
28 import org.apache.hadoop.hbase.HTableDescriptor;
29 import org.apache.hadoop.hbase.Server;
30 import org.apache.hadoop.hbase.executor.EventHandler;
31 import org.apache.hadoop.hbase.executor.EventType;
32 import org.apache.hadoop.hbase.master.AssignmentManager;
33 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
34 import org.apache.hadoop.hbase.regionserver.HRegion;
35 import org.apache.hadoop.hbase.regionserver.RegionServerAccounting;
36 import org.apache.hadoop.hbase.regionserver.RegionServerServices;
37 import org.apache.hadoop.hbase.regionserver.RegionServerServices.PostOpenDeployContext;
38 import org.apache.hadoop.hbase.util.CancelableProgressable;
39 import org.apache.hadoop.hbase.util.ConfigUtil;
40 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
41 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
42 import org.apache.zookeeper.KeeperException;
43
44
45
46
47
48 @InterfaceAudience.Private
49 public class OpenRegionHandler extends EventHandler {
50 private static final Log LOG = LogFactory.getLog(OpenRegionHandler.class);
51
52 protected final RegionServerServices rsServices;
53
54 private final HRegionInfo regionInfo;
55 private final HTableDescriptor htd;
56 private final long masterSystemTime;
57
58 private boolean tomActivated;
59 private int assignmentTimeout;
60
61
62
63
64 private volatile int version = -1;
65
66 private volatile int versionOfOfflineNode = -1;
67
68 private final boolean useZKForAssignment;
69
70 public OpenRegionHandler(final Server server,
71 final RegionServerServices rsServices, HRegionInfo regionInfo,
72 HTableDescriptor htd) {
73 this(server, rsServices, regionInfo, htd, -1, EventType.M_RS_OPEN_REGION, -1);
74 }
75
76 public OpenRegionHandler(final Server server,
77 final RegionServerServices rsServices, HRegionInfo regionInfo,
78 HTableDescriptor htd, int versionOfOfflineNode, long masterSystemTime) {
79 this(server, rsServices, regionInfo, htd, masterSystemTime, EventType.M_RS_OPEN_REGION,
80 versionOfOfflineNode);
81 }
82
83 protected OpenRegionHandler(final Server server,
84 final RegionServerServices rsServices, final HRegionInfo regionInfo,
85 final HTableDescriptor htd, long masterSystemTime, EventType eventType,
86 final int versionOfOfflineNode) {
87 super(server, eventType);
88 this.rsServices = rsServices;
89 this.regionInfo = regionInfo;
90 this.htd = htd;
91 this.masterSystemTime = masterSystemTime;
92 this.versionOfOfflineNode = versionOfOfflineNode;
93 tomActivated = this.server.getConfiguration().
94 getBoolean(AssignmentManager.ASSIGNMENT_TIMEOUT_MANAGEMENT,
95 AssignmentManager.DEFAULT_ASSIGNMENT_TIMEOUT_MANAGEMENT);
96 assignmentTimeout = this.server.getConfiguration().
97 getInt(AssignmentManager.ASSIGNMENT_TIMEOUT,
98 AssignmentManager.DEFAULT_ASSIGNMENT_TIMEOUT_DEFAULT);
99 useZKForAssignment = ConfigUtil.useZKForAssignment(server.getConfiguration());
100 }
101
102 public HRegionInfo getRegionInfo() {
103 return regionInfo;
104 }
105
106 @Override
107 public void process() throws IOException {
108 boolean openSuccessful = false;
109 boolean transitionedToOpening = false;
110 final String regionName = regionInfo.getRegionNameAsString();
111 HRegion region = null;
112
113 try {
114 if (this.server.isStopped() || this.rsServices.isStopping()) {
115 return;
116 }
117 final String encodedName = regionInfo.getEncodedName();
118
119
120
121
122
123
124
125 if (this.rsServices.getFromOnlineRegions(encodedName) != null) {
126 LOG.error("Region " + encodedName +
127 " was already online when we started processing the opening. " +
128 "Marking this new attempt as failed");
129 return;
130 }
131
132
133
134
135 if (!isRegionStillOpening()){
136 LOG.error("Region " + encodedName + " opening cancelled");
137 return;
138 }
139
140 if (useZKForAssignment
141 && !transitionZookeeperOfflineToOpening(encodedName, versionOfOfflineNode)) {
142 LOG.warn("Region was hijacked? Opening cancelled for encodedName=" + encodedName);
143
144 return;
145 }
146 transitionedToOpening = true;
147
148
149 region = openRegion();
150 if (region == null) {
151 return;
152 }
153
154 boolean failed = true;
155 if (isRegionStillOpening() && (!useZKForAssignment || tickleOpening("post_region_open"))) {
156 if (updateMeta(region, masterSystemTime)) {
157 failed = false;
158 }
159 }
160 if (failed || this.server.isStopped() ||
161 this.rsServices.isStopping()) {
162 return;
163 }
164
165
166 if (!isRegionStillOpening() || (useZKForAssignment && !transitionToOpened(region))) {
167
168
169
170
171
172 return;
173 }
174
175
176
177
178
179
180
181
182
183
184
185 this.rsServices.addToOnlineRegions(region);
186 openSuccessful = true;
187
188
189 LOG.debug("Opened " + regionName + " on " +
190 this.server.getServerName());
191
192
193 } finally {
194
195 if (!openSuccessful) {
196 doCleanUpOnFailedOpen(region, transitionedToOpening);
197 }
198 final Boolean current = this.rsServices.getRegionsInTransitionInRS().
199 remove(this.regionInfo.getEncodedNameAsBytes());
200
201
202
203
204
205
206
207
208 if (openSuccessful) {
209 if (current == null) {
210 LOG.error("Bad state: we've just opened a region that was NOT in transition. Region="
211 + regionName);
212 } else if (Boolean.FALSE.equals(current)) {
213
214 LOG.error("Race condition: we've finished to open a region, while a close was requested "
215 + " on region=" + regionName + ". It can be a critical error, as a region that"
216 + " should be closed is now opened. Closing it now");
217 cleanupFailedOpen(region);
218 }
219 }
220 }
221 }
222
223 private void doCleanUpOnFailedOpen(HRegion region, boolean transitionedToOpening)
224 throws IOException {
225 if (transitionedToOpening) {
226 try {
227 if (region != null) {
228 cleanupFailedOpen(region);
229 }
230 } finally {
231 if (!useZKForAssignment) {
232 rsServices.reportRegionStateTransition(TransitionCode.FAILED_OPEN, regionInfo);
233 } else {
234
235
236 tryTransitionFromOpeningToFailedOpen(regionInfo);
237 }
238 }
239 } else if (!useZKForAssignment) {
240 rsServices.reportRegionStateTransition(TransitionCode.FAILED_OPEN, regionInfo);
241 } else {
242
243
244 tryTransitionFromOfflineToFailedOpen(this.rsServices, regionInfo, versionOfOfflineNode);
245 }
246 }
247
248
249
250
251
252
253
254
255 boolean updateMeta(final HRegion r, final long masterSystemTime) {
256 if (this.server.isStopped() || this.rsServices.isStopping()) {
257 return false;
258 }
259
260
261 final AtomicBoolean signaller = new AtomicBoolean(false);
262 PostOpenDeployTasksThread t = new PostOpenDeployTasksThread(r,
263 this.server, this.rsServices, signaller, masterSystemTime);
264 t.start();
265
266
267 long timeout = assignmentTimeout * 10;
268 long now = System.currentTimeMillis();
269 long endTime = now + timeout;
270
271
272 long period = Math.max(1, assignmentTimeout/ 3);
273 long lastUpdate = now;
274 boolean tickleOpening = true;
275 while (!signaller.get() && t.isAlive() && !this.server.isStopped() &&
276 !this.rsServices.isStopping() && (endTime > now)) {
277 long elapsed = now - lastUpdate;
278 if (elapsed > period) {
279
280 lastUpdate = now;
281 if (useZKForAssignment) {
282 tickleOpening = tickleOpening("post_open_deploy");
283 }
284 }
285 synchronized (signaller) {
286 try {
287 if (!signaller.get()) signaller.wait(period);
288 } catch (InterruptedException e) {
289
290 }
291 }
292 now = System.currentTimeMillis();
293 }
294
295
296 if (t.isAlive()) {
297 if (!signaller.get()) {
298
299 LOG.debug("Interrupting thread " + t);
300 t.interrupt();
301 }
302 try {
303 t.join();
304 } catch (InterruptedException ie) {
305 LOG.warn("Interrupted joining " +
306 r.getRegionInfo().getRegionNameAsString(), ie);
307 Thread.currentThread().interrupt();
308 }
309 }
310
311
312
313
314 return ((!Thread.interrupted() && t.getException() == null) && tickleOpening);
315 }
316
317
318
319
320
321
322
323
324 static class PostOpenDeployTasksThread extends Thread {
325 private Throwable exception = null;
326 private final Server server;
327 private final RegionServerServices services;
328 private final HRegion region;
329 private final AtomicBoolean signaller;
330 private final long masterSystemTime;
331
332 PostOpenDeployTasksThread(final HRegion region, final Server server,
333 final RegionServerServices services, final AtomicBoolean signaller,
334 final long masterSystemTime) {
335 super("PostOpenDeployTasks:" + region.getRegionInfo().getEncodedName());
336 this.setDaemon(true);
337 this.server = server;
338 this.services = services;
339 this.region = region;
340 this.signaller = signaller;
341 this.masterSystemTime = masterSystemTime;
342 }
343
344 public void run() {
345 try {
346 this.services.postOpenDeployTasks(new PostOpenDeployContext(region, masterSystemTime),
347 this.server.getCatalogTracker());
348 } catch (Throwable e) {
349 String msg =
350 "Exception running postOpenDeployTasks; region="
351 + this.region.getRegionInfo().getEncodedName();
352 this.exception = e;
353 if (e instanceof IOException && isRegionStillOpening(region.getRegionInfo(), services)) {
354 server.abort(msg, e);
355 } else {
356 LOG.warn(msg, e);
357 }
358 }
359
360 this.signaller.set(true);
361 synchronized (this.signaller) {
362 this.signaller.notify();
363 }
364 }
365
366
367
368
369 Throwable getException() {
370 return this.exception;
371 }
372 }
373
374
375
376
377
378
379
380 boolean transitionToOpened(final HRegion r) throws IOException {
381 boolean result = false;
382 HRegionInfo hri = r.getRegionInfo();
383 final String name = hri.getRegionNameAsString();
384
385 try {
386 if (ZKAssign.transitionNodeOpened(this.server.getZooKeeper(), hri,
387 this.server.getServerName(), this.version) == -1) {
388 String warnMsg = "Completed the OPEN of region " + name +
389 " but when transitioning from " + " OPENING to OPENED ";
390 try {
391 String node = ZKAssign.getNodeName(this.server.getZooKeeper(), hri.getEncodedName());
392 if (ZKUtil.checkExists(this.server.getZooKeeper(), node) < 0) {
393
394 rsServices.abort(warnMsg + "the znode disappeared", null);
395 } else {
396 LOG.warn(warnMsg + "got a version mismatch, someone else clashed; " +
397 "so now unassigning -- closing region on server: " + this.server.getServerName());
398 }
399 } catch (KeeperException ke) {
400 rsServices.abort(warnMsg, ke);
401 }
402 } else {
403 LOG.debug("Transitioned " + r.getRegionInfo().getEncodedName() +
404 " to OPENED in zk on " + this.server.getServerName());
405 result = true;
406 }
407 } catch (KeeperException e) {
408 LOG.error("Failed transitioning node " + name +
409 " from OPENING to OPENED -- closing region", e);
410 }
411 return result;
412 }
413
414
415
416
417
418
419 private boolean tryTransitionFromOpeningToFailedOpen(final HRegionInfo hri) {
420 boolean result = false;
421 final String name = hri.getRegionNameAsString();
422 try {
423 LOG.info("Opening of region " + hri + " failed, transitioning" +
424 " from OPENING to FAILED_OPEN in ZK, expecting version " + this.version);
425 if (ZKAssign.transitionNode(
426 this.server.getZooKeeper(), hri,
427 this.server.getServerName(),
428 EventType.RS_ZK_REGION_OPENING,
429 EventType.RS_ZK_REGION_FAILED_OPEN,
430 this.version) == -1) {
431 LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " +
432 "It's likely that the master already timed out this open " +
433 "attempt, and thus another RS already has the region.");
434 } else {
435 result = true;
436 }
437 } catch (KeeperException e) {
438 LOG.error("Failed transitioning node " + name +
439 " from OPENING to FAILED_OPEN", e);
440 }
441 return result;
442 }
443
444
445
446
447
448
449
450
451
452
453
454
455 public static boolean tryTransitionFromOfflineToFailedOpen(RegionServerServices rsServices,
456 final HRegionInfo hri, final int versionOfOfflineNode) {
457 boolean result = false;
458 final String name = hri.getRegionNameAsString();
459 try {
460 LOG.info("Opening of region " + hri + " failed, transitioning" +
461 " from OFFLINE to FAILED_OPEN in ZK, expecting version " + versionOfOfflineNode);
462 if (ZKAssign.transitionNode(
463 rsServices.getZooKeeper(), hri,
464 rsServices.getServerName(),
465 EventType.M_ZK_REGION_OFFLINE,
466 EventType.RS_ZK_REGION_FAILED_OPEN,
467 versionOfOfflineNode) == -1) {
468 LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " +
469 "It's likely that the master already timed out this open " +
470 "attempt, and thus another RS already has the region.");
471 } else {
472 result = true;
473 }
474 } catch (KeeperException e) {
475 LOG.error("Failed transitioning node " + name + " from OFFLINE to FAILED_OPEN", e);
476 }
477 return result;
478 }
479
480
481
482
483
484 HRegion openRegion() {
485 HRegion region = null;
486 try {
487
488
489 region = HRegion.openHRegion(this.regionInfo, this.htd,
490 this.rsServices.getWAL(this.regionInfo),
491 this.server.getConfiguration(),
492 this.rsServices,
493 new CancelableProgressable() {
494 public boolean progress() {
495 if (useZKForAssignment) {
496
497
498
499
500 return tickleOpening("open_region_progress");
501 }
502 if (!isRegionStillOpening()) {
503 LOG.warn("Open region aborted since it isn't opening any more");
504 return false;
505 }
506 return true;
507 }
508 });
509 } catch (Throwable t) {
510
511
512
513 LOG.error(
514 "Failed open of region=" + this.regionInfo.getRegionNameAsString()
515 + ", starting to roll back the global memstore size.", t);
516
517 if (this.rsServices != null) {
518 RegionServerAccounting rsAccounting =
519 this.rsServices.getRegionServerAccounting();
520 if (rsAccounting != null) {
521 rsAccounting.rollbackRegionReplayEditsSize(this.regionInfo.getRegionName());
522 }
523 }
524 }
525 return region;
526 }
527
528 void cleanupFailedOpen(final HRegion region) throws IOException {
529 if (region != null) {
530 byte[] encodedName = regionInfo.getEncodedNameAsBytes();
531 try {
532 rsServices.getRegionsInTransitionInRS().put(encodedName,Boolean.FALSE);
533 this.rsServices.removeFromOnlineRegions(region, null);
534 region.close();
535 } finally {
536 rsServices.getRegionsInTransitionInRS().remove(encodedName);
537 }
538 }
539 }
540
541 private static boolean isRegionStillOpening(HRegionInfo regionInfo,
542 RegionServerServices rsServices) {
543 byte[] encodedName = regionInfo.getEncodedNameAsBytes();
544 Boolean action = rsServices.getRegionsInTransitionInRS().get(encodedName);
545 return Boolean.TRUE.equals(action);
546 }
547
548 private boolean isRegionStillOpening() {
549 return isRegionStillOpening(regionInfo, rsServices);
550 }
551
552
553
554
555
556
557
558
559
560 boolean transitionZookeeperOfflineToOpening(final String encodedName,
561 int versionOfOfflineNode) {
562
563 try {
564
565 this.version = ZKAssign.transitionNode(server.getZooKeeper(), regionInfo,
566 server.getServerName(), EventType.M_ZK_REGION_OFFLINE,
567 EventType.RS_ZK_REGION_OPENING, versionOfOfflineNode);
568 } catch (KeeperException e) {
569 LOG.error("Error transition from OFFLINE to OPENING for region=" +
570 encodedName, e);
571 this.version = -1;
572 return false;
573 }
574 boolean b = isGoodVersion();
575 if (!b) {
576 LOG.warn("Failed transition from OFFLINE to OPENING for region=" +
577 encodedName);
578 }
579 return b;
580 }
581
582
583
584
585
586
587
588 boolean tickleOpening(final String context) {
589 if (!isRegionStillOpening()) {
590 LOG.warn("Open region aborted since it isn't opening any more");
591 return false;
592 }
593
594 if (!isGoodVersion()) return false;
595 String encodedName = this.regionInfo.getEncodedName();
596 try {
597 this.version =
598 ZKAssign.retransitionNodeOpening(server.getZooKeeper(),
599 this.regionInfo, this.server.getServerName(), this.version, tomActivated);
600 } catch (KeeperException e) {
601 server.abort("Exception refreshing OPENING; region=" + encodedName +
602 ", context=" + context, e);
603 this.version = -1;
604 return false;
605 }
606 boolean b = isGoodVersion();
607 if (!b) {
608 LOG.warn("Failed refreshing OPENING; region=" + encodedName +
609 ", context=" + context);
610 }
611 return b;
612 }
613
614 private boolean isGoodVersion() {
615 return this.version != -1;
616 }
617 }