1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.regionserver.handler;
21
22 import java.io.IOException;
23 import java.util.concurrent.atomic.AtomicBoolean;
24
25 import org.apache.commons.logging.Log;
26 import org.apache.commons.logging.LogFactory;
27 import org.apache.hadoop.hbase.HRegionInfo;
28 import org.apache.hadoop.hbase.HTableDescriptor;
29 import org.apache.hadoop.hbase.Server;
30 import org.apache.hadoop.hbase.executor.EventHandler;
31 import org.apache.hadoop.hbase.regionserver.HRegion;
32 import org.apache.hadoop.hbase.regionserver.RegionServerAccounting;
33 import org.apache.hadoop.hbase.regionserver.RegionServerServices;
34 import org.apache.hadoop.hbase.regionserver.wal.HLog;
35 import org.apache.hadoop.hbase.util.CancelableProgressable;
36 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
37 import org.apache.zookeeper.KeeperException;
38
39
40
41
42
43
44 public class OpenRegionHandler extends EventHandler {
45 private static final Log LOG = LogFactory.getLog(OpenRegionHandler.class);
46
47 protected final RegionServerServices rsServices;
48
49 private final HRegionInfo regionInfo;
50 private final HTableDescriptor htd;
51
52
53
54
55 private volatile int version = -1;
56
57
58 public OpenRegionHandler(final Server server,
59 final RegionServerServices rsServices, HRegionInfo regionInfo,
60 HTableDescriptor htd) {
61 this(server, rsServices, regionInfo, htd, EventType.M_RS_OPEN_REGION, -1);
62 }
63 public OpenRegionHandler(final Server server,
64 final RegionServerServices rsServices, HRegionInfo regionInfo,
65 HTableDescriptor htd, int version) {
66 this(server, rsServices, regionInfo, htd, EventType.M_RS_OPEN_REGION,
67 version);
68 }
69
70 protected OpenRegionHandler(final Server server,
71 final RegionServerServices rsServices, final HRegionInfo regionInfo,
72 final HTableDescriptor htd, EventType eventType,
73 final int version) {
74 super(server, eventType);
75 this.rsServices = rsServices;
76 this.regionInfo = regionInfo;
77 this.htd = htd;
78 this.version = version;
79 }
80
81 public HRegionInfo getRegionInfo() {
82 return regionInfo;
83 }
84
85 @Override
86 public void process() throws IOException {
87 boolean transitionToFailedOpen = false;
88 boolean openSuccessful = false;
89 try {
90 final String name = regionInfo.getRegionNameAsString();
91 if (this.server.isStopped() || this.rsServices.isStopping()) {
92 return;
93 }
94 final String encodedName = regionInfo.getEncodedName();
95
96
97 HRegion region = this.rsServices.getFromOnlineRegions(encodedName);
98
99
100
101 region = openRegion();
102 if (region == null) {
103 this.rsServices.removeFromRegionsInTransition(this.regionInfo);
104 tryTransitionToFailedOpen(regionInfo);
105 transitionToFailedOpen = true;
106 return;
107 }
108 boolean failed = true;
109 if (tickleOpening("post_region_open")) {
110 if (updateMeta(region)) {
111 failed = false;
112 }
113 }
114 if (failed || this.server.isStopped() ||
115 this.rsServices.isStopping()) {
116 this.rsServices.removeFromRegionsInTransition(this.regionInfo);
117 cleanupFailedOpen(region);
118 tryTransitionToFailedOpen(regionInfo);
119 transitionToFailedOpen = true;
120 return;
121 }
122
123 if (!transitionToOpened(region)) {
124
125
126
127
128
129
130 cleanupFailedOpen(region);
131 transitionToFailedOpen = true;
132 return;
133 }
134
135 this.rsServices.addToOnlineRegions(region);
136 openSuccessful = true;
137
138 LOG.debug("Opened " + name + " on server:" +
139 this.server.getServerName());
140 } finally {
141 this.rsServices.removeFromRegionsInTransition(this.regionInfo);
142 if (!openSuccessful && !transitionToFailedOpen) {
143 tryTransitionToFailedOpen(regionInfo);
144 }
145 }
146 }
147
148
149
150
151
152
153
154
155 boolean updateMeta(final HRegion r) {
156 if (this.server.isStopped() || this.rsServices.isStopping()) {
157 return false;
158 }
159
160
161 final AtomicBoolean signaller = new AtomicBoolean(false);
162 PostOpenDeployTasksThread t = new PostOpenDeployTasksThread(r,
163 this.server, this.rsServices, signaller);
164 t.start();
165 int assignmentTimeout = this.server.getConfiguration().
166 getInt("hbase.master.assignment.timeoutmonitor.period", 10000);
167
168
169 long timeout = assignmentTimeout * 10;
170 long now = System.currentTimeMillis();
171 long endTime = now + timeout;
172
173
174 long period = Math.max(1, assignmentTimeout/ 3);
175 long lastUpdate = now;
176 boolean tickleOpening = true;
177 while (!signaller.get() && t.isAlive() && !this.server.isStopped() &&
178 !this.rsServices.isStopping() && (endTime > now)) {
179 long elapsed = now - lastUpdate;
180 if (elapsed > period) {
181
182 lastUpdate = now;
183 tickleOpening = tickleOpening("post_open_deploy");
184 }
185 synchronized (signaller) {
186 try {
187 signaller.wait(period);
188 } catch (InterruptedException e) {
189
190 }
191 }
192 now = System.currentTimeMillis();
193 }
194
195
196 if (t.isAlive()) {
197 if (!signaller.get()) {
198
199 LOG.debug("Interrupting thread " + t);
200 t.interrupt();
201 }
202 try {
203 t.join();
204 } catch (InterruptedException ie) {
205 LOG.warn("Interrupted joining " +
206 r.getRegionInfo().getRegionNameAsString(), ie);
207 Thread.currentThread().interrupt();
208 }
209 }
210
211
212
213
214 return ((!Thread.interrupted() && t.getException() == null) && tickleOpening);
215 }
216
217
218
219
220
221
222
223 static class PostOpenDeployTasksThread extends Thread {
224 private Exception exception = null;
225 private final Server server;
226 private final RegionServerServices services;
227 private final HRegion region;
228 private final AtomicBoolean signaller;
229
230 PostOpenDeployTasksThread(final HRegion region, final Server server,
231 final RegionServerServices services, final AtomicBoolean signaller) {
232 super("PostOpenDeployTasks:" + region.getRegionInfo().getEncodedName());
233 this.setDaemon(true);
234 this.server = server;
235 this.services = services;
236 this.region = region;
237 this.signaller = signaller;
238 }
239
240 public void run() {
241 try {
242 this.services.postOpenDeployTasks(this.region,
243 this.server.getCatalogTracker(), false);
244 } catch (KeeperException e) {
245 server.abort("Exception running postOpenDeployTasks; region=" +
246 this.region.getRegionInfo().getEncodedName(), e);
247 } catch (Exception e) {
248 LOG.warn("Exception running postOpenDeployTasks; region=" +
249 this.region.getRegionInfo().getEncodedName(), e);
250 this.exception = e;
251 }
252
253 this.signaller.set(true);
254 synchronized (this.signaller) {
255 this.signaller.notify();
256 }
257 }
258
259
260
261
262 Exception getException() {
263 return this.exception;
264 }
265 }
266
267
268
269
270
271
272
273 private boolean transitionToOpened(final HRegion r) throws IOException {
274 boolean result = false;
275 HRegionInfo hri = r.getRegionInfo();
276 final String name = hri.getRegionNameAsString();
277
278 try {
279 if (ZKAssign.transitionNodeOpened(this.server.getZooKeeper(), hri,
280 this.server.getServerName(), this.version) == -1) {
281 LOG.warn("Completed the OPEN of region " + name +
282 " but when transitioning from " +
283 " OPENING to OPENED got a version mismatch, someone else clashed " +
284 "so now unassigning -- closing region on server: " +
285 this.server.getServerName());
286 } else {
287 LOG.debug("region transitioned to opened in zookeeper: " +
288 r.getRegionInfo() + ", server: " + this.server.getServerName());
289 result = true;
290 }
291 } catch (KeeperException e) {
292 LOG.error("Failed transitioning node " + name +
293 " from OPENING to OPENED -- closing region", e);
294 }
295 return result;
296 }
297
298
299
300
301
302
303 private boolean tryTransitionToFailedOpen(final HRegionInfo hri) {
304 boolean result = false;
305 final String name = hri.getRegionNameAsString();
306 try {
307 LOG.info("Opening of region " + hri + " failed, marking as FAILED_OPEN in ZK");
308 if (ZKAssign.transitionNode(
309 this.server.getZooKeeper(), hri,
310 this.server.getServerName(),
311 EventType.RS_ZK_REGION_OPENING,
312 EventType.RS_ZK_REGION_FAILED_OPEN,
313 this.version) == -1) {
314 LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " +
315 "It's likely that the master already timed out this open " +
316 "attempt, and thus another RS already has the region.");
317 } else {
318 result = true;
319 }
320 } catch (KeeperException e) {
321 LOG.error("Failed transitioning node " + name +
322 " from OPENING to FAILED_OPEN", e);
323 }
324 return result;
325 }
326
327
328
329
330 HRegion openRegion() {
331 HRegion region = null;
332 try {
333
334
335 region = HRegion.openHRegion(this.regionInfo, this.htd,
336 this.rsServices.getWAL(this.regionInfo),
337 this.server.getConfiguration(),
338 this.rsServices,
339 new CancelableProgressable() {
340 public boolean progress() {
341
342
343
344 return tickleOpening("open_region_progress");
345 }
346 });
347 } catch (Throwable t) {
348
349
350
351 LOG.error(
352 "Failed open of region=" + this.regionInfo.getRegionNameAsString()
353 + ", starting to roll back the global memstore size.", t);
354
355 if (this.rsServices != null) {
356 RegionServerAccounting rsAccounting =
357 this.rsServices.getRegionServerAccounting();
358 if (rsAccounting != null) {
359 rsAccounting.rollbackRegionReplayEditsSize(this.regionInfo.getRegionName());
360 }
361 }
362 }
363 return region;
364 }
365
366 void cleanupFailedOpen(final HRegion region) throws IOException {
367 if (region != null) region.close();
368 }
369
370
371
372
373
374
375
376
377 boolean tickleOpening(final String context) {
378
379 if (!isGoodVersion()) return false;
380 String encodedName = this.regionInfo.getEncodedName();
381 try {
382 this.version =
383 ZKAssign.retransitionNodeOpening(server.getZooKeeper(),
384 this.regionInfo, this.server.getServerName(), this.version);
385 } catch (KeeperException e) {
386 server.abort("Exception refreshing OPENING; region=" + encodedName +
387 ", context=" + context, e);
388 this.version = -1;
389 }
390 boolean b = isGoodVersion();
391 if (!b) {
392 LOG.warn("Failed refreshing OPENING; region=" + encodedName +
393 ", context=" + context);
394 }
395 return b;
396 }
397
398 private boolean isGoodVersion() {
399 return this.version != -1;
400 }
401 }