View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver.handler;
20  
21  import java.io.IOException;
22  import java.util.Map;
23  import java.util.concurrent.atomic.AtomicBoolean;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.HRegionInfo;
29  import org.apache.hadoop.hbase.HTableDescriptor;
30  import org.apache.hadoop.hbase.Server;
31  import org.apache.hadoop.hbase.executor.EventHandler;
32  import org.apache.hadoop.hbase.executor.EventType;
33  import org.apache.hadoop.hbase.regionserver.HRegion;
34  import org.apache.hadoop.hbase.regionserver.RegionServerAccounting;
35  import org.apache.hadoop.hbase.regionserver.RegionServerServices;
36  import org.apache.hadoop.hbase.util.CancelableProgressable;
37  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
38  import org.apache.zookeeper.KeeperException;
39  
40  /**
41   * Handles opening of a region on a region server.
42   * <p>
43   * This is executed after receiving an OPEN RPC from the master or client.
44   */
45  @InterfaceAudience.Private
46  public class OpenRegionHandler extends EventHandler {
47    private static final Log LOG = LogFactory.getLog(OpenRegionHandler.class);
48  
49    protected final RegionServerServices rsServices;
50  
51    private final HRegionInfo regionInfo;
52    private final HTableDescriptor htd;
53  
54    private boolean tomActivated;
55    private int assignmentTimeout;
56  
57    // We get version of our znode at start of open process and monitor it across
58    // the total open. We'll fail the open if someone hijacks our znode; we can
59    // tell this has happened if version is not as expected.
60    private volatile int version = -1;
61    //version of the offline node that was set by the master
62    private volatile int versionOfOfflineNode = -1;
63  
64    public OpenRegionHandler(final Server server,
65        final RegionServerServices rsServices, HRegionInfo regionInfo,
66        HTableDescriptor htd) {
67      this(server, rsServices, regionInfo, htd, EventType.M_RS_OPEN_REGION, -1);
68    }
69    public OpenRegionHandler(final Server server,
70        final RegionServerServices rsServices, HRegionInfo regionInfo,
71        HTableDescriptor htd, int versionOfOfflineNode) {
72      this(server, rsServices, regionInfo, htd, EventType.M_RS_OPEN_REGION,
73          versionOfOfflineNode);
74    }
75  
76    protected OpenRegionHandler(final Server server,
77        final RegionServerServices rsServices, final HRegionInfo regionInfo,
78        final HTableDescriptor htd, EventType eventType,
79        final int versionOfOfflineNode) {
80      super(server, eventType);
81      this.rsServices = rsServices;
82      this.regionInfo = regionInfo;
83      this.htd = htd;
84      this.versionOfOfflineNode = versionOfOfflineNode;
85      tomActivated = this.server.getConfiguration().
86          getBoolean("hbase.assignment.timeout.management", false);
87      assignmentTimeout = this.server.getConfiguration().
88          getInt("hbase.master.assignment.timeoutmonitor.period", 10000);
89    }
90  
91    public HRegionInfo getRegionInfo() {
92      return regionInfo;
93    }
94  
95    @Override
96    public void process() throws IOException {
97      boolean openSuccessful = false;
98      boolean transitionedToOpening = false;
99      final String regionName = regionInfo.getRegionNameAsString();
100     HRegion region = null;
101 
102     try {
103       if (this.server.isStopped() || this.rsServices.isStopping()) {
104         return;
105       }
106       final String encodedName = regionInfo.getEncodedName();
107 
108       // 3 different difficult situations can occur
109       // 1) The opening was cancelled. This is an expected situation
110       // 2) The region was hijacked, we no longer have the znode
111       // 3) The region is now marked as online while we're suppose to open. This would be a bug.
112 
113       // Check that this region is not already online
114       if (this.rsServices.getFromOnlineRegions(encodedName) != null) {
115         LOG.error("Region " + encodedName +
116             " was already online when we started processing the opening. " +
117             "Marking this new attempt as failed");
118         return;
119       }
120 
121       // Check that we're still supposed to open the region and transition.
122       // If fails, just return.  Someone stole the region from under us.
123       // Calling transitionZookeeperOfflineToOpening initializes this.version.
124       if (!isRegionStillOpening()){
125         LOG.error("Region " + encodedName + " opening cancelled");
126         return;
127       }
128 
129       if (!transitionZookeeperOfflineToOpening(encodedName, versionOfOfflineNode)) {
130         LOG.warn("Region was hijacked? Opening cancelled for encodedName=" + encodedName);
131         // This is a desperate attempt: the znode is unlikely to be ours. But we can't do more.
132         return;
133       }
134       transitionedToOpening = true;
135       // Open region.  After a successful open, failures in subsequent
136       // processing needs to do a close as part of cleanup.
137       region = openRegion();
138       if (region == null) {
139         return;
140       }
141 
142       // check if we need set current region in recovering state
143       region.setRecovering(false);
144       Map<String, HRegion> recoveringRegions = this.rsServices.getRecoveringRegions();
145       if (recoveringRegions != null && !recoveringRegions.isEmpty()
146           && recoveringRegions.containsKey(region.getRegionInfo().getEncodedName())) {
147         region.setRecovering(true);
148         recoveringRegions.put(region.getRegionInfo().getEncodedName(), region);
149       }
150 
151       boolean failed = true;
152       if (tickleOpening("post_region_open")) {
153         if (updateMeta(region)) {
154           failed = false;
155         }
156       }
157       if (failed || this.server.isStopped() ||
158           this.rsServices.isStopping()) {
159         return;
160       }
161 
162 
163       if (!isRegionStillOpening() || !transitionToOpened(region)) {
164         // If we fail to transition to opened, it's because of one of two cases:
165         //    (a) we lost our ZK lease
166         // OR (b) someone else opened the region before us
167         // OR (c) someone cancelled the open
168         // In all cases, we try to transition to failed_open to be safe.
169         return;
170       }
171 
172       // We have a znode in the opened state now. We can't really delete it as the master job.
173       // Transitioning to failed open would create a race condition if the master has already
174       // acted the transition to opened.
175       // Cancelling the open is dangerous, because we would have a state where the master thinks
176       // the region is opened while the region is actually closed. It is a dangerous state
177       // to be in. For this reason, from now on, we're not going back. There is a message in the
178       // finally close to let the admin knows where we stand.
179 
180 
181       // Successful region open, and add it to OnlineRegions
182       this.rsServices.addToOnlineRegions(region);
183       openSuccessful = true;
184 
185       // Done!  Successful region open
186       LOG.debug("Opened " + regionName + " on server:" +
187         this.server.getServerName());
188 
189 
190     } finally {
191       // Do all clean up here
192       if (!openSuccessful) {
193         doCleanUpOnFailedOpen(region, transitionedToOpening);
194       }
195       final Boolean current = this.rsServices.getRegionsInTransitionInRS().
196           remove(this.regionInfo.getEncodedNameAsBytes());
197 
198       // Let's check if we have met a race condition on open cancellation....
199       // A better solution would be to not have any race condition.
200       // this.rsServices.getRegionsInTransitionInRS().remove(
201       //  this.regionInfo.getEncodedNameAsBytes(), Boolean.TRUE);
202       // would help, but we would still have a consistency issue to manage with
203       // 1) this.rsServices.addToOnlineRegions(region);
204       // 2) the ZK state.
205       if (openSuccessful) {
206         if (current == null) { // Should NEVER happen, but let's be paranoid.
207           LOG.error("Bad state: we've just opened a region that was NOT in transition. Region="
208               + regionName);
209         } else if (Boolean.FALSE.equals(current)) { // Can happen, if we're
210                                                     // really unlucky.
211           LOG.error("Race condition: we've finished to open a region, while a close was requested "
212               + " on region=" + regionName + ". It can be a critical error, as a region that"
213               + " should be closed is now opened.");
214         }
215       } 
216     }
217   }
218 
219   private void doCleanUpOnFailedOpen(HRegion region, boolean transitionedToOpening)
220       throws IOException {
221     if (transitionedToOpening) {
222       try {
223         if (region != null) {
224           cleanupFailedOpen(region);
225         }
226       } finally {
227         // Even if cleanupFailed open fails we need to do this transition
228         // See HBASE-7698
229         tryTransitionFromOpeningToFailedOpen(regionInfo);
230       }
231     } else {
232       // If still transition to OPENING is not done, we need to transition znode
233       // to FAILED_OPEN
234       tryTransitionFromOfflineToFailedOpen(this.rsServices, regionInfo, versionOfOfflineNode);
235     }
236   }
237 
238   /**
239    * Update ZK or META.  This can take a while if for example the
240    * .META. is not available -- if server hosting .META. crashed and we are
241    * waiting on it to come back -- so run in a thread and keep updating znode
242    * state meantime so master doesn't timeout our region-in-transition.
243    * Caller must cleanup region if this fails.
244    */
245   boolean updateMeta(final HRegion r) {
246     if (this.server.isStopped() || this.rsServices.isStopping()) {
247       return false;
248     }
249     // Object we do wait/notify on.  Make it boolean.  If set, we're done.
250     // Else, wait.
251     final AtomicBoolean signaller = new AtomicBoolean(false);
252     PostOpenDeployTasksThread t = new PostOpenDeployTasksThread(r,
253       this.server, this.rsServices, signaller);
254     t.start();
255     // Total timeout for meta edit.  If we fail adding the edit then close out
256     // the region and let it be assigned elsewhere.
257     long timeout = assignmentTimeout * 10;
258     long now = System.currentTimeMillis();
259     long endTime = now + timeout;
260     // Let our period at which we update OPENING state to be be 1/3rd of the
261     // regions-in-transition timeout period.
262     long period = Math.max(1, assignmentTimeout/ 3);
263     long lastUpdate = now;
264     boolean tickleOpening = true;
265     while (!signaller.get() && t.isAlive() && !this.server.isStopped() &&
266         !this.rsServices.isStopping() && (endTime > now)) {
267       long elapsed = now - lastUpdate;
268       if (elapsed > period) {
269         // Only tickle OPENING if postOpenDeployTasks is taking some time.
270         lastUpdate = now;
271         tickleOpening = tickleOpening("post_open_deploy");
272       }
273       synchronized (signaller) {
274         try {
275           signaller.wait(period);
276         } catch (InterruptedException e) {
277           // Go to the loop check.
278         }
279       }
280       now = System.currentTimeMillis();
281     }
282     // Is thread still alive?  We may have left above loop because server is
283     // stopping or we timed out the edit.  Is so, interrupt it.
284     if (t.isAlive()) {
285       if (!signaller.get()) {
286         // Thread still running; interrupt
287         LOG.debug("Interrupting thread " + t);
288         t.interrupt();
289       }
290       try {
291         t.join();
292       } catch (InterruptedException ie) {
293         LOG.warn("Interrupted joining " +
294           r.getRegionInfo().getRegionNameAsString(), ie);
295         Thread.currentThread().interrupt();
296       }
297     }
298 
299     // Was there an exception opening the region?  This should trigger on
300     // InterruptedException too.  If so, we failed.  Even if tickle opening fails
301     // then it is a failure.
302     return ((!Thread.interrupted() && t.getException() == null) && tickleOpening);
303   }
304 
305   /**
306    * Thread to run region post open tasks. Call {@link #getException()} after
307    * the thread finishes to check for exceptions running
308    * {@link RegionServerServices#postOpenDeployTasks(
309    * HRegion, org.apache.hadoop.hbase.catalog.CatalogTracker)}
310    * .
311    */
312   static class PostOpenDeployTasksThread extends Thread {
313     private Exception exception = null;
314     private final Server server;
315     private final RegionServerServices services;
316     private final HRegion region;
317     private final AtomicBoolean signaller;
318 
319     PostOpenDeployTasksThread(final HRegion region, final Server server,
320         final RegionServerServices services, final AtomicBoolean signaller) {
321       super("PostOpenDeployTasks:" + region.getRegionInfo().getEncodedName());
322       this.setDaemon(true);
323       this.server = server;
324       this.services = services;
325       this.region = region;
326       this.signaller = signaller;
327     }
328 
329     public void run() {
330       try {
331         this.services.postOpenDeployTasks(this.region,
332           this.server.getCatalogTracker());
333       } catch (Exception e) {
334         LOG.warn("Exception running postOpenDeployTasks; region=" +
335           this.region.getRegionInfo().getEncodedName(), e);
336         this.exception = e;
337       }
338       // We're done.  Set flag then wake up anyone waiting on thread to complete.
339       this.signaller.set(true);
340       synchronized (this.signaller) {
341         this.signaller.notify();
342       }
343     }
344 
345     /**
346      * @return Null or the run exception; call this method after thread is done.
347      */
348     Exception getException() {
349       return this.exception;
350     }
351   }
352 
353 
354   /**
355    * @param r Region we're working on.
356    * @return whether znode is successfully transitioned to OPENED state.
357    * @throws IOException
358    */
359   private boolean transitionToOpened(final HRegion r) throws IOException {
360     boolean result = false;
361     HRegionInfo hri = r.getRegionInfo();
362     final String name = hri.getRegionNameAsString();
363     // Finally, Transition ZK node to OPENED
364     try {
365       if (ZKAssign.transitionNodeOpened(this.server.getZooKeeper(), hri,
366           this.server.getServerName(), this.version) == -1) {
367         LOG.warn("Completed the OPEN of region " + name +
368           " but when transitioning from " +
369           " OPENING to OPENED got a version mismatch, someone else clashed " +
370           "so now unassigning -- closing region on server: " +
371           this.server.getServerName());
372       } else {
373         LOG.debug("region transitioned to opened in zookeeper: " +
374           r.getRegionInfo() + ", server: " + this.server.getServerName());
375         result = true;
376       }
377     } catch (KeeperException e) {
378       LOG.error("Failed transitioning node " + name +
379         " from OPENING to OPENED -- closing region", e);
380     }
381     return result;
382   }
383 
384   /**
385    * This is not guaranteed to succeed, we just do our best.
386    * @param hri Region we're working on.
387    * @return whether znode is successfully transitioned to FAILED_OPEN state.
388    */
389   private boolean tryTransitionFromOpeningToFailedOpen(final HRegionInfo hri) {
390     boolean result = false;
391     final String name = hri.getRegionNameAsString();
392     try {
393       LOG.info("Opening of region " + hri + " failed, transitioning" +
394           " from OPENING to FAILED_OPEN in ZK, expecting version " + this.version);
395       if (ZKAssign.transitionNode(
396           this.server.getZooKeeper(), hri,
397           this.server.getServerName(),
398           EventType.RS_ZK_REGION_OPENING,
399           EventType.RS_ZK_REGION_FAILED_OPEN,
400           this.version) == -1) {
401         LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " +
402             "It's likely that the master already timed out this open " +
403             "attempt, and thus another RS already has the region.");
404       } else {
405         result = true;
406       }
407     } catch (KeeperException e) {
408       LOG.error("Failed transitioning node " + name +
409         " from OPENING to FAILED_OPEN", e);
410     }
411     return result;
412   }
413 
414   /**
415    * Try to transition to open. This function is static to make it usable before creating the
416    *  handler.
417    *
418    * This is not guaranteed to succeed, we just do our best.
419    *
420    * @param rsServices
421    * @param hri Region we're working on.
422    * @param versionOfOfflineNode version to checked.
423    * @return whether znode is successfully transitioned to FAILED_OPEN state.
424    */
425   public static boolean tryTransitionFromOfflineToFailedOpen(RegionServerServices rsServices,
426        final HRegionInfo hri, final int versionOfOfflineNode) {
427     boolean result = false;
428     final String name = hri.getRegionNameAsString();
429     try {
430       LOG.info("Opening of region " + hri + " failed, transitioning" +
431           " from OFFLINE to FAILED_OPEN in ZK, expecting version " + versionOfOfflineNode);
432       if (ZKAssign.transitionNode(
433           rsServices.getZooKeeper(), hri,
434           rsServices.getServerName(),
435           EventType.M_ZK_REGION_OFFLINE,
436           EventType.RS_ZK_REGION_FAILED_OPEN,
437           versionOfOfflineNode) == -1) {
438         LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " +
439             "It's likely that the master already timed out this open " +
440             "attempt, and thus another RS already has the region.");
441       } else {
442         result = true;
443       }
444     } catch (KeeperException e) {
445       LOG.error("Failed transitioning node " + name + " from OFFLINE to FAILED_OPEN", e);
446     }
447     return result;
448   }
449 
450 
451   /**
452    * @return Instance of HRegion if successful open else null.
453    */
454   HRegion openRegion() {
455     HRegion region = null;
456     try {
457       // Instantiate the region.  This also periodically tickles our zk OPENING
458       // state so master doesn't timeout this region in transition.
459       region = HRegion.openHRegion(this.regionInfo, this.htd,
460           this.rsServices.getWAL(this.regionInfo), 
461           this.server.getConfiguration(),
462           this.rsServices,
463         new CancelableProgressable() {
464           public boolean progress() {
465             // We may lose the znode ownership during the open.  Currently its
466             // too hard interrupting ongoing region open.  Just let it complete
467             // and check we still have the znode after region open.
468             return tickleOpening("open_region_progress");
469           }
470         });
471     } catch (Throwable t) {
472       // We failed open. Our caller will see the 'null' return value
473       // and transition the node back to FAILED_OPEN. If that fails,
474       // we rely on the Timeout Monitor in the master to reassign.
475       LOG.error(
476           "Failed open of region=" + this.regionInfo.getRegionNameAsString()
477               + ", starting to roll back the global memstore size.", t);
478       // Decrease the global memstore size.
479       if (this.rsServices != null) {
480         RegionServerAccounting rsAccounting =
481           this.rsServices.getRegionServerAccounting();
482         if (rsAccounting != null) {
483           rsAccounting.rollbackRegionReplayEditsSize(this.regionInfo.getRegionName());
484         }
485       }
486     }
487     return region;
488   }
489 
490   void cleanupFailedOpen(final HRegion region) throws IOException {
491     if (region != null) region.close();
492   }
493 
494   private boolean isRegionStillOpening() {
495     byte[] encodedName = regionInfo.getEncodedNameAsBytes();
496     Boolean action = rsServices.getRegionsInTransitionInRS().get(encodedName);
497     return Boolean.TRUE.equals(action); // true means opening for RIT
498   }
499 
500   /**
501    * Transition ZK node from OFFLINE to OPENING.
502    * @param encodedName Name of the znode file (Region encodedName is the znode
503    * name).
504    * @param versionOfOfflineNode - version Of OfflineNode that needs to be compared
505    * before changing the node's state from OFFLINE 
506    * @return True if successful transition.
507    */
508   boolean transitionZookeeperOfflineToOpening(final String encodedName,
509       int versionOfOfflineNode) {
510     // TODO: should also handle transition from CLOSED?
511     try {
512       // Initialize the znode version.
513       this.version = ZKAssign.transitionNode(server.getZooKeeper(), regionInfo,
514           server.getServerName(), EventType.M_ZK_REGION_OFFLINE,
515           EventType.RS_ZK_REGION_OPENING, versionOfOfflineNode);
516     } catch (KeeperException e) {
517       LOG.error("Error transition from OFFLINE to OPENING for region=" +
518         encodedName, e);
519       this.version = -1;
520       return false;
521     }
522     boolean b = isGoodVersion();
523     if (!b) {
524       LOG.warn("Failed transition from OFFLINE to OPENING for region=" +
525         encodedName);
526     }
527     return b;
528   }
529 
530   /**
531    * Update our OPENING state in zookeeper.
532    * Do this so master doesn't timeout this region-in-transition.
533    * @param context Some context to add to logs if failure
534    * @return True if successful transition.
535    */
536   boolean tickleOpening(final String context) {
537     if (!isRegionStillOpening()) {
538       LOG.warn("Open region aborted since it isn't opening any more");
539       return false;
540     }
541     // If previous checks failed... do not try again.
542     if (!isGoodVersion()) return false;
543     String encodedName = this.regionInfo.getEncodedName();
544     try {
545       this.version =
546         ZKAssign.retransitionNodeOpening(server.getZooKeeper(),
547           this.regionInfo, this.server.getServerName(), this.version, tomActivated);
548     } catch (KeeperException e) {
549       server.abort("Exception refreshing OPENING; region=" + encodedName +
550         ", context=" + context, e);
551       this.version = -1;
552       return false;
553     }
554     boolean b = isGoodVersion();
555     if (!b) {
556       LOG.warn("Failed refreshing OPENING; region=" + encodedName +
557         ", context=" + context);
558     }
559     return b;
560   }
561 
562   private boolean isGoodVersion() {
563     return this.version != -1;
564   }
565 }