View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver.handler;
20  
21  import java.io.IOException;
22  import java.util.concurrent.atomic.AtomicBoolean;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.hbase.classification.InterfaceAudience;
27  import org.apache.hadoop.hbase.HRegionInfo;
28  import org.apache.hadoop.hbase.HTableDescriptor;
29  import org.apache.hadoop.hbase.Server;
30  import org.apache.hadoop.hbase.executor.EventHandler;
31  import org.apache.hadoop.hbase.executor.EventType;
32  import org.apache.hadoop.hbase.master.AssignmentManager;
33  import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
34  import org.apache.hadoop.hbase.regionserver.HRegion;
35  import org.apache.hadoop.hbase.regionserver.RegionServerAccounting;
36  import org.apache.hadoop.hbase.regionserver.RegionServerServices;
37  import org.apache.hadoop.hbase.regionserver.RegionServerServices.PostOpenDeployContext;
38  import org.apache.hadoop.hbase.util.CancelableProgressable;
39  import org.apache.hadoop.hbase.util.ConfigUtil;
40  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
41  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
42  import org.apache.zookeeper.KeeperException;
43  /**
44   * Handles opening of a region on a region server.
45   * <p>
46   * This is executed after receiving an OPEN RPC from the master or client.
47   */
48  @InterfaceAudience.Private
49  public class OpenRegionHandler extends EventHandler {
50    private static final Log LOG = LogFactory.getLog(OpenRegionHandler.class);
51  
52    protected final RegionServerServices rsServices;
53  
54    private final HRegionInfo regionInfo;
55    private final HTableDescriptor htd;
56    private final long masterSystemTime;
57  
58    private boolean tomActivated;
59    private int assignmentTimeout;
60  
61    // We get version of our znode at start of open process and monitor it across
62    // the total open. We'll fail the open if someone hijacks our znode; we can
63    // tell this has happened if version is not as expected.
64    private volatile int version = -1;
65    //version of the offline node that was set by the master
66    private volatile int versionOfOfflineNode = -1;
67  
68    private final boolean useZKForAssignment;
69  
70    public OpenRegionHandler(final Server server,
71        final RegionServerServices rsServices, HRegionInfo regionInfo,
72        HTableDescriptor htd) {
73      this(server, rsServices, regionInfo, htd, -1, EventType.M_RS_OPEN_REGION, -1);
74    }
75  
76    public OpenRegionHandler(final Server server,
77        final RegionServerServices rsServices, HRegionInfo regionInfo,
78        HTableDescriptor htd, int versionOfOfflineNode, long masterSystemTime) {
79      this(server, rsServices, regionInfo, htd, masterSystemTime, EventType.M_RS_OPEN_REGION,
80          versionOfOfflineNode);
81    }
82  
83    protected OpenRegionHandler(final Server server,
84        final RegionServerServices rsServices, final HRegionInfo regionInfo,
85        final HTableDescriptor htd, long masterSystemTime, EventType eventType,
86        final int versionOfOfflineNode) {
87      super(server, eventType);
88      this.rsServices = rsServices;
89      this.regionInfo = regionInfo;
90      this.htd = htd;
91      this.masterSystemTime = masterSystemTime;
92      this.versionOfOfflineNode = versionOfOfflineNode;
93      tomActivated = this.server.getConfiguration().
94        getBoolean(AssignmentManager.ASSIGNMENT_TIMEOUT_MANAGEMENT,
95          AssignmentManager.DEFAULT_ASSIGNMENT_TIMEOUT_MANAGEMENT);
96      assignmentTimeout = this.server.getConfiguration().
97        getInt(AssignmentManager.ASSIGNMENT_TIMEOUT,
98          AssignmentManager.DEFAULT_ASSIGNMENT_TIMEOUT_DEFAULT);
99      useZKForAssignment = ConfigUtil.useZKForAssignment(server.getConfiguration());
100   }
101 
102   public HRegionInfo getRegionInfo() {
103     return regionInfo;
104   }
105 
106   @Override
107   public void process() throws IOException {
108     boolean openSuccessful = false;
109     boolean transitionedToOpening = false;
110     final String regionName = regionInfo.getRegionNameAsString();
111     HRegion region = null;
112 
113     try {
114       if (this.server.isStopped() || this.rsServices.isStopping()) {
115         return;
116       }
117       final String encodedName = regionInfo.getEncodedName();
118 
119       // 3 different difficult situations can occur
120       // 1) The opening was cancelled. This is an expected situation
121       // 2) The region was hijacked, we no longer have the znode
122       // 3) The region is now marked as online while we're suppose to open. This would be a bug.
123 
124       // Check that this region is not already online
125       if (this.rsServices.getFromOnlineRegions(encodedName) != null) {
126         LOG.error("Region " + encodedName +
127             " was already online when we started processing the opening. " +
128             "Marking this new attempt as failed");
129         return;
130       }
131 
132       // Check that we're still supposed to open the region and transition.
133       // If fails, just return.  Someone stole the region from under us.
134       // Calling transitionZookeeperOfflineToOpening initializes this.version.
135       if (!isRegionStillOpening()){
136         LOG.error("Region " + encodedName + " opening cancelled");
137         return;
138       }
139 
140       if (useZKForAssignment
141           && !transitionZookeeperOfflineToOpening(encodedName, versionOfOfflineNode)) {
142         LOG.warn("Region was hijacked? Opening cancelled for encodedName=" + encodedName);
143         // This is a desperate attempt: the znode is unlikely to be ours. But we can't do more.
144         return;
145       }
146       transitionedToOpening = true;
147       // Open region.  After a successful open, failures in subsequent
148       // processing needs to do a close as part of cleanup.
149       region = openRegion();
150       if (region == null) {
151         return;
152       }
153 
154       boolean failed = true;
155       if (isRegionStillOpening() && (!useZKForAssignment || tickleOpening("post_region_open"))) {
156         if (updateMeta(region, masterSystemTime)) {
157           failed = false;
158         }
159       }
160       if (failed || this.server.isStopped() ||
161           this.rsServices.isStopping()) {
162         return;
163       }
164 
165 
166       if (!isRegionStillOpening() || (useZKForAssignment && !transitionToOpened(region))) {
167         // If we fail to transition to opened, it's because of one of two cases:
168         //    (a) we lost our ZK lease
169         // OR (b) someone else opened the region before us
170         // OR (c) someone cancelled the open
171         // In all cases, we try to transition to failed_open to be safe.
172         return;
173       }
174 
175       // We have a znode in the opened state now. We can't really delete it as the master job.
176       // Transitioning to failed open would create a race condition if the master has already
177       // acted the transition to opened.
178       // Cancelling the open is dangerous, because we would have a state where the master thinks
179       // the region is opened while the region is actually closed. It is a dangerous state
180       // to be in. For this reason, from now on, we're not going back. There is a message in the
181       // finally close to let the admin knows where we stand.
182 
183 
184       // Successful region open, and add it to OnlineRegions
185       this.rsServices.addToOnlineRegions(region);
186       openSuccessful = true;
187 
188       // Done!  Successful region open
189       LOG.debug("Opened " + regionName + " on " +
190         this.server.getServerName());
191 
192 
193     } finally {
194       // Do all clean up here
195       if (!openSuccessful) {
196         doCleanUpOnFailedOpen(region, transitionedToOpening);
197       }
198       final Boolean current = this.rsServices.getRegionsInTransitionInRS().
199           remove(this.regionInfo.getEncodedNameAsBytes());
200 
201       // Let's check if we have met a race condition on open cancellation....
202       // A better solution would be to not have any race condition.
203       // this.rsServices.getRegionsInTransitionInRS().remove(
204       //  this.regionInfo.getEncodedNameAsBytes(), Boolean.TRUE);
205       // would help, but we would still have a consistency issue to manage with
206       // 1) this.rsServices.addToOnlineRegions(region);
207       // 2) the ZK state.
208       if (openSuccessful) {
209         if (current == null) { // Should NEVER happen, but let's be paranoid.
210           LOG.error("Bad state: we've just opened a region that was NOT in transition. Region="
211               + regionName);
212         } else if (Boolean.FALSE.equals(current)) { // Can happen, if we're
213                                                     // really unlucky.
214           LOG.error("Race condition: we've finished to open a region, while a close was requested "
215               + " on region=" + regionName + ". It can be a critical error, as a region that"
216               + " should be closed is now opened. Closing it now");
217           cleanupFailedOpen(region);
218         }
219       }
220     }
221   }
222 
223   private void doCleanUpOnFailedOpen(HRegion region, boolean transitionedToOpening)
224       throws IOException {
225     if (transitionedToOpening) {
226       try {
227         if (region != null) {
228           cleanupFailedOpen(region);
229         }
230       } finally {
231         if (!useZKForAssignment) {
232           rsServices.reportRegionStateTransition(TransitionCode.FAILED_OPEN, regionInfo);
233         } else {
234         // Even if cleanupFailed open fails we need to do this transition
235         // See HBASE-7698
236         tryTransitionFromOpeningToFailedOpen(regionInfo);
237         }
238       }
239     } else if (!useZKForAssignment) {
240       rsServices.reportRegionStateTransition(TransitionCode.FAILED_OPEN, regionInfo);
241     } else {
242       // If still transition to OPENING is not done, we need to transition znode
243       // to FAILED_OPEN
244       tryTransitionFromOfflineToFailedOpen(this.rsServices, regionInfo, versionOfOfflineNode);
245     }
246   }
247 
248   /**
249    * Update ZK or META.  This can take a while if for example the
250    * hbase:meta is not available -- if server hosting hbase:meta crashed and we are
251    * waiting on it to come back -- so run in a thread and keep updating znode
252    * state meantime so master doesn't timeout our region-in-transition.
253    * Caller must cleanup region if this fails.
254    */
255   boolean updateMeta(final HRegion r, final long masterSystemTime) {
256     if (this.server.isStopped() || this.rsServices.isStopping()) {
257       return false;
258     }
259     // Object we do wait/notify on.  Make it boolean.  If set, we're done.
260     // Else, wait.
261     final AtomicBoolean signaller = new AtomicBoolean(false);
262     PostOpenDeployTasksThread t = new PostOpenDeployTasksThread(r,
263       this.server, this.rsServices, signaller, masterSystemTime);
264     t.start();
265     // Total timeout for meta edit.  If we fail adding the edit then close out
266     // the region and let it be assigned elsewhere.
267     long timeout = assignmentTimeout * 10;
268     long now = System.currentTimeMillis();
269     long endTime = now + timeout;
270     // Let our period at which we update OPENING state to be be 1/3rd of the
271     // regions-in-transition timeout period.
272     long period = Math.max(1, assignmentTimeout/ 3);
273     long lastUpdate = now;
274     boolean tickleOpening = true;
275     while (!signaller.get() && t.isAlive() && !this.server.isStopped() &&
276         !this.rsServices.isStopping() && (endTime > now)) {
277       long elapsed = now - lastUpdate;
278       if (elapsed > period) {
279         // Only tickle OPENING if postOpenDeployTasks is taking some time.
280         lastUpdate = now;
281         if (useZKForAssignment) {
282           tickleOpening = tickleOpening("post_open_deploy");
283         }
284       }
285       synchronized (signaller) {
286         try {
287           if (!signaller.get()) signaller.wait(period);
288         } catch (InterruptedException e) {
289           // Go to the loop check.
290         }
291       }
292       now = System.currentTimeMillis();
293     }
294     // Is thread still alive?  We may have left above loop because server is
295     // stopping or we timed out the edit.  Is so, interrupt it.
296     if (t.isAlive()) {
297       if (!signaller.get()) {
298         // Thread still running; interrupt
299         LOG.debug("Interrupting thread " + t);
300         t.interrupt();
301       }
302       try {
303         t.join();
304       } catch (InterruptedException ie) {
305         LOG.warn("Interrupted joining " +
306           r.getRegionInfo().getRegionNameAsString(), ie);
307         Thread.currentThread().interrupt();
308       }
309     }
310 
311     // Was there an exception opening the region?  This should trigger on
312     // InterruptedException too.  If so, we failed.  Even if tickle opening fails
313     // then it is a failure.
314     return ((!Thread.interrupted() && t.getException() == null) && tickleOpening);
315   }
316 
317   /**
318    * Thread to run region post open tasks. Call {@link #getException()} after
319    * the thread finishes to check for exceptions running
320    * {@link RegionServerServices#postOpenDeployTasks(
321    * HRegion, org.apache.hadoop.hbase.catalog.CatalogTracker)}
322    * .
323    */
324   static class PostOpenDeployTasksThread extends Thread {
325     private Throwable exception = null;
326     private final Server server;
327     private final RegionServerServices services;
328     private final HRegion region;
329     private final AtomicBoolean signaller;
330     private final long masterSystemTime;
331 
332     PostOpenDeployTasksThread(final HRegion region, final Server server,
333         final RegionServerServices services, final AtomicBoolean signaller,
334         final long masterSystemTime) {
335       super("PostOpenDeployTasks:" + region.getRegionInfo().getEncodedName());
336       this.setDaemon(true);
337       this.server = server;
338       this.services = services;
339       this.region = region;
340       this.signaller = signaller;      
341       this.masterSystemTime = masterSystemTime;
342     }
343 
344     public void run() {
345       try {
346         this.services.postOpenDeployTasks(new PostOpenDeployContext(region, masterSystemTime),
347           this.server.getCatalogTracker());
348       } catch (Throwable e) {
349         String msg =
350             "Exception running postOpenDeployTasks; region="
351                 + this.region.getRegionInfo().getEncodedName();
352         this.exception = e;
353         if (e instanceof IOException && isRegionStillOpening(region.getRegionInfo(), services)) {
354           server.abort(msg, e);
355         } else {
356           LOG.warn(msg, e);
357         }
358       }
359       // We're done.  Set flag then wake up anyone waiting on thread to complete.
360       this.signaller.set(true);
361       synchronized (this.signaller) {
362         this.signaller.notify();
363       }
364     }
365 
366     /**
367      * @return Null or the run exception; call this method after thread is done.
368      */
369     Throwable getException() {
370       return this.exception;
371     }
372   }
373 
374 
375   /**
376    * @param r Region we're working on.
377    * @return whether znode is successfully transitioned to OPENED state.
378    * @throws IOException
379    */
380   boolean transitionToOpened(final HRegion r) throws IOException {
381     boolean result = false;
382     HRegionInfo hri = r.getRegionInfo();
383     final String name = hri.getRegionNameAsString();
384     // Finally, Transition ZK node to OPENED
385     try {
386       if (ZKAssign.transitionNodeOpened(this.server.getZooKeeper(), hri,
387           this.server.getServerName(), this.version) == -1) {
388         String warnMsg = "Completed the OPEN of region " + name +
389           " but when transitioning from " + " OPENING to OPENED ";
390         try {
391           String node = ZKAssign.getNodeName(this.server.getZooKeeper(), hri.getEncodedName());
392           if (ZKUtil.checkExists(this.server.getZooKeeper(), node) < 0) {
393             // if the znode 
394             rsServices.abort(warnMsg + "the znode disappeared", null);
395           } else {
396             LOG.warn(warnMsg + "got a version mismatch, someone else clashed; " +
397           "so now unassigning -- closing region on server: " + this.server.getServerName());
398           }
399         } catch (KeeperException ke) {
400           rsServices.abort(warnMsg, ke);
401         }
402       } else {
403         LOG.debug("Transitioned " + r.getRegionInfo().getEncodedName() +
404           " to OPENED in zk on " + this.server.getServerName());
405         result = true;
406       }
407     } catch (KeeperException e) {
408       LOG.error("Failed transitioning node " + name +
409         " from OPENING to OPENED -- closing region", e);
410     }
411     return result;
412   }
413 
414   /**
415    * This is not guaranteed to succeed, we just do our best.
416    * @param hri Region we're working on.
417    * @return whether znode is successfully transitioned to FAILED_OPEN state.
418    */
419   private boolean tryTransitionFromOpeningToFailedOpen(final HRegionInfo hri) {
420     boolean result = false;
421     final String name = hri.getRegionNameAsString();
422     try {
423       LOG.info("Opening of region " + hri + " failed, transitioning" +
424           " from OPENING to FAILED_OPEN in ZK, expecting version " + this.version);
425       if (ZKAssign.transitionNode(
426           this.server.getZooKeeper(), hri,
427           this.server.getServerName(),
428           EventType.RS_ZK_REGION_OPENING,
429           EventType.RS_ZK_REGION_FAILED_OPEN,
430           this.version) == -1) {
431         LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " +
432             "It's likely that the master already timed out this open " +
433             "attempt, and thus another RS already has the region.");
434       } else {
435         result = true;
436       }
437     } catch (KeeperException e) {
438       LOG.error("Failed transitioning node " + name +
439         " from OPENING to FAILED_OPEN", e);
440     }
441     return result;
442   }
443 
444   /**
445    * Try to transition to open. This function is static to make it usable before creating the
446    *  handler.
447    *
448    * This is not guaranteed to succeed, we just do our best.
449    *
450    * @param rsServices
451    * @param hri Region we're working on.
452    * @param versionOfOfflineNode version to checked.
453    * @return whether znode is successfully transitioned to FAILED_OPEN state.
454    */
455   public static boolean tryTransitionFromOfflineToFailedOpen(RegionServerServices rsServices,
456        final HRegionInfo hri, final int versionOfOfflineNode) {
457     boolean result = false;
458     final String name = hri.getRegionNameAsString();
459     try {
460       LOG.info("Opening of region " + hri + " failed, transitioning" +
461           " from OFFLINE to FAILED_OPEN in ZK, expecting version " + versionOfOfflineNode);
462       if (ZKAssign.transitionNode(
463           rsServices.getZooKeeper(), hri,
464           rsServices.getServerName(),
465           EventType.M_ZK_REGION_OFFLINE,
466           EventType.RS_ZK_REGION_FAILED_OPEN,
467           versionOfOfflineNode) == -1) {
468         LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " +
469             "It's likely that the master already timed out this open " +
470             "attempt, and thus another RS already has the region.");
471       } else {
472         result = true;
473       }
474     } catch (KeeperException e) {
475       LOG.error("Failed transitioning node " + name + " from OFFLINE to FAILED_OPEN", e);
476     }
477     return result;
478   }
479 
480 
481   /**
482    * @return Instance of HRegion if successful open else null.
483    */
484   HRegion openRegion() {
485     HRegion region = null;
486     try {
487       // Instantiate the region.  This also periodically tickles our zk OPENING
488       // state so master doesn't timeout this region in transition.
489       region = HRegion.openHRegion(this.regionInfo, this.htd,
490           this.rsServices.getWAL(this.regionInfo),
491           this.server.getConfiguration(),
492           this.rsServices,
493         new CancelableProgressable() {
494               public boolean progress() {
495                 if (useZKForAssignment) {
496                   // We may lose the znode ownership during the open. Currently its
497                   // too hard interrupting ongoing region open. Just let it complete
498                   // and check we still have the znode after region open.
499                   // if tickle failed, we need to cancel opening region.
500                   return tickleOpening("open_region_progress");
501                 }
502                 if (!isRegionStillOpening()) {
503                   LOG.warn("Open region aborted since it isn't opening any more");
504                   return false;
505                 }
506                 return true;
507               }
508         });
509     } catch (Throwable t) {
510       // We failed open. Our caller will see the 'null' return value
511       // and transition the node back to FAILED_OPEN. If that fails,
512       // we rely on the Timeout Monitor in the master to reassign.
513       LOG.error(
514           "Failed open of region=" + this.regionInfo.getRegionNameAsString()
515               + ", starting to roll back the global memstore size.", t);
516       // Decrease the global memstore size.
517       if (this.rsServices != null) {
518         RegionServerAccounting rsAccounting =
519           this.rsServices.getRegionServerAccounting();
520         if (rsAccounting != null) {
521           rsAccounting.rollbackRegionReplayEditsSize(this.regionInfo.getRegionName());
522         }
523       }
524     }
525     return region;
526   }
527 
528   void cleanupFailedOpen(final HRegion region) throws IOException {
529     if (region != null) {
530       byte[] encodedName = regionInfo.getEncodedNameAsBytes();
531       try {
532         rsServices.getRegionsInTransitionInRS().put(encodedName,Boolean.FALSE);
533         this.rsServices.removeFromOnlineRegions(region, null);
534         region.close();
535       } finally {
536         rsServices.getRegionsInTransitionInRS().remove(encodedName);
537       }
538     }
539   }
540 
541   private static boolean isRegionStillOpening(HRegionInfo regionInfo,
542       RegionServerServices rsServices) {
543     byte[] encodedName = regionInfo.getEncodedNameAsBytes();
544     Boolean action = rsServices.getRegionsInTransitionInRS().get(encodedName);
545     return Boolean.TRUE.equals(action); // true means opening for RIT
546   }
547 
548   private boolean isRegionStillOpening() {
549     return isRegionStillOpening(regionInfo, rsServices);
550   }
551 
552   /**
553    * Transition ZK node from OFFLINE to OPENING.
554    * @param encodedName Name of the znode file (Region encodedName is the znode
555    * name).
556    * @param versionOfOfflineNode - version Of OfflineNode that needs to be compared
557    * before changing the node's state from OFFLINE
558    * @return True if successful transition.
559    */
560   boolean transitionZookeeperOfflineToOpening(final String encodedName,
561       int versionOfOfflineNode) {
562     // TODO: should also handle transition from CLOSED?
563     try {
564       // Initialize the znode version.
565       this.version = ZKAssign.transitionNode(server.getZooKeeper(), regionInfo,
566           server.getServerName(), EventType.M_ZK_REGION_OFFLINE,
567           EventType.RS_ZK_REGION_OPENING, versionOfOfflineNode);
568     } catch (KeeperException e) {
569       LOG.error("Error transition from OFFLINE to OPENING for region=" +
570         encodedName, e);
571       this.version = -1;
572       return false;
573     }
574     boolean b = isGoodVersion();
575     if (!b) {
576       LOG.warn("Failed transition from OFFLINE to OPENING for region=" +
577         encodedName);
578     }
579     return b;
580   }
581 
582   /**
583    * Update our OPENING state in zookeeper.
584    * Do this so master doesn't timeout this region-in-transition.
585    * @param context Some context to add to logs if failure
586    * @return True if successful transition.
587    */
588   boolean tickleOpening(final String context) {
589     if (!isRegionStillOpening()) {
590       LOG.warn("Open region aborted since it isn't opening any more");
591       return false;
592     }
593     // If previous checks failed... do not try again.
594     if (!isGoodVersion()) return false;
595     String encodedName = this.regionInfo.getEncodedName();
596     try {
597       this.version =
598         ZKAssign.retransitionNodeOpening(server.getZooKeeper(),
599           this.regionInfo, this.server.getServerName(), this.version, tomActivated);
600     } catch (KeeperException e) {
601       server.abort("Exception refreshing OPENING; region=" + encodedName +
602         ", context=" + context, e);
603       this.version = -1;
604       return false;
605     }
606     boolean b = isGoodVersion();
607     if (!b) {
608       LOG.warn("Failed refreshing OPENING; region=" + encodedName +
609         ", context=" + context);
610     }
611     return b;
612   }
613 
614   private boolean isGoodVersion() {
615     return this.version != -1;
616   }
617 }