View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver.handler;
20  
21  import java.io.IOException;
22  import java.util.Map;
23  import java.util.concurrent.atomic.AtomicBoolean;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.HRegionInfo;
29  import org.apache.hadoop.hbase.HTableDescriptor;
30  import org.apache.hadoop.hbase.Server;
31  import org.apache.hadoop.hbase.executor.EventHandler;
32  import org.apache.hadoop.hbase.executor.EventType;
33  import org.apache.hadoop.hbase.master.AssignmentManager;
34  import org.apache.hadoop.hbase.regionserver.HRegion;
35  import org.apache.hadoop.hbase.regionserver.RegionServerAccounting;
36  import org.apache.hadoop.hbase.regionserver.RegionServerServices;
37  import org.apache.hadoop.hbase.util.CancelableProgressable;
38  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
39  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
40  import org.apache.zookeeper.KeeperException;
41  /**
42   * Handles opening of a region on a region server.
43   * <p>
44   * This is executed after receiving an OPEN RPC from the master or client.
45   */
46  @InterfaceAudience.Private
47  public class OpenRegionHandler extends EventHandler {
48    private static final Log LOG = LogFactory.getLog(OpenRegionHandler.class);
49  
50    protected final RegionServerServices rsServices;
51  
52    private final HRegionInfo regionInfo;
53    private final HTableDescriptor htd;
54  
55    private boolean tomActivated;
56    private int assignmentTimeout;
57  
58    // We get version of our znode at start of open process and monitor it across
59    // the total open. We'll fail the open if someone hijacks our znode; we can
60    // tell this has happened if version is not as expected.
61    private volatile int version = -1;
62    //version of the offline node that was set by the master
63    private volatile int versionOfOfflineNode = -1;
64  
65    public OpenRegionHandler(final Server server,
66        final RegionServerServices rsServices, HRegionInfo regionInfo,
67        HTableDescriptor htd) {
68      this(server, rsServices, regionInfo, htd, EventType.M_RS_OPEN_REGION, -1);
69    }
70    public OpenRegionHandler(final Server server,
71        final RegionServerServices rsServices, HRegionInfo regionInfo,
72        HTableDescriptor htd, int versionOfOfflineNode) {
73      this(server, rsServices, regionInfo, htd, EventType.M_RS_OPEN_REGION,
74          versionOfOfflineNode);
75    }
76  
77    protected OpenRegionHandler(final Server server,
78        final RegionServerServices rsServices, final HRegionInfo regionInfo,
79        final HTableDescriptor htd, EventType eventType,
80        final int versionOfOfflineNode) {
81      super(server, eventType);
82      this.rsServices = rsServices;
83      this.regionInfo = regionInfo;
84      this.htd = htd;
85      this.versionOfOfflineNode = versionOfOfflineNode;
86      tomActivated = this.server.getConfiguration().
87        getBoolean(AssignmentManager.ASSIGNMENT_TIMEOUT_MANAGEMENT,
88          AssignmentManager.DEFAULT_ASSIGNMENT_TIMEOUT_MANAGEMENT);
89      assignmentTimeout = this.server.getConfiguration().
90        getInt(AssignmentManager.ASSIGNMENT_TIMEOUT,
91          AssignmentManager.DEFAULT_ASSIGNMENT_TIMEOUT_DEFAULT);
92    }
93  
94    public HRegionInfo getRegionInfo() {
95      return regionInfo;
96    }
97  
98    @Override
99    public void process() throws IOException {
100     boolean openSuccessful = false;
101     boolean transitionedToOpening = false;
102     final String regionName = regionInfo.getRegionNameAsString();
103     HRegion region = null;
104 
105     try {
106       if (this.server.isStopped() || this.rsServices.isStopping()) {
107         return;
108       }
109       final String encodedName = regionInfo.getEncodedName();
110 
111       // 3 different difficult situations can occur
112       // 1) The opening was cancelled. This is an expected situation
113       // 2) The region was hijacked, we no longer have the znode
114       // 3) The region is now marked as online while we're suppose to open. This would be a bug.
115 
116       // Check that this region is not already online
117       if (this.rsServices.getFromOnlineRegions(encodedName) != null) {
118         LOG.error("Region " + encodedName +
119             " was already online when we started processing the opening. " +
120             "Marking this new attempt as failed");
121         return;
122       }
123 
124       // Check that we're still supposed to open the region and transition.
125       // If fails, just return.  Someone stole the region from under us.
126       // Calling transitionZookeeperOfflineToOpening initializes this.version.
127       if (!isRegionStillOpening()){
128         LOG.error("Region " + encodedName + " opening cancelled");
129         return;
130       }
131 
132       if (!transitionZookeeperOfflineToOpening(encodedName, versionOfOfflineNode)) {
133         LOG.warn("Region was hijacked? Opening cancelled for encodedName=" + encodedName);
134         // This is a desperate attempt: the znode is unlikely to be ours. But we can't do more.
135         return;
136       }
137       transitionedToOpening = true;
138       // Open region.  After a successful open, failures in subsequent
139       // processing needs to do a close as part of cleanup.
140       region = openRegion();
141       if (region == null) {
142         return;
143       }
144 
145       // check if we need set current region in recovering state
146       region.setRecovering(false);
147       Map<String, HRegion> recoveringRegions = this.rsServices.getRecoveringRegions();
148       if (recoveringRegions != null && !recoveringRegions.isEmpty()
149           && recoveringRegions.containsKey(region.getRegionInfo().getEncodedName())) {
150         region.setRecovering(true);
151         recoveringRegions.put(region.getRegionInfo().getEncodedName(), region);
152       }
153 
154       boolean failed = true;
155       if (tickleOpening("post_region_open")) {
156         if (updateMeta(region)) {
157           failed = false;
158         }
159       }
160       if (failed || this.server.isStopped() ||
161           this.rsServices.isStopping()) {
162         return;
163       }
164 
165 
166       if (!isRegionStillOpening() || !transitionToOpened(region)) {
167         // If we fail to transition to opened, it's because of one of two cases:
168         //    (a) we lost our ZK lease
169         // OR (b) someone else opened the region before us
170         // OR (c) someone cancelled the open
171         // In all cases, we try to transition to failed_open to be safe.
172         return;
173       }
174 
175       // We have a znode in the opened state now. We can't really delete it as the master job.
176       // Transitioning to failed open would create a race condition if the master has already
177       // acted the transition to opened.
178       // Cancelling the open is dangerous, because we would have a state where the master thinks
179       // the region is opened while the region is actually closed. It is a dangerous state
180       // to be in. For this reason, from now on, we're not going back. There is a message in the
181       // finally close to let the admin knows where we stand.
182 
183 
184       // Successful region open, and add it to OnlineRegions
185       this.rsServices.addToOnlineRegions(region);
186       openSuccessful = true;
187 
188       // Done!  Successful region open
189       LOG.debug("Opened " + regionName + " on " +
190         this.server.getServerName());
191 
192 
193     } finally {
194       // Do all clean up here
195       if (!openSuccessful) {
196         doCleanUpOnFailedOpen(region, transitionedToOpening);
197       }
198       final Boolean current = this.rsServices.getRegionsInTransitionInRS().
199           remove(this.regionInfo.getEncodedNameAsBytes());
200 
201       // Let's check if we have met a race condition on open cancellation....
202       // A better solution would be to not have any race condition.
203       // this.rsServices.getRegionsInTransitionInRS().remove(
204       //  this.regionInfo.getEncodedNameAsBytes(), Boolean.TRUE);
205       // would help, but we would still have a consistency issue to manage with
206       // 1) this.rsServices.addToOnlineRegions(region);
207       // 2) the ZK state.
208       if (openSuccessful) {
209         if (current == null) { // Should NEVER happen, but let's be paranoid.
210           LOG.error("Bad state: we've just opened a region that was NOT in transition. Region="
211               + regionName);
212         } else if (Boolean.FALSE.equals(current)) { // Can happen, if we're
213                                                     // really unlucky.
214           LOG.error("Race condition: we've finished to open a region, while a close was requested "
215               + " on region=" + regionName + ". It can be a critical error, as a region that"
216               + " should be closed is now opened. Closing it now");
217           cleanupFailedOpen(region);
218         }
219       }
220     }
221   }
222 
223   private void doCleanUpOnFailedOpen(HRegion region, boolean transitionedToOpening)
224       throws IOException {
225     if (transitionedToOpening) {
226       try {
227         if (region != null) {
228           cleanupFailedOpen(region);
229         }
230       } finally {
231         // Even if cleanupFailed open fails we need to do this transition
232         // See HBASE-7698
233         tryTransitionFromOpeningToFailedOpen(regionInfo);
234       }
235     } else {
236       // If still transition to OPENING is not done, we need to transition znode
237       // to FAILED_OPEN
238       tryTransitionFromOfflineToFailedOpen(this.rsServices, regionInfo, versionOfOfflineNode);
239     }
240   }
241 
242   /**
243    * Update ZK or META.  This can take a while if for example the
244    * hbase:meta is not available -- if server hosting hbase:meta crashed and we are
245    * waiting on it to come back -- so run in a thread and keep updating znode
246    * state meantime so master doesn't timeout our region-in-transition.
247    * Caller must cleanup region if this fails.
248    */
249   boolean updateMeta(final HRegion r) {
250     if (this.server.isStopped() || this.rsServices.isStopping()) {
251       return false;
252     }
253     // Object we do wait/notify on.  Make it boolean.  If set, we're done.
254     // Else, wait.
255     final AtomicBoolean signaller = new AtomicBoolean(false);
256     PostOpenDeployTasksThread t = new PostOpenDeployTasksThread(r,
257       this.server, this.rsServices, signaller);
258     t.start();
259     // Total timeout for meta edit.  If we fail adding the edit then close out
260     // the region and let it be assigned elsewhere.
261     long timeout = assignmentTimeout * 10;
262     long now = System.currentTimeMillis();
263     long endTime = now + timeout;
264     // Let our period at which we update OPENING state to be be 1/3rd of the
265     // regions-in-transition timeout period.
266     long period = Math.max(1, assignmentTimeout/ 3);
267     long lastUpdate = now;
268     boolean tickleOpening = true;
269     while (!signaller.get() && t.isAlive() && !this.server.isStopped() &&
270         !this.rsServices.isStopping() && (endTime > now)) {
271       long elapsed = now - lastUpdate;
272       if (elapsed > period) {
273         // Only tickle OPENING if postOpenDeployTasks is taking some time.
274         lastUpdate = now;
275         tickleOpening = tickleOpening("post_open_deploy");
276       }
277       synchronized (signaller) {
278         try {
279           signaller.wait(period);
280         } catch (InterruptedException e) {
281           // Go to the loop check.
282         }
283       }
284       now = System.currentTimeMillis();
285     }
286     // Is thread still alive?  We may have left above loop because server is
287     // stopping or we timed out the edit.  Is so, interrupt it.
288     if (t.isAlive()) {
289       if (!signaller.get()) {
290         // Thread still running; interrupt
291         LOG.debug("Interrupting thread " + t);
292         t.interrupt();
293       }
294       try {
295         t.join();
296       } catch (InterruptedException ie) {
297         LOG.warn("Interrupted joining " +
298           r.getRegionInfo().getRegionNameAsString(), ie);
299         Thread.currentThread().interrupt();
300       }
301     }
302 
303     // Was there an exception opening the region?  This should trigger on
304     // InterruptedException too.  If so, we failed.  Even if tickle opening fails
305     // then it is a failure.
306     return ((!Thread.interrupted() && t.getException() == null) && tickleOpening);
307   }
308 
309   /**
310    * Thread to run region post open tasks. Call {@link #getException()} after
311    * the thread finishes to check for exceptions running
312    * {@link RegionServerServices#postOpenDeployTasks(
313    * HRegion, org.apache.hadoop.hbase.catalog.CatalogTracker)}
314    * .
315    */
316   static class PostOpenDeployTasksThread extends Thread {
317     private Exception exception = null;
318     private final Server server;
319     private final RegionServerServices services;
320     private final HRegion region;
321     private final AtomicBoolean signaller;
322 
323     PostOpenDeployTasksThread(final HRegion region, final Server server,
324         final RegionServerServices services, final AtomicBoolean signaller) {
325       super("PostOpenDeployTasks:" + region.getRegionInfo().getEncodedName());
326       this.setDaemon(true);
327       this.server = server;
328       this.services = services;
329       this.region = region;
330       this.signaller = signaller;
331     }
332 
333     public void run() {
334       try {
335         this.services.postOpenDeployTasks(this.region,
336           this.server.getCatalogTracker());
337       } catch (KeeperException e) {
338         server.abort("Exception running postOpenDeployTasks; region=" +
339             this.region.getRegionInfo().getEncodedName(), e);
340       } catch (Exception e) {
341         LOG.warn("Exception running postOpenDeployTasks; region=" +
342           this.region.getRegionInfo().getEncodedName(), e);
343         this.exception = e;
344       }
345       // We're done.  Set flag then wake up anyone waiting on thread to complete.
346       this.signaller.set(true);
347       synchronized (this.signaller) {
348         this.signaller.notify();
349       }
350     }
351 
352     /**
353      * @return Null or the run exception; call this method after thread is done.
354      */
355     Exception getException() {
356       return this.exception;
357     }
358   }
359 
360 
361   /**
362    * @param r Region we're working on.
363    * @return whether znode is successfully transitioned to OPENED state.
364    * @throws IOException
365    */
366   boolean transitionToOpened(final HRegion r) throws IOException {
367     boolean result = false;
368     HRegionInfo hri = r.getRegionInfo();
369     final String name = hri.getRegionNameAsString();
370     // Finally, Transition ZK node to OPENED
371     try {
372       if (ZKAssign.transitionNodeOpened(this.server.getZooKeeper(), hri,
373           this.server.getServerName(), this.version) == -1) {
374         String warnMsg = "Completed the OPEN of region " + name +
375           " but when transitioning from " + " OPENING to OPENED ";
376         try {
377           String node = ZKAssign.getNodeName(this.server.getZooKeeper(), hri.getEncodedName());
378           if (ZKUtil.checkExists(this.server.getZooKeeper(), node) < 0) {
379             // if the znode 
380             rsServices.abort(warnMsg + "the znode disappeared", null);
381           } else {
382             LOG.warn(warnMsg + "got a version mismatch, someone else clashed; " +
383           "so now unassigning -- closing region on server: " + this.server.getServerName());
384           }
385         } catch (KeeperException ke) {
386           rsServices.abort(warnMsg, ke);
387         }
388       } else {
389         LOG.debug("Transitioned " + r.getRegionInfo().getEncodedName() +
390           " to OPENED in zk on " + this.server.getServerName());
391         result = true;
392       }
393     } catch (KeeperException e) {
394       LOG.error("Failed transitioning node " + name +
395         " from OPENING to OPENED -- closing region", e);
396     }
397     return result;
398   }
399 
400   /**
401    * This is not guaranteed to succeed, we just do our best.
402    * @param hri Region we're working on.
403    * @return whether znode is successfully transitioned to FAILED_OPEN state.
404    */
405   private boolean tryTransitionFromOpeningToFailedOpen(final HRegionInfo hri) {
406     boolean result = false;
407     final String name = hri.getRegionNameAsString();
408     try {
409       LOG.info("Opening of region " + hri + " failed, transitioning" +
410           " from OPENING to FAILED_OPEN in ZK, expecting version " + this.version);
411       if (ZKAssign.transitionNode(
412           this.server.getZooKeeper(), hri,
413           this.server.getServerName(),
414           EventType.RS_ZK_REGION_OPENING,
415           EventType.RS_ZK_REGION_FAILED_OPEN,
416           this.version) == -1) {
417         LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " +
418             "It's likely that the master already timed out this open " +
419             "attempt, and thus another RS already has the region.");
420       } else {
421         result = true;
422       }
423     } catch (KeeperException e) {
424       LOG.error("Failed transitioning node " + name +
425         " from OPENING to FAILED_OPEN", e);
426     }
427     return result;
428   }
429 
430   /**
431    * Try to transition to open. This function is static to make it usable before creating the
432    *  handler.
433    *
434    * This is not guaranteed to succeed, we just do our best.
435    *
436    * @param rsServices
437    * @param hri Region we're working on.
438    * @param versionOfOfflineNode version to checked.
439    * @return whether znode is successfully transitioned to FAILED_OPEN state.
440    */
441   public static boolean tryTransitionFromOfflineToFailedOpen(RegionServerServices rsServices,
442        final HRegionInfo hri, final int versionOfOfflineNode) {
443     boolean result = false;
444     final String name = hri.getRegionNameAsString();
445     try {
446       LOG.info("Opening of region " + hri + " failed, transitioning" +
447           " from OFFLINE to FAILED_OPEN in ZK, expecting version " + versionOfOfflineNode);
448       if (ZKAssign.transitionNode(
449           rsServices.getZooKeeper(), hri,
450           rsServices.getServerName(),
451           EventType.M_ZK_REGION_OFFLINE,
452           EventType.RS_ZK_REGION_FAILED_OPEN,
453           versionOfOfflineNode) == -1) {
454         LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " +
455             "It's likely that the master already timed out this open " +
456             "attempt, and thus another RS already has the region.");
457       } else {
458         result = true;
459       }
460     } catch (KeeperException e) {
461       LOG.error("Failed transitioning node " + name + " from OFFLINE to FAILED_OPEN", e);
462     }
463     return result;
464   }
465 
466 
467   /**
468    * @return Instance of HRegion if successful open else null.
469    */
470   HRegion openRegion() {
471     HRegion region = null;
472     try {
473       // Instantiate the region.  This also periodically tickles our zk OPENING
474       // state so master doesn't timeout this region in transition.
475       region = HRegion.openHRegion(this.regionInfo, this.htd,
476           this.rsServices.getWAL(this.regionInfo), 
477           this.server.getConfiguration(),
478           this.rsServices,
479         new CancelableProgressable() {
480           public boolean progress() {
481             // We may lose the znode ownership during the open.  Currently its
482             // too hard interrupting ongoing region open.  Just let it complete
483             // and check we still have the znode after region open.
484             return tickleOpening("open_region_progress");
485           }
486         });
487     } catch (Throwable t) {
488       // We failed open. Our caller will see the 'null' return value
489       // and transition the node back to FAILED_OPEN. If that fails,
490       // we rely on the Timeout Monitor in the master to reassign.
491       LOG.error(
492           "Failed open of region=" + this.regionInfo.getRegionNameAsString()
493               + ", starting to roll back the global memstore size.", t);
494       // Decrease the global memstore size.
495       if (this.rsServices != null) {
496         RegionServerAccounting rsAccounting =
497           this.rsServices.getRegionServerAccounting();
498         if (rsAccounting != null) {
499           rsAccounting.rollbackRegionReplayEditsSize(this.regionInfo.getRegionName());
500         }
501       }
502     }
503     return region;
504   }
505 
506   void cleanupFailedOpen(final HRegion region) throws IOException {
507     if (region != null) {
508       this.rsServices.removeFromOnlineRegions(region, null);
509       region.close();
510     }
511   }
512 
513   private boolean isRegionStillOpening() {
514     byte[] encodedName = regionInfo.getEncodedNameAsBytes();
515     Boolean action = rsServices.getRegionsInTransitionInRS().get(encodedName);
516     return Boolean.TRUE.equals(action); // true means opening for RIT
517   }
518 
519   /**
520    * Transition ZK node from OFFLINE to OPENING.
521    * @param encodedName Name of the znode file (Region encodedName is the znode
522    * name).
523    * @param versionOfOfflineNode - version Of OfflineNode that needs to be compared
524    * before changing the node's state from OFFLINE 
525    * @return True if successful transition.
526    */
527   boolean transitionZookeeperOfflineToOpening(final String encodedName,
528       int versionOfOfflineNode) {
529     // TODO: should also handle transition from CLOSED?
530     try {
531       // Initialize the znode version.
532       this.version = ZKAssign.transitionNode(server.getZooKeeper(), regionInfo,
533           server.getServerName(), EventType.M_ZK_REGION_OFFLINE,
534           EventType.RS_ZK_REGION_OPENING, versionOfOfflineNode);
535     } catch (KeeperException e) {
536       LOG.error("Error transition from OFFLINE to OPENING for region=" +
537         encodedName, e);
538       this.version = -1;
539       return false;
540     }
541     boolean b = isGoodVersion();
542     if (!b) {
543       LOG.warn("Failed transition from OFFLINE to OPENING for region=" +
544         encodedName);
545     }
546     return b;
547   }
548 
549   /**
550    * Update our OPENING state in zookeeper.
551    * Do this so master doesn't timeout this region-in-transition.
552    * @param context Some context to add to logs if failure
553    * @return True if successful transition.
554    */
555   boolean tickleOpening(final String context) {
556     if (!isRegionStillOpening()) {
557       LOG.warn("Open region aborted since it isn't opening any more");
558       return false;
559     }
560     // If previous checks failed... do not try again.
561     if (!isGoodVersion()) return false;
562     String encodedName = this.regionInfo.getEncodedName();
563     try {
564       this.version =
565         ZKAssign.retransitionNodeOpening(server.getZooKeeper(),
566           this.regionInfo, this.server.getServerName(), this.version, tomActivated);
567     } catch (KeeperException e) {
568       server.abort("Exception refreshing OPENING; region=" + encodedName +
569         ", context=" + context, e);
570       this.version = -1;
571       return false;
572     }
573     boolean b = isGoodVersion();
574     if (!b) {
575       LOG.warn("Failed refreshing OPENING; region=" + encodedName +
576         ", context=" + context);
577     }
578     return b;
579   }
580 
581   private boolean isGoodVersion() {
582     return this.version != -1;
583   }
584 }