View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver.handler;
20  
21  import java.io.IOException;
22  import java.util.concurrent.atomic.AtomicBoolean;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.classification.InterfaceAudience;
27  import org.apache.hadoop.hbase.HRegionInfo;
28  import org.apache.hadoop.hbase.HTableDescriptor;
29  import org.apache.hadoop.hbase.Server;
30  import org.apache.hadoop.hbase.executor.EventHandler;
31  import org.apache.hadoop.hbase.executor.EventType;
32  import org.apache.hadoop.hbase.regionserver.HRegion;
33  import org.apache.hadoop.hbase.regionserver.RegionServerAccounting;
34  import org.apache.hadoop.hbase.regionserver.RegionServerServices;
35  import org.apache.hadoop.hbase.util.CancelableProgressable;
36  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
37  import org.apache.zookeeper.KeeperException;
38  
39  /**
40   * Handles opening of a region on a region server.
41   * <p>
42   * This is executed after receiving an OPEN RPC from the master or client.
43   */
44  @InterfaceAudience.Private
45  public class OpenRegionHandler extends EventHandler {
46    private static final Log LOG = LogFactory.getLog(OpenRegionHandler.class);
47  
48    protected final RegionServerServices rsServices;
49  
50    private final HRegionInfo regionInfo;
51    private final HTableDescriptor htd;
52  
53    // We get version of our znode at start of open process and monitor it across
54    // the total open. We'll fail the open if someone hijacks our znode; we can
55    // tell this has happened if version is not as expected.
56    private volatile int version = -1;
57    //version of the offline node that was set by the master
58    private volatile int versionOfOfflineNode = -1;
59  
60    public OpenRegionHandler(final Server server,
61        final RegionServerServices rsServices, HRegionInfo regionInfo,
62        HTableDescriptor htd) {
63      this(server, rsServices, regionInfo, htd, EventType.M_RS_OPEN_REGION, -1);
64    }
65    public OpenRegionHandler(final Server server,
66        final RegionServerServices rsServices, HRegionInfo regionInfo,
67        HTableDescriptor htd, int versionOfOfflineNode) {
68      this(server, rsServices, regionInfo, htd, EventType.M_RS_OPEN_REGION,
69          versionOfOfflineNode);
70    }
71  
72    protected OpenRegionHandler(final Server server,
73        final RegionServerServices rsServices, final HRegionInfo regionInfo,
74        final HTableDescriptor htd, EventType eventType,
75        final int versionOfOfflineNode) {
76      super(server, eventType);
77      this.rsServices = rsServices;
78      this.regionInfo = regionInfo;
79      this.htd = htd;
80      this.versionOfOfflineNode = versionOfOfflineNode;
81    }
82  
83    public HRegionInfo getRegionInfo() {
84      return regionInfo;
85    }
86  
87    @Override
88    public void process() throws IOException {
89      boolean openSuccessful = false;
90      boolean transitionedToOpening = false;
91      final String regionName = regionInfo.getRegionNameAsString();
92      HRegion region = null;
93  
94      try {
95        if (this.server.isStopped() || this.rsServices.isStopping()) {
96          return;
97        }
98        final String encodedName = regionInfo.getEncodedName();
99  
100       // 3 different difficult situations can occur
101       // 1) The opening was cancelled. This is an expected situation
102       // 2) The region was hijacked, we no longer have the znode
103       // 3) The region is now marked as online while we're suppose to open. This would be a bug.
104 
105       // Check that this region is not already online
106       if (this.rsServices.getFromOnlineRegions(encodedName) != null) {
107         LOG.error("Region " + encodedName +
108             " was already online when we started processing the opening. " +
109             "Marking this new attempt as failed");
110         return;
111       }
112 
113       // Check that we're still supposed to open the region and transition.
114       // If fails, just return.  Someone stole the region from under us.
115       // Calling transitionZookeeperOfflineToOpening initializes this.version.
116       if (!isRegionStillOpening()){
117         LOG.error("Region " + encodedName + " opening cancelled");
118         return;
119       }
120 
121       if (!transitionZookeeperOfflineToOpening(encodedName, versionOfOfflineNode)) {
122         LOG.warn("Region was hijacked? Opening cancelled for encodedName=" + encodedName);
123         // This is a desperate attempt: the znode is unlikely to be ours. But we can't do more.
124         return;
125       }
126       transitionedToOpening = true;
127       // Open region.  After a successful open, failures in subsequent
128       // processing needs to do a close as part of cleanup.
129       region = openRegion();
130       if (region == null) {
131         return;
132       }
133       boolean failed = true;
134       if (tickleOpening("post_region_open")) {
135         if (updateMeta(region)) {
136           failed = false;
137         }
138       }
139       if (failed || this.server.isStopped() ||
140           this.rsServices.isStopping()) {
141         return;
142       }
143 
144 
145       if (!isRegionStillOpening() || !transitionToOpened(region)) {
146         // If we fail to transition to opened, it's because of one of two cases:
147         //    (a) we lost our ZK lease
148         // OR (b) someone else opened the region before us
149         // OR (c) someone cancelled the open
150         // In all cases, we try to transition to failed_open to be safe.
151         return;
152       }
153 
154       // We have a znode in the opened state now. We can't really delete it as the master job.
155       // Transitioning to failed open would create a race condition if the master has already
156       // acted the transition to opened.
157       // Cancelling the open is dangerous, because we would have a state where the master thinks
158       // the region is opened while the region is actually closed. It is a dangerous state
159       // to be in. For this reason, from now on, we're not going back. There is a message in the
160       // finally close to let the admin knows where we stand.
161 
162 
163       // Successful region open, and add it to OnlineRegions
164       this.rsServices.addToOnlineRegions(region);
165       openSuccessful = true;
166 
167       // Done!  Successful region open
168       LOG.debug("Opened " + regionName + " on server:" +
169         this.server.getServerName());
170 
171 
172     } finally {
173       // Do all clean up here
174       if (!openSuccessful) {
175         doCleanUpOnFailedOpen(region, transitionedToOpening);
176       }
177       final Boolean current = this.rsServices.getRegionsInTransitionInRS().
178           remove(this.regionInfo.getEncodedNameAsBytes());
179 
180       // Let's check if we have met a race condition on open cancellation....
181       // A better solution would be to not have any race condition.
182       // this.rsServices.getRegionsInTransitionInRS().remove(
183       //  this.regionInfo.getEncodedNameAsBytes(), Boolean.TRUE);
184       // would help, but we would still have a consistency issue to manage with
185       // 1) this.rsServices.addToOnlineRegions(region);
186       // 2) the ZK state.
187       if (openSuccessful) {
188         if (current == null) { // Should NEVER happen, but let's be paranoid.
189           LOG.error("Bad state: we've just opened a region that was NOT in transition. Region="
190               + regionName);
191         } else if (Boolean.FALSE.equals(current)) { // Can happen, if we're
192                                                     // really unlucky.
193           LOG.error("Race condition: we've finished to open a region, while a close was requested "
194               + " on region=" + regionName + ". It can be a critical error, as a region that"
195               + " should be closed is now opened.");
196         }
197       } 
198     }
199   }
200 
201   private void doCleanUpOnFailedOpen(HRegion region, boolean transitionedToOpening)
202       throws IOException {
203     if (transitionedToOpening) {
204       try {
205         if (region != null) {
206           cleanupFailedOpen(region);
207         }
208       } finally {
209         // Even if cleanupFailed open fails we need to do this transition
210         // See HBASE-7698
211         tryTransitionFromOpeningToFailedOpen(regionInfo);
212       }
213     } else {
214       // If still transition to OPENING is not done, we need to transition znode
215       // to FAILED_OPEN
216       tryTransitionFromOfflineToFailedOpen(this.rsServices, regionInfo, versionOfOfflineNode);
217     }
218   }
219 
220   /**
221    * Update ZK or META.  This can take a while if for example the
222    * .META. is not available -- if server hosting .META. crashed and we are
223    * waiting on it to come back -- so run in a thread and keep updating znode
224    * state meantime so master doesn't timeout our region-in-transition.
225    * Caller must cleanup region if this fails.
226    */
227   boolean updateMeta(final HRegion r) {
228     if (this.server.isStopped() || this.rsServices.isStopping()) {
229       return false;
230     }
231     // Object we do wait/notify on.  Make it boolean.  If set, we're done.
232     // Else, wait.
233     final AtomicBoolean signaller = new AtomicBoolean(false);
234     PostOpenDeployTasksThread t = new PostOpenDeployTasksThread(r,
235       this.server, this.rsServices, signaller);
236     t.start();
237     boolean tomActivated = this.server.getConfiguration().
238         getBoolean("hbase.assignment.timeout.management", false);
239     int assignmentTimeout = this.server.getConfiguration().
240       getInt("hbase.master.assignment.timeoutmonitor.period", 10000);
241     // Total timeout for meta edit.  If we fail adding the edit then close out
242     // the region and let it be assigned elsewhere.
243     long timeout = assignmentTimeout * 10;
244     long now = System.currentTimeMillis();
245     long endTime = now + timeout;
246     // Let our period at which we update OPENING state to be be 1/3rd of the
247     // regions-in-transition timeout period.
248     long period = Math.max(1, assignmentTimeout/ 3);
249     long lastUpdate = now;
250     boolean tickleOpening = true;
251     while (!signaller.get() && t.isAlive() && !this.server.isStopped() &&
252         !this.rsServices.isStopping() && (endTime > now)) {
253       if (tomActivated) {
254         long elapsed = now - lastUpdate;
255         if (elapsed > period) {
256           // Only tickle OPENING if postOpenDeployTasks is taking some time.
257           lastUpdate = now;
258           tickleOpening = tickleOpening("post_open_deploy");
259         }
260       }
261       synchronized (signaller) {
262         try {
263           signaller.wait(period);
264         } catch (InterruptedException e) {
265           // Go to the loop check.
266         }
267       }
268       now = System.currentTimeMillis();
269     }
270     // Is thread still alive?  We may have left above loop because server is
271     // stopping or we timed out the edit.  Is so, interrupt it.
272     if (t.isAlive()) {
273       if (!signaller.get()) {
274         // Thread still running; interrupt
275         LOG.debug("Interrupting thread " + t);
276         t.interrupt();
277       }
278       try {
279         t.join();
280       } catch (InterruptedException ie) {
281         LOG.warn("Interrupted joining " +
282           r.getRegionInfo().getRegionNameAsString(), ie);
283         Thread.currentThread().interrupt();
284       }
285     }
286 
287     // Was there an exception opening the region?  This should trigger on
288     // InterruptedException too.  If so, we failed.  Even if tickle opening fails
289     // then it is a failure.
290     return ((!Thread.interrupted() && t.getException() == null) && tickleOpening);
291   }
292 
293   /**
294    * Thread to run region post open tasks. Call {@link #getException()} after
295    * the thread finishes to check for exceptions running
296    * {@link RegionServerServices#postOpenDeployTasks(
297    * HRegion, org.apache.hadoop.hbase.catalog.CatalogTracker, boolean)}
298    * .
299    */
300   static class PostOpenDeployTasksThread extends Thread {
301     private Exception exception = null;
302     private final Server server;
303     private final RegionServerServices services;
304     private final HRegion region;
305     private final AtomicBoolean signaller;
306 
307     PostOpenDeployTasksThread(final HRegion region, final Server server,
308         final RegionServerServices services, final AtomicBoolean signaller) {
309       super("PostOpenDeployTasks:" + region.getRegionInfo().getEncodedName());
310       this.setDaemon(true);
311       this.server = server;
312       this.services = services;
313       this.region = region;
314       this.signaller = signaller;
315     }
316 
317     public void run() {
318       try {
319         this.services.postOpenDeployTasks(this.region,
320           this.server.getCatalogTracker());
321       } catch (Exception e) {
322         LOG.warn("Exception running postOpenDeployTasks; region=" +
323           this.region.getRegionInfo().getEncodedName(), e);
324         this.exception = e;
325       }
326       // We're done.  Set flag then wake up anyone waiting on thread to complete.
327       this.signaller.set(true);
328       synchronized (this.signaller) {
329         this.signaller.notify();
330       }
331     }
332 
333     /**
334      * @return Null or the run exception; call this method after thread is done.
335      */
336     Exception getException() {
337       return this.exception;
338     }
339   }
340 
341 
342   /**
343    * @param r Region we're working on.
344    * @return whether znode is successfully transitioned to OPENED state.
345    * @throws IOException
346    */
347   private boolean transitionToOpened(final HRegion r) throws IOException {
348     boolean result = false;
349     HRegionInfo hri = r.getRegionInfo();
350     final String name = hri.getRegionNameAsString();
351     // Finally, Transition ZK node to OPENED
352     try {
353       if (ZKAssign.transitionNodeOpened(this.server.getZooKeeper(), hri,
354           this.server.getServerName(), this.version) == -1) {
355         LOG.warn("Completed the OPEN of region " + name +
356           " but when transitioning from " +
357           " OPENING to OPENED got a version mismatch, someone else clashed " +
358           "so now unassigning -- closing region on server: " +
359           this.server.getServerName());
360       } else {
361         LOG.debug("region transitioned to opened in zookeeper: " +
362           r.getRegionInfo() + ", server: " + this.server.getServerName());
363         result = true;
364       }
365     } catch (KeeperException e) {
366       LOG.error("Failed transitioning node " + name +
367         " from OPENING to OPENED -- closing region", e);
368     }
369     return result;
370   }
371 
372   /**
373    * This is not guaranteed to succeed, we just do our best.
374    * @param hri Region we're working on.
375    * @return whether znode is successfully transitioned to FAILED_OPEN state.
376    */
377   private boolean tryTransitionFromOpeningToFailedOpen(final HRegionInfo hri) {
378     boolean result = false;
379     final String name = hri.getRegionNameAsString();
380     try {
381       LOG.info("Opening of region " + hri + " failed, transitioning" +
382           " from OPENING to FAILED_OPEN in ZK, expecting version " + this.version);
383       if (ZKAssign.transitionNode(
384           this.server.getZooKeeper(), hri,
385           this.server.getServerName(),
386           EventType.RS_ZK_REGION_OPENING,
387           EventType.RS_ZK_REGION_FAILED_OPEN,
388           this.version) == -1) {
389         LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " +
390             "It's likely that the master already timed out this open " +
391             "attempt, and thus another RS already has the region.");
392       } else {
393         result = true;
394       }
395     } catch (KeeperException e) {
396       LOG.error("Failed transitioning node " + name +
397         " from OPENING to FAILED_OPEN", e);
398     }
399     return result;
400   }
401 
402   /**
403    * Try to transition to open. This function is static to make it usable before creating the
404    *  handler.
405    *
406    * This is not guaranteed to succeed, we just do our best.
407    *
408    * @param rsServices
409    * @param hri Region we're working on.
410    * @param versionOfOfflineNode version to checked.
411    * @return whether znode is successfully transitioned to FAILED_OPEN state.
412    */
413   public static boolean tryTransitionFromOfflineToFailedOpen(RegionServerServices rsServices,
414        final HRegionInfo hri, final int versionOfOfflineNode) {
415     boolean result = false;
416     final String name = hri.getRegionNameAsString();
417     try {
418       LOG.info("Opening of region " + hri + " failed, transitioning" +
419           " from OFFLINE to FAILED_OPEN in ZK, expecting version " + versionOfOfflineNode);
420       if (ZKAssign.transitionNode(
421           rsServices.getZooKeeper(), hri,
422           rsServices.getServerName(),
423           EventType.M_ZK_REGION_OFFLINE,
424           EventType.RS_ZK_REGION_FAILED_OPEN,
425           versionOfOfflineNode) == -1) {
426         LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " +
427             "It's likely that the master already timed out this open " +
428             "attempt, and thus another RS already has the region.");
429       } else {
430         result = true;
431       }
432     } catch (KeeperException e) {
433       LOG.error("Failed transitioning node " + name + " from OFFLINE to FAILED_OPEN", e);
434     }
435     return result;
436   }
437 
438 
439   /**
440    * @return Instance of HRegion if successful open else null.
441    */
442   HRegion openRegion() {
443     HRegion region = null;
444     try {
445       // Instantiate the region.  This also periodically tickles our zk OPENING
446       // state so master doesn't timeout this region in transition.
447       region = HRegion.openHRegion(this.regionInfo, this.htd,
448           this.rsServices.getWAL(this.regionInfo), 
449           this.server.getConfiguration(),
450           this.rsServices,
451         new CancelableProgressable() {
452           public boolean progress() {
453             // We may lose the znode ownership during the open.  Currently its
454             // too hard interrupting ongoing region open.  Just let it complete
455             // and check we still have the znode after region open.
456             return tickleOpening("open_region_progress");
457           }
458         });
459     } catch (Throwable t) {
460       // We failed open. Our caller will see the 'null' return value
461       // and transition the node back to FAILED_OPEN. If that fails,
462       // we rely on the Timeout Monitor in the master to reassign.
463       LOG.error(
464           "Failed open of region=" + this.regionInfo.getRegionNameAsString()
465               + ", starting to roll back the global memstore size.", t);
466       // Decrease the global memstore size.
467       if (this.rsServices != null) {
468         RegionServerAccounting rsAccounting =
469           this.rsServices.getRegionServerAccounting();
470         if (rsAccounting != null) {
471           rsAccounting.rollbackRegionReplayEditsSize(this.regionInfo.getRegionName());
472         }
473       }
474     }
475     return region;
476   }
477 
478   void cleanupFailedOpen(final HRegion region) throws IOException {
479     if (region != null) region.close();
480   }
481 
482   private boolean isRegionStillOpening() {
483     byte[] encodedName = regionInfo.getEncodedNameAsBytes();
484     Boolean action = rsServices.getRegionsInTransitionInRS().get(encodedName);
485     return Boolean.TRUE.equals(action); // true means opening for RIT
486   }
487 
488   /**
489    * Transition ZK node from OFFLINE to OPENING.
490    * @param encodedName Name of the znode file (Region encodedName is the znode
491    * name).
492    * @param versionOfOfflineNode - version Of OfflineNode that needs to be compared
493    * before changing the node's state from OFFLINE 
494    * @return True if successful transition.
495    */
496   boolean transitionZookeeperOfflineToOpening(final String encodedName,
497       int versionOfOfflineNode) {
498     // TODO: should also handle transition from CLOSED?
499     try {
500       // Initialize the znode version.
501       this.version = ZKAssign.transitionNode(server.getZooKeeper(), regionInfo,
502           server.getServerName(), EventType.M_ZK_REGION_OFFLINE,
503           EventType.RS_ZK_REGION_OPENING, versionOfOfflineNode);
504     } catch (KeeperException e) {
505       LOG.error("Error transition from OFFLINE to OPENING for region=" +
506         encodedName, e);
507       this.version = -1;
508       return false;
509     }
510     boolean b = isGoodVersion();
511     if (!b) {
512       LOG.warn("Failed transition from OFFLINE to OPENING for region=" +
513         encodedName);
514     }
515     return b;
516   }
517 
518   /**
519    * Update our OPENING state in zookeeper.
520    * Do this so master doesn't timeout this region-in-transition.
521    * @param context Some context to add to logs if failure
522    * @return True if successful transition.
523    */
524   boolean tickleOpening(final String context) {
525     if (!isRegionStillOpening()) {
526       LOG.warn("Open region aborted since it isn't opening any more");
527       return false;
528     }
529     // If previous checks failed... do not try again.
530     if (!isGoodVersion()) return false;
531     String encodedName = this.regionInfo.getEncodedName();
532     try {
533       this.version =
534         ZKAssign.retransitionNodeOpening(server.getZooKeeper(),
535           this.regionInfo, this.server.getServerName(), this.version);
536     } catch (KeeperException e) {
537       server.abort("Exception refreshing OPENING; region=" + encodedName +
538         ", context=" + context, e);
539       this.version = -1;
540       return false;
541     }
542     boolean b = isGoodVersion();
543     if (!b) {
544       LOG.warn("Failed refreshing OPENING; region=" + encodedName +
545         ", context=" + context);
546     }
547     return b;
548   }
549 
550   private boolean isGoodVersion() {
551     return this.version != -1;
552   }
553 }