View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master.handler;
20  
21  import java.io.IOException;
22  import java.util.HashSet;
23  import java.util.Set;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.HRegionInfo;
29  import org.apache.hadoop.hbase.Server;
30  import org.apache.hadoop.hbase.ServerName;
31  import org.apache.hadoop.hbase.executor.EventType;
32  import org.apache.hadoop.hbase.master.AssignmentManager;
33  import org.apache.hadoop.hbase.master.DeadServer;
34  import org.apache.hadoop.hbase.master.MasterServices;
35  import org.apache.zookeeper.KeeperException;
36  
37  /**
38   * Shutdown handler for the server hosting <code>hbase:meta</code>
39   */
40  @InterfaceAudience.Private
41  public class MetaServerShutdownHandler extends ServerShutdownHandler {
42    private static final Log LOG = LogFactory.getLog(MetaServerShutdownHandler.class);
43    public MetaServerShutdownHandler(final Server server,
44        final MasterServices services,
45        final DeadServer deadServers, final ServerName serverName) {
46      super(server, services, deadServers, serverName,
47        EventType.M_META_SERVER_SHUTDOWN, true);
48    }
49  
50    @Override
51    public void process() throws IOException {
52      boolean gotException = true; 
53      try {
54        AssignmentManager am = this.services.getAssignmentManager();
55        try {
56          if (this.shouldSplitHlog) {
57            LOG.info("Splitting hbase:meta logs for " + serverName);
58            if (this.distributedLogReplay) {
59              Set<HRegionInfo> regions = new HashSet<HRegionInfo>();
60              regions.add(HRegionInfo.FIRST_META_REGIONINFO);
61              this.services.getMasterFileSystem().prepareLogReplay(serverName, regions);
62            } else {
63              this.services.getMasterFileSystem().splitMetaLog(serverName);
64            }
65            am.getRegionStates().logSplit(HRegionInfo.FIRST_META_REGIONINFO);
66          }
67        } catch (IOException ioe) {
68          this.services.getExecutorService().submit(this);
69          this.deadServers.add(serverName);
70          throw new IOException("failed log splitting for " + serverName + ", will retry", ioe);
71        }
72    
73        // Assign meta if we were carrying it.
74        // Check again: region may be assigned to other where because of RIT
75        // timeout
76        if (am.isCarryingMeta(serverName)) {
77          LOG.info("Server " + serverName + " was carrying META. Trying to assign.");
78          am.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
79          verifyAndAssignMetaWithRetries();
80        } else if (!this.services.getCatalogTracker().isMetaLocationAvailable()) {
81          // the meta location as per master is null. This could happen in case when meta assignment
82          // in previous run failed, while meta znode has been updated to null. We should try to
83          // assign the meta again.
84          verifyAndAssignMetaWithRetries();
85        } else {
86          LOG.info("META has been assigned to otherwhere, skip assigning.");
87        }
88  
89        try {
90          if (this.shouldSplitHlog && this.distributedLogReplay) {
91            if (!am.waitOnRegionToClearRegionsInTransition(HRegionInfo.FIRST_META_REGIONINFO,
92              regionAssignmentWaitTimeout)) {
93              // Wait here is to avoid log replay hits current dead server and incur a RPC timeout
94              // when replay happens before region assignment completes.
95              LOG.warn("Region " + HRegionInfo.FIRST_META_REGIONINFO.getEncodedName()
96                  + " didn't complete assignment in time");
97            }
98            this.services.getMasterFileSystem().splitMetaLog(serverName);
99          }
100       } catch (Exception ex) {
101         if (ex instanceof IOException) {
102           this.services.getExecutorService().submit(this);
103           this.deadServers.add(serverName);
104           throw new IOException("failed log splitting for " + serverName + ", will retry", ex);
105         } else {
106           throw new IOException(ex);
107         }
108       }
109 
110       gotException = false;
111     } finally {
112       if (gotException){
113         // If we had an exception, this.deadServers.finish will be skipped in super.process()
114         this.deadServers.finish(serverName);
115       }     
116     }
117     
118     super.process();
119   }
120 
121   @Override
122   boolean isCarryingMeta() {
123     return true;
124   }
125 
126   /**
127    * Before assign the hbase:meta region, ensure it haven't
128    *  been assigned by other place
129    * <p>
130    * Under some scenarios, the hbase:meta region can be opened twice, so it seemed online
131    * in two regionserver at the same time.
132    * If the hbase:meta region has been assigned, so the operation can be canceled.
133    * @throws InterruptedException
134    * @throws IOException
135    * @throws KeeperException
136    */
137   private void verifyAndAssignMeta()
138       throws InterruptedException, IOException, KeeperException {
139     long timeout = this.server.getConfiguration().
140         getLong("hbase.catalog.verification.timeout", 1000);
141     if (!this.server.getCatalogTracker().verifyMetaRegionLocation(timeout)) {
142       this.services.getAssignmentManager().assignMeta();
143     } else if (serverName.equals(server.getCatalogTracker().getMetaLocation())) {
144       throw new IOException("hbase:meta is onlined on the dead server "
145           + serverName);
146     } else {
147       LOG.info("Skip assigning hbase:meta, because it is online on the "
148           + server.getCatalogTracker().getMetaLocation());
149     }
150   }
151 
152   /**
153    * Failed many times, shutdown processing
154    * @throws IOException
155    */
156   private void verifyAndAssignMetaWithRetries() throws IOException {
157     int iTimes = this.server.getConfiguration().getInt(
158         "hbase.catalog.verification.retries", 10);
159 
160     long waitTime = this.server.getConfiguration().getLong(
161         "hbase.catalog.verification.timeout", 1000);
162 
163     int iFlag = 0;
164     while (true) {
165       try {
166         verifyAndAssignMeta();
167         break;
168       } catch (KeeperException e) {
169         this.server.abort("In server shutdown processing, assigning meta", e);
170         throw new IOException("Aborting", e);
171       } catch (Exception e) {
172         if (iFlag >= iTimes) {
173           this.server.abort("verifyAndAssignMeta failed after" + iTimes
174               + " times retries, aborting", e);
175           throw new IOException("Aborting", e);
176         }
177         try {
178           Thread.sleep(waitTime);
179         } catch (InterruptedException e1) {
180           LOG.warn("Interrupted when is the thread sleep", e1);
181           Thread.currentThread().interrupt();
182           throw new IOException("Interrupted", e1);
183         }
184         iFlag++;
185       }
186     }
187   }
188 
189   @Override
190   public String toString() {
191     String name = "UnknownServerName";
192     if(server != null && server.getServerName() != null) {
193       name = server.getServerName().toString();
194     }
195     return getClass().getSimpleName() + "-" + name + "-" + getSeqid();
196   }
197 }