View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.master;
21  
22  import org.apache.hadoop.fs.Path;
23  import org.apache.hadoop.hbase.HConstants;
24  import org.apache.hadoop.hbase.HRegionInfo;
25  import org.apache.hadoop.hbase.HServerAddress;
26  import org.apache.hadoop.hbase.HServerInfo;
27  import org.apache.hadoop.hbase.RemoteExceptionHandler;
28  import org.apache.hadoop.hbase.client.Result;
29  import org.apache.hadoop.hbase.client.Scan;
30  import org.apache.hadoop.hbase.ipc.HRegionInterface;
31  import org.apache.hadoop.hbase.regionserver.HRegion;
32  import org.apache.hadoop.hbase.regionserver.wal.HLog;
33  import org.apache.hadoop.hbase.util.Bytes;
34  import org.apache.hadoop.hbase.master.RegionManager.RegionState;
35  
36  import java.io.IOException;
37  import java.util.ArrayList;
38  import java.util.HashSet;
39  import java.util.List;
40  import java.util.Map;
41  import java.util.Set;
42  
43  /**
44   * Instantiated when a server's lease has expired, meaning it has crashed.
45   * The region server's log file needs to be split up for each region it was
46   * serving, and the regions need to get reassigned.
47   */
48  class ProcessServerShutdown extends RegionServerOperation {
49    // Server name made of the concatenation of hostname, port and startcode
50    // formatted as <code>&lt;hostname> ',' &lt;port> ',' &lt;startcode></code>
51    private final String deadServer;
52    private boolean isRootServer;
53    private List<MetaRegion> metaRegions;
54  
55    private Path rsLogDir;
56    private boolean logSplit;
57    private boolean rootRescanned;
58    private HServerAddress deadServerAddress;
59  
60    private static class ToDoEntry {
61      boolean regionOffline;
62      final HRegionInfo info;
63  
64      ToDoEntry(final HRegionInfo info) {
65        this.regionOffline = false;
66        this.info = info;
67      }
68    }
69  
70    /**
71     * @param master
72     * @param serverInfo
73     */
74    public ProcessServerShutdown(HMaster master, HServerInfo serverInfo) {
75      super(master);
76      this.deadServer = serverInfo.getServerName();
77      this.deadServerAddress = serverInfo.getServerAddress();
78      this.logSplit = false;
79      this.rootRescanned = false;
80      this.rsLogDir =
81        new Path(master.getRootDir(), HLog.getHLogDirectoryName(serverInfo));
82  
83      // check to see if I am responsible for either ROOT or any of the META tables.
84  
85      // TODO Why do we do this now instead of at processing time?
86      closeMetaRegions();
87    }
88  
89    private void closeMetaRegions() {
90      this.isRootServer =
91        this.master.getRegionManager().isRootServer(this.deadServerAddress) ||
92        this.master.getRegionManager().isRootInTransitionOnThisServer(deadServer);
93      if (this.isRootServer) {
94        this.master.getRegionManager().unsetRootRegion();
95      }
96      List<byte[]> metaStarts =
97        this.master.getRegionManager().listMetaRegionsForServer(deadServerAddress);
98  
99      this.metaRegions = new ArrayList<MetaRegion>();
100     for (byte [] startKey: metaStarts) {
101       MetaRegion r = master.getRegionManager().offlineMetaRegionWithStartKey(startKey);
102       this.metaRegions.add(r);
103     }
104 
105     //HBASE-1928: Check whether this server has been transitioning the META table
106     HRegionInfo metaServerRegionInfo = master.getRegionManager().getMetaServerRegionInfo (deadServer);
107     if (metaServerRegionInfo != null) {
108       metaRegions.add (new MetaRegion (deadServerAddress, metaServerRegionInfo));
109     }
110   }
111 
112   /**
113    * @return Name of server we are processing.
114    */
115   public HServerAddress getDeadServerAddress() {
116     return this.deadServerAddress;
117   }
118 
119   private void closeRegionsInTransition() {
120     Map<String, RegionState> inTransition =
121       master.getRegionManager().getRegionsInTransitionOnServer(deadServer);
122     for (Map.Entry<String, RegionState> entry : inTransition.entrySet()) {
123       String regionName = entry.getKey();
124       RegionState state = entry.getValue();
125 
126       LOG.info("Region " + regionName + " was in transition " +
127           state + " on dead server " + deadServer + " - marking unassigned");
128       master.getRegionManager().setUnassigned(state.getRegionInfo(), true);
129     }
130   }
131 
132   @Override
133   public String toString() {
134     return "ProcessServerShutdown of " + this.deadServer;
135   }
136 
137   /** Finds regions that the dead region server was serving
138    */
139   protected void scanMetaRegion(HRegionInterface server, long scannerId,
140     byte [] regionName)
141   throws IOException {
142     List<ToDoEntry> toDoList = new ArrayList<ToDoEntry>();
143     Set<HRegionInfo> regions = new HashSet<HRegionInfo>();
144     List<byte []> emptyRows = new ArrayList<byte []>();
145     try {
146       while (true) {
147         Result values = null;
148         try {
149           values = server.next(scannerId);
150         } catch (IOException e) {
151           LOG.error("Shutdown scanning of meta region",
152             RemoteExceptionHandler.checkIOException(e));
153           break;
154         }
155         if (values == null || values.size() == 0) {
156           break;
157         }
158         byte [] row = values.getRow();
159         // Check server name.  If null, skip (We used to consider it was on
160         // shutdown server but that would mean that we'd reassign regions that
161         // were already out being assigned, ones that were product of a split
162         // that happened while the shutdown was being processed).
163         String serverAddress = BaseScanner.getServerAddress(values);
164         long startCode = BaseScanner.getStartCode(values);
165 
166         String serverName = null;
167         if (serverAddress != null && serverAddress.length() > 0) {
168           serverName = HServerInfo.getServerName(serverAddress, startCode);
169         }
170         if (serverName == null || !deadServer.equals(serverName)) {
171           // This isn't the server you're looking for - move along
172           continue;
173         }
174 
175         if (LOG.isDebugEnabled() && row != null) {
176           LOG.debug("Shutdown scanner for " + serverName + " processing " +
177             Bytes.toString(row));
178         }
179 
180         HRegionInfo info = master.getHRegionInfo(row, values);
181         if (info == null) {
182           emptyRows.add(row);
183           continue;
184         }
185 
186         synchronized (master.getRegionManager()) {
187           if (info.isMetaTable()) {
188             if (LOG.isDebugEnabled()) {
189               LOG.debug("removing meta region " +
190                   Bytes.toString(info.getRegionName()) +
191               " from online meta regions");
192             }
193             master.getRegionManager().offlineMetaRegionWithStartKey(info.getStartKey());
194           }
195 
196           ToDoEntry todo = new ToDoEntry(info);
197           toDoList.add(todo);
198 
199           if (master.getRegionManager().isOfflined(info.getRegionNameAsString()) ||
200               info.isOffline()) {
201             master.getRegionManager().removeRegion(info);
202             // Mark region offline
203             if (!info.isOffline()) {
204               todo.regionOffline = true;
205             }
206           } else {
207             if (!info.isOffline() && !info.isSplit()) {
208               // Get region reassigned
209               regions.add(info);
210             }
211           }
212         }
213       }
214     } finally {
215       if (scannerId != -1L) {
216         try {
217           server.close(scannerId);
218         } catch (IOException e) {
219           LOG.error("Closing scanner",
220             RemoteExceptionHandler.checkIOException(e));
221         }
222       }
223     }
224 
225     // Scan complete. Remove any rows which had empty HRegionInfos
226 
227     if (emptyRows.size() > 0) {
228       LOG.warn("Found " + emptyRows.size() +
229         " rows with empty HRegionInfo while scanning meta region " +
230         Bytes.toString(regionName));
231       master.deleteEmptyMetaRows(server, regionName, emptyRows);
232     }
233     // Update server in root/meta entries
234     for (ToDoEntry e: toDoList) {
235       if (e.regionOffline) {
236         HRegion.offlineRegionInMETA(server, regionName, e.info);
237       }
238     }
239 
240     // Get regions reassigned
241     for (HRegionInfo info: regions) {
242       master.getRegionManager().setUnassigned(info, true);
243     }
244   }
245 
246   private class ScanRootRegion extends RetryableMetaOperation<Boolean> {
247     ScanRootRegion(MetaRegion m, HMaster master) {
248       super(m, master);
249     }
250 
251     public Boolean call() throws IOException {
252       if (LOG.isDebugEnabled()) {
253         LOG.debug("Process server shutdown scanning root region on " +
254             master.getRegionManager().getRootRegionLocation().getBindAddress());
255       }
256       Scan scan = new Scan();
257       scan.addFamily(HConstants.CATALOG_FAMILY);
258       long scannerId = server.openScanner(
259           HRegionInfo.ROOT_REGIONINFO.getRegionName(), scan);
260       scanMetaRegion(server, scannerId,
261           HRegionInfo.ROOT_REGIONINFO.getRegionName());
262       return true;
263     }
264   }
265 
266   private class ScanMetaRegions extends RetryableMetaOperation<Boolean> {
267     ScanMetaRegions(MetaRegion m, HMaster master) {
268       super(m, master);
269     }
270 
271     public Boolean call() throws IOException {
272       if (LOG.isDebugEnabled()) {
273         LOG.debug("process server shutdown scanning " +
274           Bytes.toString(m.getRegionName()) + " on " + m.getServer());
275       }
276       Scan scan = new Scan();
277       scan.addFamily(HConstants.CATALOG_FAMILY);
278       long scannerId = server.openScanner(
279           m.getRegionName(), scan);
280       scanMetaRegion(server, scannerId, m.getRegionName());
281       return true;
282     }
283   }
284 
285   @Override
286   protected boolean process() throws IOException {
287     LOG.info("Process shutdown of server " + this.deadServer +
288       ": logSplit: " + logSplit + ", rootRescanned: " + rootRescanned +
289       ", numberOfMetaRegions: " + master.getRegionManager().numMetaRegions() +
290       ", onlineMetaRegions.size(): " +
291       master.getRegionManager().numOnlineMetaRegions());
292     if (!logSplit) {
293       // Process the old log file
294       if (this.master.getFileSystem().exists(rsLogDir)) {
295         if (!master.splitLogLock.tryLock()) {
296           return false;
297         }
298         try {
299           HLog.splitLog(master.getRootDir(), rsLogDir,
300               this.master.getOldLogDir(), this.master.getFileSystem(),
301             this.master.getConfiguration());
302         } finally {
303           master.splitLogLock.unlock();
304         }
305       }
306       logSplit = true;
307     }
308     LOG.info("Log split complete, meta reassignment and scanning:");
309     if (this.isRootServer) {
310       LOG.info("ProcessServerShutdown reassigning ROOT region");
311       master.getRegionManager().reassignRootRegion();
312       isRootServer = false;  // prevent double reassignment... heh.
313     }
314 
315     for (MetaRegion metaRegion : metaRegions) {
316       LOG.info("ProcessServerShutdown setting to unassigned: " + metaRegion.toString());
317       master.getRegionManager().setUnassigned(metaRegion.getRegionInfo(), true);
318     }
319     // one the meta regions are online, "forget" about them.  Since there are explicit
320     // checks below to make sure meta/root are online, this is likely to occur.
321     metaRegions.clear();
322 
323     if (!rootAvailable()) {
324       // Return true so that worker does not put this request back on the
325       // toDoQueue.
326       // rootAvailable() has already put it on the delayedToDoQueue
327       return true;
328     }
329 
330     if (!rootRescanned) {
331       // Scan the ROOT region
332       Boolean result = new ScanRootRegion(
333           new MetaRegion(master.getRegionManager().getRootRegionLocation(),
334               HRegionInfo.ROOT_REGIONINFO), this.master).doWithRetries();
335       if (result == null) {
336         // Master is closing - give up
337         return true;
338       }
339 
340       if (LOG.isDebugEnabled()) {
341         LOG.debug("Process server shutdown scanning root region on " +
342           master.getRegionManager().getRootRegionLocation().getBindAddress() +
343           " finished " + Thread.currentThread().getName());
344       }
345       rootRescanned = true;
346     }
347 
348     if (!metaTableAvailable()) {
349       // We can't proceed because not all meta regions are online.
350       // metaAvailable() has put this request on the delayedToDoQueue
351       // Return true so that worker does not put this on the toDoQueue
352       return true;
353     }
354 
355     List<MetaRegion> regions = master.getRegionManager().getListOfOnlineMetaRegions();
356     for (MetaRegion r: regions) {
357       Boolean result = new ScanMetaRegions(r, this.master).doWithRetries();
358       if (result == null) {
359         break;
360       }
361       if (LOG.isDebugEnabled()) {
362         LOG.debug("process server shutdown finished scanning " +
363           Bytes.toString(r.getRegionName()) + " on " + r.getServer());
364       }
365     }
366 
367     closeRegionsInTransition();
368     this.master.getServerManager().removeDeadServer(deadServer);
369     if (LOG.isDebugEnabled()) {
370       LOG.debug("Removed " + deadServer + " from deadservers Map");
371     }
372     return true;
373   }
374 
375   @Override
376   protected int getPriority() {
377     return 2; // high but not highest priority
378   }
379 }