View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.snapshot;
19  
20  import java.io.FileNotFoundException;
21  import java.io.IOException;
22  import java.util.HashSet;
23  import java.util.List;
24  import java.util.Set;
25  import java.util.concurrent.CancellationException;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.classification.InterfaceAudience;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.HTableDescriptor;
35  import org.apache.hadoop.hbase.ServerName;
36  import org.apache.hadoop.hbase.catalog.MetaReader;
37  import org.apache.hadoop.hbase.errorhandling.ForeignException;
38  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
39  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
40  import org.apache.hadoop.hbase.executor.EventHandler;
41  import org.apache.hadoop.hbase.master.MasterServices;
42  import org.apache.hadoop.hbase.master.SnapshotSentinel;
43  import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
44  import org.apache.hadoop.hbase.monitoring.MonitoredTask;
45  import org.apache.hadoop.hbase.monitoring.TaskMonitor;
46  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
47  import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
48  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
49  import org.apache.hadoop.hbase.snapshot.TableInfoCopyTask;
50  import org.apache.hadoop.hbase.util.Bytes;
51  import org.apache.hadoop.hbase.util.Pair;
52  import org.apache.zookeeper.KeeperException;
53  
54  /**
55   * A handler for taking snapshots from the master.
56   *
57   * This is not a subclass of TableEventHandler because using that would incur an extra META scan.
58   *
59   * The {@link #snapshotRegions(List)} call should get implemented for each snapshot flavor.
60   */
61  @InterfaceAudience.Private
62  public abstract class TakeSnapshotHandler extends EventHandler implements SnapshotSentinel,
63      ForeignExceptionSnare {
64    private static final Log LOG = LogFactory.getLog(TakeSnapshotHandler.class);
65  
66    private volatile boolean finished;
67  
68    // none of these should ever be null
69    protected final MasterServices master;
70    protected final MasterMetrics metricsMaster;
71    protected final SnapshotDescription snapshot;
72    protected final Configuration conf;
73    protected final FileSystem fs;
74    protected final Path rootDir;
75    private final Path snapshotDir;
76    protected final Path workingDir;
77    private final MasterSnapshotVerifier verifier;
78    protected final ForeignExceptionDispatcher monitor;
79    protected final MonitoredTask status;
80  
81    /**
82     * @param snapshot descriptor of the snapshot to take
83     * @param masterServices master services provider
84     * @throws IOException on unexpected error
85     */
86    public TakeSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices,
87        final MasterMetrics metricsMaster) {
88      super(masterServices, EventType.C_M_SNAPSHOT_TABLE);
89      assert snapshot != null : "SnapshotDescription must not be nul1";
90      assert masterServices != null : "MasterServices must not be nul1";
91  
92      this.master = masterServices;
93      this.metricsMaster = metricsMaster;
94      this.snapshot = snapshot;
95      this.conf = this.master.getConfiguration();
96      this.fs = this.master.getMasterFileSystem().getFileSystem();
97      this.rootDir = this.master.getMasterFileSystem().getRootDir();
98      this.snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
99      this.workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
100     this.monitor = new ForeignExceptionDispatcher(snapshot.getName());
101 
102     // prepare the verify
103     this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, rootDir);
104     // update the running tasks
105     this.status = TaskMonitor.get().createStatus(
106       "Taking " + snapshot.getType() + " snapshot on table: " + snapshot.getTable());
107   }
108 
109   private HTableDescriptor loadTableDescriptor()
110       throws FileNotFoundException, IOException {
111     final String name = snapshot.getTable();
112     HTableDescriptor htd =
113       this.master.getTableDescriptors().get(name);
114     if (htd == null) {
115       throw new IOException("HTableDescriptor missing for " + name);
116     }
117     return htd;
118   }
119 
120   public TakeSnapshotHandler prepare() throws Exception {
121     loadTableDescriptor(); // check that .tableinfo is present
122     return this;
123   }
124 
125   /**
126    * Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)}
127    * call should get implemented for each snapshot flavor.
128    */
129   @Override
130   public void process() {
131     String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " "
132         + eventType + " on table " + snapshot.getTable();
133     LOG.info(msg);
134     status.setStatus(msg);
135     try {
136       // If regions move after this meta scan, the region specific snapshot should fail, triggering
137       // an external exception that gets captured here.
138 
139       // write down the snapshot info in the working directory
140       SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, this.fs);
141       new TableInfoCopyTask(monitor, snapshot, fs, rootDir).call();
142       monitor.rethrowException();
143 
144       List<Pair<HRegionInfo, ServerName>> regionsAndLocations =
145           MetaReader.getTableRegionsAndLocations(this.server.getCatalogTracker(),
146             Bytes.toBytes(snapshot.getTable()), true);
147 
148       // run the snapshot
149       snapshotRegions(regionsAndLocations);
150       monitor.rethrowException();
151 
152       // extract each pair to separate lists
153       Set<String> serverNames = new HashSet<String>();
154       for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
155         serverNames.add(p.getSecond().toString());
156       }
157 
158       // verify the snapshot is valid
159       status.setStatus("Verifying snapshot: " + snapshot.getName());
160       verifier.verifySnapshot(this.workingDir, serverNames);
161 
162       // complete the snapshot, atomically moving from tmp to .snapshot dir.
163       completeSnapshot(this.snapshotDir, this.workingDir, this.fs);
164       status.markComplete("Snapshot " + snapshot.getName() + " of table " + snapshot.getTable()
165           + " completed");
166       metricsMaster.addSnapshot(status.getCompletionTimestamp() - status.getStartTime());
167     } catch (Exception e) {
168       status.abort("Failed to complete snapshot " + snapshot.getName() + " on table " +
169           snapshot.getTable() + " because " + e.getMessage());
170       String reason = "Failed taking snapshot " + SnapshotDescriptionUtils.toString(snapshot)
171           + " due to exception:" + e.getMessage();
172       LOG.error(reason, e);
173       ForeignException ee = new ForeignException(reason, e);
174       monitor.receive(ee);
175       // need to mark this completed to close off and allow cleanup to happen.
176       cancel("Failed to take snapshot '" + SnapshotDescriptionUtils.toString(snapshot)
177           + "' due to exception");
178     } finally {
179       LOG.debug("Launching cleanup of working dir:" + workingDir);
180       try {
181         // if the working dir is still present, the snapshot has failed.  it is present we delete
182         // it.
183         if (fs.exists(workingDir) && !this.fs.delete(workingDir, true)) {
184           LOG.error("Couldn't delete snapshot working directory:" + workingDir);
185         }
186       } catch (IOException e) {
187         LOG.error("Couldn't delete snapshot working directory:" + workingDir);
188       }
189     }
190   }
191 
192   /**
193    * Reset the manager to allow another snapshot to proceed
194    *
195    * @param snapshotDir final path of the snapshot
196    * @param workingDir directory where the in progress snapshot was built
197    * @param fs {@link FileSystem} where the snapshot was built
198    * @throws SnapshotCreationException if the snapshot could not be moved
199    * @throws IOException the filesystem could not be reached
200    */
201   public void completeSnapshot(Path snapshotDir, Path workingDir, FileSystem fs)
202       throws SnapshotCreationException, IOException {
203     LOG.debug("Sentinel is done, just moving the snapshot from " + workingDir + " to "
204         + snapshotDir);
205     if (!fs.rename(workingDir, snapshotDir)) {
206       throw new SnapshotCreationException("Failed to move working directory(" + workingDir
207           + ") to completed directory(" + snapshotDir + ").");
208     }
209     finished = true;
210   }
211 
212   /**
213    * Snapshot the specified regions
214    */
215   protected abstract void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regions)
216       throws IOException, KeeperException;
217 
218   @Override
219   public void cancel(String why) {
220     if (finished) return;
221 
222     this.finished = true;
223     LOG.info("Stop taking snapshot=" + SnapshotDescriptionUtils.toString(snapshot) + " because: "
224         + why);
225     CancellationException ce = new CancellationException(why);
226     monitor.receive(new ForeignException(master.getServerName().toString(), ce));
227   }
228 
229   @Override
230   public boolean isFinished() {
231     return finished;
232   }
233 
234   @Override
235   public long getCompletionTimestamp() {
236     return this.status.getCompletionTimestamp();
237   }
238 
239   @Override
240   public SnapshotDescription getSnapshot() {
241     return snapshot;
242   }
243 
244   @Override
245   public ForeignException getExceptionIfFailed() {
246     return monitor.getException();
247   }
248 
249   @Override
250   public void rethrowExceptionIfFailed() throws ForeignException {
251     monitor.rethrowException();
252   }
253 
254   @Override
255   public void rethrowException() throws ForeignException {
256     monitor.rethrowException();
257   }
258 
259   @Override
260   public boolean hasException() {
261     return monitor.hasException();
262   }
263 
264   @Override
265   public ForeignException getException() {
266     return monitor.getException();
267   }
268 
269 }