View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.snapshot;
19  
20  import java.io.FileNotFoundException;
21  import java.io.IOException;
22  import java.util.HashSet;
23  import java.util.List;
24  import java.util.Set;
25  import java.util.concurrent.CancellationException;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.classification.InterfaceAudience;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.HTableDescriptor;
35  import org.apache.hadoop.hbase.ServerName;
36  import org.apache.hadoop.hbase.catalog.MetaReader;
37  import org.apache.hadoop.hbase.errorhandling.ForeignException;
38  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
39  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
40  import org.apache.hadoop.hbase.executor.EventHandler;
41  import org.apache.hadoop.hbase.master.MasterServices;
42  import org.apache.hadoop.hbase.master.SnapshotSentinel;
43  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
44  import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
45  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
46  import org.apache.hadoop.hbase.snapshot.TableInfoCopyTask;
47  import org.apache.hadoop.hbase.util.Bytes;
48  import org.apache.hadoop.hbase.util.Pair;
49  import org.apache.zookeeper.KeeperException;
50  
51  /**
52   * A handler for taking snapshots from the master.
53   *
54   * This is not a subclass of TableEventHandler because using that would incur an extra META scan.
55   *
56   * The {@link #snapshotRegions(List)} call should get implemented for each snapshot flavor.
57   */
58  @InterfaceAudience.Private
59  public abstract class TakeSnapshotHandler extends EventHandler implements SnapshotSentinel,
60      ForeignExceptionSnare {
61    private static final Log LOG = LogFactory.getLog(TakeSnapshotHandler.class);
62  
63    private volatile boolean finished;
64  
65    // none of these should ever be null
66    protected final MasterServices master;
67    protected final SnapshotDescription snapshot;
68    protected final Configuration conf;
69    protected final FileSystem fs;
70    protected final Path rootDir;
71    private final Path snapshotDir;
72    protected final Path workingDir;
73    private final MasterSnapshotVerifier verifier;
74    protected final ForeignExceptionDispatcher monitor;
75  
76    /**
77     * @param snapshot descriptor of the snapshot to take
78     * @param masterServices master services provider
79     * @throws IOException on unexpected error
80     */
81    public TakeSnapshotHandler(SnapshotDescription snapshot,
82        final MasterServices masterServices) throws IOException {
83      super(masterServices, EventType.C_M_SNAPSHOT_TABLE);
84      assert snapshot != null : "SnapshotDescription must not be nul1";
85      assert masterServices != null : "MasterServices must not be nul1";
86  
87      this.master = masterServices;
88      this.snapshot = snapshot;
89      this.conf = this.master.getConfiguration();
90      this.fs = this.master.getMasterFileSystem().getFileSystem();
91      this.rootDir = this.master.getMasterFileSystem().getRootDir();
92      this.snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
93      this.workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
94      this.monitor =  new ForeignExceptionDispatcher();
95  
96      loadTableDescriptor(); // check that .tableinfo is present
97  
98      // prepare the verify
99      this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, rootDir);
100   }
101 
102   private HTableDescriptor loadTableDescriptor()
103       throws FileNotFoundException, IOException {
104     final String name = snapshot.getTable();
105     HTableDescriptor htd =
106       this.master.getTableDescriptors().get(name);
107     if (htd == null) {
108       throw new IOException("HTableDescriptor missing for " + name);
109     }
110     return htd;
111   }
112 
113   /**
114    * Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)}
115    * call should get implemented for each snapshot flavor.
116    */
117   @Override
118   public void process() {
119     LOG.info("Running table snapshot operation " + eventType + " on table " + snapshot.getTable());
120     try {
121       // If regions move after this meta scan, the region specific snapshot should fail, triggering
122       // an external exception that gets captured here.
123 
124       // write down the snapshot info in the working directory
125       SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, this.fs);
126       new TableInfoCopyTask(monitor, snapshot, fs, rootDir).call();
127       monitor.rethrowException();
128 
129       List<Pair<HRegionInfo, ServerName>> regionsAndLocations =
130           MetaReader.getTableRegionsAndLocations(this.server.getCatalogTracker(),
131             Bytes.toBytes(snapshot.getTable()), true);
132 
133       // run the snapshot
134       snapshotRegions(regionsAndLocations);
135 
136       // extract each pair to separate lists
137       Set<String> serverNames = new HashSet<String>();
138       for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
139         serverNames.add(p.getSecond().toString());
140       }
141 
142       // verify the snapshot is valid
143       verifier.verifySnapshot(this.workingDir, serverNames);
144 
145       // complete the snapshot, atomically moving from tmp to .snapshot dir.
146       completeSnapshot(this.snapshotDir, this.workingDir, this.fs);
147     } catch (Exception e) {
148       String reason = "Failed taking snapshot " + SnapshotDescriptionUtils.toString(snapshot)
149           + " due to exception:" + e.getMessage();
150       LOG.error(reason, e);
151       ForeignException ee = new ForeignException(reason, e);
152       monitor.receive(ee);
153       // need to mark this completed to close off and allow cleanup to happen.
154       cancel("Failed to take snapshot '" + SnapshotDescriptionUtils.toString(snapshot)
155           + "' due to exception");
156     } finally {
157       LOG.debug("Launching cleanup of working dir:" + workingDir);
158       try {
159         // if the working dir is still present, the snapshot has failed.  it is present we delete
160         // it.
161         if (fs.exists(workingDir) && !this.fs.delete(workingDir, true)) {
162           LOG.error("Couldn't delete snapshot working directory:" + workingDir);
163         }
164       } catch (IOException e) {
165         LOG.error("Couldn't delete snapshot working directory:" + workingDir);
166       }
167     }
168   }
169 
170   /**
171    * Reset the manager to allow another snapshot to proceed
172    *
173    * @param snapshotDir final path of the snapshot
174    * @param workingDir directory where the in progress snapshot was built
175    * @param fs {@link FileSystem} where the snapshot was built
176    * @throws SnapshotCreationException if the snapshot could not be moved
177    * @throws IOException the filesystem could not be reached
178    */
179   public void completeSnapshot(Path snapshotDir, Path workingDir, FileSystem fs)
180       throws SnapshotCreationException, IOException {
181     LOG.debug("Sentinel is done, just moving the snapshot from " + workingDir + " to "
182         + snapshotDir);
183     if (!fs.rename(workingDir, snapshotDir)) {
184       throw new SnapshotCreationException("Failed to move working directory(" + workingDir
185           + ") to completed directory(" + snapshotDir + ").");
186     }
187     finished = true;
188   }
189 
190   /**
191    * Snapshot the specified regions
192    */
193   protected abstract void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regions)
194       throws IOException, KeeperException;
195 
196   @Override
197   public void cancel(String why) {
198     if (finished) return;
199 
200     this.finished = true;
201     LOG.info("Stop taking snapshot=" + SnapshotDescriptionUtils.toString(snapshot) + " because: "
202         + why);
203     CancellationException ce = new CancellationException(why);
204     monitor.receive(new ForeignException(master.getServerName().toString(), ce));
205   }
206 
207   @Override
208   public boolean isFinished() {
209     return finished;
210   }
211 
212   @Override
213   public SnapshotDescription getSnapshot() {
214     return snapshot;
215   }
216 
217   @Override
218   public ForeignException getExceptionIfFailed() {
219     return monitor.getException();
220   }
221 
222   @Override
223   public void rethrowException() throws ForeignException {
224     monitor.rethrowException();
225   }
226 
227   @Override
228   public boolean hasException() {
229     return monitor.hasException();
230   }
231 
232   @Override
233   public ForeignException getException() {
234     return monitor.getException();
235   }
236 
237 }