1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.master.snapshot;
19
20 import java.io.FileNotFoundException;
21 import java.io.IOException;
22 import java.util.HashSet;
23 import java.util.List;
24 import java.util.Set;
25 import java.util.concurrent.CancellationException;
26
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.hadoop.classification.InterfaceAudience;
30 import org.apache.hadoop.conf.Configuration;
31 import org.apache.hadoop.fs.FileSystem;
32 import org.apache.hadoop.fs.Path;
33 import org.apache.hadoop.hbase.TableName;
34 import org.apache.hadoop.hbase.HRegionInfo;
35 import org.apache.hadoop.hbase.HTableDescriptor;
36 import org.apache.hadoop.hbase.ServerName;
37 import org.apache.hadoop.hbase.catalog.MetaReader;
38 import org.apache.hadoop.hbase.errorhandling.ForeignException;
39 import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
40 import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
41 import org.apache.hadoop.hbase.executor.EventHandler;
42 import org.apache.hadoop.hbase.executor.EventType;
43 import org.apache.hadoop.hbase.master.MasterServices;
44 import org.apache.hadoop.hbase.master.MetricsSnapshot;
45 import org.apache.hadoop.hbase.master.SnapshotSentinel;
46 import org.apache.hadoop.hbase.master.TableLockManager;
47 import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
48 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
49 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
50 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
51 import org.apache.hadoop.hbase.regionserver.HRegion;
52 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
53 import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
54 import org.apache.hadoop.hbase.snapshot.CopyRecoveredEditsTask;
55 import org.apache.hadoop.hbase.snapshot.ReferenceRegionHFilesTask;
56 import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
57 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
58 import org.apache.hadoop.hbase.snapshot.TableInfoCopyTask;
59 import org.apache.hadoop.hbase.util.Pair;
60 import org.apache.zookeeper.KeeperException;
61
62
63
64
65
66
67
68
69 @InterfaceAudience.Private
70 public abstract class TakeSnapshotHandler extends EventHandler implements SnapshotSentinel,
71 ForeignExceptionSnare {
72 private static final Log LOG = LogFactory.getLog(TakeSnapshotHandler.class);
73
74 private volatile boolean finished;
75
76
77 protected final MasterServices master;
78 protected final MetricsSnapshot metricsSnapshot = new MetricsSnapshot();
79 protected final SnapshotDescription snapshot;
80 protected final Configuration conf;
81 protected final FileSystem fs;
82 protected final Path rootDir;
83 private final Path snapshotDir;
84 protected final Path workingDir;
85 private final MasterSnapshotVerifier verifier;
86 protected final ForeignExceptionDispatcher monitor;
87 protected final TableLockManager tableLockManager;
88 protected final TableLock tableLock;
89 protected final MonitoredTask status;
90 protected final TableName snapshotTable;
91
92
93
94
95
96 public TakeSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices) {
97 super(masterServices, EventType.C_M_SNAPSHOT_TABLE);
98 assert snapshot != null : "SnapshotDescription must not be nul1";
99 assert masterServices != null : "MasterServices must not be nul1";
100
101 this.master = masterServices;
102 this.snapshot = snapshot;
103 this.snapshotTable = TableName.valueOf(snapshot.getTable());
104 this.conf = this.master.getConfiguration();
105 this.fs = this.master.getMasterFileSystem().getFileSystem();
106 this.rootDir = this.master.getMasterFileSystem().getRootDir();
107 this.snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
108 this.workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
109 this.monitor = new ForeignExceptionDispatcher(snapshot.getName());
110
111 this.tableLockManager = master.getTableLockManager();
112 this.tableLock = this.tableLockManager.writeLock(
113 snapshotTable,
114 EventType.C_M_SNAPSHOT_TABLE.toString());
115
116
117 this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, rootDir);
118
119 this.status = TaskMonitor.get().createStatus(
120 "Taking " + snapshot.getType() + " snapshot on table: " + snapshotTable);
121 }
122
123 private HTableDescriptor loadTableDescriptor()
124 throws FileNotFoundException, IOException {
125 HTableDescriptor htd =
126 this.master.getTableDescriptors().get(snapshotTable);
127 if (htd == null) {
128 throw new IOException("HTableDescriptor missing for " + snapshotTable);
129 }
130 return htd;
131 }
132
133 public TakeSnapshotHandler prepare() throws Exception {
134 super.prepare();
135 this.tableLock.acquire();
136
137 boolean success = false;
138 try {
139 loadTableDescriptor();
140 success = true;
141 } finally {
142 if (!success) {
143 releaseTableLock();
144 }
145 }
146
147 return this;
148 }
149
150
151
152
153
154 @Override
155 public void process() {
156 String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " "
157 + eventType + " on table " + snapshotTable;
158 LOG.info(msg);
159 status.setStatus(msg);
160 try {
161
162
163
164
165 SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, this.fs);
166 new TableInfoCopyTask(monitor, snapshot, fs, rootDir).call();
167 monitor.rethrowException();
168
169 List<Pair<HRegionInfo, ServerName>> regionsAndLocations =
170 MetaReader.getTableRegionsAndLocations(this.server.getCatalogTracker(),
171 snapshotTable, false);
172
173
174 snapshotRegions(regionsAndLocations);
175 monitor.rethrowException();
176
177
178 Set<String> serverNames = new HashSet<String>();
179 for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
180 if (p != null && p.getFirst() != null && p.getSecond() != null) {
181 HRegionInfo hri = p.getFirst();
182 if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) continue;
183 serverNames.add(p.getSecond().toString());
184 }
185 }
186
187
188 status.setStatus("Verifying snapshot: " + snapshot.getName());
189 verifier.verifySnapshot(this.workingDir, serverNames);
190
191
192 completeSnapshot(this.snapshotDir, this.workingDir, this.fs);
193 status.markComplete("Snapshot " + snapshot.getName() + " of table " + snapshotTable
194 + " completed");
195 metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime());
196 } catch (Exception e) {
197 status.abort("Failed to complete snapshot " + snapshot.getName() + " on table " +
198 snapshotTable + " because " + e.getMessage());
199 String reason = "Failed taking snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot)
200 + " due to exception:" + e.getMessage();
201 LOG.error(reason, e);
202 ForeignException ee = new ForeignException(reason, e);
203 monitor.receive(ee);
204
205 cancel("Failed to take snapshot '" + ClientSnapshotDescriptionUtils.toString(snapshot)
206 + "' due to exception");
207 } finally {
208 LOG.debug("Launching cleanup of working dir:" + workingDir);
209 try {
210
211
212 if (fs.exists(workingDir) && !this.fs.delete(workingDir, true)) {
213 LOG.error("Couldn't delete snapshot working directory:" + workingDir);
214 }
215 } catch (IOException e) {
216 LOG.error("Couldn't delete snapshot working directory:" + workingDir);
217 }
218 releaseTableLock();
219 }
220 }
221
222 protected void releaseTableLock() {
223 if (this.tableLock != null) {
224 try {
225 this.tableLock.release();
226 } catch (IOException ex) {
227 LOG.warn("Could not release the table lock", ex);
228 }
229 }
230 }
231
232
233
234
235
236
237
238
239
240
241 public void completeSnapshot(Path snapshotDir, Path workingDir, FileSystem fs)
242 throws SnapshotCreationException, IOException {
243 LOG.debug("Sentinel is done, just moving the snapshot from " + workingDir + " to "
244 + snapshotDir);
245 if (!fs.rename(workingDir, snapshotDir)) {
246 throw new SnapshotCreationException("Failed to move working directory(" + workingDir
247 + ") to completed directory(" + snapshotDir + ").");
248 }
249 finished = true;
250 }
251
252
253
254
255 protected abstract void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regions)
256 throws IOException, KeeperException;
257
258
259
260
261 protected void snapshotDisabledRegion(final HRegionInfo regionInfo)
262 throws IOException {
263
264 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs,
265 workingDir, regionInfo);
266
267
268 monitor.rethrowException();
269
270
271 Path regionDir = HRegion.getRegionDir(rootDir, regionInfo);
272 Path snapshotRegionDir = regionFs.getRegionDir();
273 new CopyRecoveredEditsTask(snapshot, monitor, fs, regionDir, snapshotRegionDir).call();
274 monitor.rethrowException();
275 status.setStatus("Completed copying recovered edits for offline snapshot of table: "
276 + snapshotTable);
277
278
279 new ReferenceRegionHFilesTask(snapshot, monitor, regionDir, fs, snapshotRegionDir).call();
280 monitor.rethrowException();
281 status.setStatus("Completed referencing HFiles for offline snapshot of table: " +
282 snapshotTable);
283 }
284
285 @Override
286 public void cancel(String why) {
287 if (finished) return;
288
289 this.finished = true;
290 LOG.info("Stop taking snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) +
291 " because: " + why);
292 CancellationException ce = new CancellationException(why);
293 monitor.receive(new ForeignException(master.getServerName().toString(), ce));
294 }
295
296 @Override
297 public boolean isFinished() {
298 return finished;
299 }
300
301 @Override
302 public long getCompletionTimestamp() {
303 return this.status.getCompletionTimestamp();
304 }
305
306 @Override
307 public SnapshotDescription getSnapshot() {
308 return snapshot;
309 }
310
311 @Override
312 public ForeignException getExceptionIfFailed() {
313 return monitor.getException();
314 }
315
316 @Override
317 public void rethrowExceptionIfFailed() throws ForeignException {
318 monitor.rethrowException();
319 }
320
321 @Override
322 public void rethrowException() throws ForeignException {
323 monitor.rethrowException();
324 }
325
326 @Override
327 public boolean hasException() {
328 return monitor.hasException();
329 }
330
331 @Override
332 public ForeignException getException() {
333 return monitor.getException();
334 }
335
336 }