View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.snapshot;
19  
20  import java.io.FileNotFoundException;
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.Collections;
24  import java.util.HashMap;
25  import java.util.HashSet;
26  import java.util.Iterator;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.Set;
30  import java.util.concurrent.ThreadPoolExecutor;
31  
32  import org.apache.commons.logging.Log;
33  import org.apache.commons.logging.LogFactory;
34  import org.apache.hadoop.classification.InterfaceAudience;
35  import org.apache.hadoop.classification.InterfaceStability;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.fs.FSDataInputStream;
38  import org.apache.hadoop.fs.FileStatus;
39  import org.apache.hadoop.fs.FileSystem;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.hbase.HConstants;
42  import org.apache.hadoop.hbase.HTableDescriptor;
43  import org.apache.hadoop.hbase.Stoppable;
44  import org.apache.hadoop.hbase.catalog.MetaReader;
45  import org.apache.hadoop.hbase.errorhandling.ForeignException;
46  import org.apache.hadoop.hbase.executor.ExecutorService;
47  import org.apache.hadoop.hbase.master.AssignmentManager;
48  import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
49  import org.apache.hadoop.hbase.master.MasterFileSystem;
50  import org.apache.hadoop.hbase.master.MasterServices;
51  import org.apache.hadoop.hbase.master.SnapshotSentinel;
52  import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
53  import org.apache.hadoop.hbase.master.cleaner.HFileLinkCleaner;
54  import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
55  import org.apache.hadoop.hbase.procedure.Procedure;
56  import org.apache.hadoop.hbase.procedure.ProcedureCoordinator;
57  import org.apache.hadoop.hbase.procedure.ProcedureCoordinatorRpcs;
58  import org.apache.hadoop.hbase.procedure.ZKProcedureCoordinatorRpcs;
59  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
60  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type;
61  import org.apache.hadoop.hbase.snapshot.HBaseSnapshotException;
62  import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException;
63  import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
64  import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
65  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
66  import org.apache.hadoop.hbase.snapshot.SnapshotDoesNotExistException;
67  import org.apache.hadoop.hbase.snapshot.SnapshotExistsException;
68  import org.apache.hadoop.hbase.snapshot.TablePartiallyOpenException;
69  import org.apache.hadoop.hbase.snapshot.UnknownSnapshotException;
70  import org.apache.hadoop.hbase.util.Bytes;
71  import org.apache.hadoop.hbase.util.FSTableDescriptors;
72  import org.apache.hadoop.hbase.util.FSUtils;
73  import org.apache.zookeeper.KeeperException;
74  
75  /**
76   * This class manages the procedure of taking and restoring snapshots. There is only one
77   * SnapshotManager for the master.
78   * <p>
79   * The class provides methods for monitoring in-progress snapshot actions.
80   * <p>
81   * Note: Currently there can only be one snapshot being taken at a time over the cluster. This is a
82   * simplification in the current implementation.
83   */
84  @InterfaceAudience.Private
85  @InterfaceStability.Unstable
86  public class SnapshotManager implements Stoppable {
87    private static final Log LOG = LogFactory.getLog(SnapshotManager.class);
88  
89    /** By default, check to see if the snapshot is complete every WAKE MILLIS (ms) */
90    private static final int SNAPSHOT_WAKE_MILLIS_DEFAULT = 500;
91  
92    /** Enable or disable snapshot support */
93    public static final String HBASE_SNAPSHOT_ENABLED = "hbase.snapshot.enabled";
94  
95    /**
96     * Conf key for # of ms elapsed between checks for snapshot errors while waiting for
97     * completion.
98     */
99    private static final String SNAPSHOT_WAKE_MILLIS_KEY = "hbase.snapshot.master.wakeMillis";
100 
101   /** By default, check to see if the snapshot is complete (ms) */
102   private static final int SNAPSHOT_TIMEOUT_MILLIS_DEFAULT = 5000;
103 
104   /**
105    * Conf key for # of ms elapsed before injecting a snapshot timeout error when waiting for
106    * completion.
107    */
108   private static final String SNAPSHOT_TIMEOUT_MILLIS_KEY = "hbase.snapshot.master.timeoutMillis";
109 
110   /** Name of the operation to use in the controller */
111   public static final String ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION = "online-snapshot";
112 
113   // TODO - enable having multiple snapshots with multiple monitors/threads
114   // this needs to be configuration based when running multiple snapshots is implemented
115   /** number of current operations running on the master */
116   private static final int opThreads = 1;
117 
118   private boolean stopped;
119   private final long wakeFrequency;
120   private final MasterServices master;  // Needed by TableEventHandlers
121   private final MasterMetrics metricsMaster;
122   private final ProcedureCoordinator coordinator;
123 
124   // Is snapshot feature enabled?
125   private boolean isSnapshotSupported = false;
126 
127   // A reference to a handler.  If the handler is non-null, then it is assumed that a snapshot is
128   // in progress currently
129   // TODO: this is a bad smell;  likely replace with a collection in the future.  Also this gets
130   // reset by every operation.
131   private TakeSnapshotHandler handler;
132 
133   private final Path rootDir;
134   private final ExecutorService executorService;
135 
136   // Restore Sentinels map, with table name as key
137   private Map<String, SnapshotSentinel> restoreHandlers = new HashMap<String, SnapshotSentinel>();
138 
139   /**
140    * Construct a snapshot manager.
141    * @param master
142    */
143   public SnapshotManager(final MasterServices master, final MasterMetrics metricsMaster)
144       throws KeeperException, IOException, UnsupportedOperationException {
145     this.master = master;
146     this.metricsMaster = metricsMaster;
147 
148     this.rootDir = master.getMasterFileSystem().getRootDir();
149     checkSnapshotSupport(master.getConfiguration(), master.getMasterFileSystem());
150 
151     // get the configuration for the coordinator
152     Configuration conf = master.getConfiguration();
153     this.wakeFrequency = conf.getInt(SNAPSHOT_WAKE_MILLIS_KEY, SNAPSHOT_WAKE_MILLIS_DEFAULT);
154     long keepAliveTime = conf.getLong(SNAPSHOT_TIMEOUT_MILLIS_KEY, SNAPSHOT_TIMEOUT_MILLIS_DEFAULT);
155 
156     // setup the default procedure coordinator
157     String name = master.getServerName().toString();
158     ThreadPoolExecutor tpool = ProcedureCoordinator.defaultPool(name, keepAliveTime, opThreads, wakeFrequency);
159     ProcedureCoordinatorRpcs comms = new ZKProcedureCoordinatorRpcs(
160         master.getZooKeeper(), SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION, name);
161     this.coordinator = new ProcedureCoordinator(comms, tpool);
162     this.executorService = master.getExecutorService();
163     resetTempDir();
164   }
165 
166   /**
167    * Fully specify all necessary components of a snapshot manager. Exposed for testing.
168    * @param master services for the master where the manager is running
169    * @param coordinator procedure coordinator instance.  exposed for testing.
170    * @param pool HBase ExecutorServcie instance, exposed for testing.
171    */
172   public SnapshotManager(final MasterServices master, final MasterMetrics metricsMaster,
173       ProcedureCoordinator coordinator, ExecutorService pool)
174       throws IOException, UnsupportedOperationException {
175     this.master = master;
176     this.metricsMaster = metricsMaster;
177 
178     this.rootDir = master.getMasterFileSystem().getRootDir();
179     checkSnapshotSupport(master.getConfiguration(), master.getMasterFileSystem());
180 
181     this.wakeFrequency = master.getConfiguration().getInt(SNAPSHOT_WAKE_MILLIS_KEY,
182       SNAPSHOT_WAKE_MILLIS_DEFAULT);
183     this.coordinator = coordinator;
184     this.executorService = pool;
185     resetTempDir();
186   }
187 
188   /**
189    * Gets the list of all completed snapshots.
190    * @return list of SnapshotDescriptions
191    * @throws IOException File system exception
192    */
193   public List<SnapshotDescription> getCompletedSnapshots() throws IOException {
194     return getCompletedSnapshots(SnapshotDescriptionUtils.getSnapshotsDir(rootDir));
195   }
196   
197   /**
198    * Gets the list of all completed snapshots.
199    * @param snapshotDir snapshot directory
200    * @return list of SnapshotDescriptions
201    * @throws IOException File system exception
202    */
203   private List<SnapshotDescription> getCompletedSnapshots(Path snapshotDir) throws IOException {
204     List<SnapshotDescription> snapshotDescs = new ArrayList<SnapshotDescription>();
205     // first create the snapshot root path and check to see if it exists
206     if (snapshotDir == null) snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir);
207 
208     FileSystem fs = master.getMasterFileSystem().getFileSystem();
209 
210     // if there are no snapshots, return an empty list
211     if (!fs.exists(snapshotDir)) {
212       return snapshotDescs;
213     }
214 
215     // ignore all the snapshots in progress
216     FileStatus[] snapshots = fs.listStatus(snapshotDir,
217       new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
218     // loop through all the completed snapshots
219     for (FileStatus snapshot : snapshots) {
220       Path info = new Path(snapshot.getPath(), SnapshotDescriptionUtils.SNAPSHOTINFO_FILE);
221       // if the snapshot is bad
222       if (!fs.exists(info)) {
223         LOG.error("Snapshot information for " + snapshot.getPath() + " doesn't exist");
224         continue;
225       }
226       FSDataInputStream in = null;
227       try {
228         in = fs.open(info);
229         SnapshotDescription desc = SnapshotDescription.parseFrom(in);
230         snapshotDescs.add(desc);
231       } catch (IOException e) {
232         LOG.warn("Found a corrupted snapshot " + snapshot.getPath(), e);
233       } finally {
234         if (in != null) {
235           in.close();
236         }
237       }
238     }
239     return snapshotDescs;
240   }
241 
242   /**
243    * Cleans up any snapshots in the snapshot/.tmp directory that were left from failed
244    * snapshot attempts.
245    *
246    * @throws IOException if we can't reach the filesystem
247    */
248   void resetTempDir() throws IOException {
249     // cleanup any existing snapshots.
250     Path tmpdir = SnapshotDescriptionUtils.getWorkingSnapshotDir(rootDir);
251     if (!master.getMasterFileSystem().getFileSystem().delete(tmpdir, true)) {
252       LOG.warn("Couldn't delete working snapshot directory: " + tmpdir);
253     }
254   }
255 
256   /**
257    * Delete the specified snapshot
258    * @param snapshot
259    * @throws SnapshotDoesNotExistException If the specified snapshot does not exist.
260    * @throws IOException For filesystem IOExceptions
261    */
262   public void deleteSnapshot(SnapshotDescription snapshot) throws SnapshotDoesNotExistException, IOException {
263 
264     // call coproc pre hook
265     MasterCoprocessorHost cpHost = master.getCoprocessorHost();
266     if (cpHost != null) {
267       cpHost.preDeleteSnapshot(snapshot);
268     }
269 
270     // check to see if it is completed
271     if (!isSnapshotCompleted(snapshot)) {
272       throw new SnapshotDoesNotExistException(snapshot);
273     }
274 
275     String snapshotName = snapshot.getName();
276     LOG.debug("Deleting snapshot: " + snapshotName);
277     // first create the snapshot description and check to see if it exists
278     MasterFileSystem fs = master.getMasterFileSystem();
279     Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
280 
281     // delete the existing snapshot
282     if (!fs.getFileSystem().delete(snapshotDir, true)) {
283       throw new HBaseSnapshotException("Failed to delete snapshot directory: " + snapshotDir);
284     }
285 
286     // call coproc post hook
287     if (cpHost != null) {
288       cpHost.postDeleteSnapshot(snapshot);
289     }
290 
291   }
292 
293   /**
294    * Return the handler if it is currently running and has the same snapshot target name.
295    * @param snapshot
296    * @return null if doesn't match, else a live handler.
297    */
298   private synchronized TakeSnapshotHandler getTakeSnapshotHandler(SnapshotDescription snapshot) {
299     TakeSnapshotHandler h = this.handler;
300     if (h == null) {
301       return null;
302     }
303 
304     if (!h.getSnapshot().getName().equals(snapshot.getName())) {
305       // specified snapshot is to the one currently running
306       return null;
307     }
308 
309     return h;
310   }
311 
312   /**
313    * Check if the specified snapshot is done
314    * @param expected
315    * @return true if snapshot is ready to be restored, false if it is still being taken.
316    * @throws IOException IOException if error from HDFS or RPC
317    * @throws UnknownSnapshotException if snapshot is invalid or does not exist.
318    */
319   public boolean isSnapshotDone(SnapshotDescription expected) throws IOException {
320     // check the request to make sure it has a snapshot
321     if (expected == null) {
322       throw new UnknownSnapshotException(
323          "No snapshot name passed in request, can't figure out which snapshot you want to check.");
324     }
325 
326     String ssString = SnapshotDescriptionUtils.toString(expected);
327 
328     // check to see if the sentinel exists
329     TakeSnapshotHandler handler = getTakeSnapshotHandler(expected);
330     if (handler == null) {
331       // doesn't exist, check if it is already completely done.
332       if (!isSnapshotCompleted(expected)) {
333         throw new UnknownSnapshotException("Snapshot " + ssString
334             + " is not currently running or one of the known completed snapshots.");
335       }
336       // was done, return true;
337       return true;
338     }
339 
340     // pass on any failure we find in the sentinel
341     try {
342       handler.rethrowException();
343     } catch (ForeignException e) {
344       // Give some procedure info on an exception.
345       String status;
346       Procedure p = coordinator.getProcedure(expected.getName());
347       if (p != null) {
348         status = p.getStatus();
349       } else {
350         status = expected.getName() + " not found in proclist " + coordinator.getProcedureNames();
351       }
352       throw new HBaseSnapshotException("Snapshot " + ssString +  " had an error.  " + status, e,
353           expected);
354     }
355 
356     // check to see if we are done
357     if (handler.isFinished()) {
358       LOG.debug("Snapshot '" + ssString + "' has completed, notifying client.");
359       return true;
360     } else if (LOG.isDebugEnabled()) {
361       LOG.debug("Snapshoting '" + ssString + "' is still in progress!");
362     }
363     return false;
364   }
365 
366   /**
367    * Check to see if there are any snapshots in progress currently.  Currently we have a
368    * limitation only allowing a single snapshot attempt at a time.
369    * @return <tt>true</tt> if there any snapshots in progress, <tt>false</tt> otherwise
370    * @throws SnapshotCreationException if the snapshot failed
371    */
372   synchronized boolean isTakingSnapshot() throws SnapshotCreationException {
373     // TODO later when we handle multiple there would be a map with ssname to handler.
374     return handler != null && !handler.isFinished();
375   }
376 
377   /**
378    * Check to see if the specified table has a snapshot in progress.  Currently we have a
379    * limitation only allowing a single snapshot attempt at a time.
380    * @param tableName name of the table being snapshotted.
381    * @return <tt>true</tt> if there is a snapshot in progress on the specified table.
382    */
383   private boolean isTakingSnapshot(final String tableName) {
384     if (handler != null && handler.getSnapshot().getTable().equals(tableName)) {
385       return !handler.isFinished();
386     }
387     return false;
388   }
389 
390   /**
391    * Check to make sure that we are OK to run the passed snapshot. Checks to make sure that we
392    * aren't already running a snapshot.
393    * @param snapshot description of the snapshot we want to start
394    * @throws HBaseSnapshotException if the filesystem could not be prepared to start the snapshot
395    */
396   private synchronized void prepareToTakeSnapshot(SnapshotDescription snapshot)
397       throws HBaseSnapshotException {
398     FileSystem fs = master.getMasterFileSystem().getFileSystem();
399     Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
400 
401     // make sure we aren't already running a snapshot
402     if (isTakingSnapshot()) {
403       throw new SnapshotCreationException("Rejected taking "
404           + SnapshotDescriptionUtils.toString(snapshot)
405           + " because we are already running another snapshot "
406           + SnapshotDescriptionUtils.toString(this.handler.getSnapshot()), snapshot);
407     }
408 
409     // make sure we aren't running a restore on the same table
410     if (isRestoringTable(snapshot.getTable())) {
411       throw new SnapshotCreationException("Rejected taking "
412           + SnapshotDescriptionUtils.toString(snapshot)
413           + " because we are already have a restore in progress on the same snapshot "
414           + SnapshotDescriptionUtils.toString(this.handler.getSnapshot()), snapshot);
415     }
416 
417     try {
418       // delete the working directory, since we aren't running the snapshot. Likely leftovers
419       // from a failed attempt.
420       fs.delete(workingDir, true);
421 
422       // recreate the working directory for the snapshot
423       if (!fs.mkdirs(workingDir)) {
424         throw new SnapshotCreationException("Couldn't create working directory (" + workingDir
425             + ") for snapshot" , snapshot);
426       }
427     } catch (HBaseSnapshotException e) {
428       throw e;
429     } catch (IOException e) {
430       throw new SnapshotCreationException(
431           "Exception while checking to see if snapshot could be started.", e, snapshot);
432     }
433   }
434 
435   /**
436    * Take a snapshot of an enabled table.
437    * <p>
438    * The thread limitation on the executorService's thread pool for snapshots ensures the
439    * snapshot won't be started if there is another snapshot already running. Does
440    * <b>not</b> check to see if another snapshot of the same name already exists.
441    * @param snapshot description of the snapshot to take.
442    * @throws HBaseSnapshotException if the snapshot could not be started
443    */
444   private synchronized void snapshotEnabledTable(SnapshotDescription snapshot)
445       throws HBaseSnapshotException {
446     TakeSnapshotHandler handler;
447     try {
448       handler = new EnabledTableSnapshotHandler(snapshot, master, this, metricsMaster);
449       this.executorService.submit(handler);
450       this.handler = handler;
451     } catch (IOException e) {
452       // cleanup the working directory by trying to delete it from the fs.
453       Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
454       try {
455         if (!this.master.getMasterFileSystem().getFileSystem().delete(workingDir, true)) {
456           LOG.warn("Couldn't delete working directory (" + workingDir + " for snapshot:"
457               + SnapshotDescriptionUtils.toString(snapshot));
458         }
459       } catch (IOException e1) {
460         LOG.warn("Couldn't delete working directory (" + workingDir + " for snapshot:" +
461             SnapshotDescriptionUtils.toString(snapshot));
462       }
463       // fail the snapshot
464       throw new SnapshotCreationException("Could not build snapshot handler", e, snapshot);
465     }
466   }
467 
468   /**
469    * Take a snapshot based on the enabled/disabled state of the table.
470    *
471    * @param snapshot
472    * @throws HBaseSnapshotException when a snapshot specific exception occurs.
473    * @throws IOException when some sort of generic IO exception occurs.
474    */
475   public void takeSnapshot(SnapshotDescription snapshot) throws IOException {
476     // check to see if we already completed the snapshot
477     if (isSnapshotCompleted(snapshot)) {
478       throw new SnapshotExistsException("Snapshot '" + snapshot.getName()
479           + "' already stored on the filesystem.", snapshot);
480     }
481 
482     LOG.debug("No existing snapshot, attempting snapshot...");
483 
484     // check to see if the table exists
485     HTableDescriptor desc = null;
486     try {
487       desc = master.getTableDescriptors().get(snapshot.getTable());
488     } catch (FileNotFoundException e) {
489       String msg = "Table:" + snapshot.getTable() + " info doesn't exist!";
490       LOG.error(msg);
491       throw new SnapshotCreationException(msg, e, snapshot);
492     } catch (IOException e) {
493       throw new SnapshotCreationException("Error while geting table description for table "
494           + snapshot.getTable(), e, snapshot);
495     }
496     if (desc == null) {
497       throw new SnapshotCreationException("Table '" + snapshot.getTable()
498           + "' doesn't exist, can't take snapshot.", snapshot);
499     }
500 
501     // set the snapshot version, now that we are ready to take it
502     snapshot = snapshot.toBuilder().setVersion(SnapshotDescriptionUtils.SNAPSHOT_LAYOUT_VERSION)
503         .build();
504 
505     // call pre coproc hook
506     MasterCoprocessorHost cpHost = master.getCoprocessorHost();
507     if (cpHost != null) {
508       cpHost.preSnapshot(snapshot, desc);
509     }
510 
511     // setup the snapshot
512     prepareToTakeSnapshot(snapshot);
513 
514     // if the table is enabled, then have the RS run actually the snapshot work
515     AssignmentManager assignmentMgr = master.getAssignmentManager();
516     if (assignmentMgr.getZKTable().isEnabledTable(snapshot.getTable())) {
517       LOG.debug("Table enabled, starting distributed snapshot.");
518       snapshotEnabledTable(snapshot);
519       LOG.debug("Started snapshot: " + SnapshotDescriptionUtils.toString(snapshot));
520     }
521     // For disabled table, snapshot is created by the master
522     else if (assignmentMgr.getZKTable().isDisabledTable(snapshot.getTable())) {
523       LOG.debug("Table is disabled, running snapshot entirely on master.");
524       snapshotDisabledTable(snapshot);
525       LOG.debug("Started snapshot: " + SnapshotDescriptionUtils.toString(snapshot));
526     } else {
527       LOG.error("Can't snapshot table '" + snapshot.getTable()
528           + "', isn't open or closed, we don't know what to do!");
529       TablePartiallyOpenException tpoe = new TablePartiallyOpenException(snapshot.getTable()
530           + " isn't fully open.");
531       throw new SnapshotCreationException("Table is not entirely open or closed", tpoe, snapshot);
532     }
533 
534     // call post coproc hook
535     if (cpHost != null) {
536       cpHost.postSnapshot(snapshot, desc);
537     }
538   }
539 
540   /**
541    * Take a snapshot of a disabled table.
542    * <p>
543    * The thread limitation on the executorService's thread pool for snapshots ensures the
544    * snapshot won't be started if there is another snapshot already running. Does
545    * <b>not</b> check to see if another snapshot of the same name already exists.
546    * @param snapshot description of the snapshot to take. Modified to be {@link Type#DISABLED}.
547    * @throws HBaseSnapshotException if the snapshot could not be started
548    */
549   private synchronized void snapshotDisabledTable(SnapshotDescription snapshot)
550       throws HBaseSnapshotException {
551 
552     // set the snapshot to be a disabled snapshot, since the client doesn't know about that
553     snapshot = snapshot.toBuilder().setType(Type.DISABLED).build();
554 
555     DisabledTableSnapshotHandler handler;
556     try {
557       handler = new DisabledTableSnapshotHandler(snapshot, master, metricsMaster);
558       this.executorService.submit(handler);
559       this.handler = handler;
560     } catch (IOException e) {
561       // cleanup the working directory by trying to delete it from the fs.
562       Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
563       try {
564         if (!this.master.getMasterFileSystem().getFileSystem().delete(workingDir, true)) {
565           LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:" +
566               SnapshotDescriptionUtils.toString(snapshot));
567         }
568       } catch (IOException e1) {
569         LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:" +
570             SnapshotDescriptionUtils.toString(snapshot));
571       }
572       // fail the snapshot
573       throw new SnapshotCreationException("Could not build snapshot handler", e, snapshot);
574     }
575   }
576 
577   /**
578    * Set the handler for the current snapshot
579    * <p>
580    * Exposed for TESTING
581    * @param handler handler the master should use
582    *
583    * TODO get rid of this if possible, repackaging, modify tests.
584    */
585   public synchronized void setSnapshotHandlerForTesting(TakeSnapshotHandler handler) {
586     this.handler = handler;
587   }
588 
589   /**
590    * @return distributed commit coordinator for all running snapshots
591    */
592   ProcedureCoordinator getCoordinator() {
593     return coordinator;
594   }
595 
596   /**
597    * Check to see if the snapshot is one of the currently completed snapshots
598    * @param expected snapshot to check
599    * @return <tt>true</tt> if the snapshot is stored on the {@link FileSystem}, <tt>false</tt> if is
600    *         not stored
601    * @throws IOException if the filesystem throws an unexpected exception,
602    * @throws IllegalArgumentException if snapshot name is invalid.
603    */
604   private boolean isSnapshotCompleted(SnapshotDescription snapshot) throws IOException {
605     try {
606       final Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
607       FileSystem fs = master.getMasterFileSystem().getFileSystem();
608 
609       // check to see if the snapshot already exists
610       return fs.exists(snapshotDir);
611     } catch (IllegalArgumentException iae) {
612       throw new UnknownSnapshotException("Unexpected exception thrown", iae);
613     }
614   }
615 
616   /**
617    * Clone the specified snapshot into a new table.
618    * The operation will fail if the destination table has a snapshot or restore in progress.
619    *
620    * @param snapshot Snapshot Descriptor
621    * @param hTableDescriptor Table Descriptor of the table to create
622    * @param waitTime timeout before considering the clone failed
623    */
624   synchronized void cloneSnapshot(final SnapshotDescription snapshot,
625       final HTableDescriptor hTableDescriptor) throws HBaseSnapshotException {
626     String tableName = hTableDescriptor.getNameAsString();
627 
628     // make sure we aren't running a snapshot on the same table
629     if (isTakingSnapshot(tableName)) {
630       throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName);
631     }
632 
633     // make sure we aren't running a restore on the same table
634     if (isRestoringTable(tableName)) {
635       throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName);
636     }
637 
638     try {
639       CloneSnapshotHandler handler =
640         new CloneSnapshotHandler(master, snapshot, hTableDescriptor, metricsMaster);
641       this.executorService.submit(handler);
642       restoreHandlers.put(tableName, handler);
643     } catch (Exception e) {
644       String msg = "Couldn't clone the snapshot=" + SnapshotDescriptionUtils.toString(snapshot) +
645         " on table=" + tableName;
646       LOG.error(msg, e);
647       throw new RestoreSnapshotException(msg, e);
648     }
649   }
650 
651   /**
652    * Restore the specified snapshot
653    * @param reqSnapshot
654    * @throws IOException
655    */
656   public void restoreSnapshot(SnapshotDescription reqSnapshot) throws IOException {
657     FileSystem fs = master.getMasterFileSystem().getFileSystem();
658     Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(reqSnapshot, rootDir);
659     MasterCoprocessorHost cpHost = master.getCoprocessorHost();
660 
661     // check if the snapshot exists
662     if (!fs.exists(snapshotDir)) {
663       LOG.error("A Snapshot named '" + reqSnapshot.getName() + "' does not exist.");
664       throw new SnapshotDoesNotExistException(reqSnapshot);
665     }
666 
667     // read snapshot information
668     SnapshotDescription fsSnapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
669     HTableDescriptor snapshotTableDesc = FSTableDescriptors.getTableDescriptor(fs, snapshotDir);
670     String tableName = reqSnapshot.getTable();
671 
672     // stop tracking completed restores
673     cleanupRestoreSentinels();
674 
675     // Execute the restore/clone operation
676     if (MetaReader.tableExists(master.getCatalogTracker(), tableName)) {
677       if (master.getAssignmentManager().getZKTable().isEnabledTable(fsSnapshot.getTable())) {
678         throw new UnsupportedOperationException("Table '" +
679           fsSnapshot.getTable() + "' must be disabled in order to perform a restore operation.");
680       }
681 
682       // call coproc pre hook
683       if (cpHost != null) {
684         cpHost.preRestoreSnapshot(reqSnapshot, snapshotTableDesc);
685       }
686       restoreSnapshot(fsSnapshot, snapshotTableDesc);
687       LOG.info("Restore snapshot=" + fsSnapshot.getName() + " as table=" + tableName);
688 
689       if (cpHost != null) {
690         cpHost.postRestoreSnapshot(reqSnapshot, snapshotTableDesc);
691       }
692     } else {
693       HTableDescriptor htd = RestoreSnapshotHelper.cloneTableSchema(snapshotTableDesc,
694                                                          Bytes.toBytes(tableName));
695       if (cpHost != null) {
696         cpHost.preCloneSnapshot(reqSnapshot, htd);
697       }
698       cloneSnapshot(fsSnapshot, htd);
699       LOG.info("Clone snapshot=" + fsSnapshot.getName() + " as table=" + tableName);
700 
701       if (cpHost != null) {
702         cpHost.postCloneSnapshot(reqSnapshot, htd);
703       }
704     }
705   }
706 
707   /**
708    * Restore the specified snapshot.
709    * The restore will fail if the destination table has a snapshot or restore in progress.
710    *
711    * @param snapshot Snapshot Descriptor
712    * @param hTableDescriptor Table Descriptor
713    * @param waitTime timeout before considering the restore failed
714    */
715   private synchronized void restoreSnapshot(final SnapshotDescription snapshot,
716       final HTableDescriptor hTableDescriptor) throws HBaseSnapshotException {
717     String tableName = hTableDescriptor.getNameAsString();
718 
719     // make sure we aren't running a snapshot on the same table
720     if (isTakingSnapshot(tableName)) {
721       throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName);
722     }
723 
724     // make sure we aren't running a restore on the same table
725     if (isRestoringTable(tableName)) {
726       throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName);
727     }
728 
729     try {
730       RestoreSnapshotHandler handler =
731         new RestoreSnapshotHandler(master, snapshot, hTableDescriptor, metricsMaster);
732       this.executorService.submit(handler);
733       restoreHandlers.put(hTableDescriptor.getNameAsString(), handler);
734     } catch (Exception e) {
735       String msg = "Couldn't restore the snapshot=" + SnapshotDescriptionUtils.toString(snapshot)  +
736           " on table=" + tableName;
737       LOG.error(msg, e);
738       throw new RestoreSnapshotException(msg, e);
739     }
740   }
741 
742   /**
743    * Verify if the restore of the specified table is in progress.
744    *
745    * @param tableName table under restore
746    * @return <tt>true</tt> if there is a restore in progress of the specified table.
747    */
748   private boolean isRestoringTable(final String tableName) {
749     SnapshotSentinel sentinel = restoreHandlers.get(tableName);
750     return(sentinel != null && !sentinel.isFinished());
751   }
752 
753   /**
754    * Returns status of a restore request, specifically comparing source snapshot and target table
755    * names.  Throws exception if not a known snapshot.
756    * @param snapshot
757    * @return true if in progress, false if snapshot is completed.
758    * @throws UnknownSnapshotException if specified source snapshot does not exit.
759    * @throws IOException if there was some sort of IO failure
760    */
761   public boolean isRestoringTable(final SnapshotDescription snapshot) throws IOException {
762     // check to see if the snapshot is already on the fs
763     if (!isSnapshotCompleted(snapshot)) {
764       throw new UnknownSnapshotException("Snapshot:" + snapshot.getName()
765           + " is not one of the known completed snapshots.");
766     }
767 
768     SnapshotSentinel sentinel = getRestoreSnapshotSentinel(snapshot.getTable());
769     if (sentinel == null) {
770       // there is no sentinel so restore is not in progress.
771       return false;
772     }
773     if (!sentinel.getSnapshot().getName().equals(snapshot.getName())) {
774       // another handler is trying to restore to the table, but it isn't the same snapshot source.
775       return false;
776     }
777 
778     LOG.debug("Verify snapshot=" + snapshot.getName() + " against="
779         + sentinel.getSnapshot().getName() + " table=" + snapshot.getTable());
780     ForeignException e = sentinel.getExceptionIfFailed();
781     if (e != null) throw e;
782 
783     // check to see if we are done
784     if (sentinel.isFinished()) {
785       LOG.debug("Restore snapshot=" + SnapshotDescriptionUtils.toString(snapshot) +
786           " has completed. Notifying the client.");
787       return false;
788     }
789 
790     if (LOG.isDebugEnabled()) {
791       LOG.debug("Sentinel is not yet finished with restoring snapshot=" +
792           SnapshotDescriptionUtils.toString(snapshot));
793     }
794     return true;
795   }
796 
797   /**
798    * Get the restore snapshot sentinel for the specified table
799    * @param tableName table under restore
800    * @return the restore snapshot handler
801    */
802   private synchronized SnapshotSentinel getRestoreSnapshotSentinel(final String tableName) {
803     try {
804       return restoreHandlers.get(tableName);
805     } finally {
806       cleanupRestoreSentinels();
807     }
808   }
809 
810   /**
811    * Scan the restore handlers and remove the finished ones.
812    */
813   private synchronized void cleanupRestoreSentinels() {
814     Iterator<Map.Entry<String, SnapshotSentinel>> it = restoreHandlers.entrySet().iterator();
815     while (it.hasNext()) {
816         Map.Entry<String, SnapshotSentinel> entry = it.next();
817         SnapshotSentinel sentinel = entry.getValue();
818         if (sentinel.isFinished()) {
819           it.remove();
820         }
821     }
822   }
823 
824   //
825   // Implementing Stoppable interface
826   //
827 
828   @Override
829   public void stop(String why) {
830     // short circuit
831     if (this.stopped) return;
832     // make sure we get stop
833     this.stopped = true;
834     // pass the stop onto take snapshot handlers
835     if (this.handler != null) this.handler.cancel(why);
836 
837     // pass the stop onto all the restore handlers
838     for (SnapshotSentinel restoreHandler: this.restoreHandlers.values()) {
839       restoreHandler.cancel(why);
840     }
841   }
842 
843   @Override
844   public boolean isStopped() {
845     return this.stopped;
846   }
847 
848   /**
849    * Throws an exception if snapshot operations (take a snapshot, restore, clone) are not supported.
850    * Called at the beginning of snapshot() and restoreSnapshot() methods.
851    * @throws UnsupportedOperationException if snapshot are not supported
852    */
853   public void checkSnapshotSupport() throws UnsupportedOperationException {
854     if (!this.isSnapshotSupported) {
855       throw new UnsupportedOperationException(
856         "To use snapshots, You must add to the hbase-site.xml of the HBase Master: '" +
857           HBASE_SNAPSHOT_ENABLED + "' property with value 'true'.");
858     }
859   }
860 
861   /**
862    * Called at startup, to verify if snapshot operation is supported, and to avoid
863    * starting the master if there're snapshots present but the cleaners needed are missing.
864    * Otherwise we can end up with snapshot data loss.
865    * @param conf The {@link Configuration} object to use
866    * @param mfs The MasterFileSystem to use
867    * @throws IOException in case of file-system operation failure
868    * @throws UnsupportedOperationException in case cleaners are missing and
869    *         there're snapshot in the system
870    */
871   private void checkSnapshotSupport(final Configuration conf, final MasterFileSystem mfs)
872       throws IOException, UnsupportedOperationException {
873     // Verify if snapshot is disabled by the user
874     String enabled = conf.get(HBASE_SNAPSHOT_ENABLED);
875     boolean snapshotEnabled = conf.getBoolean(HBASE_SNAPSHOT_ENABLED, false);
876     boolean userDisabled = (enabled != null && enabled.trim().length() > 0 && !snapshotEnabled);
877 
878     // Extract cleaners from conf
879     Set<String> hfileCleaners = new HashSet<String>();
880     String[] cleaners = conf.getStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
881     if (cleaners != null) Collections.addAll(hfileCleaners, cleaners);
882 
883     Set<String> logCleaners = new HashSet<String>();
884     cleaners = conf.getStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS);
885     if (cleaners != null) Collections.addAll(logCleaners, cleaners);
886 
887     // check if an older version of snapshot directory was present
888     Path oldSnapshotDir = new Path(mfs.getRootDir(), HConstants.OLD_SNAPSHOT_DIR_NAME);
889     FileSystem fs = mfs.getFileSystem();
890     List<SnapshotDescription> ss = getCompletedSnapshots(new Path(rootDir, oldSnapshotDir));
891     if (ss != null && !ss.isEmpty()) {
892       LOG.error("Snapshots from an earlier release were found under: " + oldSnapshotDir);
893       LOG.error("Please rename the directory as " + HConstants.SNAPSHOT_DIR_NAME);
894     }
895     
896     // If the user has enabled the snapshot, we force the cleaners to be present
897     // otherwise we still need to check if cleaners are enabled or not and verify
898     // that there're no snapshot in the .snapshot folder.
899     if (snapshotEnabled) {
900       // Inject snapshot cleaners, if snapshot.enable is true
901       hfileCleaners.add(SnapshotHFileCleaner.class.getName());
902       hfileCleaners.add(HFileLinkCleaner.class.getName());
903       logCleaners.add(SnapshotLogCleaner.class.getName());
904 
905       // Set cleaners conf
906       conf.setStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS,
907         hfileCleaners.toArray(new String[hfileCleaners.size()]));
908       conf.setStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS,
909         logCleaners.toArray(new String[logCleaners.size()]));
910     } else {
911       // Verify if cleaners are present
912       snapshotEnabled = logCleaners.contains(SnapshotLogCleaner.class.getName()) &&
913         hfileCleaners.contains(SnapshotHFileCleaner.class.getName()) &&
914         hfileCleaners.contains(HFileLinkCleaner.class.getName());
915 
916       // Warn if the cleaners are enabled but the snapshot.enabled property is false/not set.
917       if (snapshotEnabled) {
918         LOG.warn("Snapshot log and hfile cleaners are present in the configuration, " +
919           "but the '" + HBASE_SNAPSHOT_ENABLED + "' property " +
920           (userDisabled ? "is set to 'false'." : "is not set."));
921       }
922     }
923 
924     // Mark snapshot feature as enabled if cleaners are present and user has not disabled it.
925     this.isSnapshotSupported = snapshotEnabled && !userDisabled;
926 
927     // If cleaners are not enabled, verify that there're no snapshot in the .snapshot folder
928     // otherwise we end up with snapshot data loss.
929     if (!snapshotEnabled) {
930       LOG.info("Snapshot feature is not enabled, missing log and hfile cleaners.");
931       Path snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(mfs.getRootDir());
932       if (fs.exists(snapshotDir)) {
933         FileStatus[] snapshots = FSUtils.listStatus(fs, snapshotDir,
934           new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
935         if (snapshots != null) {
936           LOG.error("Snapshots are present, but cleaners are not enabled.");
937           checkSnapshotSupport();
938         }
939       }
940     }
941   }
942 }