View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.snapshot;
19  
20  import java.io.FileNotFoundException;
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.Collections;
24  import java.util.HashMap;
25  import java.util.HashSet;
26  import java.util.Iterator;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.Set;
30  import java.util.concurrent.ThreadPoolExecutor;
31  
32  import org.apache.commons.logging.Log;
33  import org.apache.commons.logging.LogFactory;
34  import org.apache.hadoop.classification.InterfaceAudience;
35  import org.apache.hadoop.classification.InterfaceStability;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.fs.FSDataInputStream;
38  import org.apache.hadoop.fs.FileStatus;
39  import org.apache.hadoop.fs.FileSystem;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.hbase.HConstants;
42  import org.apache.hadoop.hbase.HTableDescriptor;
43  import org.apache.hadoop.hbase.Stoppable;
44  import org.apache.hadoop.hbase.catalog.MetaReader;
45  import org.apache.hadoop.hbase.errorhandling.ForeignException;
46  import org.apache.hadoop.hbase.exceptions.HBaseSnapshotException;
47  import org.apache.hadoop.hbase.exceptions.RestoreSnapshotException;
48  import org.apache.hadoop.hbase.exceptions.SnapshotCreationException;
49  import org.apache.hadoop.hbase.exceptions.SnapshotDoesNotExistException;
50  import org.apache.hadoop.hbase.exceptions.SnapshotExistsException;
51  import org.apache.hadoop.hbase.exceptions.TablePartiallyOpenException;
52  import org.apache.hadoop.hbase.exceptions.UnknownSnapshotException;
53  import org.apache.hadoop.hbase.executor.ExecutorService;
54  import org.apache.hadoop.hbase.master.AssignmentManager;
55  import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
56  import org.apache.hadoop.hbase.master.MasterFileSystem;
57  import org.apache.hadoop.hbase.master.MasterServices;
58  import org.apache.hadoop.hbase.master.SnapshotSentinel;
59  import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
60  import org.apache.hadoop.hbase.master.cleaner.HFileLinkCleaner;
61  import org.apache.hadoop.hbase.procedure.Procedure;
62  import org.apache.hadoop.hbase.procedure.ProcedureCoordinator;
63  import org.apache.hadoop.hbase.procedure.ProcedureCoordinatorRpcs;
64  import org.apache.hadoop.hbase.procedure.ZKProcedureCoordinatorRpcs;
65  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
66  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type;
67  import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
68  import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
69  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
70  import org.apache.hadoop.hbase.util.Bytes;
71  import org.apache.hadoop.hbase.util.FSTableDescriptors;
72  import org.apache.hadoop.hbase.util.FSUtils;
73  import org.apache.zookeeper.KeeperException;
74  
75  /**
76   * This class manages the procedure of taking and restoring snapshots. There is only one
77   * SnapshotManager for the master.
78   * <p>
79   * The class provides methods for monitoring in-progress snapshot actions.
80   * <p>
81   * Note: Currently there can only be one snapshot being taken at a time over the cluster. This is a
82   * simplification in the current implementation.
83   */
84  @InterfaceAudience.Private
85  @InterfaceStability.Unstable
86  public class SnapshotManager implements Stoppable {
87    private static final Log LOG = LogFactory.getLog(SnapshotManager.class);
88  
89    /** By default, check to see if the snapshot is complete every WAKE MILLIS (ms) */
90    private static final int SNAPSHOT_WAKE_MILLIS_DEFAULT = 500;
91  
92    /** Enable or disable snapshot support */
93    public static final String HBASE_SNAPSHOT_ENABLED = "hbase.snapshot.enabled";
94  
95    /**
96     * Conf key for # of ms elapsed between checks for snapshot errors while waiting for
97     * completion.
98     */
99    private static final String SNAPSHOT_WAKE_MILLIS_KEY = "hbase.snapshot.master.wakeMillis";
100 
101   /** By default, check to see if the snapshot is complete (ms) */
102   private static final int SNAPSHOT_TIMEOUT_MILLIS_DEFAULT = 5000;
103 
104   /**
105    * Conf key for # of ms elapsed before injecting a snapshot timeout error when waiting for
106    * completion.
107    */
108   private static final String SNAPSHOT_TIMEOUT_MILLIS_KEY = "hbase.snapshot.master.timeoutMillis";
109 
110   /** Name of the operation to use in the controller */
111   public static final String ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION = "online-snapshot";
112 
113   // TODO - enable having multiple snapshots with multiple monitors/threads
114   // this needs to be configuration based when running multiple snapshots is implemented
115   /** number of current operations running on the master */
116   private static final int opThreads = 1;
117 
118   private boolean stopped;
119   private final long wakeFrequency;
120   private final MasterServices master;  // Needed by TableEventHandlers
121   private final ProcedureCoordinator coordinator;
122 
123   // Is snapshot feature enabled?
124   private boolean isSnapshotSupported = false;
125 
126   // A reference to a handler.  If the handler is non-null, then it is assumed that a snapshot is
127   // in progress currently
128   // TODO: this is a bad smell;  likely replace with a collection in the future.  Also this gets
129   // reset by every operation.
130   private TakeSnapshotHandler handler;
131 
132   private final Path rootDir;
133   private final ExecutorService executorService;
134 
135   // Restore Sentinels map, with table name as key
136   private Map<String, SnapshotSentinel> restoreHandlers = new HashMap<String, SnapshotSentinel>();
137 
138   /**
139    * Construct a snapshot manager.
140    * @param master
141    */
142   public SnapshotManager(final MasterServices master) throws KeeperException, IOException,
143     UnsupportedOperationException {
144     this.master = master;
145     checkSnapshotSupport(master.getConfiguration(), master.getMasterFileSystem());
146 
147     // get the configuration for the coordinator
148     Configuration conf = master.getConfiguration();
149     this.wakeFrequency = conf.getInt(SNAPSHOT_WAKE_MILLIS_KEY, SNAPSHOT_WAKE_MILLIS_DEFAULT);
150     long keepAliveTime = conf.getLong(SNAPSHOT_TIMEOUT_MILLIS_KEY, SNAPSHOT_TIMEOUT_MILLIS_DEFAULT);
151 
152     // setup the default procedure coordinator
153     String name = master.getServerName().toString();
154     ThreadPoolExecutor tpool = ProcedureCoordinator.defaultPool(name, keepAliveTime, opThreads, wakeFrequency);
155     ProcedureCoordinatorRpcs comms = new ZKProcedureCoordinatorRpcs(
156         master.getZooKeeper(), SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION, name);
157     this.coordinator = new ProcedureCoordinator(comms, tpool);
158     this.rootDir = master.getMasterFileSystem().getRootDir();
159     this.executorService = master.getExecutorService();
160     resetTempDir();
161   }
162 
163   /**
164    * Fully specify all necessary components of a snapshot manager. Exposed for testing.
165    * @param master services for the master where the manager is running
166    * @param coordinator procedure coordinator instance.  exposed for testing.
167    * @param pool HBase ExecutorServcie instance, exposed for testing.
168    */
169   public SnapshotManager(final MasterServices master, ProcedureCoordinator coordinator, ExecutorService pool)
170       throws IOException, UnsupportedOperationException {
171     this.master = master;
172     checkSnapshotSupport(master.getConfiguration(), master.getMasterFileSystem());
173 
174     this.wakeFrequency = master.getConfiguration().getInt(SNAPSHOT_WAKE_MILLIS_KEY,
175       SNAPSHOT_WAKE_MILLIS_DEFAULT);
176     this.coordinator = coordinator;
177     this.rootDir = master.getMasterFileSystem().getRootDir();
178     this.executorService = pool;
179     resetTempDir();
180   }
181 
182   /**
183    * Gets the list of all completed snapshots.
184    * @return list of SnapshotDescriptions
185    * @throws IOException File system exception
186    */
187   public List<SnapshotDescription> getCompletedSnapshots() throws IOException {
188     List<SnapshotDescription> snapshotDescs = new ArrayList<SnapshotDescription>();
189     // first create the snapshot root path and check to see if it exists
190     Path snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir);
191     FileSystem fs = master.getMasterFileSystem().getFileSystem();
192 
193     // if there are no snapshots, return an empty list
194     if (!fs.exists(snapshotDir)) {
195       return snapshotDescs;
196     }
197 
198     // ignore all the snapshots in progress
199     FileStatus[] snapshots = fs.listStatus(snapshotDir,
200       new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
201     // loop through all the completed snapshots
202     for (FileStatus snapshot : snapshots) {
203       Path info = new Path(snapshot.getPath(), SnapshotDescriptionUtils.SNAPSHOTINFO_FILE);
204       // if the snapshot is bad
205       if (!fs.exists(info)) {
206         LOG.error("Snapshot information for " + snapshot.getPath() + " doesn't exist");
207         continue;
208       }
209       FSDataInputStream in = null;
210       try {
211         in = fs.open(info);
212         SnapshotDescription desc = SnapshotDescription.parseFrom(in);
213         snapshotDescs.add(desc);
214       } catch (IOException e) {
215         LOG.warn("Found a corrupted snapshot " + snapshot.getPath(), e);
216       } finally {
217         if (in != null) {
218           in.close();
219         }
220       }
221     }
222     return snapshotDescs;
223   }
224 
225   /**
226    * Cleans up any snapshots in the snapshot/.tmp directory that were left from failed
227    * snapshot attempts.
228    *
229    * @throws IOException if we can't reach the filesystem
230    */
231   void resetTempDir() throws IOException {
232     // cleanup any existing snapshots.
233     Path tmpdir = SnapshotDescriptionUtils.getWorkingSnapshotDir(rootDir);
234     if (!master.getMasterFileSystem().getFileSystem().delete(tmpdir, true)) {
235       LOG.warn("Couldn't delete working snapshot directory: " + tmpdir);
236     }
237   }
238 
239   /**
240    * Delete the specified snapshot
241    * @param snapshot
242    * @throws SnapshotDoesNotExistException If the specified snapshot does not exist.
243    * @throws IOException For filesystem IOExceptions
244    */
245   public void deleteSnapshot(SnapshotDescription snapshot) throws SnapshotDoesNotExistException, IOException {
246 
247     // call coproc pre hook
248     MasterCoprocessorHost cpHost = master.getCoprocessorHost();
249     if (cpHost != null) {
250       cpHost.preDeleteSnapshot(snapshot);
251     }
252 
253     // check to see if it is completed
254     if (!isSnapshotCompleted(snapshot)) {
255       throw new SnapshotDoesNotExistException(snapshot);
256     }
257 
258     String snapshotName = snapshot.getName();
259     LOG.debug("Deleting snapshot: " + snapshotName);
260     // first create the snapshot description and check to see if it exists
261     MasterFileSystem fs = master.getMasterFileSystem();
262     Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
263 
264     // delete the existing snapshot
265     if (!fs.getFileSystem().delete(snapshotDir, true)) {
266       throw new HBaseSnapshotException("Failed to delete snapshot directory: " + snapshotDir);
267     }
268 
269     // call coproc post hook
270     if (cpHost != null) {
271       cpHost.postDeleteSnapshot(snapshot);
272     }
273 
274   }
275 
276   /**
277    * Return the handler if it is currently running and has the same snapshot target name.
278    * @param snapshot
279    * @return null if doesn't match, else a live handler.
280    */
281   private synchronized TakeSnapshotHandler getTakeSnapshotHandler(SnapshotDescription snapshot) {
282     TakeSnapshotHandler h = this.handler;
283     if (h == null) {
284       return null;
285     }
286 
287     if (!h.getSnapshot().getName().equals(snapshot.getName())) {
288       // specified snapshot is to the one currently running
289       return null;
290     }
291 
292     return h;
293   }
294 
295   /**
296    * Check if the specified snapshot is done
297    * @param expected
298    * @return true if snapshot is ready to be restored, false if it is still being taken.
299    * @throws IOException IOException if error from HDFS or RPC
300    * @throws UnknownSnapshotException if snapshot is invalid or does not exist.
301    */
302   public boolean isSnapshotDone(SnapshotDescription expected) throws IOException {
303     // check the request to make sure it has a snapshot
304     if (expected == null) {
305       throw new UnknownSnapshotException(
306          "No snapshot name passed in request, can't figure out which snapshot you want to check.");
307     }
308 
309     String ssString = ClientSnapshotDescriptionUtils.toString(expected);
310 
311     // check to see if the sentinel exists
312     TakeSnapshotHandler handler = getTakeSnapshotHandler(expected);
313     if (handler == null) {
314       // doesn't exist, check if it is already completely done.
315       if (!isSnapshotCompleted(expected)) {
316         throw new UnknownSnapshotException("Snapshot " + ssString
317             + " is not currently running or one of the known completed snapshots.");
318       }
319       // was done, return true;
320       return true;
321     }
322 
323     // pass on any failure we find in the sentinel
324     try {
325       handler.rethrowException();
326     } catch (ForeignException e) {
327       // Give some procedure info on an exception.
328       String status;
329       Procedure p = coordinator.getProcedure(expected.getName());
330       if (p != null) {
331         status = p.getStatus();
332       } else {
333         status = expected.getName() + " not found in proclist " + coordinator.getProcedureNames();
334       }
335       throw new HBaseSnapshotException("Snapshot " + ssString +  " had an error.  " + status, e,
336           expected);
337     }
338 
339     // check to see if we are done
340     if (handler.isFinished()) {
341       LOG.debug("Snapshot '" + ssString + "' has completed, notifying client.");
342       return true;
343     } else if (LOG.isDebugEnabled()) {
344       LOG.debug("Snapshoting '" + ssString + "' is still in progress!");
345     }
346     return false;
347   }
348 
349   /**
350    * Check to see if there are any snapshots in progress currently.  Currently we have a
351    * limitation only allowing a single snapshot attempt at a time.
352    * @return <tt>true</tt> if there any snapshots in progress, <tt>false</tt> otherwise
353    * @throws SnapshotCreationException if the snapshot failed
354    */
355   synchronized boolean isTakingSnapshot() throws SnapshotCreationException {
356     // TODO later when we handle multiple there would be a map with ssname to handler.
357     return handler != null && !handler.isFinished();
358   }
359 
360   /**
361    * Check to see if the specified table has a snapshot in progress.  Currently we have a
362    * limitation only allowing a single snapshot attempt at a time.
363    * @param tableName name of the table being snapshotted.
364    * @return <tt>true</tt> if there is a snapshot in progress on the specified table.
365    */
366   private boolean isTakingSnapshot(final String tableName) {
367     if (handler != null && handler.getSnapshot().getTable().equals(tableName)) {
368       return !handler.isFinished();
369     }
370     return false;
371   }
372 
373   /**
374    * Check to make sure that we are OK to run the passed snapshot. Checks to make sure that we
375    * aren't already running a snapshot.
376    * @param snapshot description of the snapshot we want to start
377    * @throws HBaseSnapshotException if the filesystem could not be prepared to start the snapshot
378    */
379   private synchronized void prepareToTakeSnapshot(SnapshotDescription snapshot)
380       throws HBaseSnapshotException {
381     FileSystem fs = master.getMasterFileSystem().getFileSystem();
382     Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
383 
384     // make sure we aren't already running a snapshot
385     if (isTakingSnapshot()) {
386       throw new SnapshotCreationException("Rejected taking "
387           + ClientSnapshotDescriptionUtils.toString(snapshot)
388           + " because we are already running another snapshot "
389           + ClientSnapshotDescriptionUtils.toString(this.handler.getSnapshot()), snapshot);
390     }
391 
392     // make sure we aren't running a restore on the same table
393     if (isRestoringTable(snapshot.getTable())) {
394       throw new SnapshotCreationException("Rejected taking "
395           + ClientSnapshotDescriptionUtils.toString(snapshot)
396           + " because we are already have a restore in progress on the same snapshot "
397           + ClientSnapshotDescriptionUtils.toString(this.handler.getSnapshot()), snapshot);
398     }
399 
400     try {
401       // delete the working directory, since we aren't running the snapshot. Likely leftovers
402       // from a failed attempt.
403       fs.delete(workingDir, true);
404 
405       // recreate the working directory for the snapshot
406       if (!fs.mkdirs(workingDir)) {
407         throw new SnapshotCreationException("Couldn't create working directory (" + workingDir
408             + ") for snapshot" , snapshot);
409       }
410     } catch (HBaseSnapshotException e) {
411       throw e;
412     } catch (IOException e) {
413       throw new SnapshotCreationException(
414           "Exception while checking to see if snapshot could be started.", e, snapshot);
415     }
416   }
417 
418   /**
419    * Take a snapshot of an enabled table.
420    * <p>
421    * The thread limitation on the executorService's thread pool for snapshots ensures the
422    * snapshot won't be started if there is another snapshot already running. Does
423    * <b>not</b> check to see if another snapshot of the same name already exists.
424    * @param snapshot description of the snapshot to take.
425    * @throws HBaseSnapshotException if the snapshot could not be started
426    */
427   private synchronized void snapshotEnabledTable(SnapshotDescription snapshot)
428       throws HBaseSnapshotException {
429     TakeSnapshotHandler handler;
430     try {
431       handler = new EnabledTableSnapshotHandler(snapshot, master, this).prepare();
432       this.executorService.submit(handler);
433       this.handler = handler;
434     } catch (Exception e) {
435       // cleanup the working directory by trying to delete it from the fs.
436       Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
437       try {
438         if (!this.master.getMasterFileSystem().getFileSystem().delete(workingDir, true)) {
439           LOG.warn("Couldn't delete working directory (" + workingDir + " for snapshot:"
440               + ClientSnapshotDescriptionUtils.toString(snapshot));
441         }
442       } catch (IOException e1) {
443         LOG.warn("Couldn't delete working directory (" + workingDir + " for snapshot:" +
444             ClientSnapshotDescriptionUtils.toString(snapshot));
445       }
446       // fail the snapshot
447       throw new SnapshotCreationException("Could not build snapshot handler", e, snapshot);
448     }
449   }
450 
451   /**
452    * Take a snapshot based on the enabled/disabled state of the table.
453    *
454    * @param snapshot
455    * @throws HBaseSnapshotException when a snapshot specific exception occurs.
456    * @throws IOException when some sort of generic IO exception occurs.
457    */
458   public void takeSnapshot(SnapshotDescription snapshot) throws IOException {
459     // check to see if we already completed the snapshot
460     if (isSnapshotCompleted(snapshot)) {
461       throw new SnapshotExistsException("Snapshot '" + snapshot.getName()
462           + "' already stored on the filesystem.", snapshot);
463     }
464 
465     LOG.debug("No existing snapshot, attempting snapshot...");
466 
467     // check to see if the table exists
468     HTableDescriptor desc = null;
469     try {
470       desc = master.getTableDescriptors().get(snapshot.getTable());
471     } catch (FileNotFoundException e) {
472       String msg = "Table:" + snapshot.getTable() + " info doesn't exist!";
473       LOG.error(msg);
474       throw new SnapshotCreationException(msg, e, snapshot);
475     } catch (IOException e) {
476       throw new SnapshotCreationException("Error while geting table description for table "
477           + snapshot.getTable(), e, snapshot);
478     }
479     if (desc == null) {
480       throw new SnapshotCreationException("Table '" + snapshot.getTable()
481           + "' doesn't exist, can't take snapshot.", snapshot);
482     }
483 
484     // set the snapshot version, now that we are ready to take it
485     snapshot = snapshot.toBuilder().setVersion(SnapshotDescriptionUtils.SNAPSHOT_LAYOUT_VERSION)
486         .build();
487 
488     // call pre coproc hook
489     MasterCoprocessorHost cpHost = master.getCoprocessorHost();
490     if (cpHost != null) {
491       cpHost.preSnapshot(snapshot, desc);
492     }
493 
494     // setup the snapshot
495     prepareToTakeSnapshot(snapshot);
496 
497     // if the table is enabled, then have the RS run actually the snapshot work
498     AssignmentManager assignmentMgr = master.getAssignmentManager();
499     if (assignmentMgr.getZKTable().isEnabledTable(snapshot.getTable())) {
500       LOG.debug("Table enabled, starting distributed snapshot.");
501       snapshotEnabledTable(snapshot);
502       LOG.debug("Started snapshot: " + ClientSnapshotDescriptionUtils.toString(snapshot));
503     }
504     // For disabled table, snapshot is created by the master
505     else if (assignmentMgr.getZKTable().isDisabledTable(snapshot.getTable())) {
506       LOG.debug("Table is disabled, running snapshot entirely on master.");
507       snapshotDisabledTable(snapshot);
508       LOG.debug("Started snapshot: " + ClientSnapshotDescriptionUtils.toString(snapshot));
509     } else {
510       LOG.error("Can't snapshot table '" + snapshot.getTable()
511           + "', isn't open or closed, we don't know what to do!");
512       TablePartiallyOpenException tpoe = new TablePartiallyOpenException(snapshot.getTable()
513           + " isn't fully open.");
514       throw new SnapshotCreationException("Table is not entirely open or closed", tpoe, snapshot);
515     }
516 
517     // call post coproc hook
518     if (cpHost != null) {
519       cpHost.postSnapshot(snapshot, desc);
520     }
521   }
522 
523   /**
524    * Take a snapshot of a disabled table.
525    * <p>
526    * The thread limitation on the executorService's thread pool for snapshots ensures the
527    * snapshot won't be started if there is another snapshot already running. Does
528    * <b>not</b> check to see if another snapshot of the same name already exists.
529    * @param snapshot description of the snapshot to take. Modified to be {@link Type#DISABLED}.
530    * @throws HBaseSnapshotException if the snapshot could not be started
531    */
532   private synchronized void snapshotDisabledTable(SnapshotDescription snapshot)
533       throws HBaseSnapshotException {
534 
535     // set the snapshot to be a disabled snapshot, since the client doesn't know about that
536     snapshot = snapshot.toBuilder().setType(Type.DISABLED).build();
537 
538     DisabledTableSnapshotHandler handler;
539     try {
540       handler = new DisabledTableSnapshotHandler(snapshot, this.master).prepare();
541       this.executorService.submit(handler);
542       this.handler = handler;
543     } catch (Exception e) {
544       // cleanup the working directory by trying to delete it from the fs.
545       Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
546       try {
547         if (!this.master.getMasterFileSystem().getFileSystem().delete(workingDir, true)) {
548           LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:" +
549               ClientSnapshotDescriptionUtils.toString(snapshot));
550         }
551       } catch (IOException e1) {
552         LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:" +
553             ClientSnapshotDescriptionUtils.toString(snapshot));
554       }
555       // fail the snapshot
556       throw new SnapshotCreationException("Could not build snapshot handler", e, snapshot);
557     }
558   }
559 
560   /**
561    * Set the handler for the current snapshot
562    * <p>
563    * Exposed for TESTING
564    * @param handler handler the master should use
565    *
566    * TODO get rid of this if possible, repackaging, modify tests.
567    */
568   public synchronized void setSnapshotHandlerForTesting(TakeSnapshotHandler handler) {
569     this.handler = handler;
570   }
571 
572   /**
573    * @return distributed commit coordinator for all running snapshots
574    */
575   ProcedureCoordinator getCoordinator() {
576     return coordinator;
577   }
578 
579   /**
580    * Check to see if the snapshot is one of the currently completed snapshots
581    * @param expected snapshot to check
582    * @return <tt>true</tt> if the snapshot is stored on the {@link FileSystem}, <tt>false</tt> if is
583    *         not stored
584    * @throws IOException if the filesystem throws an unexpected exception,
585    * @throws IllegalArgumentException if snapshot name is invalid.
586    */
587   private boolean isSnapshotCompleted(SnapshotDescription snapshot) throws IOException {
588     try {
589       final Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
590       FileSystem fs = master.getMasterFileSystem().getFileSystem();
591 
592       // check to see if the snapshot already exists
593       return fs.exists(snapshotDir);
594     } catch (IllegalArgumentException iae) {
595       throw new UnknownSnapshotException("Unexpected exception thrown", iae);
596     }
597   }
598 
599   /**
600    * Clone the specified snapshot into a new table.
601    * The operation will fail if the destination table has a snapshot or restore in progress.
602    *
603    * @param snapshot Snapshot Descriptor
604    * @param hTableDescriptor Table Descriptor of the table to create
605    * @param waitTime timeout before considering the clone failed
606    */
607   synchronized void cloneSnapshot(final SnapshotDescription snapshot,
608       final HTableDescriptor hTableDescriptor) throws HBaseSnapshotException {
609     String tableName = hTableDescriptor.getNameAsString();
610 
611     // make sure we aren't running a snapshot on the same table
612     if (isTakingSnapshot(tableName)) {
613       throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName);
614     }
615 
616     // make sure we aren't running a restore on the same table
617     if (isRestoringTable(tableName)) {
618       throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName);
619     }
620 
621     try {
622       CloneSnapshotHandler handler =
623         new CloneSnapshotHandler(master, snapshot, hTableDescriptor).prepare();
624       this.executorService.submit(handler);
625       restoreHandlers.put(tableName, handler);
626     } catch (Exception e) {
627       String msg = "Couldn't clone the snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) +
628         " on table=" + tableName;
629       LOG.error(msg, e);
630       throw new RestoreSnapshotException(msg, e);
631     }
632   }
633 
634   /**
635    * Restore the specified snapshot
636    * @param reqSnapshot
637    * @throws IOException
638    */
639   public void restoreSnapshot(SnapshotDescription reqSnapshot) throws IOException {
640     FileSystem fs = master.getMasterFileSystem().getFileSystem();
641     Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(reqSnapshot, rootDir);
642     MasterCoprocessorHost cpHost = master.getCoprocessorHost();
643 
644     // check if the snapshot exists
645     if (!fs.exists(snapshotDir)) {
646       LOG.error("A Snapshot named '" + reqSnapshot.getName() + "' does not exist.");
647       throw new SnapshotDoesNotExistException(reqSnapshot);
648     }
649 
650     // read snapshot information
651     SnapshotDescription fsSnapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
652     HTableDescriptor snapshotTableDesc = FSTableDescriptors.getTableDescriptor(fs, snapshotDir);
653     String tableName = reqSnapshot.getTable();
654 
655     // stop tracking completed restores
656     cleanupRestoreSentinels();
657 
658     // Execute the restore/clone operation
659     if (MetaReader.tableExists(master.getCatalogTracker(), tableName)) {
660       if (master.getAssignmentManager().getZKTable().isEnabledTable(fsSnapshot.getTable())) {
661         throw new UnsupportedOperationException("Table '" +
662           fsSnapshot.getTable() + "' must be disabled in order to perform a restore operation.");
663       }
664 
665       // call coproc pre hook
666       if (cpHost != null) {
667         cpHost.preRestoreSnapshot(reqSnapshot, snapshotTableDesc);
668       }
669       restoreSnapshot(fsSnapshot, snapshotTableDesc);
670       LOG.info("Restore snapshot=" + fsSnapshot.getName() + " as table=" + tableName);
671 
672       if (cpHost != null) {
673         cpHost.postRestoreSnapshot(reqSnapshot, snapshotTableDesc);
674       }
675     } else {
676       HTableDescriptor htd = RestoreSnapshotHelper.cloneTableSchema(snapshotTableDesc,
677                                                          Bytes.toBytes(tableName));
678       if (cpHost != null) {
679         cpHost.preCloneSnapshot(reqSnapshot, htd);
680       }
681       cloneSnapshot(fsSnapshot, htd);
682       LOG.info("Clone snapshot=" + fsSnapshot.getName() + " as table=" + tableName);
683 
684       if (cpHost != null) {
685         cpHost.postCloneSnapshot(reqSnapshot, htd);
686       }
687     }
688   }
689 
690   /**
691    * Restore the specified snapshot.
692    * The restore will fail if the destination table has a snapshot or restore in progress.
693    *
694    * @param snapshot Snapshot Descriptor
695    * @param hTableDescriptor Table Descriptor
696    * @param waitTime timeout before considering the restore failed
697    */
698   private synchronized void restoreSnapshot(final SnapshotDescription snapshot,
699       final HTableDescriptor hTableDescriptor) throws HBaseSnapshotException {
700     String tableName = hTableDescriptor.getNameAsString();
701 
702     // make sure we aren't running a snapshot on the same table
703     if (isTakingSnapshot(tableName)) {
704       throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName);
705     }
706 
707     // make sure we aren't running a restore on the same table
708     if (isRestoringTable(tableName)) {
709       throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName);
710     }
711 
712     try {
713       RestoreSnapshotHandler handler =
714         new RestoreSnapshotHandler(master, snapshot, hTableDescriptor);
715       this.executorService.submit(handler);
716       restoreHandlers.put(hTableDescriptor.getNameAsString(), handler);
717     } catch (Exception e) {
718       String msg = "Couldn't restore the snapshot=" + ClientSnapshotDescriptionUtils.toString(
719           snapshot)  +
720           " on table=" + tableName;
721       LOG.error(msg, e);
722       throw new RestoreSnapshotException(msg, e);
723     }
724   }
725 
726   /**
727    * Verify if the restore of the specified table is in progress.
728    *
729    * @param tableName table under restore
730    * @return <tt>true</tt> if there is a restore in progress of the specified table.
731    */
732   private boolean isRestoringTable(final String tableName) {
733     SnapshotSentinel sentinel = restoreHandlers.get(tableName);
734     return(sentinel != null && !sentinel.isFinished());
735   }
736 
737   /**
738    * Returns status of a restore request, specifically comparing source snapshot and target table
739    * names.  Throws exception if not a known snapshot.
740    * @param snapshot
741    * @return true if in progress, false if snapshot is completed.
742    * @throws UnknownSnapshotException if specified source snapshot does not exit.
743    * @throws IOException if there was some sort of IO failure
744    */
745   public boolean isRestoringTable(final SnapshotDescription snapshot) throws IOException {
746     // check to see if the snapshot is already on the fs
747     if (!isSnapshotCompleted(snapshot)) {
748       throw new UnknownSnapshotException("Snapshot:" + snapshot.getName()
749           + " is not one of the known completed snapshots.");
750     }
751 
752     SnapshotSentinel sentinel = getRestoreSnapshotSentinel(snapshot.getTable());
753     if (sentinel == null) {
754       // there is no sentinel so restore is not in progress.
755       return false;
756     }
757     if (!sentinel.getSnapshot().getName().equals(snapshot.getName())) {
758       // another handler is trying to restore to the table, but it isn't the same snapshot source.
759       return false;
760     }
761 
762     LOG.debug("Verify snapshot=" + snapshot.getName() + " against="
763         + sentinel.getSnapshot().getName() + " table=" + snapshot.getTable());
764     ForeignException e = sentinel.getExceptionIfFailed();
765     if (e != null) throw e;
766 
767     // check to see if we are done
768     if (sentinel.isFinished()) {
769       LOG.debug("Restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) +
770           " has completed. Notifying the client.");
771       return false;
772     }
773 
774     if (LOG.isDebugEnabled()) {
775       LOG.debug("Sentinel is not yet finished with restoring snapshot=" +
776           ClientSnapshotDescriptionUtils.toString(snapshot));
777     }
778     return true;
779   }
780 
781   /**
782    * Get the restore snapshot sentinel for the specified table
783    * @param tableName table under restore
784    * @return the restore snapshot handler
785    */
786   private synchronized SnapshotSentinel getRestoreSnapshotSentinel(final String tableName) {
787     try {
788       return restoreHandlers.get(tableName);
789     } finally {
790       cleanupRestoreSentinels();
791     }
792   }
793 
794   /**
795    * Scan the restore handlers and remove the finished ones.
796    */
797   private synchronized void cleanupRestoreSentinels() {
798     Iterator<Map.Entry<String, SnapshotSentinel>> it = restoreHandlers.entrySet().iterator();
799     while (it.hasNext()) {
800         Map.Entry<String, SnapshotSentinel> entry = it.next();
801         SnapshotSentinel sentinel = entry.getValue();
802         if (sentinel.isFinished()) {
803           it.remove();
804         }
805     }
806   }
807 
808   //
809   // Implementing Stoppable interface
810   //
811 
812   @Override
813   public void stop(String why) {
814     // short circuit
815     if (this.stopped) return;
816     // make sure we get stop
817     this.stopped = true;
818     // pass the stop onto take snapshot handlers
819     if (this.handler != null) this.handler.cancel(why);
820 
821     // pass the stop onto all the restore handlers
822     for (SnapshotSentinel restoreHandler: this.restoreHandlers.values()) {
823       restoreHandler.cancel(why);
824     }
825   }
826 
827   @Override
828   public boolean isStopped() {
829     return this.stopped;
830   }
831 
832   /**
833    * Throws an exception if snapshot operations (take a snapshot, restore, clone) are not supported.
834    * Called at the beginning of snapshot() and restoreSnapshot() methods.
835    * @throws UnsupportedOperationException if snapshot are not supported
836    */
837   public void checkSnapshotSupport() throws UnsupportedOperationException {
838     if (!this.isSnapshotSupported) {
839       throw new UnsupportedOperationException(
840         "To use snapshots, You must add to the hbase-site.xml of the HBase Master: '" +
841           HBASE_SNAPSHOT_ENABLED + "' property with value 'true'.");
842     }
843   }
844 
845   /**
846    * Called at startup, to verify if snapshot operation is supported, and to avoid
847    * starting the master if there're snapshots present but the cleaners needed are missing.
848    * Otherwise we can end up with snapshot data loss.
849    * @param conf The {@link Configuration} object to use
850    * @param mfs The MasterFileSystem to use
851    * @throws IOException in case of file-system operation failure
852    * @throws UnsupportedOperationException in case cleaners are missing and
853    *         there're snapshot in the system
854    */
855   private void checkSnapshotSupport(final Configuration conf, final MasterFileSystem mfs)
856       throws IOException, UnsupportedOperationException {
857     // Verify if snapshot is disabled by the user
858     String enabled = conf.get(HBASE_SNAPSHOT_ENABLED);
859     boolean snapshotEnabled = conf.getBoolean(HBASE_SNAPSHOT_ENABLED, false);
860     boolean userDisabled = (enabled != null && enabled.trim().length() > 0 && !snapshotEnabled);
861 
862     // Extract cleaners from conf
863     Set<String> hfileCleaners = new HashSet<String>();
864     String[] cleaners = conf.getStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
865     if (cleaners != null) Collections.addAll(hfileCleaners, cleaners);
866 
867     Set<String> logCleaners = new HashSet<String>();
868     cleaners = conf.getStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS);
869     if (cleaners != null) Collections.addAll(logCleaners, cleaners);
870 
871     // If the user has enabled the snapshot, we force the cleaners to be present
872     // otherwise we still need to check if cleaners are enabled or not and verify
873     // that there're no snapshot in the .snapshot folder.
874     if (snapshotEnabled) {
875       // Inject snapshot cleaners, if snapshot.enable is true
876       hfileCleaners.add(SnapshotHFileCleaner.class.getName());
877       hfileCleaners.add(HFileLinkCleaner.class.getName());
878       logCleaners.add(SnapshotLogCleaner.class.getName());
879 
880       // Set cleaners conf
881       conf.setStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS,
882         hfileCleaners.toArray(new String[hfileCleaners.size()]));
883       conf.setStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS,
884         logCleaners.toArray(new String[logCleaners.size()]));
885     } else {
886       // Verify if cleaners are present
887       snapshotEnabled = logCleaners.contains(SnapshotLogCleaner.class.getName()) &&
888         hfileCleaners.contains(SnapshotHFileCleaner.class.getName()) &&
889         hfileCleaners.contains(HFileLinkCleaner.class.getName());
890 
891       // Warn if the cleaners are enabled but the snapshot.enabled property is false/not set.
892       if (snapshotEnabled) {
893         LOG.warn("Snapshot log and hfile cleaners are present in the configuration, " +
894           "but the '" + HBASE_SNAPSHOT_ENABLED + "' property " +
895           (userDisabled ? "is set to 'false'." : "is not set."));
896       }
897     }
898 
899     // Mark snapshot feature as enabled if cleaners are present and user has not disabled it.
900     this.isSnapshotSupported = snapshotEnabled && !userDisabled;
901 
902     // If cleaners are not enabled, verify that there're no snapshot in the .snapshot folder
903     // otherwise we end up with snapshot data loss.
904     if (!snapshotEnabled) {
905       LOG.info("Snapshot feature is not enabled, missing log and hfile cleaners.");
906       Path snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(mfs.getRootDir());
907       FileSystem fs = mfs.getFileSystem();
908       if (fs.exists(snapshotDir)) {
909         FileStatus[] snapshots = FSUtils.listStatus(fs, snapshotDir,
910           new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
911         if (snapshots != null) {
912           LOG.error("Snapshots are present, but cleaners are not enabled.");
913           checkSnapshotSupport();
914         }
915       }
916     }
917   }
918 }