View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.snapshot;
20  
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.net.URI;
24  import java.util.ArrayList;
25  import java.util.Collections;
26  import java.util.Comparator;
27  import java.util.LinkedList;
28  import java.util.List;
29  import java.util.Random;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.classification.InterfaceAudience;
34  import org.apache.hadoop.classification.InterfaceStability;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.conf.Configured;
37  import org.apache.hadoop.fs.FSDataInputStream;
38  import org.apache.hadoop.fs.FSDataOutputStream;
39  import org.apache.hadoop.fs.FileChecksum;
40  import org.apache.hadoop.fs.FileStatus;
41  import org.apache.hadoop.fs.FileSystem;
42  import org.apache.hadoop.fs.FileUtil;
43  import org.apache.hadoop.fs.Path;
44  import org.apache.hadoop.fs.permission.FsPermission;
45  import org.apache.hadoop.hbase.TableName;
46  import org.apache.hadoop.hbase.HBaseConfiguration;
47  import org.apache.hadoop.hbase.HConstants;
48  import org.apache.hadoop.hbase.io.HFileLink;
49  import org.apache.hadoop.hbase.io.HLogLink;
50  import org.apache.hadoop.hbase.mapreduce.JobUtil;
51  import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
52  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
53  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
54  import org.apache.hadoop.hbase.security.UserProvider;
55  import org.apache.hadoop.hbase.security.token.FsDelegationToken;
56  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
57  import org.apache.hadoop.hbase.util.FSUtils;
58  import org.apache.hadoop.hbase.util.Pair;
59  import org.apache.hadoop.io.NullWritable;
60  import org.apache.hadoop.io.SequenceFile;
61  import org.apache.hadoop.io.Text;
62  import org.apache.hadoop.mapreduce.Job;
63  import org.apache.hadoop.mapreduce.Mapper;
64  import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
65  import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
66  import org.apache.hadoop.util.StringUtils;
67  import org.apache.hadoop.util.Tool;
68  import org.apache.hadoop.util.ToolRunner;
69  
70  /**
71   * Export the specified snapshot to a given FileSystem.
72   *
73   * The .snapshot/name folder is copied to the destination cluster
74   * and then all the hfiles/hlogs are copied using a Map-Reduce Job in the .archive/ location.
75   * When everything is done, the second cluster can restore the snapshot.
76   */
77  @InterfaceAudience.Public
78  @InterfaceStability.Evolving
79  public final class ExportSnapshot extends Configured implements Tool {
80    private static final Log LOG = LogFactory.getLog(ExportSnapshot.class);
81  
82    private static final String CONF_FILES_USER = "snapshot.export.files.attributes.user";
83    private static final String CONF_FILES_GROUP = "snapshot.export.files.attributes.group";
84    private static final String CONF_FILES_MODE = "snapshot.export.files.attributes.mode";
85    private static final String CONF_CHECKSUM_VERIFY = "snapshot.export.checksum.verify";
86    private static final String CONF_OUTPUT_ROOT = "snapshot.export.output.root";
87    private static final String CONF_INPUT_ROOT = "snapshot.export.input.root";
88    private static final String CONF_BUFFER_SIZE = "snapshot.export.buffer.size";
89    private static final String CONF_MAP_GROUP = "snapshot.export.default.map.group";
90  
91    static final String CONF_TEST_FAILURE = "test.snapshot.export.failure";
92    static final String CONF_TEST_RETRY = "test.snapshot.export.failure.retry";
93  
94    private static final String INPUT_FOLDER_PREFIX = "export-files.";
95  
96    // Export Map-Reduce Counters, to keep track of the progress
97    public enum Counter { MISSING_FILES, COPY_FAILED, BYTES_EXPECTED, BYTES_COPIED, FILES_COPIED };
98  
99    private static class ExportMapper extends Mapper<Text, NullWritable, NullWritable, NullWritable> {
100     final static int REPORT_SIZE = 1 * 1024 * 1024;
101     final static int BUFFER_SIZE = 64 * 1024;
102 
103     private boolean testFailures;
104     private Random random;
105 
106     private boolean verifyChecksum;
107     private String filesGroup;
108     private String filesUser;
109     private short filesMode;
110     private int bufferSize;
111 
112     private FileSystem outputFs;
113     private Path outputArchive;
114     private Path outputRoot;
115 
116     private FileSystem inputFs;
117     private Path inputArchive;
118     private Path inputRoot;
119 
120     @Override
121     public void setup(Context context) throws IOException {
122       Configuration conf = context.getConfiguration();
123       verifyChecksum = conf.getBoolean(CONF_CHECKSUM_VERIFY, true);
124 
125       filesGroup = conf.get(CONF_FILES_GROUP);
126       filesUser = conf.get(CONF_FILES_USER);
127       filesMode = (short)conf.getInt(CONF_FILES_MODE, 0);
128       outputRoot = new Path(conf.get(CONF_OUTPUT_ROOT));
129       inputRoot = new Path(conf.get(CONF_INPUT_ROOT));
130 
131       inputArchive = new Path(inputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
132       outputArchive = new Path(outputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
133 
134       testFailures = conf.getBoolean(CONF_TEST_FAILURE, false);
135 
136       try {
137         inputFs = FileSystem.get(inputRoot.toUri(), conf);
138       } catch (IOException e) {
139         throw new IOException("Could not get the input FileSystem with root=" + inputRoot, e);
140       }
141 
142       try {
143         outputFs = FileSystem.get(outputRoot.toUri(), conf);
144       } catch (IOException e) {
145         throw new IOException("Could not get the output FileSystem with root="+ outputRoot, e);
146       }
147 
148       // Use the default block size of the outputFs if bigger
149       int defaultBlockSize = Math.max((int) outputFs.getDefaultBlockSize(), BUFFER_SIZE);
150       bufferSize = conf.getInt(CONF_BUFFER_SIZE, defaultBlockSize);
151       LOG.info("Using bufferSize=" + StringUtils.humanReadableInt(bufferSize));
152     }
153 
154     @Override
155     public void map(Text key, NullWritable value, Context context)
156         throws InterruptedException, IOException {
157       Path inputPath = new Path(key.toString());
158       Path outputPath = getOutputPath(inputPath);
159 
160       LOG.info("copy file input=" + inputPath + " output=" + outputPath);
161       copyFile(context, inputPath, outputPath);
162     }
163 
164     /**
165      * Returns the location where the inputPath will be copied.
166      *  - hfiles are encoded as hfile links hfile-region-table
167      *  - logs are encoded as serverName/logName
168      */
169     private Path getOutputPath(final Path inputPath) throws IOException {
170       Path path;
171       if (HFileLink.isHFileLink(inputPath) || StoreFileInfo.isReference(inputPath)) {
172         String family = inputPath.getParent().getName();
173         TableName table =
174             HFileLink.getReferencedTableName(inputPath.getName());
175         String region = HFileLink.getReferencedRegionName(inputPath.getName());
176         String hfile = HFileLink.getReferencedHFileName(inputPath.getName());
177         path = new Path(FSUtils.getTableDir(new Path("./"), table),
178             new Path(region, new Path(family, hfile)));
179       } else if (isHLogLinkPath(inputPath)) {
180         String logName = inputPath.getName();
181         path = new Path(new Path(outputRoot, HConstants.HREGION_OLDLOGDIR_NAME), logName);
182       } else {
183         path = inputPath;
184       }
185       return new Path(outputArchive, path);
186     }
187 
188     /*
189      * Used by TestExportSnapshot to simulate a failure
190      */
191     private void injectTestFailure(final Context context, final Path inputPath)
192         throws IOException {
193       if (testFailures) {
194         if (context.getConfiguration().getBoolean(CONF_TEST_RETRY, false)) {
195           if (random == null) {
196             random = new Random();
197           }
198 
199           // FLAKY-TEST-WARN: lower is better, we can get some runs without the
200           // retry, but at least we reduce the number of test failures due to
201           // this test exception from the same map task.
202           if (random.nextFloat() < 0.03) {
203             throw new IOException("TEST RETRY FAILURE: Unable to copy input=" + inputPath
204                                   + " time=" + System.currentTimeMillis());
205           }
206         } else {
207           context.getCounter(Counter.COPY_FAILED).increment(1);
208           throw new IOException("TEST FAILURE: Unable to copy input=" + inputPath);
209         }
210       }
211     }
212 
213     private void copyFile(final Context context, final Path inputPath, final Path outputPath)
214         throws IOException {
215       injectTestFailure(context, inputPath);
216 
217       // Get the file information
218       FileStatus inputStat = getSourceFileStatus(context, inputPath);
219 
220       // Verify if the output file exists and is the same that we want to copy
221       if (outputFs.exists(outputPath)) {
222         FileStatus outputStat = outputFs.getFileStatus(outputPath);
223         if (outputStat != null && sameFile(inputStat, outputStat)) {
224           LOG.info("Skip copy " + inputPath + " to " + outputPath + ", same file.");
225           return;
226         }
227       }
228 
229       FSDataInputStream in = openSourceFile(context, inputPath);
230       try {
231         context.getCounter(Counter.BYTES_EXPECTED).increment(inputStat.getLen());
232 
233         // Ensure that the output folder is there and copy the file
234         outputFs.mkdirs(outputPath.getParent());
235         FSDataOutputStream out = outputFs.create(outputPath, true);
236         try {
237           copyData(context, inputPath, in, outputPath, out, inputStat.getLen());
238         } finally {
239           out.close();
240         }
241 
242         // Try to Preserve attributes
243         if (!preserveAttributes(outputPath, inputStat)) {
244           LOG.warn("You may have to run manually chown on: " + outputPath);
245         }
246       } finally {
247         in.close();
248       }
249     }
250 
251     /**
252      * Try to Preserve the files attribute selected by the user copying them from the source file
253      * This is only required when you are exporting as a different user than "hbase" or on a system
254      * that doesn't have the "hbase" user.
255      *
256      * This is not considered a blocking failure since the user can force a chmod with the user
257      * that knows is available on the system.
258      */
259     private boolean preserveAttributes(final Path path, final FileStatus refStat) {
260       FileStatus stat;
261       try {
262         stat = outputFs.getFileStatus(path);
263       } catch (IOException e) {
264         LOG.warn("Unable to get the status for file=" + path);
265         return false;
266       }
267 
268       try {
269         if (filesMode > 0 && stat.getPermission().toShort() != filesMode) {
270           outputFs.setPermission(path, new FsPermission(filesMode));
271         } else if (!stat.getPermission().equals(refStat.getPermission())) {
272           outputFs.setPermission(path, refStat.getPermission());
273         }
274       } catch (IOException e) {
275         LOG.warn("Unable to set the permission for file="+ stat.getPath() +": "+ e.getMessage());
276         return false;
277       }
278 
279       String user = stringIsNotEmpty(filesUser) ? filesUser : refStat.getOwner();
280       String group = stringIsNotEmpty(filesGroup) ? filesGroup : refStat.getGroup();
281       if (stringIsNotEmpty(user) || stringIsNotEmpty(group)) {
282         try {
283           if (!(user.equals(stat.getOwner()) && group.equals(stat.getGroup()))) {
284             outputFs.setOwner(path, user, group);
285           }
286         } catch (IOException e) {
287           LOG.warn("Unable to set the owner/group for file="+ stat.getPath() +": "+ e.getMessage());
288           LOG.warn("The user/group may not exist on the destination cluster: user=" +
289                    user + " group=" + group);
290           return false;
291         }
292       }
293 
294       return true;
295     }
296 
297     private boolean stringIsNotEmpty(final String str) {
298       return str != null && str.length() > 0;
299     }
300 
301     private void copyData(final Context context,
302         final Path inputPath, final FSDataInputStream in,
303         final Path outputPath, final FSDataOutputStream out,
304         final long inputFileSize)
305         throws IOException {
306       final String statusMessage = "copied %s/" + StringUtils.humanReadableInt(inputFileSize) +
307                                    " (%.1f%%)";
308 
309       try {
310         byte[] buffer = new byte[bufferSize];
311         long totalBytesWritten = 0;
312         int reportBytes = 0;
313         int bytesRead;
314 
315         long stime = System.currentTimeMillis();
316         while ((bytesRead = in.read(buffer)) > 0) {
317           out.write(buffer, 0, bytesRead);
318           totalBytesWritten += bytesRead;
319           reportBytes += bytesRead;
320 
321           if (reportBytes >= REPORT_SIZE) {
322             context.getCounter(Counter.BYTES_COPIED).increment(reportBytes);
323             context.setStatus(String.format(statusMessage,
324                               StringUtils.humanReadableInt(totalBytesWritten),
325                               (totalBytesWritten/(float)inputFileSize) * 100.0f) +
326                               " from " + inputPath + " to " + outputPath);
327             reportBytes = 0;
328           }
329         }
330         long etime = System.currentTimeMillis();
331 
332         context.getCounter(Counter.BYTES_COPIED).increment(reportBytes);
333         context.setStatus(String.format(statusMessage,
334                           StringUtils.humanReadableInt(totalBytesWritten),
335                           (totalBytesWritten/(float)inputFileSize) * 100.0f) +
336                           " from " + inputPath + " to " + outputPath);
337 
338         // Verify that the written size match
339         if (totalBytesWritten != inputFileSize) {
340           String msg = "number of bytes copied not matching copied=" + totalBytesWritten +
341                        " expected=" + inputFileSize + " for file=" + inputPath;
342           throw new IOException(msg);
343         }
344 
345         LOG.info("copy completed for input=" + inputPath + " output=" + outputPath);
346         LOG.info("size=" + totalBytesWritten +
347             " (" + StringUtils.humanReadableInt(totalBytesWritten) + ")" +
348             " time=" + StringUtils.formatTimeDiff(etime, stime) +
349             String.format(" %.3fM/sec", (totalBytesWritten / ((etime - stime)/1000.0))/1048576.0));
350         context.getCounter(Counter.FILES_COPIED).increment(1);
351       } catch (IOException e) {
352         LOG.error("Error copying " + inputPath + " to " + outputPath, e);
353         context.getCounter(Counter.COPY_FAILED).increment(1);
354         throw e;
355       }
356     }
357 
358     /**
359      * Try to open the "source" file.
360      * Throws an IOException if the communication with the inputFs fail or
361      * if the file is not found.
362      */
363     private FSDataInputStream openSourceFile(Context context, final Path path) throws IOException {
364       try {
365         if (HFileLink.isHFileLink(path) || StoreFileInfo.isReference(path)) {
366           return new HFileLink(inputRoot, inputArchive, path).open(inputFs);
367         } else if (isHLogLinkPath(path)) {
368           String serverName = path.getParent().getName();
369           String logName = path.getName();
370           return new HLogLink(inputRoot, serverName, logName).open(inputFs);
371         }
372         return inputFs.open(path);
373       } catch (IOException e) {
374         context.getCounter(Counter.MISSING_FILES).increment(1);
375         LOG.error("Unable to open source file=" + path, e);
376         throw e;
377       }
378     }
379 
380     private FileStatus getSourceFileStatus(Context context, final Path path) throws IOException {
381       try {
382         if (HFileLink.isHFileLink(path) || StoreFileInfo.isReference(path)) {
383           HFileLink link = new HFileLink(inputRoot, inputArchive, path);
384           return link.getFileStatus(inputFs);
385         } else if (isHLogLinkPath(path)) {
386           String serverName = path.getParent().getName();
387           String logName = path.getName();
388           return new HLogLink(inputRoot, serverName, logName).getFileStatus(inputFs);
389         }
390         return inputFs.getFileStatus(path);
391       } catch (FileNotFoundException e) {
392         context.getCounter(Counter.MISSING_FILES).increment(1);
393         LOG.error("Unable to get the status for source file=" + path, e);
394         throw e;
395       } catch (IOException e) {
396         LOG.error("Unable to get the status for source file=" + path, e);
397         throw e;
398       }
399     }
400 
401     private FileChecksum getFileChecksum(final FileSystem fs, final Path path) {
402       try {
403         return fs.getFileChecksum(path);
404       } catch (IOException e) {
405         LOG.warn("Unable to get checksum for file=" + path, e);
406         return null;
407       }
408     }
409 
410     /**
411      * Check if the two files are equal by looking at the file length,
412      * and at the checksum (if user has specified the verifyChecksum flag).
413      */
414     private boolean sameFile(final FileStatus inputStat, final FileStatus outputStat) {
415       // Not matching length
416       if (inputStat.getLen() != outputStat.getLen()) return false;
417 
418       // Mark files as equals, since user asked for no checksum verification
419       if (!verifyChecksum) return true;
420 
421       // If checksums are not available, files are not the same.
422       FileChecksum inChecksum = getFileChecksum(inputFs, inputStat.getPath());
423       if (inChecksum == null) return false;
424 
425       FileChecksum outChecksum = getFileChecksum(outputFs, outputStat.getPath());
426       if (outChecksum == null) return false;
427 
428       return inChecksum.equals(outChecksum);
429     }
430 
431     /**
432      * HLog files are encoded as serverName/logName
433      * and since all the other files should be in /hbase/table/..path..
434      * we can rely on the depth, for now.
435      */
436     private static boolean isHLogLinkPath(final Path path) {
437       return path.depth() == 2;
438     }
439   }
440 
441   /**
442    * Extract the list of files (HFiles/HLogs) to copy using Map-Reduce.
443    * @return list of files referenced by the snapshot (pair of path and size)
444    */
445   private List<Pair<Path, Long>> getSnapshotFiles(final FileSystem fs, final Path snapshotDir)
446       throws IOException {
447     SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
448 
449     final List<Pair<Path, Long>> files = new ArrayList<Pair<Path, Long>>();
450     final TableName table =
451         TableName.valueOf(snapshotDesc.getTable());
452     final Configuration conf = getConf();
453 
454     // Get snapshot files
455     SnapshotReferenceUtil.visitReferencedFiles(fs, snapshotDir,
456       new SnapshotReferenceUtil.FileVisitor() {
457         public void storeFile (final String region, final String family, final String hfile)
458             throws IOException {
459           Path path = HFileLink.createPath(table, region, family, hfile);
460           long size = new HFileLink(conf, path).getFileStatus(fs).getLen();
461           files.add(new Pair<Path, Long>(path, size));
462         }
463 
464         public void recoveredEdits (final String region, final String logfile)
465             throws IOException {
466           // copied with the snapshot referenecs
467         }
468 
469         public void logFile (final String server, final String logfile)
470             throws IOException {
471           long size = new HLogLink(conf, server, logfile).getFileStatus(fs).getLen();
472           files.add(new Pair<Path, Long>(new Path(server, logfile), size));
473         }
474     });
475 
476     return files;
477   }
478 
479   /**
480    * Given a list of file paths and sizes, create around ngroups in as balanced a way as possible.
481    * The groups created will have similar amounts of bytes.
482    * <p>
483    * The algorithm used is pretty straightforward; the file list is sorted by size,
484    * and then each group fetch the bigger file available, iterating through groups
485    * alternating the direction.
486    */
487   static List<List<Path>> getBalancedSplits(final List<Pair<Path, Long>> files, int ngroups) {
488     // Sort files by size, from small to big
489     Collections.sort(files, new Comparator<Pair<Path, Long>>() {
490       public int compare(Pair<Path, Long> a, Pair<Path, Long> b) {
491         long r = a.getSecond() - b.getSecond();
492         return (r < 0) ? -1 : ((r > 0) ? 1 : 0);
493       }
494     });
495 
496     // create balanced groups
497     List<List<Path>> fileGroups = new LinkedList<List<Path>>();
498     long[] sizeGroups = new long[ngroups];
499     int hi = files.size() - 1;
500     int lo = 0;
501 
502     List<Path> group;
503     int dir = 1;
504     int g = 0;
505 
506     while (hi >= lo) {
507       if (g == fileGroups.size()) {
508         group = new LinkedList<Path>();
509         fileGroups.add(group);
510       } else {
511         group = fileGroups.get(g);
512       }
513 
514       Pair<Path, Long> fileInfo = files.get(hi--);
515 
516       // add the hi one
517       sizeGroups[g] += fileInfo.getSecond();
518       group.add(fileInfo.getFirst());
519 
520       // change direction when at the end or the beginning
521       g += dir;
522       if (g == ngroups) {
523         dir = -1;
524         g = ngroups - 1;
525       } else if (g < 0) {
526         dir = 1;
527         g = 0;
528       }
529     }
530 
531     if (LOG.isDebugEnabled()) {
532       for (int i = 0; i < sizeGroups.length; ++i) {
533         LOG.debug("export split=" + i + " size=" + StringUtils.humanReadableInt(sizeGroups[i]));
534       }
535     }
536 
537     return fileGroups;
538   }
539 
540   private static Path getInputFolderPath(Configuration conf)
541       throws IOException, InterruptedException {
542     Path stagingDir = JobUtil.getStagingDir(conf);
543     return new Path(stagingDir, INPUT_FOLDER_PREFIX +
544       String.valueOf(EnvironmentEdgeManager.currentTimeMillis()));
545   }
546 
547   /**
548    * Create the input files, with the path to copy, for the MR job.
549    * Each input files contains n files, and each input file has a similar amount data to copy.
550    * The number of input files created are based on the number of mappers provided as argument
551    * and the number of the files to copy.
552    */
553   private static Path[] createInputFiles(final Configuration conf, final Path inputFolderPath,
554       final List<Pair<Path, Long>> snapshotFiles, int mappers)
555       throws IOException, InterruptedException {
556     FileSystem fs = inputFolderPath.getFileSystem(conf);
557     LOG.debug("Input folder location: " + inputFolderPath);
558 
559     List<List<Path>> splits = getBalancedSplits(snapshotFiles, mappers);
560     Path[] inputFiles = new Path[splits.size()];
561 
562     Text key = new Text();
563     for (int i = 0; i < inputFiles.length; i++) {
564       List<Path> files = splits.get(i);
565       inputFiles[i] = new Path(inputFolderPath, String.format("export-%d.seq", i));
566       SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, inputFiles[i],
567         Text.class, NullWritable.class);
568       LOG.debug("Input split: " + i);
569       try {
570         for (Path file: files) {
571           LOG.debug(file.toString());
572           key.set(file.toString());
573           writer.append(key, NullWritable.get());
574         }
575       } finally {
576         writer.close();
577       }
578     }
579 
580     return inputFiles;
581   }
582 
583   /**
584    * Run Map-Reduce Job to perform the files copy.
585    */
586   private void runCopyJob(final FileSystem inputFs, final Path inputRoot,
587       final FileSystem outputFs, final Path outputRoot,
588       final List<Pair<Path, Long>> snapshotFiles, final boolean verifyChecksum,
589       final String filesUser, final String filesGroup, final int filesMode,
590       final int mappers) throws IOException, InterruptedException, ClassNotFoundException {
591     Configuration conf = getConf();
592     if (filesGroup != null) conf.set(CONF_FILES_GROUP, filesGroup);
593     if (filesUser != null) conf.set(CONF_FILES_USER, filesUser);
594     conf.setInt(CONF_FILES_MODE, filesMode);
595     conf.setBoolean(CONF_CHECKSUM_VERIFY, verifyChecksum);
596     conf.set(CONF_OUTPUT_ROOT, outputRoot.toString());
597     conf.set(CONF_INPUT_ROOT, inputRoot.toString());
598     conf.setInt("mapreduce.job.maps", mappers);
599 
600     Job job = new Job(conf);
601     job.setJobName("ExportSnapshot");
602     job.setJarByClass(ExportSnapshot.class);
603     TableMapReduceUtil.addDependencyJars(job);
604     job.setMapperClass(ExportMapper.class);
605     job.setInputFormatClass(SequenceFileInputFormat.class);
606     job.setOutputFormatClass(NullOutputFormat.class);
607     job.setMapSpeculativeExecution(false);
608     job.setNumReduceTasks(0);
609 
610     // Create MR Input
611     Path inputFolderPath = getInputFolderPath(conf);
612     for (Path path: createInputFiles(conf, inputFolderPath, snapshotFiles, mappers)) {
613       LOG.debug("Add Input Path=" + path);
614       SequenceFileInputFormat.addInputPath(job, path);
615     }
616 
617     UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
618     FsDelegationToken inputFsToken = new FsDelegationToken(userProvider, "irenewer");
619     FsDelegationToken outputFsToken = new FsDelegationToken(userProvider, "orenewer");
620     try {
621       // Acquire the delegation Tokens
622       inputFsToken.acquireDelegationToken(inputFs);
623       outputFsToken.acquireDelegationToken(outputFs);
624 
625       // Run the MR Job
626       if (!job.waitForCompletion(true)) {
627         // TODO: Replace the fixed string with job.getStatus().getFailureInfo()
628         // when it will be available on all the supported versions.
629         throw new ExportSnapshotException("Copy Files Map-Reduce Job failed");
630       }
631     } finally {
632       inputFsToken.releaseDelegationToken();
633       outputFsToken.releaseDelegationToken();
634 
635       // Remove MR Input
636       try {
637         inputFolderPath.getFileSystem(conf).delete(inputFolderPath, true);
638       } catch (IOException e) {
639         LOG.warn("Unable to remove MR input folder: " + inputFolderPath, e);
640       }
641     }
642   }
643 
644   private void verifySnapshot(final Configuration baseConf,
645       final FileSystem fs, final Path rootDir, final Path snapshotDir) throws IOException {
646     // Update the conf with the current root dir, since may be a different cluster
647     Configuration conf = new Configuration(baseConf);
648     FSUtils.setRootDir(conf, rootDir);
649     FSUtils.setFsDefault(conf, snapshotDir);
650     SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
651     SnapshotReferenceUtil.verifySnapshot(conf, fs, snapshotDir, snapshotDesc);
652   }
653 
654   /**
655    * Execute the export snapshot by copying the snapshot metadata, hfiles and hlogs.
656    * @return 0 on success, and != 0 upon failure.
657    */
658   @Override
659   public int run(String[] args) throws IOException {
660     boolean verifyChecksum = true;
661     String snapshotName = null;
662     boolean overwrite = false;
663     String filesGroup = null;
664     String filesUser = null;
665     Path outputRoot = null;
666     int filesMode = 0;
667     int mappers = 0;
668 
669     Configuration conf = getConf();
670 
671     // Process command line args
672     for (int i = 0; i < args.length; i++) {
673       String cmd = args[i];
674       try {
675         if (cmd.equals("-snapshot")) {
676           snapshotName = args[++i];
677         } else if (cmd.equals("-copy-to")) {
678           outputRoot = new Path(args[++i]);
679         } else if (cmd.equals("-copy-from")) {
680           Path sourceDir = new Path(args[++i]);
681           URI defaultFs = sourceDir.getFileSystem(conf).getUri();
682           FSUtils.setFsDefault(conf, new Path(defaultFs));
683           FSUtils.setRootDir(conf, sourceDir);
684         } else if (cmd.equals("-no-checksum-verify")) {
685           verifyChecksum = false;
686         } else if (cmd.equals("-mappers")) {
687           mappers = Integer.parseInt(args[++i]);
688         } else if (cmd.equals("-chuser")) {
689           filesUser = args[++i];
690         } else if (cmd.equals("-chgroup")) {
691           filesGroup = args[++i];
692         } else if (cmd.equals("-chmod")) {
693           filesMode = Integer.parseInt(args[++i], 8);
694         } else if (cmd.equals("-overwrite")) {
695           overwrite = true;
696         } else if (cmd.equals("-h") || cmd.equals("--help")) {
697           printUsageAndExit();
698         } else {
699           System.err.println("UNEXPECTED: " + cmd);
700           printUsageAndExit();
701         }
702       } catch (Exception e) {
703         printUsageAndExit();
704       }
705     }
706 
707     // Check user options
708     if (snapshotName == null) {
709       System.err.println("Snapshot name not provided.");
710       printUsageAndExit();
711     }
712 
713     if (outputRoot == null) {
714       System.err.println("Destination file-system not provided.");
715       printUsageAndExit();
716     }
717 
718     Path inputRoot = FSUtils.getRootDir(conf);
719     FileSystem inputFs = FileSystem.get(inputRoot.toUri(), conf);
720     LOG.debug("inputFs=" + inputFs.getUri().toString() + " inputRoot=" + inputRoot);
721     FileSystem outputFs = FileSystem.get(outputRoot.toUri(), conf);
722     LOG.debug("outputFs=" + outputFs.getUri().toString() + " outputRoot=" + outputRoot.toString());
723 
724     Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, inputRoot);
725     Path snapshotTmpDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshotName, outputRoot);
726     Path outputSnapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, outputRoot);
727 
728     // Check if the snapshot already exists
729     if (outputFs.exists(outputSnapshotDir)) {
730       if (overwrite) {
731         if (!outputFs.delete(outputSnapshotDir, true)) {
732           System.err.println("Unable to remove existing snapshot directory: " + outputSnapshotDir);
733           return 1;
734         }
735       } else {
736         System.err.println("The snapshot '" + snapshotName +
737           "' already exists in the destination: " + outputSnapshotDir);
738         return 1;
739       }
740     }
741 
742     // Check if the snapshot already in-progress
743     if (outputFs.exists(snapshotTmpDir)) {
744       if (overwrite) {
745         if (!outputFs.delete(snapshotTmpDir, true)) {
746           System.err.println("Unable to remove existing snapshot tmp directory: " + snapshotTmpDir);
747           return 1;
748         }
749       } else {
750         System.err.println("A snapshot with the same name '"+ snapshotName +"' may be in-progress");
751         System.err.println("Please check " + snapshotTmpDir + ". If the snapshot has completed, ");
752         System.err.println("consider removing "+ snapshotTmpDir +" by using the -overwrite option");
753         return 1;
754       }
755     }
756 
757     // Step 0 - Extract snapshot files to copy
758     LOG.info("Loading Snapshot hfile list");
759     final List<Pair<Path, Long>> files = getSnapshotFiles(inputFs, snapshotDir);
760     if (mappers == 0 && files.size() > 0) {
761       mappers = 1 + (files.size() / conf.getInt(CONF_MAP_GROUP, 10));
762       mappers = Math.min(mappers, files.size());
763     }
764 
765     // Step 1 - Copy fs1:/.snapshot/<snapshot> to  fs2:/.snapshot/.tmp/<snapshot>
766     // The snapshot references must be copied before the hfiles otherwise the cleaner
767     // will remove them because they are unreferenced.
768     try {
769       LOG.info("Copy Snapshot Manifest");
770       FileUtil.copy(inputFs, snapshotDir, outputFs, snapshotTmpDir, false, false, conf);
771     } catch (IOException e) {
772       throw new ExportSnapshotException("Failed to copy the snapshot directory: from=" +
773         snapshotDir + " to=" + snapshotTmpDir, e);
774     }
775 
776     // Step 2 - Start MR Job to copy files
777     // The snapshot references must be copied before the files otherwise the files gets removed
778     // by the HFileArchiver, since they have no references.
779     try {
780       if (files.size() == 0) {
781         LOG.warn("There are 0 store file to be copied. There may be no data in the table.");
782       } else {
783         runCopyJob(inputFs, inputRoot, outputFs, outputRoot, files, verifyChecksum,
784                    filesUser, filesGroup, filesMode, mappers);
785       }
786 
787       // Step 3 - Rename fs2:/.snapshot/.tmp/<snapshot> fs2:/.snapshot/<snapshot>
788       LOG.info("Finalize the Snapshot Export");
789       if (!outputFs.rename(snapshotTmpDir, outputSnapshotDir)) {
790         throw new ExportSnapshotException("Unable to rename snapshot directory from=" +
791           snapshotTmpDir + " to=" + outputSnapshotDir);
792       }
793 
794       // Step 4 - Verify snapshot validity
795       LOG.info("Verify snapshot validity");
796       verifySnapshot(conf, outputFs, outputRoot, outputSnapshotDir);
797 
798       LOG.info("Export Completed: " + snapshotName);
799       return 0;
800     } catch (Exception e) {
801       LOG.error("Snapshot export failed", e);
802       outputFs.delete(snapshotTmpDir, true);
803       outputFs.delete(outputSnapshotDir, true);
804       return 1;
805     }
806   }
807 
808   // ExportSnapshot
809   private void printUsageAndExit() {
810     System.err.printf("Usage: bin/hbase %s [options]%n", getClass().getName());
811     System.err.println(" where [options] are:");
812     System.err.println("  -h|-help                Show this help and exit.");
813     System.err.println("  -snapshot NAME          Snapshot to restore.");
814     System.err.println("  -copy-to NAME           Remote destination hdfs://");
815     System.err.println("  -copy-from NAME         Input folder hdfs:// (default hbase.rootdir)");
816     System.err.println("  -no-checksum-verify     Do not verify checksum.");
817     System.err.println("  -overwrite              Rewrite the snapshot manifest if already exists");
818     System.err.println("  -chuser USERNAME        Change the owner of the files to the specified one.");
819     System.err.println("  -chgroup GROUP          Change the group of the files to the specified one.");
820     System.err.println("  -chmod MODE             Change the permission of the files to the specified one.");
821     System.err.println("  -mappers                Number of mappers to use during the copy (mapreduce.job.maps).");
822     System.err.println();
823     System.err.println("Examples:");
824     System.err.println("  hbase " + getClass().getName() + " \\");
825     System.err.println("    -snapshot MySnapshot -copy-to hdfs://srv2:8082/hbase \\");
826     System.err.println("    -chuser MyUser -chgroup MyGroup -chmod 700 -mappers 16");
827     System.err.println();
828     System.err.println("  hbase " + getClass().getName() + " \\");
829     System.err.println("    -snapshot MySnapshot -copy-from hdfs://srv2:8082/hbase \\");
830     System.err.println("    -copy-to hdfs://srv1:50070/hbase \\");
831     System.exit(1);
832   }
833 
834   /**
835    * The guts of the {@link #main} method.
836    * Call this method to avoid the {@link #main(String[])} System.exit.
837    * @param args
838    * @return errCode
839    * @throws Exception
840    */
841   static int innerMain(final Configuration conf, final String [] args) throws Exception {
842     return ToolRunner.run(conf, new ExportSnapshot(), args);
843   }
844 
845   public static void main(String[] args) throws Exception {
846     System.exit(innerMain(HBaseConfiguration.create(), args));
847   }
848 }