View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.snapshot;
20  
21  import java.io.BufferedInputStream;
22  import java.io.FileNotFoundException;
23  import java.io.DataInput;
24  import java.io.DataOutput;
25  import java.io.IOException;
26  import java.io.InputStream;
27  import java.net.URI;
28  import java.util.ArrayList;
29  import java.util.Collections;
30  import java.util.Comparator;
31  import java.util.LinkedList;
32  import java.util.List;
33  import java.util.Random;
34  
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.hbase.classification.InterfaceAudience;
38  import org.apache.hadoop.hbase.classification.InterfaceStability;
39  import org.apache.hadoop.conf.Configuration;
40  import org.apache.hadoop.conf.Configured;
41  import org.apache.hadoop.fs.FSDataInputStream;
42  import org.apache.hadoop.fs.FSDataOutputStream;
43  import org.apache.hadoop.fs.FileChecksum;
44  import org.apache.hadoop.fs.FileStatus;
45  import org.apache.hadoop.fs.FileSystem;
46  import org.apache.hadoop.fs.FileUtil;
47  import org.apache.hadoop.fs.Path;
48  import org.apache.hadoop.fs.permission.FsPermission;
49  import org.apache.hadoop.hbase.TableName;
50  import org.apache.hadoop.hbase.HBaseConfiguration;
51  import org.apache.hadoop.hbase.HConstants;
52  import org.apache.hadoop.hbase.HRegionInfo;
53  import org.apache.hadoop.hbase.io.FileLink;
54  import org.apache.hadoop.hbase.io.HFileLink;
55  import org.apache.hadoop.hbase.io.HLogLink;
56  import org.apache.hadoop.hbase.io.hadoopbackport.ThrottledInputStream;
57  import org.apache.hadoop.hbase.mapreduce.JobUtil;
58  import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
59  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
60  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotFileInfo;
61  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
62  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
63  import org.apache.hadoop.hbase.util.FSUtils;
64  import org.apache.hadoop.hbase.util.Pair;
65  import org.apache.hadoop.io.BytesWritable;
66  import org.apache.hadoop.io.IOUtils;
67  import org.apache.hadoop.io.NullWritable;
68  import org.apache.hadoop.io.SequenceFile;
69  import org.apache.hadoop.io.Writable;
70  import org.apache.hadoop.mapreduce.Job;
71  import org.apache.hadoop.mapreduce.JobContext;
72  import org.apache.hadoop.mapreduce.Mapper;
73  import org.apache.hadoop.mapreduce.InputFormat;
74  import org.apache.hadoop.mapreduce.InputSplit;
75  import org.apache.hadoop.mapreduce.RecordReader;
76  import org.apache.hadoop.mapreduce.TaskAttemptContext;
77  import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
78  import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
79  import org.apache.hadoop.mapreduce.security.TokenCache;
80  import org.apache.hadoop.util.StringUtils;
81  import org.apache.hadoop.util.Tool;
82  import org.apache.hadoop.util.ToolRunner;
83  
84  /**
85   * Export the specified snapshot to a given FileSystem.
86   *
87   * The .snapshot/name folder is copied to the destination cluster
88   * and then all the hfiles/hlogs are copied using a Map-Reduce Job in the .archive/ location.
89   * When everything is done, the second cluster can restore the snapshot.
90   */
91  @InterfaceAudience.Public
92  @InterfaceStability.Evolving
93  public class ExportSnapshot extends Configured implements Tool {
94    public static final String NAME = "exportsnapshot";
95  
96    private static final Log LOG = LogFactory.getLog(ExportSnapshot.class);
97  
98    private static final String MR_NUM_MAPS = "mapreduce.job.maps";
99    private static final String CONF_NUM_SPLITS = "snapshot.export.format.splits";
100   private static final String CONF_SNAPSHOT_NAME = "snapshot.export.format.snapshot.name";
101   private static final String CONF_SNAPSHOT_DIR = "snapshot.export.format.snapshot.dir";
102   private static final String CONF_FILES_USER = "snapshot.export.files.attributes.user";
103   private static final String CONF_FILES_GROUP = "snapshot.export.files.attributes.group";
104   private static final String CONF_FILES_MODE = "snapshot.export.files.attributes.mode";
105   private static final String CONF_CHECKSUM_VERIFY = "snapshot.export.checksum.verify";
106   private static final String CONF_OUTPUT_ROOT = "snapshot.export.output.root";
107   private static final String CONF_INPUT_ROOT = "snapshot.export.input.root";
108   private static final String CONF_BANDWIDTH_MB = "snapshot.export.map.bandwidth.mb";
109   private static final String CONF_BUFFER_SIZE = "snapshot.export.buffer.size";
110   private static final String CONF_MAP_GROUP = "snapshot.export.default.map.group";
111   protected static final String CONF_SKIP_TMP = "snapshot.export.skip.tmp";
112 
113   static final String CONF_TEST_FAILURE = "test.snapshot.export.failure";
114   static final String CONF_TEST_RETRY = "test.snapshot.export.failure.retry";
115 
116   private static final String INPUT_FOLDER_PREFIX = "export-files.";
117 
118   // Export Map-Reduce Counters, to keep track of the progress
119   public enum Counter {
120     MISSING_FILES, FILES_COPIED, FILES_SKIPPED, COPY_FAILED,
121     BYTES_EXPECTED, BYTES_SKIPPED, BYTES_COPIED
122   }
123 
124   private static class ExportMapper extends Mapper<BytesWritable, NullWritable,
125                                                    NullWritable, NullWritable> {
126     final static int REPORT_SIZE = 1 * 1024 * 1024;
127     final static int BUFFER_SIZE = 64 * 1024;
128 
129     private boolean testFailures;
130     private Random random;
131 
132     private boolean verifyChecksum;
133     private String filesGroup;
134     private String filesUser;
135     private short filesMode;
136     private int bufferSize;
137 
138     private FileSystem outputFs;
139     private Path outputArchive;
140     private Path outputRoot;
141 
142     private FileSystem inputFs;
143     private Path inputArchive;
144     private Path inputRoot;
145 
146     @Override
147     public void setup(Context context) throws IOException {
148       Configuration conf = context.getConfiguration();
149       verifyChecksum = conf.getBoolean(CONF_CHECKSUM_VERIFY, true);
150 
151       filesGroup = conf.get(CONF_FILES_GROUP);
152       filesUser = conf.get(CONF_FILES_USER);
153       filesMode = (short)conf.getInt(CONF_FILES_MODE, 0);
154       outputRoot = new Path(conf.get(CONF_OUTPUT_ROOT));
155       inputRoot = new Path(conf.get(CONF_INPUT_ROOT));
156 
157       inputArchive = new Path(inputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
158       outputArchive = new Path(outputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
159 
160       testFailures = conf.getBoolean(CONF_TEST_FAILURE, false);
161 
162       try {
163         conf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true);
164         inputFs = FileSystem.get(inputRoot.toUri(), conf);
165       } catch (IOException e) {
166         throw new IOException("Could not get the input FileSystem with root=" + inputRoot, e);
167       }
168 
169       try {
170         conf.setBoolean("fs." + outputRoot.toUri().getScheme() + ".impl.disable.cache", true);
171         outputFs = FileSystem.get(outputRoot.toUri(), conf);
172       } catch (IOException e) {
173         throw new IOException("Could not get the output FileSystem with root="+ outputRoot, e);
174       }
175 
176       // Use the default block size of the outputFs if bigger
177       int defaultBlockSize = Math.max((int) outputFs.getDefaultBlockSize(), BUFFER_SIZE);
178       bufferSize = conf.getInt(CONF_BUFFER_SIZE, defaultBlockSize);
179       LOG.info("Using bufferSize=" + StringUtils.humanReadableInt(bufferSize));
180 
181       for (Counter c : Counter.values()) {
182         context.getCounter(c).increment(0);
183       }
184     }
185 
186     byte[] copyBytes(BytesWritable  bw) {
187       byte[] result = new byte[bw.getLength()];
188       System.arraycopy(bw.getBytes(), 0, result, 0, bw.getLength());
189       return result;
190     }
191 
192     @Override
193     protected void cleanup(Context context) {
194       IOUtils.closeStream(inputFs);
195       IOUtils.closeStream(outputFs);
196     }
197 
198     @Override
199     public void map(BytesWritable key, NullWritable value, Context context)
200         throws InterruptedException, IOException {
201       SnapshotFileInfo inputInfo = SnapshotFileInfo.parseFrom(copyBytes(key));
202       Path outputPath = getOutputPath(inputInfo);
203 
204       copyFile(context, inputInfo, outputPath);
205     }
206 
207     /**
208      * Returns the location where the inputPath will be copied.
209      */
210     private Path getOutputPath(final SnapshotFileInfo inputInfo) throws IOException {
211       Path path = null;
212       switch (inputInfo.getType()) {
213         case HFILE:
214           Path inputPath = new Path(inputInfo.getHfile());
215           String family = inputPath.getParent().getName();
216           TableName table =HFileLink.getReferencedTableName(inputPath.getName());
217           String region = HFileLink.getReferencedRegionName(inputPath.getName());
218           String hfile = HFileLink.getReferencedHFileName(inputPath.getName());
219           path = new Path(FSUtils.getTableDir(new Path("./"), table),
220               new Path(region, new Path(family, hfile)));
221           break;
222         case WAL:
223           Path oldLogsDir = new Path(outputRoot, HConstants.HREGION_OLDLOGDIR_NAME);
224           path = new Path(oldLogsDir, inputInfo.getWalName());
225           break;
226         default:
227           throw new IOException("Invalid File Type: " + inputInfo.getType().toString());
228       }
229       return new Path(outputArchive, path);
230     }
231 
232     /*
233      * Used by TestExportSnapshot to simulate a failure
234      */
235     private void injectTestFailure(final Context context, final SnapshotFileInfo inputInfo)
236         throws IOException {
237       if (testFailures) {
238         if (context.getConfiguration().getBoolean(CONF_TEST_RETRY, false)) {
239           if (random == null) {
240             random = new Random();
241           }
242 
243           // FLAKY-TEST-WARN: lower is better, we can get some runs without the
244           // retry, but at least we reduce the number of test failures due to
245           // this test exception from the same map task.
246           if (random.nextFloat() < 0.03) {
247             throw new IOException("TEST RETRY FAILURE: Unable to copy input=" + inputInfo
248                                   + " time=" + System.currentTimeMillis());
249           }
250         } else {
251           context.getCounter(Counter.COPY_FAILED).increment(1);
252           throw new IOException("TEST FAILURE: Unable to copy input=" + inputInfo);
253         }
254       }
255     }
256 
257     private void copyFile(final Context context, final SnapshotFileInfo inputInfo,
258         final Path outputPath) throws IOException {
259       injectTestFailure(context, inputInfo);
260 
261       // Get the file information
262       FileStatus inputStat = getSourceFileStatus(context, inputInfo);
263 
264       // Verify if the output file exists and is the same that we want to copy
265       if (outputFs.exists(outputPath)) {
266         FileStatus outputStat = outputFs.getFileStatus(outputPath);
267         if (outputStat != null && sameFile(inputStat, outputStat)) {
268           LOG.info("Skip copy " + inputStat.getPath() + " to " + outputPath + ", same file.");
269           context.getCounter(Counter.FILES_SKIPPED).increment(1);
270           context.getCounter(Counter.BYTES_SKIPPED).increment(inputStat.getLen());
271           return;
272         }
273       }
274 
275       InputStream in = openSourceFile(context, inputInfo);
276       int bandwidthMB = context.getConfiguration().getInt(CONF_BANDWIDTH_MB, 100);
277       if (Integer.MAX_VALUE != bandwidthMB) {
278         in = new ThrottledInputStream(new BufferedInputStream(in), bandwidthMB * 1024 * 1024);
279       }
280 
281       try {
282         context.getCounter(Counter.BYTES_EXPECTED).increment(inputStat.getLen());
283 
284         // Ensure that the output folder is there and copy the file
285         createOutputPath(outputPath.getParent());
286         FSDataOutputStream out = outputFs.create(outputPath, true);
287         try {
288           copyData(context, inputStat.getPath(), in, outputPath, out, inputStat.getLen());
289         } finally {
290           out.close();
291         }
292 
293         // Try to Preserve attributes
294         if (!preserveAttributes(outputPath, inputStat)) {
295           LOG.warn("You may have to run manually chown on: " + outputPath);
296         }
297       } finally {
298         in.close();
299       }
300     }
301 
302     /**
303      * Create the output folder and optionally set ownership.
304      */
305     private void createOutputPath(final Path path) throws IOException {
306       if (filesUser == null && filesGroup == null) {
307         outputFs.mkdirs(path);
308       } else {
309         Path parent = path.getParent();
310         if (!outputFs.exists(parent) && parent.getParent() != null) {
311           createOutputPath(parent);
312         }
313         outputFs.mkdirs(path);
314         // override the owner when non-null user/group is specified
315         outputFs.setOwner(path, filesUser, filesGroup);
316       }
317     }
318 
319     /**
320      * Try to Preserve the files attribute selected by the user copying them from the source file
321      * This is only required when you are exporting as a different user than "hbase" or on a system
322      * that doesn't have the "hbase" user.
323      *
324      * This is not considered a blocking failure since the user can force a chmod with the user
325      * that knows is available on the system.
326      */
327     private boolean preserveAttributes(final Path path, final FileStatus refStat) {
328       FileStatus stat;
329       try {
330         stat = outputFs.getFileStatus(path);
331       } catch (IOException e) {
332         LOG.warn("Unable to get the status for file=" + path);
333         return false;
334       }
335 
336       try {
337         if (filesMode > 0 && stat.getPermission().toShort() != filesMode) {
338           outputFs.setPermission(path, new FsPermission(filesMode));
339         } else if (refStat != null && !stat.getPermission().equals(refStat.getPermission())) {
340           outputFs.setPermission(path, refStat.getPermission());
341         }
342       } catch (IOException e) {
343         LOG.warn("Unable to set the permission for file="+ stat.getPath() +": "+ e.getMessage());
344         return false;
345       }
346 
347       boolean hasRefStat = (refStat != null);
348       String user = stringIsNotEmpty(filesUser) || !hasRefStat ? filesUser : refStat.getOwner();
349       String group = stringIsNotEmpty(filesGroup) || !hasRefStat ? filesGroup : refStat.getGroup();
350       if (stringIsNotEmpty(user) || stringIsNotEmpty(group)) {
351         try {
352           if (!(user.equals(stat.getOwner()) && group.equals(stat.getGroup()))) {
353             outputFs.setOwner(path, user, group);
354           }
355         } catch (IOException e) {
356           LOG.warn("Unable to set the owner/group for file="+ stat.getPath() +": "+ e.getMessage());
357           LOG.warn("The user/group may not exist on the destination cluster: user=" +
358                    user + " group=" + group);
359           return false;
360         }
361       }
362 
363       return true;
364     }
365 
366     private boolean stringIsNotEmpty(final String str) {
367       return str != null && str.length() > 0;
368     }
369 
370     private void copyData(final Context context,
371         final Path inputPath, final InputStream in,
372         final Path outputPath, final FSDataOutputStream out,
373         final long inputFileSize)
374         throws IOException {
375       final String statusMessage = "copied %s/" + StringUtils.humanReadableInt(inputFileSize) +
376                                    " (%.1f%%)";
377 
378       try {
379         byte[] buffer = new byte[bufferSize];
380         long totalBytesWritten = 0;
381         int reportBytes = 0;
382         int bytesRead;
383 
384         long stime = System.currentTimeMillis();
385         while ((bytesRead = in.read(buffer)) > 0) {
386           out.write(buffer, 0, bytesRead);
387           totalBytesWritten += bytesRead;
388           reportBytes += bytesRead;
389 
390           if (reportBytes >= REPORT_SIZE) {
391             context.getCounter(Counter.BYTES_COPIED).increment(reportBytes);
392             context.setStatus(String.format(statusMessage,
393                               StringUtils.humanReadableInt(totalBytesWritten),
394                               (totalBytesWritten/(float)inputFileSize) * 100.0f) +
395                               " from " + inputPath + " to " + outputPath);
396             reportBytes = 0;
397           }
398         }
399         long etime = System.currentTimeMillis();
400 
401         context.getCounter(Counter.BYTES_COPIED).increment(reportBytes);
402         context.setStatus(String.format(statusMessage,
403                           StringUtils.humanReadableInt(totalBytesWritten),
404                           (totalBytesWritten/(float)inputFileSize) * 100.0f) +
405                           " from " + inputPath + " to " + outputPath);
406 
407         // Verify that the written size match
408         if (totalBytesWritten != inputFileSize) {
409           String msg = "number of bytes copied not matching copied=" + totalBytesWritten +
410                        " expected=" + inputFileSize + " for file=" + inputPath;
411           throw new IOException(msg);
412         }
413 
414         LOG.info("copy completed for input=" + inputPath + " output=" + outputPath);
415         LOG.info("size=" + totalBytesWritten +
416             " (" + StringUtils.humanReadableInt(totalBytesWritten) + ")" +
417             " time=" + StringUtils.formatTimeDiff(etime, stime) +
418             String.format(" %.3fM/sec", (totalBytesWritten / ((etime - stime)/1000.0))/1048576.0));
419         context.getCounter(Counter.FILES_COPIED).increment(1);
420       } catch (IOException e) {
421         LOG.error("Error copying " + inputPath + " to " + outputPath, e);
422         context.getCounter(Counter.COPY_FAILED).increment(1);
423         throw e;
424       }
425     }
426 
427     /**
428      * Try to open the "source" file.
429      * Throws an IOException if the communication with the inputFs fail or
430      * if the file is not found.
431      */
432     private FSDataInputStream openSourceFile(Context context, final SnapshotFileInfo fileInfo)
433         throws IOException {
434       try {
435         FileLink link = null;
436         switch (fileInfo.getType()) {
437           case HFILE:
438             Path inputPath = new Path(fileInfo.getHfile());
439             link = new HFileLink(inputRoot, inputArchive, inputPath);
440             break;
441           case WAL:
442             String serverName = fileInfo.getWalServer();
443             String logName = fileInfo.getWalName();
444             link = new HLogLink(inputRoot, serverName, logName);
445             break;
446           default:
447             throw new IOException("Invalid File Type: " + fileInfo.getType().toString());
448         }
449         return link.open(inputFs);
450       } catch (IOException e) {
451         context.getCounter(Counter.MISSING_FILES).increment(1);
452         LOG.error("Unable to open source file=" + fileInfo.toString(), e);
453         throw e;
454       }
455     }
456 
457     private FileStatus getSourceFileStatus(Context context, final SnapshotFileInfo fileInfo)
458         throws IOException {
459       try {
460         FileLink link = null;
461         switch (fileInfo.getType()) {
462           case HFILE:
463             Path inputPath = new Path(fileInfo.getHfile());
464             link = new HFileLink(inputRoot, inputArchive, inputPath);
465             break;
466           case WAL:
467             link = new HLogLink(inputRoot, fileInfo.getWalServer(), fileInfo.getWalName());
468             break;
469           default:
470             throw new IOException("Invalid File Type: " + fileInfo.getType().toString());
471         }
472         return link.getFileStatus(inputFs);
473       } catch (FileNotFoundException e) {
474         context.getCounter(Counter.MISSING_FILES).increment(1);
475         LOG.error("Unable to get the status for source file=" + fileInfo.toString(), e);
476         throw e;
477       } catch (IOException e) {
478         LOG.error("Unable to get the status for source file=" + fileInfo.toString(), e);
479         throw e;
480       }
481     }
482 
483     private FileChecksum getFileChecksum(final FileSystem fs, final Path path) {
484       try {
485         return fs.getFileChecksum(path);
486       } catch (IOException e) {
487         LOG.warn("Unable to get checksum for file=" + path, e);
488         return null;
489       }
490     }
491 
492     /**
493      * Check if the two files are equal by looking at the file length,
494      * and at the checksum (if user has specified the verifyChecksum flag).
495      */
496     private boolean sameFile(final FileStatus inputStat, final FileStatus outputStat) {
497       // Not matching length
498       if (inputStat.getLen() != outputStat.getLen()) return false;
499 
500       // Mark files as equals, since user asked for no checksum verification
501       if (!verifyChecksum) return true;
502 
503       // If checksums are not available, files are not the same.
504       FileChecksum inChecksum = getFileChecksum(inputFs, inputStat.getPath());
505       if (inChecksum == null) return false;
506 
507       FileChecksum outChecksum = getFileChecksum(outputFs, outputStat.getPath());
508       if (outChecksum == null) return false;
509 
510       return inChecksum.equals(outChecksum);
511     }
512   }
513 
514   // ==========================================================================
515   //  Input Format
516   // ==========================================================================
517 
518   /**
519    * Extract the list of files (HFiles/HLogs) to copy using Map-Reduce.
520    * @return list of files referenced by the snapshot (pair of path and size)
521    */
522   private static List<Pair<SnapshotFileInfo, Long>> getSnapshotFiles(final Configuration conf,
523       final FileSystem fs, final Path snapshotDir) throws IOException {
524     SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
525 
526     final List<Pair<SnapshotFileInfo, Long>> files = new ArrayList<Pair<SnapshotFileInfo, Long>>();
527     final TableName table = TableName.valueOf(snapshotDesc.getTable());
528 
529     // Get snapshot files
530     LOG.info("Loading Snapshot '" + snapshotDesc.getName() + "' hfile list");
531     SnapshotReferenceUtil.visitReferencedFiles(conf, fs, snapshotDir, snapshotDesc,
532       new SnapshotReferenceUtil.SnapshotVisitor() {
533         @Override
534         public void storeFile(final HRegionInfo regionInfo, final String family,
535             final SnapshotRegionManifest.StoreFile storeFile) throws IOException {
536           if (storeFile.hasReference()) {
537             // copied as part of the manifest
538           } else {
539             String region = regionInfo.getEncodedName();
540             String hfile = storeFile.getName();
541             Path path = HFileLink.createPath(table, region, family, hfile);
542 
543             SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder()
544               .setType(SnapshotFileInfo.Type.HFILE)
545               .setHfile(path.toString())
546               .build();
547 
548             long size;
549             if (storeFile.hasFileSize()) {
550               size = storeFile.getFileSize();
551             } else {
552               size = new HFileLink(conf, path).getFileStatus(fs).getLen();
553             }
554             files.add(new Pair<SnapshotFileInfo, Long>(fileInfo, size));
555           }
556         }
557 
558         @Override
559         public void logFile (final String server, final String logfile)
560             throws IOException {
561           SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder()
562             .setType(SnapshotFileInfo.Type.WAL)
563             .setWalServer(server)
564             .setWalName(logfile)
565             .build();
566 
567           long size = new HLogLink(conf, server, logfile).getFileStatus(fs).getLen();
568           files.add(new Pair<SnapshotFileInfo, Long>(fileInfo, size));
569         }
570     });
571 
572     return files;
573   }
574 
575   /**
576    * Given a list of file paths and sizes, create around ngroups in as balanced a way as possible.
577    * The groups created will have similar amounts of bytes.
578    * <p>
579    * The algorithm used is pretty straightforward; the file list is sorted by size,
580    * and then each group fetch the bigger file available, iterating through groups
581    * alternating the direction.
582    */
583   static List<List<Pair<SnapshotFileInfo, Long>>> getBalancedSplits(
584       final List<Pair<SnapshotFileInfo, Long>> files, final int ngroups) {
585     // Sort files by size, from small to big
586     Collections.sort(files, new Comparator<Pair<SnapshotFileInfo, Long>>() {
587       public int compare(Pair<SnapshotFileInfo, Long> a, Pair<SnapshotFileInfo, Long> b) {
588         long r = a.getSecond() - b.getSecond();
589         return (r < 0) ? -1 : ((r > 0) ? 1 : 0);
590       }
591     });
592 
593     // create balanced groups
594     List<List<Pair<SnapshotFileInfo, Long>>> fileGroups =
595       new LinkedList<List<Pair<SnapshotFileInfo, Long>>>();
596     long[] sizeGroups = new long[ngroups];
597     int hi = files.size() - 1;
598     int lo = 0;
599 
600     List<Pair<SnapshotFileInfo, Long>> group;
601     int dir = 1;
602     int g = 0;
603 
604     while (hi >= lo) {
605       if (g == fileGroups.size()) {
606         group = new LinkedList<Pair<SnapshotFileInfo, Long>>();
607         fileGroups.add(group);
608       } else {
609         group = fileGroups.get(g);
610       }
611 
612       Pair<SnapshotFileInfo, Long> fileInfo = files.get(hi--);
613 
614       // add the hi one
615       sizeGroups[g] += fileInfo.getSecond();
616       group.add(fileInfo);
617 
618       // change direction when at the end or the beginning
619       g += dir;
620       if (g == ngroups) {
621         dir = -1;
622         g = ngroups - 1;
623       } else if (g < 0) {
624         dir = 1;
625         g = 0;
626       }
627     }
628 
629     if (LOG.isDebugEnabled()) {
630       for (int i = 0; i < sizeGroups.length; ++i) {
631         LOG.debug("export split=" + i + " size=" + StringUtils.humanReadableInt(sizeGroups[i]));
632       }
633     }
634 
635     return fileGroups;
636   }
637 
638   private static class ExportSnapshotInputFormat extends InputFormat<BytesWritable, NullWritable> {
639     @Override
640     public RecordReader<BytesWritable, NullWritable> createRecordReader(InputSplit split,
641         TaskAttemptContext tac) throws IOException, InterruptedException {
642       return new ExportSnapshotRecordReader(((ExportSnapshotInputSplit)split).getSplitKeys());
643     }
644 
645     @Override
646     public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
647       Configuration conf = context.getConfiguration();
648       String snapshotName = conf.get(CONF_SNAPSHOT_NAME);
649       Path snapshotDir = new Path(conf.get(CONF_SNAPSHOT_DIR));
650       FileSystem fs = FileSystem.get(snapshotDir.toUri(), conf);
651 
652       List<Pair<SnapshotFileInfo, Long>> snapshotFiles = getSnapshotFiles(conf, fs, snapshotDir);
653       int mappers = conf.getInt(CONF_NUM_SPLITS, 0);
654       if (mappers == 0 && snapshotFiles.size() > 0) {
655         mappers = 1 + (snapshotFiles.size() / conf.getInt(CONF_MAP_GROUP, 10));
656         mappers = Math.min(mappers, snapshotFiles.size());
657         conf.setInt(CONF_NUM_SPLITS, mappers);
658         conf.setInt(MR_NUM_MAPS, mappers);
659       }
660 
661       List<List<Pair<SnapshotFileInfo, Long>>> groups = getBalancedSplits(snapshotFiles, mappers);
662       List<InputSplit> splits = new ArrayList(groups.size());
663       for (List<Pair<SnapshotFileInfo, Long>> files: groups) {
664         splits.add(new ExportSnapshotInputSplit(files));
665       }
666       return splits;
667     }
668 
669     private static class ExportSnapshotInputSplit extends InputSplit implements Writable {
670       private List<Pair<BytesWritable, Long>> files;
671       private long length;
672 
673       public ExportSnapshotInputSplit() {
674         this.files = null;
675       }
676 
677       public ExportSnapshotInputSplit(final List<Pair<SnapshotFileInfo, Long>> snapshotFiles) {
678         this.files = new ArrayList(snapshotFiles.size());
679         for (Pair<SnapshotFileInfo, Long> fileInfo: snapshotFiles) {
680           this.files.add(new Pair<BytesWritable, Long>(
681             new BytesWritable(fileInfo.getFirst().toByteArray()), fileInfo.getSecond()));
682           this.length += fileInfo.getSecond();
683         }
684       }
685 
686       private List<Pair<BytesWritable, Long>> getSplitKeys() {
687         return files;
688       }
689 
690       @Override
691       public long getLength() throws IOException, InterruptedException {
692         return length;
693       }
694 
695       @Override
696       public String[] getLocations() throws IOException, InterruptedException {
697         return new String[] {};
698       }
699 
700       @Override
701       public void readFields(DataInput in) throws IOException {
702         int count = in.readInt();
703         files = new ArrayList<Pair<BytesWritable, Long>>(count);
704         length = 0;
705         for (int i = 0; i < count; ++i) {
706           BytesWritable fileInfo = new BytesWritable();
707           fileInfo.readFields(in);
708           long size = in.readLong();
709           files.add(new Pair<BytesWritable, Long>(fileInfo, size));
710           length += size;
711         }
712       }
713 
714       @Override
715       public void write(DataOutput out) throws IOException {
716         out.writeInt(files.size());
717         for (final Pair<BytesWritable, Long> fileInfo: files) {
718           fileInfo.getFirst().write(out);
719           out.writeLong(fileInfo.getSecond());
720         }
721       }
722     }
723 
724     private static class ExportSnapshotRecordReader
725         extends RecordReader<BytesWritable, NullWritable> {
726       private final List<Pair<BytesWritable, Long>> files;
727       private long totalSize = 0;
728       private long procSize = 0;
729       private int index = -1;
730 
731       ExportSnapshotRecordReader(final List<Pair<BytesWritable, Long>> files) {
732         this.files = files;
733         for (Pair<BytesWritable, Long> fileInfo: files) {
734           totalSize += fileInfo.getSecond();
735         }
736       }
737 
738       @Override
739       public void close() { }
740 
741       @Override
742       public BytesWritable getCurrentKey() { return files.get(index).getFirst(); }
743 
744       @Override
745       public NullWritable getCurrentValue() { return NullWritable.get(); }
746 
747       @Override
748       public float getProgress() { return (float)procSize / totalSize; }
749 
750       @Override
751       public void initialize(InputSplit split, TaskAttemptContext tac) { }
752 
753       @Override
754       public boolean nextKeyValue() {
755         if (index >= 0) {
756           procSize += files.get(index).getSecond();
757         }
758         return(++index < files.size());
759       }
760     }
761   }
762 
763   // ==========================================================================
764   //  Tool
765   // ==========================================================================
766 
767   /**
768    * Run Map-Reduce Job to perform the files copy.
769    */
770   private void runCopyJob(final Path inputRoot, final Path outputRoot,
771       final String snapshotName, final Path snapshotDir, final boolean verifyChecksum,
772       final String filesUser, final String filesGroup, final int filesMode,
773       final int mappers, final int bandwidthMB)
774           throws IOException, InterruptedException, ClassNotFoundException {
775     Configuration conf = getConf();
776     if (filesGroup != null) conf.set(CONF_FILES_GROUP, filesGroup);
777     if (filesUser != null) conf.set(CONF_FILES_USER, filesUser);
778     if (mappers > 0) {
779       conf.setInt(CONF_NUM_SPLITS, mappers);
780       conf.setInt(MR_NUM_MAPS, mappers);
781     }
782     conf.setInt(CONF_FILES_MODE, filesMode);
783     conf.setBoolean(CONF_CHECKSUM_VERIFY, verifyChecksum);
784     conf.set(CONF_OUTPUT_ROOT, outputRoot.toString());
785     conf.set(CONF_INPUT_ROOT, inputRoot.toString());
786     conf.setInt(CONF_BANDWIDTH_MB, bandwidthMB);
787     conf.set(CONF_SNAPSHOT_NAME, snapshotName);
788     conf.set(CONF_SNAPSHOT_DIR, snapshotDir.toString());
789 
790     Job job = new Job(conf);
791     job.setJobName("ExportSnapshot-" + snapshotName);
792     job.setJarByClass(ExportSnapshot.class);
793     TableMapReduceUtil.addDependencyJars(job);
794     job.setMapperClass(ExportMapper.class);
795     job.setInputFormatClass(ExportSnapshotInputFormat.class);
796     job.setOutputFormatClass(NullOutputFormat.class);
797     job.setMapSpeculativeExecution(false);
798     job.setNumReduceTasks(0);
799 
800     // Acquire the delegation Tokens
801     TokenCache.obtainTokensForNamenodes(job.getCredentials(),
802       new Path[] { inputRoot, outputRoot }, conf);
803 
804     // Run the MR Job
805     if (!job.waitForCompletion(true)) {
806       // TODO: Replace the fixed string with job.getStatus().getFailureInfo()
807       // when it will be available on all the supported versions.
808       throw new ExportSnapshotException("Copy Files Map-Reduce Job failed");
809     }
810   }
811 
812   private void verifySnapshot(final Configuration baseConf,
813       final FileSystem fs, final Path rootDir, final Path snapshotDir) throws IOException {
814     // Update the conf with the current root dir, since may be a different cluster
815     Configuration conf = new Configuration(baseConf);
816     FSUtils.setRootDir(conf, rootDir);
817     FSUtils.setFsDefault(conf, FSUtils.getRootDir(conf));
818     SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
819     SnapshotReferenceUtil.verifySnapshot(conf, fs, snapshotDir, snapshotDesc);
820   }
821 
822   /**
823    * Set path ownership.
824    */
825   private void setOwner(final FileSystem fs, final Path path, final String user,
826       final String group, final boolean recursive) throws IOException {
827     if (user != null || group != null) {
828       if (recursive && fs.isDirectory(path)) {
829         for (FileStatus child : fs.listStatus(path)) {
830           setOwner(fs, child.getPath(), user, group, recursive);
831         }
832       }
833       fs.setOwner(path, user, group);
834     }
835   }
836 
837   /**
838    * Execute the export snapshot by copying the snapshot metadata, hfiles and hlogs.
839    * @return 0 on success, and != 0 upon failure.
840    */
841   @Override
842   public int run(String[] args) throws IOException {
843     boolean verifyTarget = true;
844     boolean verifyChecksum = true;
845     String snapshotName = null;
846     String targetName = null;
847     boolean overwrite = false;
848     String filesGroup = null;
849     String filesUser = null;
850     Path outputRoot = null;
851     int bandwidthMB = Integer.MAX_VALUE;
852     int filesMode = 0;
853     int mappers = 0;
854 
855     Configuration conf = getConf();
856     Path inputRoot = FSUtils.getRootDir(conf);
857 
858     // Process command line args
859     for (int i = 0; i < args.length; i++) {
860       String cmd = args[i];
861       if (cmd.equals("-snapshot")) {
862         snapshotName = args[++i];
863       } else if (cmd.equals("-target")) {
864         targetName = args[++i];
865       } else if (cmd.equals("-copy-to")) {
866         outputRoot = new Path(args[++i]);
867       } else if (cmd.equals("-copy-from")) {
868         inputRoot = new Path(args[++i]);
869         FSUtils.setRootDir(conf, inputRoot);
870       } else if (cmd.equals("-no-checksum-verify")) {
871         verifyChecksum = false;
872       } else if (cmd.equals("-no-target-verify")) {
873         verifyTarget = false;
874       } else if (cmd.equals("-mappers")) {
875         mappers = Integer.parseInt(args[++i]);
876       } else if (cmd.equals("-chuser")) {
877         filesUser = args[++i];
878       } else if (cmd.equals("-chgroup")) {
879         filesGroup = args[++i];
880       } else if (cmd.equals("-bandwidth")) {
881         bandwidthMB = Integer.parseInt(args[++i]);
882       } else if (cmd.equals("-chmod")) {
883         filesMode = Integer.parseInt(args[++i], 8);
884       } else if (cmd.equals("-overwrite")) {
885         overwrite = true;
886       } else if (cmd.equals("-h") || cmd.equals("--help")) {
887         printUsageAndExit();
888       } else {
889         System.err.println("UNEXPECTED: " + cmd);
890         printUsageAndExit();
891       }
892     }
893 
894     // Check user options
895     if (snapshotName == null) {
896       System.err.println("Snapshot name not provided.");
897       printUsageAndExit();
898     }
899 
900     if (outputRoot == null) {
901       System.err.println("Destination file-system not provided.");
902       printUsageAndExit();
903     }
904 
905     if (targetName == null) {
906       targetName = snapshotName;
907     }
908 
909     conf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true);
910     FileSystem inputFs = FileSystem.get(inputRoot.toUri(), conf);
911     LOG.debug("inputFs=" + inputFs.getUri().toString() + " inputRoot=" + inputRoot);
912     conf.setBoolean("fs." + outputRoot.toUri().getScheme() + ".impl.disable.cache", true);
913     FileSystem outputFs = FileSystem.get(outputRoot.toUri(), conf);
914     LOG.debug("outputFs=" + outputFs.getUri().toString() + " outputRoot=" + outputRoot.toString());
915 
916     boolean skipTmp = conf.getBoolean(CONF_SKIP_TMP, false);
917 
918     Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, inputRoot);
919     Path snapshotTmpDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(targetName, outputRoot);
920     Path outputSnapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(targetName, outputRoot);
921     Path initialOutputSnapshotDir = skipTmp ? outputSnapshotDir : snapshotTmpDir;
922 
923     // Check if the snapshot already exists
924     if (outputFs.exists(outputSnapshotDir)) {
925       if (overwrite) {
926         if (!outputFs.delete(outputSnapshotDir, true)) {
927           System.err.println("Unable to remove existing snapshot directory: " + outputSnapshotDir);
928           return 1;
929         }
930       } else {
931         System.err.println("The snapshot '" + targetName +
932           "' already exists in the destination: " + outputSnapshotDir);
933         return 1;
934       }
935     }
936 
937     if (!skipTmp) {
938       // Check if the snapshot already in-progress
939       if (outputFs.exists(snapshotTmpDir)) {
940         if (overwrite) {
941           if (!outputFs.delete(snapshotTmpDir, true)) {
942             System.err.println("Unable to remove existing snapshot tmp directory: "+snapshotTmpDir);
943             return 1;
944           }
945         } else {
946           System.err.println("A snapshot with the same name '"+ targetName +"' may be in-progress");
947           System.err.println("Please check "+snapshotTmpDir+". If the snapshot has completed, ");
948           System.err.println("consider removing "+snapshotTmpDir+" by using the -overwrite option");
949           return 1;
950         }
951       }
952     }
953 
954     // Step 1 - Copy fs1:/.snapshot/<snapshot> to  fs2:/.snapshot/.tmp/<snapshot>
955     // The snapshot references must be copied before the hfiles otherwise the cleaner
956     // will remove them because they are unreferenced.
957     try {
958       LOG.info("Copy Snapshot Manifest");
959       FileUtil.copy(inputFs, snapshotDir, outputFs, initialOutputSnapshotDir, false, false, conf);
960       if (filesUser != null || filesGroup != null) {
961         setOwner(outputFs, snapshotTmpDir, filesUser, filesGroup, true);
962       }
963     } catch (IOException e) {
964       throw new ExportSnapshotException("Failed to copy the snapshot directory: from=" +
965         snapshotDir + " to=" + initialOutputSnapshotDir, e);
966     }
967 
968     // Write a new .snapshotinfo if the target name is different from the source name
969     if (!targetName.equals(snapshotName)) {
970       SnapshotDescription snapshotDesc =
971         SnapshotDescriptionUtils.readSnapshotInfo(inputFs, snapshotDir)
972           .toBuilder()
973           .setName(targetName)
974           .build();
975       SnapshotDescriptionUtils.writeSnapshotInfo(snapshotDesc, snapshotTmpDir, outputFs);
976     }
977 
978     // Step 2 - Start MR Job to copy files
979     // The snapshot references must be copied before the files otherwise the files gets removed
980     // by the HFileArchiver, since they have no references.
981     try {
982       runCopyJob(inputRoot, outputRoot, snapshotName, snapshotDir, verifyChecksum,
983                  filesUser, filesGroup, filesMode, mappers, bandwidthMB);
984 
985       LOG.info("Finalize the Snapshot Export");
986       if (!skipTmp) {
987         // Step 3 - Rename fs2:/.snapshot/.tmp/<snapshot> fs2:/.snapshot/<snapshot>
988         if (!outputFs.rename(snapshotTmpDir, outputSnapshotDir)) {
989           throw new ExportSnapshotException("Unable to rename snapshot directory from=" +
990             snapshotTmpDir + " to=" + outputSnapshotDir);
991         }
992       }
993 
994       // Step 4 - Verify snapshot integrity
995       if (verifyTarget) {
996         LOG.info("Verify snapshot integrity");
997         verifySnapshot(conf, outputFs, outputRoot, outputSnapshotDir);
998       }
999 
1000       LOG.info("Export Completed: " + targetName);
1001       return 0;
1002     } catch (Exception e) {
1003       LOG.error("Snapshot export failed", e);
1004       if (!skipTmp) {
1005         outputFs.delete(snapshotTmpDir, true);
1006       }
1007       outputFs.delete(outputSnapshotDir, true);
1008       return 1;
1009     } finally {
1010       IOUtils.closeStream(inputFs);
1011       IOUtils.closeStream(outputFs);
1012     }
1013   }
1014 
1015   // ExportSnapshot
1016   private void printUsageAndExit() {
1017     System.err.printf("Usage: bin/hbase %s [options]%n", getClass().getName());
1018     System.err.println(" where [options] are:");
1019     System.err.println("  -h|-help                Show this help and exit.");
1020     System.err.println("  -snapshot NAME          Snapshot to restore.");
1021     System.err.println("  -copy-to NAME           Remote destination hdfs://");
1022     System.err.println("  -copy-from NAME         Input folder hdfs:// (default hbase.rootdir)");
1023     System.err.println("  -no-checksum-verify     Do not verify checksum, use name+length only.");
1024     System.err.println("  -no-target-verify       Do not verify the integrity of the \\" +
1025         "exported snapshot.");
1026     System.err.println("  -overwrite              Rewrite the snapshot manifest if already exists");
1027     System.err.println("  -chuser USERNAME        Change the owner of the files to the specified one.");
1028     System.err.println("  -chgroup GROUP          Change the group of the files to the specified one.");
1029     System.err.println("  -chmod MODE             Change the permission of the files to the specified one.");
1030     System.err.println("  -mappers                Number of mappers to use during the copy (mapreduce.job.maps).");
1031     System.err.println();
1032     System.err.println("Examples:");
1033     System.err.println("  hbase " + getClass().getName() + " \\");
1034     System.err.println("    -snapshot MySnapshot -copy-to hdfs://srv2:8082/hbase \\");
1035     System.err.println("    -chuser MyUser -chgroup MyGroup -chmod 700 -mappers 16");
1036     System.err.println();
1037     System.err.println("  hbase " + getClass().getName() + " \\");
1038     System.err.println("    -snapshot MySnapshot -copy-from hdfs://srv2:8082/hbase \\");
1039     System.err.println("    -copy-to hdfs://srv1:50070/hbase \\");
1040     System.exit(1);
1041   }
1042 
1043   /**
1044    * The guts of the {@link #main} method.
1045    * Call this method to avoid the {@link #main(String[])} System.exit.
1046    * @param args
1047    * @return errCode
1048    * @throws Exception
1049    */
1050   static int innerMain(final Configuration conf, final String [] args) throws Exception {
1051     return ToolRunner.run(conf, new ExportSnapshot(), args);
1052   }
1053 
1054   public static void main(String[] args) throws Exception {
1055     System.exit(innerMain(HBaseConfiguration.create(), args));
1056   }
1057 }