1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.snapshot;
20
21 import java.io.BufferedInputStream;
22 import java.io.FileNotFoundException;
23 import java.io.DataInput;
24 import java.io.DataOutput;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.net.URI;
28 import java.util.ArrayList;
29 import java.util.Collections;
30 import java.util.Comparator;
31 import java.util.LinkedList;
32 import java.util.List;
33 import java.util.Random;
34
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.hbase.classification.InterfaceAudience;
38 import org.apache.hadoop.hbase.classification.InterfaceStability;
39 import org.apache.hadoop.conf.Configuration;
40 import org.apache.hadoop.conf.Configured;
41 import org.apache.hadoop.fs.FSDataInputStream;
42 import org.apache.hadoop.fs.FSDataOutputStream;
43 import org.apache.hadoop.fs.FileChecksum;
44 import org.apache.hadoop.fs.FileStatus;
45 import org.apache.hadoop.fs.FileSystem;
46 import org.apache.hadoop.fs.FileUtil;
47 import org.apache.hadoop.fs.Path;
48 import org.apache.hadoop.fs.permission.FsPermission;
49 import org.apache.hadoop.hbase.TableName;
50 import org.apache.hadoop.hbase.HBaseConfiguration;
51 import org.apache.hadoop.hbase.HConstants;
52 import org.apache.hadoop.hbase.HRegionInfo;
53 import org.apache.hadoop.hbase.io.FileLink;
54 import org.apache.hadoop.hbase.io.HFileLink;
55 import org.apache.hadoop.hbase.io.HLogLink;
56 import org.apache.hadoop.hbase.io.hadoopbackport.ThrottledInputStream;
57 import org.apache.hadoop.hbase.mapreduce.JobUtil;
58 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
59 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
60 import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotFileInfo;
61 import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
62 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
63 import org.apache.hadoop.hbase.util.FSUtils;
64 import org.apache.hadoop.hbase.util.Pair;
65 import org.apache.hadoop.io.BytesWritable;
66 import org.apache.hadoop.io.IOUtils;
67 import org.apache.hadoop.io.NullWritable;
68 import org.apache.hadoop.io.SequenceFile;
69 import org.apache.hadoop.io.Writable;
70 import org.apache.hadoop.mapreduce.Job;
71 import org.apache.hadoop.mapreduce.JobContext;
72 import org.apache.hadoop.mapreduce.Mapper;
73 import org.apache.hadoop.mapreduce.InputFormat;
74 import org.apache.hadoop.mapreduce.InputSplit;
75 import org.apache.hadoop.mapreduce.RecordReader;
76 import org.apache.hadoop.mapreduce.TaskAttemptContext;
77 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
78 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
79 import org.apache.hadoop.mapreduce.security.TokenCache;
80 import org.apache.hadoop.util.StringUtils;
81 import org.apache.hadoop.util.Tool;
82 import org.apache.hadoop.util.ToolRunner;
83
84
85
86
87
88
89
90
91 @InterfaceAudience.Public
92 @InterfaceStability.Evolving
93 public class ExportSnapshot extends Configured implements Tool {
94 public static final String NAME = "exportsnapshot";
95
96 private static final Log LOG = LogFactory.getLog(ExportSnapshot.class);
97
98 private static final String MR_NUM_MAPS = "mapreduce.job.maps";
99 private static final String CONF_NUM_SPLITS = "snapshot.export.format.splits";
100 private static final String CONF_SNAPSHOT_NAME = "snapshot.export.format.snapshot.name";
101 private static final String CONF_SNAPSHOT_DIR = "snapshot.export.format.snapshot.dir";
102 private static final String CONF_FILES_USER = "snapshot.export.files.attributes.user";
103 private static final String CONF_FILES_GROUP = "snapshot.export.files.attributes.group";
104 private static final String CONF_FILES_MODE = "snapshot.export.files.attributes.mode";
105 private static final String CONF_CHECKSUM_VERIFY = "snapshot.export.checksum.verify";
106 private static final String CONF_OUTPUT_ROOT = "snapshot.export.output.root";
107 private static final String CONF_INPUT_ROOT = "snapshot.export.input.root";
108 private static final String CONF_BANDWIDTH_MB = "snapshot.export.map.bandwidth.mb";
109 private static final String CONF_BUFFER_SIZE = "snapshot.export.buffer.size";
110 private static final String CONF_MAP_GROUP = "snapshot.export.default.map.group";
111 protected static final String CONF_SKIP_TMP = "snapshot.export.skip.tmp";
112
113 static final String CONF_TEST_FAILURE = "test.snapshot.export.failure";
114 static final String CONF_TEST_RETRY = "test.snapshot.export.failure.retry";
115
116 private static final String INPUT_FOLDER_PREFIX = "export-files.";
117
118
119 public enum Counter {
120 MISSING_FILES, FILES_COPIED, FILES_SKIPPED, COPY_FAILED,
121 BYTES_EXPECTED, BYTES_SKIPPED, BYTES_COPIED
122 }
123
124 private static class ExportMapper extends Mapper<BytesWritable, NullWritable,
125 NullWritable, NullWritable> {
126 final static int REPORT_SIZE = 1 * 1024 * 1024;
127 final static int BUFFER_SIZE = 64 * 1024;
128
129 private boolean testFailures;
130 private Random random;
131
132 private boolean verifyChecksum;
133 private String filesGroup;
134 private String filesUser;
135 private short filesMode;
136 private int bufferSize;
137
138 private FileSystem outputFs;
139 private Path outputArchive;
140 private Path outputRoot;
141
142 private FileSystem inputFs;
143 private Path inputArchive;
144 private Path inputRoot;
145
146 @Override
147 public void setup(Context context) throws IOException {
148 Configuration conf = context.getConfiguration();
149 verifyChecksum = conf.getBoolean(CONF_CHECKSUM_VERIFY, true);
150
151 filesGroup = conf.get(CONF_FILES_GROUP);
152 filesUser = conf.get(CONF_FILES_USER);
153 filesMode = (short)conf.getInt(CONF_FILES_MODE, 0);
154 outputRoot = new Path(conf.get(CONF_OUTPUT_ROOT));
155 inputRoot = new Path(conf.get(CONF_INPUT_ROOT));
156
157 inputArchive = new Path(inputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
158 outputArchive = new Path(outputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
159
160 testFailures = conf.getBoolean(CONF_TEST_FAILURE, false);
161
162 try {
163 conf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true);
164 inputFs = FileSystem.get(inputRoot.toUri(), conf);
165 } catch (IOException e) {
166 throw new IOException("Could not get the input FileSystem with root=" + inputRoot, e);
167 }
168
169 try {
170 conf.setBoolean("fs." + outputRoot.toUri().getScheme() + ".impl.disable.cache", true);
171 outputFs = FileSystem.get(outputRoot.toUri(), conf);
172 } catch (IOException e) {
173 throw new IOException("Could not get the output FileSystem with root="+ outputRoot, e);
174 }
175
176
177 int defaultBlockSize = Math.max((int) outputFs.getDefaultBlockSize(), BUFFER_SIZE);
178 bufferSize = conf.getInt(CONF_BUFFER_SIZE, defaultBlockSize);
179 LOG.info("Using bufferSize=" + StringUtils.humanReadableInt(bufferSize));
180
181 for (Counter c : Counter.values()) {
182 context.getCounter(c).increment(0);
183 }
184 }
185
186 byte[] copyBytes(BytesWritable bw) {
187 byte[] result = new byte[bw.getLength()];
188 System.arraycopy(bw.getBytes(), 0, result, 0, bw.getLength());
189 return result;
190 }
191
192 @Override
193 protected void cleanup(Context context) {
194 IOUtils.closeStream(inputFs);
195 IOUtils.closeStream(outputFs);
196 }
197
198 @Override
199 public void map(BytesWritable key, NullWritable value, Context context)
200 throws InterruptedException, IOException {
201 SnapshotFileInfo inputInfo = SnapshotFileInfo.parseFrom(copyBytes(key));
202 Path outputPath = getOutputPath(inputInfo);
203
204 copyFile(context, inputInfo, outputPath);
205 }
206
207
208
209
210 private Path getOutputPath(final SnapshotFileInfo inputInfo) throws IOException {
211 Path path = null;
212 switch (inputInfo.getType()) {
213 case HFILE:
214 Path inputPath = new Path(inputInfo.getHfile());
215 String family = inputPath.getParent().getName();
216 TableName table =HFileLink.getReferencedTableName(inputPath.getName());
217 String region = HFileLink.getReferencedRegionName(inputPath.getName());
218 String hfile = HFileLink.getReferencedHFileName(inputPath.getName());
219 path = new Path(FSUtils.getTableDir(new Path("./"), table),
220 new Path(region, new Path(family, hfile)));
221 break;
222 case WAL:
223 Path oldLogsDir = new Path(outputRoot, HConstants.HREGION_OLDLOGDIR_NAME);
224 path = new Path(oldLogsDir, inputInfo.getWalName());
225 break;
226 default:
227 throw new IOException("Invalid File Type: " + inputInfo.getType().toString());
228 }
229 return new Path(outputArchive, path);
230 }
231
232
233
234
235 private void injectTestFailure(final Context context, final SnapshotFileInfo inputInfo)
236 throws IOException {
237 if (testFailures) {
238 if (context.getConfiguration().getBoolean(CONF_TEST_RETRY, false)) {
239 if (random == null) {
240 random = new Random();
241 }
242
243
244
245
246 if (random.nextFloat() < 0.03) {
247 throw new IOException("TEST RETRY FAILURE: Unable to copy input=" + inputInfo
248 + " time=" + System.currentTimeMillis());
249 }
250 } else {
251 context.getCounter(Counter.COPY_FAILED).increment(1);
252 throw new IOException("TEST FAILURE: Unable to copy input=" + inputInfo);
253 }
254 }
255 }
256
257 private void copyFile(final Context context, final SnapshotFileInfo inputInfo,
258 final Path outputPath) throws IOException {
259 injectTestFailure(context, inputInfo);
260
261
262 FileStatus inputStat = getSourceFileStatus(context, inputInfo);
263
264
265 if (outputFs.exists(outputPath)) {
266 FileStatus outputStat = outputFs.getFileStatus(outputPath);
267 if (outputStat != null && sameFile(inputStat, outputStat)) {
268 LOG.info("Skip copy " + inputStat.getPath() + " to " + outputPath + ", same file.");
269 context.getCounter(Counter.FILES_SKIPPED).increment(1);
270 context.getCounter(Counter.BYTES_SKIPPED).increment(inputStat.getLen());
271 return;
272 }
273 }
274
275 InputStream in = openSourceFile(context, inputInfo);
276 int bandwidthMB = context.getConfiguration().getInt(CONF_BANDWIDTH_MB, 100);
277 if (Integer.MAX_VALUE != bandwidthMB) {
278 in = new ThrottledInputStream(new BufferedInputStream(in), bandwidthMB * 1024 * 1024);
279 }
280
281 try {
282 context.getCounter(Counter.BYTES_EXPECTED).increment(inputStat.getLen());
283
284
285 createOutputPath(outputPath.getParent());
286 FSDataOutputStream out = outputFs.create(outputPath, true);
287 try {
288 copyData(context, inputStat.getPath(), in, outputPath, out, inputStat.getLen());
289 } finally {
290 out.close();
291 }
292
293
294 if (!preserveAttributes(outputPath, inputStat)) {
295 LOG.warn("You may have to run manually chown on: " + outputPath);
296 }
297 } finally {
298 in.close();
299 }
300 }
301
302
303
304
305 private void createOutputPath(final Path path) throws IOException {
306 if (filesUser == null && filesGroup == null) {
307 outputFs.mkdirs(path);
308 } else {
309 Path parent = path.getParent();
310 if (!outputFs.exists(parent) && parent.getParent() != null) {
311 createOutputPath(parent);
312 }
313 outputFs.mkdirs(path);
314
315 outputFs.setOwner(path, filesUser, filesGroup);
316 }
317 }
318
319
320
321
322
323
324
325
326
327 private boolean preserveAttributes(final Path path, final FileStatus refStat) {
328 FileStatus stat;
329 try {
330 stat = outputFs.getFileStatus(path);
331 } catch (IOException e) {
332 LOG.warn("Unable to get the status for file=" + path);
333 return false;
334 }
335
336 try {
337 if (filesMode > 0 && stat.getPermission().toShort() != filesMode) {
338 outputFs.setPermission(path, new FsPermission(filesMode));
339 } else if (refStat != null && !stat.getPermission().equals(refStat.getPermission())) {
340 outputFs.setPermission(path, refStat.getPermission());
341 }
342 } catch (IOException e) {
343 LOG.warn("Unable to set the permission for file="+ stat.getPath() +": "+ e.getMessage());
344 return false;
345 }
346
347 boolean hasRefStat = (refStat != null);
348 String user = stringIsNotEmpty(filesUser) || !hasRefStat ? filesUser : refStat.getOwner();
349 String group = stringIsNotEmpty(filesGroup) || !hasRefStat ? filesGroup : refStat.getGroup();
350 if (stringIsNotEmpty(user) || stringIsNotEmpty(group)) {
351 try {
352 if (!(user.equals(stat.getOwner()) && group.equals(stat.getGroup()))) {
353 outputFs.setOwner(path, user, group);
354 }
355 } catch (IOException e) {
356 LOG.warn("Unable to set the owner/group for file="+ stat.getPath() +": "+ e.getMessage());
357 LOG.warn("The user/group may not exist on the destination cluster: user=" +
358 user + " group=" + group);
359 return false;
360 }
361 }
362
363 return true;
364 }
365
366 private boolean stringIsNotEmpty(final String str) {
367 return str != null && str.length() > 0;
368 }
369
370 private void copyData(final Context context,
371 final Path inputPath, final InputStream in,
372 final Path outputPath, final FSDataOutputStream out,
373 final long inputFileSize)
374 throws IOException {
375 final String statusMessage = "copied %s/" + StringUtils.humanReadableInt(inputFileSize) +
376 " (%.1f%%)";
377
378 try {
379 byte[] buffer = new byte[bufferSize];
380 long totalBytesWritten = 0;
381 int reportBytes = 0;
382 int bytesRead;
383
384 long stime = System.currentTimeMillis();
385 while ((bytesRead = in.read(buffer)) > 0) {
386 out.write(buffer, 0, bytesRead);
387 totalBytesWritten += bytesRead;
388 reportBytes += bytesRead;
389
390 if (reportBytes >= REPORT_SIZE) {
391 context.getCounter(Counter.BYTES_COPIED).increment(reportBytes);
392 context.setStatus(String.format(statusMessage,
393 StringUtils.humanReadableInt(totalBytesWritten),
394 (totalBytesWritten/(float)inputFileSize) * 100.0f) +
395 " from " + inputPath + " to " + outputPath);
396 reportBytes = 0;
397 }
398 }
399 long etime = System.currentTimeMillis();
400
401 context.getCounter(Counter.BYTES_COPIED).increment(reportBytes);
402 context.setStatus(String.format(statusMessage,
403 StringUtils.humanReadableInt(totalBytesWritten),
404 (totalBytesWritten/(float)inputFileSize) * 100.0f) +
405 " from " + inputPath + " to " + outputPath);
406
407
408 if (totalBytesWritten != inputFileSize) {
409 String msg = "number of bytes copied not matching copied=" + totalBytesWritten +
410 " expected=" + inputFileSize + " for file=" + inputPath;
411 throw new IOException(msg);
412 }
413
414 LOG.info("copy completed for input=" + inputPath + " output=" + outputPath);
415 LOG.info("size=" + totalBytesWritten +
416 " (" + StringUtils.humanReadableInt(totalBytesWritten) + ")" +
417 " time=" + StringUtils.formatTimeDiff(etime, stime) +
418 String.format(" %.3fM/sec", (totalBytesWritten / ((etime - stime)/1000.0))/1048576.0));
419 context.getCounter(Counter.FILES_COPIED).increment(1);
420 } catch (IOException e) {
421 LOG.error("Error copying " + inputPath + " to " + outputPath, e);
422 context.getCounter(Counter.COPY_FAILED).increment(1);
423 throw e;
424 }
425 }
426
427
428
429
430
431
432 private FSDataInputStream openSourceFile(Context context, final SnapshotFileInfo fileInfo)
433 throws IOException {
434 try {
435 FileLink link = null;
436 switch (fileInfo.getType()) {
437 case HFILE:
438 Path inputPath = new Path(fileInfo.getHfile());
439 link = new HFileLink(inputRoot, inputArchive, inputPath);
440 break;
441 case WAL:
442 String serverName = fileInfo.getWalServer();
443 String logName = fileInfo.getWalName();
444 link = new HLogLink(inputRoot, serverName, logName);
445 break;
446 default:
447 throw new IOException("Invalid File Type: " + fileInfo.getType().toString());
448 }
449 return link.open(inputFs);
450 } catch (IOException e) {
451 context.getCounter(Counter.MISSING_FILES).increment(1);
452 LOG.error("Unable to open source file=" + fileInfo.toString(), e);
453 throw e;
454 }
455 }
456
457 private FileStatus getSourceFileStatus(Context context, final SnapshotFileInfo fileInfo)
458 throws IOException {
459 try {
460 FileLink link = null;
461 switch (fileInfo.getType()) {
462 case HFILE:
463 Path inputPath = new Path(fileInfo.getHfile());
464 link = new HFileLink(inputRoot, inputArchive, inputPath);
465 break;
466 case WAL:
467 link = new HLogLink(inputRoot, fileInfo.getWalServer(), fileInfo.getWalName());
468 break;
469 default:
470 throw new IOException("Invalid File Type: " + fileInfo.getType().toString());
471 }
472 return link.getFileStatus(inputFs);
473 } catch (FileNotFoundException e) {
474 context.getCounter(Counter.MISSING_FILES).increment(1);
475 LOG.error("Unable to get the status for source file=" + fileInfo.toString(), e);
476 throw e;
477 } catch (IOException e) {
478 LOG.error("Unable to get the status for source file=" + fileInfo.toString(), e);
479 throw e;
480 }
481 }
482
483 private FileChecksum getFileChecksum(final FileSystem fs, final Path path) {
484 try {
485 return fs.getFileChecksum(path);
486 } catch (IOException e) {
487 LOG.warn("Unable to get checksum for file=" + path, e);
488 return null;
489 }
490 }
491
492
493
494
495
496 private boolean sameFile(final FileStatus inputStat, final FileStatus outputStat) {
497
498 if (inputStat.getLen() != outputStat.getLen()) return false;
499
500
501 if (!verifyChecksum) return true;
502
503
504 FileChecksum inChecksum = getFileChecksum(inputFs, inputStat.getPath());
505 if (inChecksum == null) return false;
506
507 FileChecksum outChecksum = getFileChecksum(outputFs, outputStat.getPath());
508 if (outChecksum == null) return false;
509
510 return inChecksum.equals(outChecksum);
511 }
512 }
513
514
515
516
517
518
519
520
521
522 private static List<Pair<SnapshotFileInfo, Long>> getSnapshotFiles(final Configuration conf,
523 final FileSystem fs, final Path snapshotDir) throws IOException {
524 SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
525
526 final List<Pair<SnapshotFileInfo, Long>> files = new ArrayList<Pair<SnapshotFileInfo, Long>>();
527 final TableName table = TableName.valueOf(snapshotDesc.getTable());
528
529
530 LOG.info("Loading Snapshot '" + snapshotDesc.getName() + "' hfile list");
531 SnapshotReferenceUtil.visitReferencedFiles(conf, fs, snapshotDir, snapshotDesc,
532 new SnapshotReferenceUtil.SnapshotVisitor() {
533 @Override
534 public void storeFile(final HRegionInfo regionInfo, final String family,
535 final SnapshotRegionManifest.StoreFile storeFile) throws IOException {
536 if (storeFile.hasReference()) {
537
538 } else {
539 String region = regionInfo.getEncodedName();
540 String hfile = storeFile.getName();
541 Path path = HFileLink.createPath(table, region, family, hfile);
542
543 SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder()
544 .setType(SnapshotFileInfo.Type.HFILE)
545 .setHfile(path.toString())
546 .build();
547
548 long size;
549 if (storeFile.hasFileSize()) {
550 size = storeFile.getFileSize();
551 } else {
552 size = new HFileLink(conf, path).getFileStatus(fs).getLen();
553 }
554 files.add(new Pair<SnapshotFileInfo, Long>(fileInfo, size));
555 }
556 }
557
558 @Override
559 public void logFile (final String server, final String logfile)
560 throws IOException {
561 SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder()
562 .setType(SnapshotFileInfo.Type.WAL)
563 .setWalServer(server)
564 .setWalName(logfile)
565 .build();
566
567 long size = new HLogLink(conf, server, logfile).getFileStatus(fs).getLen();
568 files.add(new Pair<SnapshotFileInfo, Long>(fileInfo, size));
569 }
570 });
571
572 return files;
573 }
574
575
576
577
578
579
580
581
582
583 static List<List<Pair<SnapshotFileInfo, Long>>> getBalancedSplits(
584 final List<Pair<SnapshotFileInfo, Long>> files, final int ngroups) {
585
586 Collections.sort(files, new Comparator<Pair<SnapshotFileInfo, Long>>() {
587 public int compare(Pair<SnapshotFileInfo, Long> a, Pair<SnapshotFileInfo, Long> b) {
588 long r = a.getSecond() - b.getSecond();
589 return (r < 0) ? -1 : ((r > 0) ? 1 : 0);
590 }
591 });
592
593
594 List<List<Pair<SnapshotFileInfo, Long>>> fileGroups =
595 new LinkedList<List<Pair<SnapshotFileInfo, Long>>>();
596 long[] sizeGroups = new long[ngroups];
597 int hi = files.size() - 1;
598 int lo = 0;
599
600 List<Pair<SnapshotFileInfo, Long>> group;
601 int dir = 1;
602 int g = 0;
603
604 while (hi >= lo) {
605 if (g == fileGroups.size()) {
606 group = new LinkedList<Pair<SnapshotFileInfo, Long>>();
607 fileGroups.add(group);
608 } else {
609 group = fileGroups.get(g);
610 }
611
612 Pair<SnapshotFileInfo, Long> fileInfo = files.get(hi--);
613
614
615 sizeGroups[g] += fileInfo.getSecond();
616 group.add(fileInfo);
617
618
619 g += dir;
620 if (g == ngroups) {
621 dir = -1;
622 g = ngroups - 1;
623 } else if (g < 0) {
624 dir = 1;
625 g = 0;
626 }
627 }
628
629 if (LOG.isDebugEnabled()) {
630 for (int i = 0; i < sizeGroups.length; ++i) {
631 LOG.debug("export split=" + i + " size=" + StringUtils.humanReadableInt(sizeGroups[i]));
632 }
633 }
634
635 return fileGroups;
636 }
637
638 private static class ExportSnapshotInputFormat extends InputFormat<BytesWritable, NullWritable> {
639 @Override
640 public RecordReader<BytesWritable, NullWritable> createRecordReader(InputSplit split,
641 TaskAttemptContext tac) throws IOException, InterruptedException {
642 return new ExportSnapshotRecordReader(((ExportSnapshotInputSplit)split).getSplitKeys());
643 }
644
645 @Override
646 public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
647 Configuration conf = context.getConfiguration();
648 String snapshotName = conf.get(CONF_SNAPSHOT_NAME);
649 Path snapshotDir = new Path(conf.get(CONF_SNAPSHOT_DIR));
650 FileSystem fs = FileSystem.get(snapshotDir.toUri(), conf);
651
652 List<Pair<SnapshotFileInfo, Long>> snapshotFiles = getSnapshotFiles(conf, fs, snapshotDir);
653 int mappers = conf.getInt(CONF_NUM_SPLITS, 0);
654 if (mappers == 0 && snapshotFiles.size() > 0) {
655 mappers = 1 + (snapshotFiles.size() / conf.getInt(CONF_MAP_GROUP, 10));
656 mappers = Math.min(mappers, snapshotFiles.size());
657 conf.setInt(CONF_NUM_SPLITS, mappers);
658 conf.setInt(MR_NUM_MAPS, mappers);
659 }
660
661 List<List<Pair<SnapshotFileInfo, Long>>> groups = getBalancedSplits(snapshotFiles, mappers);
662 List<InputSplit> splits = new ArrayList(groups.size());
663 for (List<Pair<SnapshotFileInfo, Long>> files: groups) {
664 splits.add(new ExportSnapshotInputSplit(files));
665 }
666 return splits;
667 }
668
669 private static class ExportSnapshotInputSplit extends InputSplit implements Writable {
670 private List<Pair<BytesWritable, Long>> files;
671 private long length;
672
673 public ExportSnapshotInputSplit() {
674 this.files = null;
675 }
676
677 public ExportSnapshotInputSplit(final List<Pair<SnapshotFileInfo, Long>> snapshotFiles) {
678 this.files = new ArrayList(snapshotFiles.size());
679 for (Pair<SnapshotFileInfo, Long> fileInfo: snapshotFiles) {
680 this.files.add(new Pair<BytesWritable, Long>(
681 new BytesWritable(fileInfo.getFirst().toByteArray()), fileInfo.getSecond()));
682 this.length += fileInfo.getSecond();
683 }
684 }
685
686 private List<Pair<BytesWritable, Long>> getSplitKeys() {
687 return files;
688 }
689
690 @Override
691 public long getLength() throws IOException, InterruptedException {
692 return length;
693 }
694
695 @Override
696 public String[] getLocations() throws IOException, InterruptedException {
697 return new String[] {};
698 }
699
700 @Override
701 public void readFields(DataInput in) throws IOException {
702 int count = in.readInt();
703 files = new ArrayList<Pair<BytesWritable, Long>>(count);
704 length = 0;
705 for (int i = 0; i < count; ++i) {
706 BytesWritable fileInfo = new BytesWritable();
707 fileInfo.readFields(in);
708 long size = in.readLong();
709 files.add(new Pair<BytesWritable, Long>(fileInfo, size));
710 length += size;
711 }
712 }
713
714 @Override
715 public void write(DataOutput out) throws IOException {
716 out.writeInt(files.size());
717 for (final Pair<BytesWritable, Long> fileInfo: files) {
718 fileInfo.getFirst().write(out);
719 out.writeLong(fileInfo.getSecond());
720 }
721 }
722 }
723
724 private static class ExportSnapshotRecordReader
725 extends RecordReader<BytesWritable, NullWritable> {
726 private final List<Pair<BytesWritable, Long>> files;
727 private long totalSize = 0;
728 private long procSize = 0;
729 private int index = -1;
730
731 ExportSnapshotRecordReader(final List<Pair<BytesWritable, Long>> files) {
732 this.files = files;
733 for (Pair<BytesWritable, Long> fileInfo: files) {
734 totalSize += fileInfo.getSecond();
735 }
736 }
737
738 @Override
739 public void close() { }
740
741 @Override
742 public BytesWritable getCurrentKey() { return files.get(index).getFirst(); }
743
744 @Override
745 public NullWritable getCurrentValue() { return NullWritable.get(); }
746
747 @Override
748 public float getProgress() { return (float)procSize / totalSize; }
749
750 @Override
751 public void initialize(InputSplit split, TaskAttemptContext tac) { }
752
753 @Override
754 public boolean nextKeyValue() {
755 if (index >= 0) {
756 procSize += files.get(index).getSecond();
757 }
758 return(++index < files.size());
759 }
760 }
761 }
762
763
764
765
766
767
768
769
770 private void runCopyJob(final Path inputRoot, final Path outputRoot,
771 final String snapshotName, final Path snapshotDir, final boolean verifyChecksum,
772 final String filesUser, final String filesGroup, final int filesMode,
773 final int mappers, final int bandwidthMB)
774 throws IOException, InterruptedException, ClassNotFoundException {
775 Configuration conf = getConf();
776 if (filesGroup != null) conf.set(CONF_FILES_GROUP, filesGroup);
777 if (filesUser != null) conf.set(CONF_FILES_USER, filesUser);
778 if (mappers > 0) {
779 conf.setInt(CONF_NUM_SPLITS, mappers);
780 conf.setInt(MR_NUM_MAPS, mappers);
781 }
782 conf.setInt(CONF_FILES_MODE, filesMode);
783 conf.setBoolean(CONF_CHECKSUM_VERIFY, verifyChecksum);
784 conf.set(CONF_OUTPUT_ROOT, outputRoot.toString());
785 conf.set(CONF_INPUT_ROOT, inputRoot.toString());
786 conf.setInt(CONF_BANDWIDTH_MB, bandwidthMB);
787 conf.set(CONF_SNAPSHOT_NAME, snapshotName);
788 conf.set(CONF_SNAPSHOT_DIR, snapshotDir.toString());
789
790 Job job = new Job(conf);
791 job.setJobName("ExportSnapshot-" + snapshotName);
792 job.setJarByClass(ExportSnapshot.class);
793 TableMapReduceUtil.addDependencyJars(job);
794 job.setMapperClass(ExportMapper.class);
795 job.setInputFormatClass(ExportSnapshotInputFormat.class);
796 job.setOutputFormatClass(NullOutputFormat.class);
797 job.setMapSpeculativeExecution(false);
798 job.setNumReduceTasks(0);
799
800
801 TokenCache.obtainTokensForNamenodes(job.getCredentials(),
802 new Path[] { inputRoot, outputRoot }, conf);
803
804
805 if (!job.waitForCompletion(true)) {
806
807
808 throw new ExportSnapshotException("Copy Files Map-Reduce Job failed");
809 }
810 }
811
812 private void verifySnapshot(final Configuration baseConf,
813 final FileSystem fs, final Path rootDir, final Path snapshotDir) throws IOException {
814
815 Configuration conf = new Configuration(baseConf);
816 FSUtils.setRootDir(conf, rootDir);
817 FSUtils.setFsDefault(conf, FSUtils.getRootDir(conf));
818 SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
819 SnapshotReferenceUtil.verifySnapshot(conf, fs, snapshotDir, snapshotDesc);
820 }
821
822
823
824
825 private void setOwner(final FileSystem fs, final Path path, final String user,
826 final String group, final boolean recursive) throws IOException {
827 if (user != null || group != null) {
828 if (recursive && fs.isDirectory(path)) {
829 for (FileStatus child : fs.listStatus(path)) {
830 setOwner(fs, child.getPath(), user, group, recursive);
831 }
832 }
833 fs.setOwner(path, user, group);
834 }
835 }
836
837
838
839
840
841 @Override
842 public int run(String[] args) throws IOException {
843 boolean verifyTarget = true;
844 boolean verifyChecksum = true;
845 String snapshotName = null;
846 String targetName = null;
847 boolean overwrite = false;
848 String filesGroup = null;
849 String filesUser = null;
850 Path outputRoot = null;
851 int bandwidthMB = Integer.MAX_VALUE;
852 int filesMode = 0;
853 int mappers = 0;
854
855 Configuration conf = getConf();
856 Path inputRoot = FSUtils.getRootDir(conf);
857
858
859 for (int i = 0; i < args.length; i++) {
860 String cmd = args[i];
861 if (cmd.equals("-snapshot")) {
862 snapshotName = args[++i];
863 } else if (cmd.equals("-target")) {
864 targetName = args[++i];
865 } else if (cmd.equals("-copy-to")) {
866 outputRoot = new Path(args[++i]);
867 } else if (cmd.equals("-copy-from")) {
868 inputRoot = new Path(args[++i]);
869 FSUtils.setRootDir(conf, inputRoot);
870 } else if (cmd.equals("-no-checksum-verify")) {
871 verifyChecksum = false;
872 } else if (cmd.equals("-no-target-verify")) {
873 verifyTarget = false;
874 } else if (cmd.equals("-mappers")) {
875 mappers = Integer.parseInt(args[++i]);
876 } else if (cmd.equals("-chuser")) {
877 filesUser = args[++i];
878 } else if (cmd.equals("-chgroup")) {
879 filesGroup = args[++i];
880 } else if (cmd.equals("-bandwidth")) {
881 bandwidthMB = Integer.parseInt(args[++i]);
882 } else if (cmd.equals("-chmod")) {
883 filesMode = Integer.parseInt(args[++i], 8);
884 } else if (cmd.equals("-overwrite")) {
885 overwrite = true;
886 } else if (cmd.equals("-h") || cmd.equals("--help")) {
887 printUsageAndExit();
888 } else {
889 System.err.println("UNEXPECTED: " + cmd);
890 printUsageAndExit();
891 }
892 }
893
894
895 if (snapshotName == null) {
896 System.err.println("Snapshot name not provided.");
897 printUsageAndExit();
898 }
899
900 if (outputRoot == null) {
901 System.err.println("Destination file-system not provided.");
902 printUsageAndExit();
903 }
904
905 if (targetName == null) {
906 targetName = snapshotName;
907 }
908
909 conf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true);
910 FileSystem inputFs = FileSystem.get(inputRoot.toUri(), conf);
911 LOG.debug("inputFs=" + inputFs.getUri().toString() + " inputRoot=" + inputRoot);
912 conf.setBoolean("fs." + outputRoot.toUri().getScheme() + ".impl.disable.cache", true);
913 FileSystem outputFs = FileSystem.get(outputRoot.toUri(), conf);
914 LOG.debug("outputFs=" + outputFs.getUri().toString() + " outputRoot=" + outputRoot.toString());
915
916 boolean skipTmp = conf.getBoolean(CONF_SKIP_TMP, false);
917
918 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, inputRoot);
919 Path snapshotTmpDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(targetName, outputRoot);
920 Path outputSnapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(targetName, outputRoot);
921 Path initialOutputSnapshotDir = skipTmp ? outputSnapshotDir : snapshotTmpDir;
922
923
924 if (outputFs.exists(outputSnapshotDir)) {
925 if (overwrite) {
926 if (!outputFs.delete(outputSnapshotDir, true)) {
927 System.err.println("Unable to remove existing snapshot directory: " + outputSnapshotDir);
928 return 1;
929 }
930 } else {
931 System.err.println("The snapshot '" + targetName +
932 "' already exists in the destination: " + outputSnapshotDir);
933 return 1;
934 }
935 }
936
937 if (!skipTmp) {
938
939 if (outputFs.exists(snapshotTmpDir)) {
940 if (overwrite) {
941 if (!outputFs.delete(snapshotTmpDir, true)) {
942 System.err.println("Unable to remove existing snapshot tmp directory: "+snapshotTmpDir);
943 return 1;
944 }
945 } else {
946 System.err.println("A snapshot with the same name '"+ targetName +"' may be in-progress");
947 System.err.println("Please check "+snapshotTmpDir+". If the snapshot has completed, ");
948 System.err.println("consider removing "+snapshotTmpDir+" by using the -overwrite option");
949 return 1;
950 }
951 }
952 }
953
954
955
956
957 try {
958 LOG.info("Copy Snapshot Manifest");
959 FileUtil.copy(inputFs, snapshotDir, outputFs, initialOutputSnapshotDir, false, false, conf);
960 if (filesUser != null || filesGroup != null) {
961 setOwner(outputFs, snapshotTmpDir, filesUser, filesGroup, true);
962 }
963 } catch (IOException e) {
964 throw new ExportSnapshotException("Failed to copy the snapshot directory: from=" +
965 snapshotDir + " to=" + initialOutputSnapshotDir, e);
966 }
967
968
969 if (!targetName.equals(snapshotName)) {
970 SnapshotDescription snapshotDesc =
971 SnapshotDescriptionUtils.readSnapshotInfo(inputFs, snapshotDir)
972 .toBuilder()
973 .setName(targetName)
974 .build();
975 SnapshotDescriptionUtils.writeSnapshotInfo(snapshotDesc, snapshotTmpDir, outputFs);
976 }
977
978
979
980
981 try {
982 runCopyJob(inputRoot, outputRoot, snapshotName, snapshotDir, verifyChecksum,
983 filesUser, filesGroup, filesMode, mappers, bandwidthMB);
984
985 LOG.info("Finalize the Snapshot Export");
986 if (!skipTmp) {
987
988 if (!outputFs.rename(snapshotTmpDir, outputSnapshotDir)) {
989 throw new ExportSnapshotException("Unable to rename snapshot directory from=" +
990 snapshotTmpDir + " to=" + outputSnapshotDir);
991 }
992 }
993
994
995 if (verifyTarget) {
996 LOG.info("Verify snapshot integrity");
997 verifySnapshot(conf, outputFs, outputRoot, outputSnapshotDir);
998 }
999
1000 LOG.info("Export Completed: " + targetName);
1001 return 0;
1002 } catch (Exception e) {
1003 LOG.error("Snapshot export failed", e);
1004 if (!skipTmp) {
1005 outputFs.delete(snapshotTmpDir, true);
1006 }
1007 outputFs.delete(outputSnapshotDir, true);
1008 return 1;
1009 } finally {
1010 IOUtils.closeStream(inputFs);
1011 IOUtils.closeStream(outputFs);
1012 }
1013 }
1014
1015
1016 private void printUsageAndExit() {
1017 System.err.printf("Usage: bin/hbase %s [options]%n", getClass().getName());
1018 System.err.println(" where [options] are:");
1019 System.err.println(" -h|-help Show this help and exit.");
1020 System.err.println(" -snapshot NAME Snapshot to restore.");
1021 System.err.println(" -copy-to NAME Remote destination hdfs://");
1022 System.err.println(" -copy-from NAME Input folder hdfs:// (default hbase.rootdir)");
1023 System.err.println(" -no-checksum-verify Do not verify checksum, use name+length only.");
1024 System.err.println(" -no-target-verify Do not verify the integrity of the \\" +
1025 "exported snapshot.");
1026 System.err.println(" -overwrite Rewrite the snapshot manifest if already exists");
1027 System.err.println(" -chuser USERNAME Change the owner of the files to the specified one.");
1028 System.err.println(" -chgroup GROUP Change the group of the files to the specified one.");
1029 System.err.println(" -chmod MODE Change the permission of the files to the specified one.");
1030 System.err.println(" -mappers Number of mappers to use during the copy (mapreduce.job.maps).");
1031 System.err.println();
1032 System.err.println("Examples:");
1033 System.err.println(" hbase " + getClass().getName() + " \\");
1034 System.err.println(" -snapshot MySnapshot -copy-to hdfs://srv2:8082/hbase \\");
1035 System.err.println(" -chuser MyUser -chgroup MyGroup -chmod 700 -mappers 16");
1036 System.err.println();
1037 System.err.println(" hbase " + getClass().getName() + " \\");
1038 System.err.println(" -snapshot MySnapshot -copy-from hdfs://srv2:8082/hbase \\");
1039 System.err.println(" -copy-to hdfs://srv1:50070/hbase \\");
1040 System.exit(1);
1041 }
1042
1043
1044
1045
1046
1047
1048
1049
1050 static int innerMain(final Configuration conf, final String [] args) throws Exception {
1051 return ToolRunner.run(conf, new ExportSnapshot(), args);
1052 }
1053
1054 public static void main(String[] args) throws Exception {
1055 System.exit(innerMain(HBaseConfiguration.create(), args));
1056 }
1057 }