1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.snapshot;
20
21 import java.io.BufferedInputStream;
22 import java.io.FileNotFoundException;
23 import java.io.DataInput;
24 import java.io.DataOutput;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.net.URI;
28 import java.util.ArrayList;
29 import java.util.Collections;
30 import java.util.Comparator;
31 import java.util.LinkedList;
32 import java.util.List;
33 import java.util.Random;
34
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.hbase.classification.InterfaceAudience;
38 import org.apache.hadoop.hbase.classification.InterfaceStability;
39 import org.apache.hadoop.conf.Configuration;
40 import org.apache.hadoop.conf.Configured;
41 import org.apache.hadoop.fs.FSDataInputStream;
42 import org.apache.hadoop.fs.FSDataOutputStream;
43 import org.apache.hadoop.fs.FileChecksum;
44 import org.apache.hadoop.fs.FileStatus;
45 import org.apache.hadoop.fs.FileSystem;
46 import org.apache.hadoop.fs.FileUtil;
47 import org.apache.hadoop.fs.Path;
48 import org.apache.hadoop.fs.permission.FsPermission;
49 import org.apache.hadoop.hbase.TableName;
50 import org.apache.hadoop.hbase.HBaseConfiguration;
51 import org.apache.hadoop.hbase.HConstants;
52 import org.apache.hadoop.hbase.HRegionInfo;
53 import org.apache.hadoop.hbase.io.FileLink;
54 import org.apache.hadoop.hbase.io.HFileLink;
55 import org.apache.hadoop.hbase.io.HLogLink;
56 import org.apache.hadoop.hbase.io.hadoopbackport.ThrottledInputStream;
57 import org.apache.hadoop.hbase.mapreduce.JobUtil;
58 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
59 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
60 import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotFileInfo;
61 import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
62 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
63 import org.apache.hadoop.hbase.util.FSUtils;
64 import org.apache.hadoop.hbase.util.Pair;
65 import org.apache.hadoop.io.BytesWritable;
66 import org.apache.hadoop.io.IOUtils;
67 import org.apache.hadoop.io.NullWritable;
68 import org.apache.hadoop.io.SequenceFile;
69 import org.apache.hadoop.io.Writable;
70 import org.apache.hadoop.mapreduce.Job;
71 import org.apache.hadoop.mapreduce.JobContext;
72 import org.apache.hadoop.mapreduce.Mapper;
73 import org.apache.hadoop.mapreduce.InputFormat;
74 import org.apache.hadoop.mapreduce.InputSplit;
75 import org.apache.hadoop.mapreduce.RecordReader;
76 import org.apache.hadoop.mapreduce.TaskAttemptContext;
77 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
78 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
79 import org.apache.hadoop.mapreduce.security.TokenCache;
80 import org.apache.hadoop.util.StringUtils;
81 import org.apache.hadoop.util.Tool;
82 import org.apache.hadoop.util.ToolRunner;
83
84
85
86
87
88
89
90
91 @InterfaceAudience.Public
92 @InterfaceStability.Evolving
93 public class ExportSnapshot extends Configured implements Tool {
94 public static final String NAME = "exportsnapshot";
95
96 public static final String CONF_SOURCE_PREFIX = NAME + ".from.";
97
98 public static final String CONF_DEST_PREFIX = NAME + ".to.";
99
100 private static final Log LOG = LogFactory.getLog(ExportSnapshot.class);
101
102 private static final String MR_NUM_MAPS = "mapreduce.job.maps";
103 private static final String CONF_NUM_SPLITS = "snapshot.export.format.splits";
104 private static final String CONF_SNAPSHOT_NAME = "snapshot.export.format.snapshot.name";
105 private static final String CONF_SNAPSHOT_DIR = "snapshot.export.format.snapshot.dir";
106 private static final String CONF_FILES_USER = "snapshot.export.files.attributes.user";
107 private static final String CONF_FILES_GROUP = "snapshot.export.files.attributes.group";
108 private static final String CONF_FILES_MODE = "snapshot.export.files.attributes.mode";
109 private static final String CONF_CHECKSUM_VERIFY = "snapshot.export.checksum.verify";
110 private static final String CONF_OUTPUT_ROOT = "snapshot.export.output.root";
111 private static final String CONF_INPUT_ROOT = "snapshot.export.input.root";
112 private static final String CONF_BANDWIDTH_MB = "snapshot.export.map.bandwidth.mb";
113 private static final String CONF_BUFFER_SIZE = "snapshot.export.buffer.size";
114 private static final String CONF_MAP_GROUP = "snapshot.export.default.map.group";
115 protected static final String CONF_SKIP_TMP = "snapshot.export.skip.tmp";
116
117 static final String CONF_TEST_FAILURE = "test.snapshot.export.failure";
118 static final String CONF_TEST_RETRY = "test.snapshot.export.failure.retry";
119
120 private static final String INPUT_FOLDER_PREFIX = "export-files.";
121
122
123 public enum Counter {
124 MISSING_FILES, FILES_COPIED, FILES_SKIPPED, COPY_FAILED,
125 BYTES_EXPECTED, BYTES_SKIPPED, BYTES_COPIED
126 }
127
128 private static class ExportMapper extends Mapper<BytesWritable, NullWritable,
129 NullWritable, NullWritable> {
130 final static int REPORT_SIZE = 1 * 1024 * 1024;
131 final static int BUFFER_SIZE = 64 * 1024;
132
133 private boolean testFailures;
134 private Random random;
135
136 private boolean verifyChecksum;
137 private String filesGroup;
138 private String filesUser;
139 private short filesMode;
140 private int bufferSize;
141
142 private FileSystem outputFs;
143 private Path outputArchive;
144 private Path outputRoot;
145
146 private FileSystem inputFs;
147 private Path inputArchive;
148 private Path inputRoot;
149
150 @Override
151 public void setup(Context context) throws IOException {
152 Configuration conf = context.getConfiguration();
153 Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
154 Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
155
156 verifyChecksum = conf.getBoolean(CONF_CHECKSUM_VERIFY, true);
157
158 filesGroup = conf.get(CONF_FILES_GROUP);
159 filesUser = conf.get(CONF_FILES_USER);
160 filesMode = (short)conf.getInt(CONF_FILES_MODE, 0);
161 outputRoot = new Path(conf.get(CONF_OUTPUT_ROOT));
162 inputRoot = new Path(conf.get(CONF_INPUT_ROOT));
163
164 inputArchive = new Path(inputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
165 outputArchive = new Path(outputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
166
167 testFailures = conf.getBoolean(CONF_TEST_FAILURE, false);
168
169 try {
170 srcConf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true);
171 inputFs = FileSystem.get(inputRoot.toUri(), srcConf);
172 } catch (IOException e) {
173 throw new IOException("Could not get the input FileSystem with root=" + inputRoot, e);
174 }
175
176 try {
177 destConf.setBoolean("fs." + outputRoot.toUri().getScheme() + ".impl.disable.cache", true);
178 outputFs = FileSystem.get(outputRoot.toUri(), destConf);
179 } catch (IOException e) {
180 throw new IOException("Could not get the output FileSystem with root="+ outputRoot, e);
181 }
182
183
184 int defaultBlockSize = Math.max((int) outputFs.getDefaultBlockSize(), BUFFER_SIZE);
185 bufferSize = conf.getInt(CONF_BUFFER_SIZE, defaultBlockSize);
186 LOG.info("Using bufferSize=" + StringUtils.humanReadableInt(bufferSize));
187
188 for (Counter c : Counter.values()) {
189 context.getCounter(c).increment(0);
190 }
191 }
192
193 byte[] copyBytes(BytesWritable bw) {
194 byte[] result = new byte[bw.getLength()];
195 System.arraycopy(bw.getBytes(), 0, result, 0, bw.getLength());
196 return result;
197 }
198
199 @Override
200 protected void cleanup(Context context) {
201 IOUtils.closeStream(inputFs);
202 IOUtils.closeStream(outputFs);
203 }
204
205 @Override
206 public void map(BytesWritable key, NullWritable value, Context context)
207 throws InterruptedException, IOException {
208 SnapshotFileInfo inputInfo = SnapshotFileInfo.parseFrom(copyBytes(key));
209 Path outputPath = getOutputPath(inputInfo);
210
211 copyFile(context, inputInfo, outputPath);
212 }
213
214
215
216
217 private Path getOutputPath(final SnapshotFileInfo inputInfo) throws IOException {
218 Path path = null;
219 switch (inputInfo.getType()) {
220 case HFILE:
221 Path inputPath = new Path(inputInfo.getHfile());
222 String family = inputPath.getParent().getName();
223 TableName table =HFileLink.getReferencedTableName(inputPath.getName());
224 String region = HFileLink.getReferencedRegionName(inputPath.getName());
225 String hfile = HFileLink.getReferencedHFileName(inputPath.getName());
226 path = new Path(FSUtils.getTableDir(new Path("./"), table),
227 new Path(region, new Path(family, hfile)));
228 break;
229 case WAL:
230 Path oldLogsDir = new Path(outputRoot, HConstants.HREGION_OLDLOGDIR_NAME);
231 path = new Path(oldLogsDir, inputInfo.getWalName());
232 break;
233 default:
234 throw new IOException("Invalid File Type: " + inputInfo.getType().toString());
235 }
236 return new Path(outputArchive, path);
237 }
238
239
240
241
242 private void injectTestFailure(final Context context, final SnapshotFileInfo inputInfo)
243 throws IOException {
244 if (testFailures) {
245 if (context.getConfiguration().getBoolean(CONF_TEST_RETRY, false)) {
246 if (random == null) {
247 random = new Random();
248 }
249
250
251
252
253 if (random.nextFloat() < 0.03) {
254 throw new IOException("TEST RETRY FAILURE: Unable to copy input=" + inputInfo
255 + " time=" + System.currentTimeMillis());
256 }
257 } else {
258 context.getCounter(Counter.COPY_FAILED).increment(1);
259 throw new IOException("TEST FAILURE: Unable to copy input=" + inputInfo);
260 }
261 }
262 }
263
264 private void copyFile(final Context context, final SnapshotFileInfo inputInfo,
265 final Path outputPath) throws IOException {
266 injectTestFailure(context, inputInfo);
267
268
269 FileStatus inputStat = getSourceFileStatus(context, inputInfo);
270
271
272 if (outputFs.exists(outputPath)) {
273 FileStatus outputStat = outputFs.getFileStatus(outputPath);
274 if (outputStat != null && sameFile(inputStat, outputStat)) {
275 LOG.info("Skip copy " + inputStat.getPath() + " to " + outputPath + ", same file.");
276 context.getCounter(Counter.FILES_SKIPPED).increment(1);
277 context.getCounter(Counter.BYTES_SKIPPED).increment(inputStat.getLen());
278 return;
279 }
280 }
281
282 InputStream in = openSourceFile(context, inputInfo);
283 int bandwidthMB = context.getConfiguration().getInt(CONF_BANDWIDTH_MB, 100);
284 if (Integer.MAX_VALUE != bandwidthMB) {
285 in = new ThrottledInputStream(new BufferedInputStream(in), bandwidthMB * 1024 * 1024);
286 }
287
288 try {
289 context.getCounter(Counter.BYTES_EXPECTED).increment(inputStat.getLen());
290
291
292 createOutputPath(outputPath.getParent());
293 FSDataOutputStream out = outputFs.create(outputPath, true);
294 try {
295 copyData(context, inputStat.getPath(), in, outputPath, out, inputStat.getLen());
296 } finally {
297 out.close();
298 }
299
300
301 if (!preserveAttributes(outputPath, inputStat)) {
302 LOG.warn("You may have to run manually chown on: " + outputPath);
303 }
304 } finally {
305 in.close();
306 }
307 }
308
309
310
311
312 private void createOutputPath(final Path path) throws IOException {
313 if (filesUser == null && filesGroup == null) {
314 outputFs.mkdirs(path);
315 } else {
316 Path parent = path.getParent();
317 if (!outputFs.exists(parent) && parent.getParent() != null) {
318 createOutputPath(parent);
319 }
320 outputFs.mkdirs(path);
321
322 outputFs.setOwner(path, filesUser, filesGroup);
323 }
324 }
325
326
327
328
329
330
331
332
333
334 private boolean preserveAttributes(final Path path, final FileStatus refStat) {
335 FileStatus stat;
336 try {
337 stat = outputFs.getFileStatus(path);
338 } catch (IOException e) {
339 LOG.warn("Unable to get the status for file=" + path);
340 return false;
341 }
342
343 try {
344 if (filesMode > 0 && stat.getPermission().toShort() != filesMode) {
345 outputFs.setPermission(path, new FsPermission(filesMode));
346 } else if (refStat != null && !stat.getPermission().equals(refStat.getPermission())) {
347 outputFs.setPermission(path, refStat.getPermission());
348 }
349 } catch (IOException e) {
350 LOG.warn("Unable to set the permission for file="+ stat.getPath() +": "+ e.getMessage());
351 return false;
352 }
353
354 boolean hasRefStat = (refStat != null);
355 String user = stringIsNotEmpty(filesUser) || !hasRefStat ? filesUser : refStat.getOwner();
356 String group = stringIsNotEmpty(filesGroup) || !hasRefStat ? filesGroup : refStat.getGroup();
357 if (stringIsNotEmpty(user) || stringIsNotEmpty(group)) {
358 try {
359 if (!(user.equals(stat.getOwner()) && group.equals(stat.getGroup()))) {
360 outputFs.setOwner(path, user, group);
361 }
362 } catch (IOException e) {
363 LOG.warn("Unable to set the owner/group for file="+ stat.getPath() +": "+ e.getMessage());
364 LOG.warn("The user/group may not exist on the destination cluster: user=" +
365 user + " group=" + group);
366 return false;
367 }
368 }
369
370 return true;
371 }
372
373 private boolean stringIsNotEmpty(final String str) {
374 return str != null && str.length() > 0;
375 }
376
377 private void copyData(final Context context,
378 final Path inputPath, final InputStream in,
379 final Path outputPath, final FSDataOutputStream out,
380 final long inputFileSize)
381 throws IOException {
382 final String statusMessage = "copied %s/" + StringUtils.humanReadableInt(inputFileSize) +
383 " (%.1f%%)";
384
385 try {
386 byte[] buffer = new byte[bufferSize];
387 long totalBytesWritten = 0;
388 int reportBytes = 0;
389 int bytesRead;
390
391 long stime = System.currentTimeMillis();
392 while ((bytesRead = in.read(buffer)) > 0) {
393 out.write(buffer, 0, bytesRead);
394 totalBytesWritten += bytesRead;
395 reportBytes += bytesRead;
396
397 if (reportBytes >= REPORT_SIZE) {
398 context.getCounter(Counter.BYTES_COPIED).increment(reportBytes);
399 context.setStatus(String.format(statusMessage,
400 StringUtils.humanReadableInt(totalBytesWritten),
401 (totalBytesWritten/(float)inputFileSize) * 100.0f) +
402 " from " + inputPath + " to " + outputPath);
403 reportBytes = 0;
404 }
405 }
406 long etime = System.currentTimeMillis();
407
408 context.getCounter(Counter.BYTES_COPIED).increment(reportBytes);
409 context.setStatus(String.format(statusMessage,
410 StringUtils.humanReadableInt(totalBytesWritten),
411 (totalBytesWritten/(float)inputFileSize) * 100.0f) +
412 " from " + inputPath + " to " + outputPath);
413
414
415 if (totalBytesWritten != inputFileSize) {
416 String msg = "number of bytes copied not matching copied=" + totalBytesWritten +
417 " expected=" + inputFileSize + " for file=" + inputPath;
418 throw new IOException(msg);
419 }
420
421 LOG.info("copy completed for input=" + inputPath + " output=" + outputPath);
422 LOG.info("size=" + totalBytesWritten +
423 " (" + StringUtils.humanReadableInt(totalBytesWritten) + ")" +
424 " time=" + StringUtils.formatTimeDiff(etime, stime) +
425 String.format(" %.3fM/sec", (totalBytesWritten / ((etime - stime)/1000.0))/1048576.0));
426 context.getCounter(Counter.FILES_COPIED).increment(1);
427 } catch (IOException e) {
428 LOG.error("Error copying " + inputPath + " to " + outputPath, e);
429 context.getCounter(Counter.COPY_FAILED).increment(1);
430 throw e;
431 }
432 }
433
434
435
436
437
438
439 private FSDataInputStream openSourceFile(Context context, final SnapshotFileInfo fileInfo)
440 throws IOException {
441 try {
442 FileLink link = null;
443 switch (fileInfo.getType()) {
444 case HFILE:
445 Path inputPath = new Path(fileInfo.getHfile());
446 link = new HFileLink(inputRoot, inputArchive, inputPath);
447 break;
448 case WAL:
449 String serverName = fileInfo.getWalServer();
450 String logName = fileInfo.getWalName();
451 link = new HLogLink(inputRoot, serverName, logName);
452 break;
453 default:
454 throw new IOException("Invalid File Type: " + fileInfo.getType().toString());
455 }
456 return link.open(inputFs);
457 } catch (IOException e) {
458 context.getCounter(Counter.MISSING_FILES).increment(1);
459 LOG.error("Unable to open source file=" + fileInfo.toString(), e);
460 throw e;
461 }
462 }
463
464 private FileStatus getSourceFileStatus(Context context, final SnapshotFileInfo fileInfo)
465 throws IOException {
466 try {
467 FileLink link = null;
468 switch (fileInfo.getType()) {
469 case HFILE:
470 Path inputPath = new Path(fileInfo.getHfile());
471 link = new HFileLink(inputRoot, inputArchive, inputPath);
472 break;
473 case WAL:
474 link = new HLogLink(inputRoot, fileInfo.getWalServer(), fileInfo.getWalName());
475 break;
476 default:
477 throw new IOException("Invalid File Type: " + fileInfo.getType().toString());
478 }
479 return link.getFileStatus(inputFs);
480 } catch (FileNotFoundException e) {
481 context.getCounter(Counter.MISSING_FILES).increment(1);
482 LOG.error("Unable to get the status for source file=" + fileInfo.toString(), e);
483 throw e;
484 } catch (IOException e) {
485 LOG.error("Unable to get the status for source file=" + fileInfo.toString(), e);
486 throw e;
487 }
488 }
489
490 private FileChecksum getFileChecksum(final FileSystem fs, final Path path) {
491 try {
492 return fs.getFileChecksum(path);
493 } catch (IOException e) {
494 LOG.warn("Unable to get checksum for file=" + path, e);
495 return null;
496 }
497 }
498
499
500
501
502
503 private boolean sameFile(final FileStatus inputStat, final FileStatus outputStat) {
504
505 if (inputStat.getLen() != outputStat.getLen()) return false;
506
507
508 if (!verifyChecksum) return true;
509
510
511 FileChecksum inChecksum = getFileChecksum(inputFs, inputStat.getPath());
512 if (inChecksum == null) return false;
513
514 FileChecksum outChecksum = getFileChecksum(outputFs, outputStat.getPath());
515 if (outChecksum == null) return false;
516
517 return inChecksum.equals(outChecksum);
518 }
519 }
520
521
522
523
524
525
526
527
528
529 private static List<Pair<SnapshotFileInfo, Long>> getSnapshotFiles(final Configuration conf,
530 final FileSystem fs, final Path snapshotDir) throws IOException {
531 SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
532
533 final List<Pair<SnapshotFileInfo, Long>> files = new ArrayList<Pair<SnapshotFileInfo, Long>>();
534 final TableName table = TableName.valueOf(snapshotDesc.getTable());
535
536
537 LOG.info("Loading Snapshot '" + snapshotDesc.getName() + "' hfile list");
538 SnapshotReferenceUtil.visitReferencedFiles(conf, fs, snapshotDir, snapshotDesc,
539 new SnapshotReferenceUtil.SnapshotVisitor() {
540 @Override
541 public void storeFile(final HRegionInfo regionInfo, final String family,
542 final SnapshotRegionManifest.StoreFile storeFile) throws IOException {
543 if (storeFile.hasReference()) {
544
545 } else {
546 String region = regionInfo.getEncodedName();
547 String hfile = storeFile.getName();
548 Path path = HFileLink.createPath(table, region, family, hfile);
549
550 SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder()
551 .setType(SnapshotFileInfo.Type.HFILE)
552 .setHfile(path.toString())
553 .build();
554
555 long size;
556 if (storeFile.hasFileSize()) {
557 size = storeFile.getFileSize();
558 } else {
559 size = new HFileLink(conf, path).getFileStatus(fs).getLen();
560 }
561 files.add(new Pair<SnapshotFileInfo, Long>(fileInfo, size));
562 }
563 }
564
565 @Override
566 public void logFile (final String server, final String logfile)
567 throws IOException {
568 SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder()
569 .setType(SnapshotFileInfo.Type.WAL)
570 .setWalServer(server)
571 .setWalName(logfile)
572 .build();
573
574 long size = new HLogLink(conf, server, logfile).getFileStatus(fs).getLen();
575 files.add(new Pair<SnapshotFileInfo, Long>(fileInfo, size));
576 }
577 });
578
579 return files;
580 }
581
582
583
584
585
586
587
588
589
590 static List<List<Pair<SnapshotFileInfo, Long>>> getBalancedSplits(
591 final List<Pair<SnapshotFileInfo, Long>> files, final int ngroups) {
592
593 Collections.sort(files, new Comparator<Pair<SnapshotFileInfo, Long>>() {
594 public int compare(Pair<SnapshotFileInfo, Long> a, Pair<SnapshotFileInfo, Long> b) {
595 long r = a.getSecond() - b.getSecond();
596 return (r < 0) ? -1 : ((r > 0) ? 1 : 0);
597 }
598 });
599
600
601 List<List<Pair<SnapshotFileInfo, Long>>> fileGroups =
602 new LinkedList<List<Pair<SnapshotFileInfo, Long>>>();
603 long[] sizeGroups = new long[ngroups];
604 int hi = files.size() - 1;
605 int lo = 0;
606
607 List<Pair<SnapshotFileInfo, Long>> group;
608 int dir = 1;
609 int g = 0;
610
611 while (hi >= lo) {
612 if (g == fileGroups.size()) {
613 group = new LinkedList<Pair<SnapshotFileInfo, Long>>();
614 fileGroups.add(group);
615 } else {
616 group = fileGroups.get(g);
617 }
618
619 Pair<SnapshotFileInfo, Long> fileInfo = files.get(hi--);
620
621
622 sizeGroups[g] += fileInfo.getSecond();
623 group.add(fileInfo);
624
625
626 g += dir;
627 if (g == ngroups) {
628 dir = -1;
629 g = ngroups - 1;
630 } else if (g < 0) {
631 dir = 1;
632 g = 0;
633 }
634 }
635
636 if (LOG.isDebugEnabled()) {
637 for (int i = 0; i < sizeGroups.length; ++i) {
638 LOG.debug("export split=" + i + " size=" + StringUtils.humanReadableInt(sizeGroups[i]));
639 }
640 }
641
642 return fileGroups;
643 }
644
645 private static class ExportSnapshotInputFormat extends InputFormat<BytesWritable, NullWritable> {
646 @Override
647 public RecordReader<BytesWritable, NullWritable> createRecordReader(InputSplit split,
648 TaskAttemptContext tac) throws IOException, InterruptedException {
649 return new ExportSnapshotRecordReader(((ExportSnapshotInputSplit)split).getSplitKeys());
650 }
651
652 @Override
653 public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
654 Configuration conf = context.getConfiguration();
655 String snapshotName = conf.get(CONF_SNAPSHOT_NAME);
656 Path snapshotDir = new Path(conf.get(CONF_SNAPSHOT_DIR));
657 FileSystem fs = FileSystem.get(snapshotDir.toUri(), conf);
658
659 List<Pair<SnapshotFileInfo, Long>> snapshotFiles = getSnapshotFiles(conf, fs, snapshotDir);
660 int mappers = conf.getInt(CONF_NUM_SPLITS, 0);
661 if (mappers == 0 && snapshotFiles.size() > 0) {
662 mappers = 1 + (snapshotFiles.size() / conf.getInt(CONF_MAP_GROUP, 10));
663 mappers = Math.min(mappers, snapshotFiles.size());
664 conf.setInt(CONF_NUM_SPLITS, mappers);
665 conf.setInt(MR_NUM_MAPS, mappers);
666 }
667
668 List<List<Pair<SnapshotFileInfo, Long>>> groups = getBalancedSplits(snapshotFiles, mappers);
669 List<InputSplit> splits = new ArrayList(groups.size());
670 for (List<Pair<SnapshotFileInfo, Long>> files: groups) {
671 splits.add(new ExportSnapshotInputSplit(files));
672 }
673 return splits;
674 }
675
676 private static class ExportSnapshotInputSplit extends InputSplit implements Writable {
677 private List<Pair<BytesWritable, Long>> files;
678 private long length;
679
680 public ExportSnapshotInputSplit() {
681 this.files = null;
682 }
683
684 public ExportSnapshotInputSplit(final List<Pair<SnapshotFileInfo, Long>> snapshotFiles) {
685 this.files = new ArrayList(snapshotFiles.size());
686 for (Pair<SnapshotFileInfo, Long> fileInfo: snapshotFiles) {
687 this.files.add(new Pair<BytesWritable, Long>(
688 new BytesWritable(fileInfo.getFirst().toByteArray()), fileInfo.getSecond()));
689 this.length += fileInfo.getSecond();
690 }
691 }
692
693 private List<Pair<BytesWritable, Long>> getSplitKeys() {
694 return files;
695 }
696
697 @Override
698 public long getLength() throws IOException, InterruptedException {
699 return length;
700 }
701
702 @Override
703 public String[] getLocations() throws IOException, InterruptedException {
704 return new String[] {};
705 }
706
707 @Override
708 public void readFields(DataInput in) throws IOException {
709 int count = in.readInt();
710 files = new ArrayList<Pair<BytesWritable, Long>>(count);
711 length = 0;
712 for (int i = 0; i < count; ++i) {
713 BytesWritable fileInfo = new BytesWritable();
714 fileInfo.readFields(in);
715 long size = in.readLong();
716 files.add(new Pair<BytesWritable, Long>(fileInfo, size));
717 length += size;
718 }
719 }
720
721 @Override
722 public void write(DataOutput out) throws IOException {
723 out.writeInt(files.size());
724 for (final Pair<BytesWritable, Long> fileInfo: files) {
725 fileInfo.getFirst().write(out);
726 out.writeLong(fileInfo.getSecond());
727 }
728 }
729 }
730
731 private static class ExportSnapshotRecordReader
732 extends RecordReader<BytesWritable, NullWritable> {
733 private final List<Pair<BytesWritable, Long>> files;
734 private long totalSize = 0;
735 private long procSize = 0;
736 private int index = -1;
737
738 ExportSnapshotRecordReader(final List<Pair<BytesWritable, Long>> files) {
739 this.files = files;
740 for (Pair<BytesWritable, Long> fileInfo: files) {
741 totalSize += fileInfo.getSecond();
742 }
743 }
744
745 @Override
746 public void close() { }
747
748 @Override
749 public BytesWritable getCurrentKey() { return files.get(index).getFirst(); }
750
751 @Override
752 public NullWritable getCurrentValue() { return NullWritable.get(); }
753
754 @Override
755 public float getProgress() { return (float)procSize / totalSize; }
756
757 @Override
758 public void initialize(InputSplit split, TaskAttemptContext tac) { }
759
760 @Override
761 public boolean nextKeyValue() {
762 if (index >= 0) {
763 procSize += files.get(index).getSecond();
764 }
765 return(++index < files.size());
766 }
767 }
768 }
769
770
771
772
773
774
775
776
777 private void runCopyJob(final Path inputRoot, final Path outputRoot,
778 final String snapshotName, final Path snapshotDir, final boolean verifyChecksum,
779 final String filesUser, final String filesGroup, final int filesMode,
780 final int mappers, final int bandwidthMB)
781 throws IOException, InterruptedException, ClassNotFoundException {
782 Configuration conf = getConf();
783 if (filesGroup != null) conf.set(CONF_FILES_GROUP, filesGroup);
784 if (filesUser != null) conf.set(CONF_FILES_USER, filesUser);
785 if (mappers > 0) {
786 conf.setInt(CONF_NUM_SPLITS, mappers);
787 conf.setInt(MR_NUM_MAPS, mappers);
788 }
789 conf.setInt(CONF_FILES_MODE, filesMode);
790 conf.setBoolean(CONF_CHECKSUM_VERIFY, verifyChecksum);
791 conf.set(CONF_OUTPUT_ROOT, outputRoot.toString());
792 conf.set(CONF_INPUT_ROOT, inputRoot.toString());
793 conf.setInt(CONF_BANDWIDTH_MB, bandwidthMB);
794 conf.set(CONF_SNAPSHOT_NAME, snapshotName);
795 conf.set(CONF_SNAPSHOT_DIR, snapshotDir.toString());
796
797 Job job = new Job(conf);
798 job.setJobName("ExportSnapshot-" + snapshotName);
799 job.setJarByClass(ExportSnapshot.class);
800 TableMapReduceUtil.addDependencyJars(job);
801 job.setMapperClass(ExportMapper.class);
802 job.setInputFormatClass(ExportSnapshotInputFormat.class);
803 job.setOutputFormatClass(NullOutputFormat.class);
804 job.setMapSpeculativeExecution(false);
805 job.setNumReduceTasks(0);
806
807
808 Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
809 TokenCache.obtainTokensForNamenodes(job.getCredentials(),
810 new Path[] { inputRoot }, srcConf);
811 Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
812 TokenCache.obtainTokensForNamenodes(job.getCredentials(),
813 new Path[] { outputRoot }, destConf);
814
815
816 if (!job.waitForCompletion(true)) {
817
818
819 throw new ExportSnapshotException("Copy Files Map-Reduce Job failed");
820 }
821 }
822
823 private void verifySnapshot(final Configuration baseConf,
824 final FileSystem fs, final Path rootDir, final Path snapshotDir) throws IOException {
825
826 Configuration conf = new Configuration(baseConf);
827 FSUtils.setRootDir(conf, rootDir);
828 FSUtils.setFsDefault(conf, FSUtils.getRootDir(conf));
829 SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
830 SnapshotReferenceUtil.verifySnapshot(conf, fs, snapshotDir, snapshotDesc);
831 }
832
833
834
835
836 private void setOwner(final FileSystem fs, final Path path, final String user,
837 final String group, final boolean recursive) throws IOException {
838 if (user != null || group != null) {
839 if (recursive && fs.isDirectory(path)) {
840 for (FileStatus child : fs.listStatus(path)) {
841 setOwner(fs, child.getPath(), user, group, recursive);
842 }
843 }
844 fs.setOwner(path, user, group);
845 }
846 }
847
848
849
850
851
852 @Override
853 public int run(String[] args) throws IOException {
854 boolean verifyTarget = true;
855 boolean verifyChecksum = true;
856 String snapshotName = null;
857 String targetName = null;
858 boolean overwrite = false;
859 String filesGroup = null;
860 String filesUser = null;
861 Path outputRoot = null;
862 int bandwidthMB = Integer.MAX_VALUE;
863 int filesMode = 0;
864 int mappers = 0;
865
866 Configuration conf = getConf();
867 Path inputRoot = FSUtils.getRootDir(conf);
868
869
870 for (int i = 0; i < args.length; i++) {
871 String cmd = args[i];
872 if (cmd.equals("-snapshot")) {
873 snapshotName = args[++i];
874 } else if (cmd.equals("-target")) {
875 targetName = args[++i];
876 } else if (cmd.equals("-copy-to")) {
877 outputRoot = new Path(args[++i]);
878 } else if (cmd.equals("-copy-from")) {
879 inputRoot = new Path(args[++i]);
880 FSUtils.setRootDir(conf, inputRoot);
881 } else if (cmd.equals("-no-checksum-verify")) {
882 verifyChecksum = false;
883 } else if (cmd.equals("-no-target-verify")) {
884 verifyTarget = false;
885 } else if (cmd.equals("-mappers")) {
886 mappers = Integer.parseInt(args[++i]);
887 } else if (cmd.equals("-chuser")) {
888 filesUser = args[++i];
889 } else if (cmd.equals("-chgroup")) {
890 filesGroup = args[++i];
891 } else if (cmd.equals("-bandwidth")) {
892 bandwidthMB = Integer.parseInt(args[++i]);
893 } else if (cmd.equals("-chmod")) {
894 filesMode = Integer.parseInt(args[++i], 8);
895 } else if (cmd.equals("-overwrite")) {
896 overwrite = true;
897 } else if (cmd.equals("-h") || cmd.equals("--help")) {
898 printUsageAndExit();
899 } else {
900 System.err.println("UNEXPECTED: " + cmd);
901 printUsageAndExit();
902 }
903 }
904
905
906 if (snapshotName == null) {
907 System.err.println("Snapshot name not provided.");
908 printUsageAndExit();
909 }
910
911 if (outputRoot == null) {
912 System.err.println("Destination file-system not provided.");
913 printUsageAndExit();
914 }
915
916 if (targetName == null) {
917 targetName = snapshotName;
918 }
919
920 Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
921 srcConf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true);
922 FileSystem inputFs = FileSystem.get(inputRoot.toUri(), srcConf);
923 LOG.debug("inputFs=" + inputFs.getUri().toString() + " inputRoot=" + inputRoot);
924 Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
925 destConf.setBoolean("fs." + outputRoot.toUri().getScheme() + ".impl.disable.cache", true);
926 FileSystem outputFs = FileSystem.get(outputRoot.toUri(), destConf);
927 LOG.debug("outputFs=" + outputFs.getUri().toString() + " outputRoot=" + outputRoot.toString());
928
929 boolean skipTmp = conf.getBoolean(CONF_SKIP_TMP, false);
930
931 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, inputRoot);
932 Path snapshotTmpDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(targetName, outputRoot);
933 Path outputSnapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(targetName, outputRoot);
934 Path initialOutputSnapshotDir = skipTmp ? outputSnapshotDir : snapshotTmpDir;
935
936
937 if (outputFs.exists(outputSnapshotDir)) {
938 if (overwrite) {
939 if (!outputFs.delete(outputSnapshotDir, true)) {
940 System.err.println("Unable to remove existing snapshot directory: " + outputSnapshotDir);
941 return 1;
942 }
943 } else {
944 System.err.println("The snapshot '" + targetName +
945 "' already exists in the destination: " + outputSnapshotDir);
946 return 1;
947 }
948 }
949
950 if (!skipTmp) {
951
952 if (outputFs.exists(snapshotTmpDir)) {
953 if (overwrite) {
954 if (!outputFs.delete(snapshotTmpDir, true)) {
955 System.err.println("Unable to remove existing snapshot tmp directory: "+snapshotTmpDir);
956 return 1;
957 }
958 } else {
959 System.err.println("A snapshot with the same name '"+ targetName +"' may be in-progress");
960 System.err.println("Please check "+snapshotTmpDir+". If the snapshot has completed, ");
961 System.err.println("consider removing "+snapshotTmpDir+" by using the -overwrite option");
962 return 1;
963 }
964 }
965 }
966
967
968
969
970 try {
971 LOG.info("Copy Snapshot Manifest");
972 FileUtil.copy(inputFs, snapshotDir, outputFs, initialOutputSnapshotDir, false, false, conf);
973 if (filesUser != null || filesGroup != null) {
974 setOwner(outputFs, snapshotTmpDir, filesUser, filesGroup, true);
975 }
976 } catch (IOException e) {
977 throw new ExportSnapshotException("Failed to copy the snapshot directory: from=" +
978 snapshotDir + " to=" + initialOutputSnapshotDir, e);
979 }
980
981
982 if (!targetName.equals(snapshotName)) {
983 SnapshotDescription snapshotDesc =
984 SnapshotDescriptionUtils.readSnapshotInfo(inputFs, snapshotDir)
985 .toBuilder()
986 .setName(targetName)
987 .build();
988 SnapshotDescriptionUtils.writeSnapshotInfo(snapshotDesc, snapshotTmpDir, outputFs);
989 }
990
991
992
993
994 try {
995 runCopyJob(inputRoot, outputRoot, snapshotName, snapshotDir, verifyChecksum,
996 filesUser, filesGroup, filesMode, mappers, bandwidthMB);
997
998 LOG.info("Finalize the Snapshot Export");
999 if (!skipTmp) {
1000
1001 if (!outputFs.rename(snapshotTmpDir, outputSnapshotDir)) {
1002 throw new ExportSnapshotException("Unable to rename snapshot directory from=" +
1003 snapshotTmpDir + " to=" + outputSnapshotDir);
1004 }
1005 }
1006
1007
1008 if (verifyTarget) {
1009 LOG.info("Verify snapshot integrity");
1010 verifySnapshot(destConf, outputFs, outputRoot, outputSnapshotDir);
1011 }
1012
1013 LOG.info("Export Completed: " + targetName);
1014 return 0;
1015 } catch (Exception e) {
1016 LOG.error("Snapshot export failed", e);
1017 if (!skipTmp) {
1018 outputFs.delete(snapshotTmpDir, true);
1019 }
1020 outputFs.delete(outputSnapshotDir, true);
1021 return 1;
1022 } finally {
1023 IOUtils.closeStream(inputFs);
1024 IOUtils.closeStream(outputFs);
1025 }
1026 }
1027
1028
1029 private void printUsageAndExit() {
1030 System.err.printf("Usage: bin/hbase %s [options]%n", getClass().getName());
1031 System.err.println(" where [options] are:");
1032 System.err.println(" -h|-help Show this help and exit.");
1033 System.err.println(" -snapshot NAME Snapshot to restore.");
1034 System.err.println(" -copy-to NAME Remote destination hdfs://");
1035 System.err.println(" -copy-from NAME Input folder hdfs:// (default hbase.rootdir)");
1036 System.err.println(" -no-checksum-verify Do not verify checksum, use name+length only.");
1037 System.err.println(" -no-target-verify Do not verify the integrity of the \\" +
1038 "exported snapshot.");
1039 System.err.println(" -overwrite Rewrite the snapshot manifest if already exists");
1040 System.err.println(" -chuser USERNAME Change the owner of the files to the specified one.");
1041 System.err.println(" -chgroup GROUP Change the group of the files to the specified one.");
1042 System.err.println(" -chmod MODE Change the permission of the files to the specified one.");
1043 System.err.println(" -mappers Number of mappers to use during the copy (mapreduce.job.maps).");
1044 System.err.println();
1045 System.err.println("Examples:");
1046 System.err.println(" hbase " + getClass().getName() + " \\");
1047 System.err.println(" -snapshot MySnapshot -copy-to hdfs://srv2:8082/hbase \\");
1048 System.err.println(" -chuser MyUser -chgroup MyGroup -chmod 700 -mappers 16");
1049 System.err.println();
1050 System.err.println(" hbase " + getClass().getName() + " \\");
1051 System.err.println(" -snapshot MySnapshot -copy-from hdfs://srv2:8082/hbase \\");
1052 System.err.println(" -copy-to hdfs://srv1:50070/hbase \\");
1053 System.exit(1);
1054 }
1055
1056
1057
1058
1059
1060
1061
1062
1063 static int innerMain(final Configuration conf, final String [] args) throws Exception {
1064 return ToolRunner.run(conf, new ExportSnapshot(), args);
1065 }
1066
1067 public static void main(String[] args) throws Exception {
1068 System.exit(innerMain(HBaseConfiguration.create(), args));
1069 }
1070 }