1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.IOException;
22 import java.util.HashMap;
23 import java.util.Map;
24 import java.util.Random;
25
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28 import org.apache.hadoop.conf.Configuration;
29 import org.apache.hadoop.conf.Configured;
30 import org.apache.hadoop.fs.FileSystem;
31 import org.apache.hadoop.fs.Path;
32 import org.apache.hadoop.hbase.HBaseConfiguration;
33 import org.apache.hadoop.hbase.HConstants;
34 import org.apache.hadoop.hbase.TableName;
35 import org.apache.hadoop.hbase.classification.InterfaceAudience;
36 import org.apache.hadoop.hbase.classification.InterfaceStability;
37 import org.apache.hadoop.hbase.client.HTable;
38 import org.apache.hadoop.hbase.client.Scan;
39 import org.apache.hadoop.hbase.util.Bytes;
40 import org.apache.hadoop.mapreduce.Job;
41 import org.apache.hadoop.util.GenericOptionsParser;
42 import org.apache.hadoop.util.Tool;
43 import org.apache.hadoop.util.ToolRunner;
44
45
46
47
48
49
50 @InterfaceAudience.Public
51 @InterfaceStability.Stable
52 public class CopyTable extends Configured implements Tool {
53 private static final Log LOG = LogFactory.getLog(CopyTable.class);
54
55 final static String NAME = "copytable";
56 long startTime = 0;
57 long endTime = HConstants.LATEST_TIMESTAMP;
58 int batch = Integer.MAX_VALUE;
59 int cacheRow = -1;
60 int versions = -1;
61 String tableName = null;
62 String startRow = null;
63 String stopRow = null;
64 String dstTableName = null;
65 String peerAddress = null;
66 String families = null;
67 boolean allCells = false;
68
69 boolean bulkload = false;
70 Path bulkloadDir = null;
71
72 private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
73
74
75
76
77 @Deprecated
78 static long startTime_ = 0;
79 @Deprecated
80 static long endTime_ = 0;
81 @Deprecated
82 static int versions_ = -1;
83 @Deprecated
84 static String tableName_ = null;
85 @Deprecated
86 static String startRow_ = null;
87 @Deprecated
88 static String stopRow_ = null;
89 @Deprecated
90 static String newTableName_ = null;
91 @Deprecated
92 static String peerAddress_ = null;
93 @Deprecated
94 static String families_ = null;
95 @Deprecated
96 static boolean allCells_ = false;
97
98 public CopyTable(Configuration conf) {
99 super(conf);
100 }
101
102
103
104
105
106
107
108
109
110
111 @Deprecated
112 public static Job createSubmittableJob(Configuration conf, String[] args)
113 throws IOException {
114 if (!deprecatedDoCommandLine(args)) {
115 return null;
116 }
117 Job job = new Job(conf, NAME + "_" + tableName_);
118 job.setJarByClass(CopyTable.class);
119 Scan scan = new Scan();
120 scan.setCacheBlocks(false);
121 if (startTime_ != 0) {
122 scan.setTimeRange(startTime_,
123 endTime_ == 0 ? HConstants.LATEST_TIMESTAMP : endTime_);
124 }
125 if (allCells_) {
126 scan.setRaw(true);
127 }
128 if (versions_ >= 0) {
129 scan.setMaxVersions(versions_);
130 }
131 if (startRow_ != null) {
132 scan.setStartRow(Bytes.toBytes(startRow_));
133 }
134 if (stopRow_ != null) {
135 scan.setStopRow(Bytes.toBytes(stopRow_));
136 }
137 if(families_ != null) {
138 String[] fams = families_.split(",");
139 Map<String,String> cfRenameMap = new HashMap<String,String>();
140 for(String fam : fams) {
141 String sourceCf;
142 if(fam.contains(":")) {
143
144 String[] srcAndDest = fam.split(":", 2);
145 sourceCf = srcAndDest[0];
146 String destCf = srcAndDest[1];
147 cfRenameMap.put(sourceCf, destCf);
148 } else {
149
150 sourceCf = fam;
151 }
152 scan.addFamily(Bytes.toBytes(sourceCf));
153 }
154 Import.configureCfRenaming(job.getConfiguration(), cfRenameMap);
155 }
156 TableMapReduceUtil.initTableMapperJob(tableName_, scan,
157 Import.Importer.class, null, null, job);
158 TableMapReduceUtil.initTableReducerJob(
159 newTableName_ == null ? tableName_ : newTableName_, null, job,
160 null, peerAddress_, null, null);
161 job.setNumReduceTasks(0);
162 return job;
163 }
164
165 private static boolean deprecatedDoCommandLine(final String[] args) {
166
167
168 if (args.length < 1) {
169 printUsage(null);
170 return false;
171 }
172 try {
173 for (int i = 0; i < args.length; i++) {
174 String cmd = args[i];
175 if (cmd.equals("-h") || cmd.startsWith("--h")) {
176 printUsage(null);
177 return false;
178 }
179 final String startRowArgKey = "--startrow=";
180 if (cmd.startsWith(startRowArgKey)) {
181 startRow_ = cmd.substring(startRowArgKey.length());
182 continue;
183 }
184 final String stopRowArgKey = "--stoprow=";
185 if (cmd.startsWith(stopRowArgKey)) {
186 stopRow_ = cmd.substring(stopRowArgKey.length());
187 continue;
188 }
189 final String startTimeArgKey = "--starttime=";
190 if (cmd.startsWith(startTimeArgKey)) {
191 startTime_ = Long.parseLong(cmd.substring(startTimeArgKey.length()));
192 continue;
193 }
194 final String endTimeArgKey = "--endtime=";
195 if (cmd.startsWith(endTimeArgKey)) {
196 endTime_ = Long.parseLong(cmd.substring(endTimeArgKey.length()));
197 continue;
198 }
199 final String versionsArgKey = "--versions=";
200 if (cmd.startsWith(versionsArgKey)) {
201 versions_ = Integer.parseInt(cmd.substring(versionsArgKey.length()));
202 continue;
203 }
204 final String newNameArgKey = "--new.name=";
205 if (cmd.startsWith(newNameArgKey)) {
206 newTableName_ = cmd.substring(newNameArgKey.length());
207 continue;
208 }
209 final String peerAdrArgKey = "--peer.adr=";
210 if (cmd.startsWith(peerAdrArgKey)) {
211 peerAddress_ = cmd.substring(peerAdrArgKey.length());
212 continue;
213 }
214 final String familiesArgKey = "--families=";
215 if (cmd.startsWith(familiesArgKey)) {
216 families_ = cmd.substring(familiesArgKey.length());
217 continue;
218 }
219 if (cmd.startsWith("--all.cells")) {
220 allCells_ = true;
221 continue;
222 }
223 if (i == args.length-1) {
224 tableName_ = cmd;
225 } else {
226 printUsage("Invalid argument '" + cmd + "'" );
227 return false;
228 }
229 }
230 if (newTableName_ == null && peerAddress_ == null) {
231 printUsage("At least a new table name or a " +
232 "peer address must be specified");
233 return false;
234 }
235 if ((endTime_ != 0) && (startTime_ > endTime_)) {
236 printUsage("Invalid time range filter: starttime=" + startTime_ + " > endtime="
237 + endTime_);
238 return false;
239 }
240 } catch (Exception e) {
241 e.printStackTrace();
242 printUsage("Can't start because " + e.getMessage());
243 return false;
244 }
245 return true;
246 }
247
248
249
250
251
252
253
254
255 public Job createSubmittableJob(String[] args)
256 throws IOException {
257 if (!doCommandLine(args)) {
258 return null;
259 }
260
261 Job job = Job.getInstance(getConf(), getConf().get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
262 job.setJarByClass(CopyTable.class);
263 Scan scan = new Scan();
264 scan.setBatch(batch);
265 scan.setCacheBlocks(false);
266
267 if (cacheRow > 0) {
268 scan.setCaching(cacheRow);
269 } else {
270 scan.setCaching(getConf().getInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, 100));
271 }
272
273 scan.setTimeRange(startTime, endTime);
274 if (allCells) {
275 scan.setRaw(true);
276 }
277 if (versions >= 0) {
278 scan.setMaxVersions(versions);
279 }
280
281 if (startRow != null) {
282 scan.setStartRow(Bytes.toBytes(startRow));
283 }
284
285 if (stopRow != null) {
286 scan.setStopRow(Bytes.toBytes(stopRow));
287 }
288
289 if(families != null) {
290 String[] fams = families.split(",");
291 Map<String,String> cfRenameMap = new HashMap<String,String>();
292 for(String fam : fams) {
293 String sourceCf;
294 if(fam.contains(":")) {
295
296 String[] srcAndDest = fam.split(":", 2);
297 sourceCf = srcAndDest[0];
298 String destCf = srcAndDest[1];
299 cfRenameMap.put(sourceCf, destCf);
300 } else {
301
302 sourceCf = fam;
303 }
304 scan.addFamily(Bytes.toBytes(sourceCf));
305 }
306 Import.configureCfRenaming(job.getConfiguration(), cfRenameMap);
307 }
308 job.setNumReduceTasks(0);
309
310 if (bulkload) {
311 TableMapReduceUtil.initTableMapperJob(tableName, scan, Import.KeyValueImporter.class, null,
312 null, job);
313
314
315 TableInputFormat.configureSplitTable(job, TableName.valueOf(dstTableName));
316
317 FileSystem fs = FileSystem.get(getConf());
318 Random rand = new Random();
319 Path root = new Path(fs.getWorkingDirectory(), "copytable");
320 fs.mkdirs(root);
321 while (true) {
322 bulkloadDir = new Path(root, "" + rand.nextLong());
323 if (!fs.exists(bulkloadDir)) {
324 break;
325 }
326 }
327
328 System.out.println("HFiles will be stored at " + this.bulkloadDir);
329 HFileOutputFormat2.setOutputPath(job, bulkloadDir);
330 HTable htable = new HTable(getConf(), TableName.valueOf(dstTableName));
331 try {
332 HFileOutputFormat2.configureIncrementalLoadMap(job, htable);
333 } finally {
334 htable.close();
335 }
336 } else {
337 TableMapReduceUtil.initTableMapperJob(tableName, scan,
338 Import.Importer.class, null, null, job);
339
340 TableMapReduceUtil.initTableReducerJob(dstTableName, null, job, null, peerAddress, null,
341 null);
342 }
343
344 return job;
345 }
346
347
348
349
350 private static void printUsage(final String errorMsg) {
351 if (errorMsg != null && errorMsg.length() > 0) {
352 System.err.println("ERROR: " + errorMsg);
353 }
354 System.err.println("Usage: CopyTable [general options] [--starttime=X] [--endtime=Y] " +
355 "[--new.name=NEW] [--peer.adr=ADR] <tablename>");
356 System.err.println();
357 System.err.println("Options:");
358 System.err.println(" rs.class hbase.regionserver.class of the peer cluster");
359 System.err.println(" specify if different from current cluster");
360 System.err.println(" rs.impl hbase.regionserver.impl of the peer cluster");
361 System.err.println(" startrow the start row");
362 System.err.println(" stoprow the stop row");
363 System.err.println(" starttime beginning of the time range (unixtime in millis)");
364 System.err.println(" without endtime means from starttime to forever");
365 System.err.println(" endtime end of the time range. Ignored if no starttime specified.");
366 System.err.println(" versions number of cell versions to copy");
367 System.err.println(" new.name new table's name");
368 System.err.println(" peer.adr Address of the peer cluster given in the format");
369 System.err.println(" hbase.zookeeer.quorum:hbase.zookeeper.client.port:zookeeper.znode.parent");
370 System.err.println(" families comma-separated list of families to copy");
371 System.err.println(" To copy from cf1 to cf2, give sourceCfName:destCfName. ");
372 System.err.println(" To keep the same name, just give \"cfName\"");
373 System.err.println(" all.cells also copy delete markers and deleted cells");
374 System.err.println(" bulkload Write input into HFiles and bulk load to the destination "
375 + "table");
376 System.err.println();
377 System.err.println("Args:");
378 System.err.println(" tablename Name of the table to copy");
379 System.err.println();
380 System.err.println("Examples:");
381 System.err.println(" To copy 'TestTable' to a cluster that uses replication for a 1 hour window:");
382 System.err.println(" $ bin/hbase " +
383 "org.apache.hadoop.hbase.mapreduce.CopyTable --starttime=1265875194289 --endtime=1265878794289 " +
384 "--peer.adr=server1,server2,server3:2181:/hbase --families=myOldCf:myNewCf,cf2,cf3 TestTable ");
385 System.err.println("For performance consider the following general options:\n"
386 + "-Dhbase.client.scanner.caching=100\n"
387 + "-Dmapred.map.tasks.speculative.execution=false");
388 }
389
390 private boolean doCommandLine(final String[] args) {
391
392
393 if (args.length < 1) {
394 printUsage(null);
395 return false;
396 }
397 try {
398 for (int i = 0; i < args.length; i++) {
399 String cmd = args[i];
400 if (cmd.equals("-h") || cmd.startsWith("--h")) {
401 printUsage(null);
402 return false;
403 }
404
405 final String startRowArgKey = "--startrow=";
406 if (cmd.startsWith(startRowArgKey)) {
407 startRow = cmd.substring(startRowArgKey.length());
408 continue;
409 }
410
411 final String stopRowArgKey = "--stoprow=";
412 if (cmd.startsWith(stopRowArgKey)) {
413 stopRow = cmd.substring(stopRowArgKey.length());
414 continue;
415 }
416
417 final String startTimeArgKey = "--starttime=";
418 if (cmd.startsWith(startTimeArgKey)) {
419 startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
420 continue;
421 }
422
423 final String endTimeArgKey = "--endtime=";
424 if (cmd.startsWith(endTimeArgKey)) {
425 endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
426 continue;
427 }
428
429 final String batchArgKey = "--batch=";
430 if (cmd.startsWith(batchArgKey)) {
431 batch = Integer.parseInt(cmd.substring(batchArgKey.length()));
432 continue;
433 }
434
435 final String cacheRowArgKey = "--cacheRow=";
436 if (cmd.startsWith(cacheRowArgKey)) {
437 cacheRow = Integer.parseInt(cmd.substring(cacheRowArgKey.length()));
438 continue;
439 }
440
441 final String versionsArgKey = "--versions=";
442 if (cmd.startsWith(versionsArgKey)) {
443 versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
444 continue;
445 }
446
447 final String newNameArgKey = "--new.name=";
448 if (cmd.startsWith(newNameArgKey)) {
449 dstTableName = cmd.substring(newNameArgKey.length());
450 continue;
451 }
452
453 final String peerAdrArgKey = "--peer.adr=";
454 if (cmd.startsWith(peerAdrArgKey)) {
455 peerAddress = cmd.substring(peerAdrArgKey.length());
456 continue;
457 }
458
459 final String familiesArgKey = "--families=";
460 if (cmd.startsWith(familiesArgKey)) {
461 families = cmd.substring(familiesArgKey.length());
462 continue;
463 }
464
465 if (cmd.startsWith("--all.cells")) {
466 allCells = true;
467 continue;
468 }
469
470 if (cmd.startsWith("--bulkload")) {
471 bulkload = true;
472 continue;
473 }
474
475 if (i == args.length-1) {
476 tableName = cmd;
477 } else {
478 printUsage("Invalid argument '" + cmd + "'" );
479 return false;
480 }
481 }
482 if (dstTableName == null && peerAddress == null) {
483 printUsage("At least a new table name or a " +
484 "peer address must be specified");
485 return false;
486 }
487 if ((endTime != 0) && (startTime > endTime)) {
488 printUsage("Invalid time range filter: starttime=" + startTime + " > endtime=" + endTime);
489 return false;
490 }
491
492 if (bulkload && peerAddress != null) {
493 printUsage("Remote bulkload is not supported!");
494 return false;
495 }
496
497
498 if (dstTableName == null) {
499 dstTableName = tableName;
500 }
501 } catch (Exception e) {
502 e.printStackTrace();
503 printUsage("Can't start because " + e.getMessage());
504 return false;
505 }
506 return true;
507 }
508
509
510
511
512
513
514
515 public static void main(String[] args) throws Exception {
516 int ret = ToolRunner.run(new CopyTable(HBaseConfiguration.create()), args);
517 System.exit(ret);
518 }
519
520 @Override
521 public int run(String[] args) throws Exception {
522 String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
523 Job job = createSubmittableJob(otherArgs);
524 if (job == null) return 1;
525 if (!job.waitForCompletion(true)) {
526 LOG.info("Map-reduce job failed!");
527 if (bulkload) {
528 LOG.info("Files are not bulkloaded!");
529 }
530 return 1;
531 }
532 int code = 0;
533 if (bulkload) {
534 code = new LoadIncrementalHFiles(this.getConf()).run(new String[]{this.bulkloadDir.toString(),
535 this.dstTableName});
536 if (code == 0) {
537
538
539 FileSystem fs = FileSystem.get(this.getConf());
540 if (!fs.delete(this.bulkloadDir, true)) {
541 LOG.error("Deleting folder " + bulkloadDir + " failed!");
542 code = 1;
543 }
544 }
545 }
546 return code;
547 }
548 }