1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.IOException;
22 import java.util.HashMap;
23 import java.util.Map;
24 import java.util.Random;
25
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28 import org.apache.hadoop.conf.Configuration;
29 import org.apache.hadoop.conf.Configured;
30 import org.apache.hadoop.fs.FileSystem;
31 import org.apache.hadoop.fs.Path;
32 import org.apache.hadoop.hbase.HBaseConfiguration;
33 import org.apache.hadoop.hbase.HConstants;
34 import org.apache.hadoop.hbase.TableName;
35 import org.apache.hadoop.hbase.classification.InterfaceAudience;
36 import org.apache.hadoop.hbase.classification.InterfaceStability;
37 import org.apache.hadoop.hbase.client.HTable;
38 import org.apache.hadoop.hbase.client.Scan;
39 import org.apache.hadoop.hbase.util.Bytes;
40 import org.apache.hadoop.mapreduce.Job;
41 import org.apache.hadoop.util.GenericOptionsParser;
42 import org.apache.hadoop.util.Tool;
43 import org.apache.hadoop.util.ToolRunner;
44
45
46
47
48
49
50 @InterfaceAudience.Public
51 @InterfaceStability.Stable
52 public class CopyTable extends Configured implements Tool {
53 private static final Log LOG = LogFactory.getLog(CopyTable.class);
54
55 final static String NAME = "copytable";
56 long startTime = 0;
57 long endTime = 0;
58 int versions = -1;
59 String tableName = null;
60 String startRow = null;
61 String stopRow = null;
62 String dstTableName = null;
63 String peerAddress = null;
64 String families = null;
65 boolean allCells = false;
66
67 boolean bulkload = false;
68 Path bulkloadDir = null;
69
70 private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
71
72 public CopyTable(Configuration conf) {
73 super(conf);
74 }
75
76
77
78
79
80
81
82 public Job createSubmittableJob(String[] args)
83 throws IOException {
84 if (!doCommandLine(args)) {
85 return null;
86 }
87
88 Job job = Job.getInstance(getConf(), getConf().get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
89 job.setJarByClass(CopyTable.class);
90 Scan scan = new Scan();
91 scan.setCacheBlocks(false);
92 if (startTime != 0) {
93 scan.setTimeRange(startTime,
94 endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
95 }
96 if (allCells) {
97 scan.setRaw(true);
98 }
99 if (versions >= 0) {
100 scan.setMaxVersions(versions);
101 }
102
103 if (startRow != null) {
104 scan.setStartRow(Bytes.toBytes(startRow));
105 }
106
107 if (stopRow != null) {
108 scan.setStopRow(Bytes.toBytes(stopRow));
109 }
110
111 if(families != null) {
112 String[] fams = families.split(",");
113 Map<String,String> cfRenameMap = new HashMap<String,String>();
114 for(String fam : fams) {
115 String sourceCf;
116 if(fam.contains(":")) {
117
118 String[] srcAndDest = fam.split(":", 2);
119 sourceCf = srcAndDest[0];
120 String destCf = srcAndDest[1];
121 cfRenameMap.put(sourceCf, destCf);
122 } else {
123
124 sourceCf = fam;
125 }
126 scan.addFamily(Bytes.toBytes(sourceCf));
127 }
128 Import.configureCfRenaming(job.getConfiguration(), cfRenameMap);
129 }
130 job.setNumReduceTasks(0);
131
132 if (bulkload) {
133 TableMapReduceUtil.initTableMapperJob(tableName, scan, Import.KeyValueImporter.class, null,
134 null, job);
135
136
137 TableInputFormat.configureSplitTable(job, TableName.valueOf(dstTableName));
138
139 FileSystem fs = FileSystem.get(getConf());
140 Random rand = new Random();
141 Path root = new Path(fs.getWorkingDirectory(), "copytable");
142 fs.mkdirs(root);
143 while (true) {
144 bulkloadDir = new Path(root, "" + rand.nextLong());
145 if (!fs.exists(bulkloadDir)) {
146 break;
147 }
148 }
149
150 System.out.println("HFiles will be stored at " + this.bulkloadDir);
151 HFileOutputFormat2.setOutputPath(job, bulkloadDir);
152 HTable htable = new HTable(getConf(), TableName.valueOf(dstTableName));
153 try {
154 HFileOutputFormat2.configureIncrementalLoadMap(job, htable);
155 } finally {
156 htable.close();
157 }
158 } else {
159 TableMapReduceUtil.initTableMapperJob(tableName, scan,
160 Import.Importer.class, null, null, job);
161
162 TableMapReduceUtil.initTableReducerJob(dstTableName, null, job, null, peerAddress, null,
163 null);
164 }
165
166 return job;
167 }
168
169
170
171
172 private static void printUsage(final String errorMsg) {
173 if (errorMsg != null && errorMsg.length() > 0) {
174 System.err.println("ERROR: " + errorMsg);
175 }
176 System.err.println("Usage: CopyTable [general options] [--starttime=X] [--endtime=Y] " +
177 "[--new.name=NEW] [--peer.adr=ADR] <tablename>");
178 System.err.println();
179 System.err.println("Options:");
180 System.err.println(" rs.class hbase.regionserver.class of the peer cluster");
181 System.err.println(" specify if different from current cluster");
182 System.err.println(" rs.impl hbase.regionserver.impl of the peer cluster");
183 System.err.println(" startrow the start row");
184 System.err.println(" stoprow the stop row");
185 System.err.println(" starttime beginning of the time range (unixtime in millis)");
186 System.err.println(" without endtime means from starttime to forever");
187 System.err.println(" endtime end of the time range. Ignored if no starttime specified.");
188 System.err.println(" versions number of cell versions to copy");
189 System.err.println(" new.name new table's name");
190 System.err.println(" peer.adr Address of the peer cluster given in the format");
191 System.err.println(" hbase.zookeeer.quorum:hbase.zookeeper.client.port:zookeeper.znode.parent");
192 System.err.println(" families comma-separated list of families to copy");
193 System.err.println(" To copy from cf1 to cf2, give sourceCfName:destCfName. ");
194 System.err.println(" To keep the same name, just give \"cfName\"");
195 System.err.println(" all.cells also copy delete markers and deleted cells");
196 System.err.println(" bulkload Write input into HFiles and bulk load to the destination "
197 + "table");
198 System.err.println();
199 System.err.println("Args:");
200 System.err.println(" tablename Name of the table to copy");
201 System.err.println();
202 System.err.println("Examples:");
203 System.err.println(" To copy 'TestTable' to a cluster that uses replication for a 1 hour window:");
204 System.err.println(" $ bin/hbase " +
205 "org.apache.hadoop.hbase.mapreduce.CopyTable --starttime=1265875194289 --endtime=1265878794289 " +
206 "--peer.adr=server1,server2,server3:2181:/hbase --families=myOldCf:myNewCf,cf2,cf3 TestTable ");
207 System.err.println("For performance consider the following general options:\n"
208 + "-Dhbase.client.scanner.caching=100\n"
209 + "-Dmapred.map.tasks.speculative.execution=false");
210 }
211
212 private boolean doCommandLine(final String[] args) {
213
214
215 if (args.length < 1) {
216 printUsage(null);
217 return false;
218 }
219 try {
220 for (int i = 0; i < args.length; i++) {
221 String cmd = args[i];
222 if (cmd.equals("-h") || cmd.startsWith("--h")) {
223 printUsage(null);
224 return false;
225 }
226
227 final String startRowArgKey = "--startrow=";
228 if (cmd.startsWith(startRowArgKey)) {
229 startRow = cmd.substring(startRowArgKey.length());
230 continue;
231 }
232
233 final String stopRowArgKey = "--stoprow=";
234 if (cmd.startsWith(stopRowArgKey)) {
235 stopRow = cmd.substring(stopRowArgKey.length());
236 continue;
237 }
238
239 final String startTimeArgKey = "--starttime=";
240 if (cmd.startsWith(startTimeArgKey)) {
241 startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
242 continue;
243 }
244
245 final String endTimeArgKey = "--endtime=";
246 if (cmd.startsWith(endTimeArgKey)) {
247 endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
248 continue;
249 }
250
251 final String versionsArgKey = "--versions=";
252 if (cmd.startsWith(versionsArgKey)) {
253 versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
254 continue;
255 }
256
257 final String newNameArgKey = "--new.name=";
258 if (cmd.startsWith(newNameArgKey)) {
259 dstTableName = cmd.substring(newNameArgKey.length());
260 continue;
261 }
262
263 final String peerAdrArgKey = "--peer.adr=";
264 if (cmd.startsWith(peerAdrArgKey)) {
265 peerAddress = cmd.substring(peerAdrArgKey.length());
266 continue;
267 }
268
269 final String familiesArgKey = "--families=";
270 if (cmd.startsWith(familiesArgKey)) {
271 families = cmd.substring(familiesArgKey.length());
272 continue;
273 }
274
275 if (cmd.startsWith("--all.cells")) {
276 allCells = true;
277 continue;
278 }
279
280 if (cmd.startsWith("--bulkload")) {
281 bulkload = true;
282 continue;
283 }
284
285 if (i == args.length-1) {
286 tableName = cmd;
287 } else {
288 printUsage("Invalid argument '" + cmd + "'" );
289 return false;
290 }
291 }
292 if (dstTableName == null && peerAddress == null) {
293 printUsage("At least a new table name or a " +
294 "peer address must be specified");
295 return false;
296 }
297 if ((endTime != 0) && (startTime > endTime)) {
298 printUsage("Invalid time range filter: starttime=" + startTime + " > endtime=" + endTime);
299 return false;
300 }
301
302 if (bulkload && peerAddress != null) {
303 printUsage("Remote bulkload is not supported!");
304 return false;
305 }
306
307
308 if (dstTableName == null) {
309 dstTableName = tableName;
310 }
311 } catch (Exception e) {
312 e.printStackTrace();
313 printUsage("Can't start because " + e.getMessage());
314 return false;
315 }
316 return true;
317 }
318
319
320
321
322
323
324
325 public static void main(String[] args) throws Exception {
326 int ret = ToolRunner.run(new CopyTable(HBaseConfiguration.create()), args);
327 System.exit(ret);
328 }
329
330 @Override
331 public int run(String[] args) throws Exception {
332 String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
333 Job job = createSubmittableJob(otherArgs);
334 if (job == null) return 1;
335 if (!job.waitForCompletion(true)) {
336 LOG.info("Map-reduce job failed!");
337 if (bulkload) {
338 LOG.info("Files are not bulkloaded!");
339 }
340 return 1;
341 }
342 int code = 0;
343 if (bulkload) {
344 code = new LoadIncrementalHFiles(this.getConf()).run(new String[]{this.bulkloadDir.toString(),
345 this.dstTableName});
346 if (code == 0) {
347
348
349 FileSystem fs = FileSystem.get(this.getConf());
350 if (!fs.delete(this.bulkloadDir, true)) {
351 LOG.error("Deleting folder " + bulkloadDir + " failed!");
352 code = 1;
353 }
354 }
355 }
356 return code;
357 }
358 }