1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import org.apache.hadoop.classification.InterfaceAudience;
22 import org.apache.hadoop.classification.InterfaceStability;
23 import org.apache.hadoop.conf.Configuration;
24 import org.apache.hadoop.hbase.HBaseConfiguration;
25 import org.apache.hadoop.hbase.HConstants;
26 import org.apache.hadoop.hbase.util.Bytes;
27 import org.apache.hadoop.hbase.client.Scan;
28 import org.apache.hadoop.mapreduce.Job;
29 import org.apache.hadoop.util.GenericOptionsParser;
30
31 import java.io.IOException;
32 import java.util.HashMap;
33 import java.util.Map;
34
35
36
37
38
39
40 @InterfaceAudience.Public
41 @InterfaceStability.Stable
42 public class CopyTable {
43
44 final static String NAME = "copytable";
45 static long startTime = 0;
46 static long endTime = 0;
47 static int versions = -1;
48 static String tableName = null;
49 static String newTableName = null;
50 static String peerAddress = null;
51 static String families = null;
52 static boolean allCells = false;
53
54
55
56
57
58
59
60
61
62 public static Job createSubmittableJob(Configuration conf, String[] args)
63 throws IOException {
64 if (!doCommandLine(args)) {
65 return null;
66 }
67 Job job = new Job(conf, NAME + "_" + tableName);
68 job.setJarByClass(CopyTable.class);
69 Scan scan = new Scan();
70 scan.setCacheBlocks(false);
71 if (startTime != 0) {
72 scan.setTimeRange(startTime,
73 endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
74 }
75 if (allCells) {
76 scan.setRaw(true);
77 }
78 if (versions >= 0) {
79 scan.setMaxVersions(versions);
80 }
81 if(families != null) {
82 String[] fams = families.split(",");
83 Map<String,String> cfRenameMap = new HashMap<String,String>();
84 for(String fam : fams) {
85 String sourceCf;
86 if(fam.contains(":")) {
87
88 String[] srcAndDest = fam.split(":", 2);
89 sourceCf = srcAndDest[0];
90 String destCf = srcAndDest[1];
91 cfRenameMap.put(sourceCf, destCf);
92 } else {
93
94 sourceCf = fam;
95 }
96 scan.addFamily(Bytes.toBytes(sourceCf));
97 }
98 Import.configureCfRenaming(job.getConfiguration(), cfRenameMap);
99 }
100 TableMapReduceUtil.initTableMapperJob(tableName, scan,
101 Import.Importer.class, null, null, job);
102 TableMapReduceUtil.initTableReducerJob(
103 newTableName == null ? tableName : newTableName, null, job,
104 null, peerAddress, null, null);
105 job.setNumReduceTasks(0);
106 return job;
107 }
108
109
110
111
112 private static void printUsage(final String errorMsg) {
113 if (errorMsg != null && errorMsg.length() > 0) {
114 System.err.println("ERROR: " + errorMsg);
115 }
116 System.err.println("Usage: CopyTable [general options] [--starttime=X] [--endtime=Y] " +
117 "[--new.name=NEW] [--peer.adr=ADR] <tablename>");
118 System.err.println();
119 System.err.println("Options:");
120 System.err.println(" rs.class hbase.regionserver.class of the peer cluster");
121 System.err.println(" specify if different from current cluster");
122 System.err.println(" rs.impl hbase.regionserver.impl of the peer cluster");
123 System.err.println(" starttime beginning of the time range (unixtime in millis)");
124 System.err.println(" without endtime means from starttime to forever");
125 System.err.println(" endtime end of the time range. Ignored if no starttime specified.");
126 System.err.println(" versions number of cell versions to copy");
127 System.err.println(" new.name new table's name");
128 System.err.println(" peer.adr Address of the peer cluster given in the format");
129 System.err.println(" hbase.zookeeer.quorum:hbase.zookeeper.client.port:zookeeper.znode.parent");
130 System.err.println(" families comma-separated list of families to copy");
131 System.err.println(" To copy from cf1 to cf2, give sourceCfName:destCfName. ");
132 System.err.println(" To keep the same name, just give \"cfName\"");
133 System.err.println(" all.cells also copy delete markers and deleted cells");
134 System.err.println();
135 System.err.println("Args:");
136 System.err.println(" tablename Name of the table to copy");
137 System.err.println();
138 System.err.println("Examples:");
139 System.err.println(" To copy 'TestTable' to a cluster that uses replication for a 1 hour window:");
140 System.err.println(" $ bin/hbase " +
141 "org.apache.hadoop.hbase.mapreduce.CopyTable --starttime=1265875194289 --endtime=1265878794289 " +
142 "--peer.adr=server1,server2,server3:2181:/hbase --families=myOldCf:myNewCf,cf2,cf3 TestTable ");
143 System.err.println("For performance consider the following general options:\n"
144 + "-Dhbase.client.scanner.caching=100\n"
145 + "-Dmapred.map.tasks.speculative.execution=false");
146 }
147
148 private static boolean doCommandLine(final String[] args) {
149
150
151 if (args.length < 1) {
152 printUsage(null);
153 return false;
154 }
155 try {
156 for (int i = 0; i < args.length; i++) {
157 String cmd = args[i];
158 if (cmd.equals("-h") || cmd.startsWith("--h")) {
159 printUsage(null);
160 return false;
161 }
162
163 final String startTimeArgKey = "--starttime=";
164 if (cmd.startsWith(startTimeArgKey)) {
165 startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
166 continue;
167 }
168
169 final String endTimeArgKey = "--endtime=";
170 if (cmd.startsWith(endTimeArgKey)) {
171 endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
172 continue;
173 }
174
175 final String versionsArgKey = "--versions=";
176 if (cmd.startsWith(versionsArgKey)) {
177 versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
178 continue;
179 }
180
181 final String newNameArgKey = "--new.name=";
182 if (cmd.startsWith(newNameArgKey)) {
183 newTableName = cmd.substring(newNameArgKey.length());
184 continue;
185 }
186
187 final String peerAdrArgKey = "--peer.adr=";
188 if (cmd.startsWith(peerAdrArgKey)) {
189 peerAddress = cmd.substring(peerAdrArgKey.length());
190 continue;
191 }
192
193 final String familiesArgKey = "--families=";
194 if (cmd.startsWith(familiesArgKey)) {
195 families = cmd.substring(familiesArgKey.length());
196 continue;
197 }
198
199 if (cmd.startsWith("--all.cells")) {
200 allCells = true;
201 continue;
202 }
203
204 if (i == args.length-1) {
205 tableName = cmd;
206 } else {
207 printUsage("Invalid argument '" + cmd + "'" );
208 return false;
209 }
210 }
211 if (newTableName == null && peerAddress == null) {
212 printUsage("At least a new table name or a " +
213 "peer address must be specified");
214 return false;
215 }
216 if (startTime > endTime) {
217 printUsage("Invalid time range filter: starttime=" + startTime + " > endtime=" + endTime);
218 return false;
219 }
220 } catch (Exception e) {
221 e.printStackTrace();
222 printUsage("Can't start because " + e.getMessage());
223 return false;
224 }
225 return true;
226 }
227
228
229
230
231
232
233
234 public static void main(String[] args) throws Exception {
235 Configuration conf = HBaseConfiguration.create();
236 String[] otherArgs =
237 new GenericOptionsParser(conf, args).getRemainingArgs();
238 Job job = createSubmittableJob(conf, otherArgs);
239 if (job != null) {
240 System.exit(job.waitForCompletion(true) ? 0 : 1);
241 }
242 }
243 }