View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import org.apache.hadoop.classification.InterfaceAudience;
22  import org.apache.hadoop.classification.InterfaceStability;
23  import org.apache.hadoop.conf.Configuration;
24  import org.apache.hadoop.hbase.HBaseConfiguration;
25  import org.apache.hadoop.hbase.HConstants;
26  import org.apache.hadoop.hbase.util.Bytes;
27  import org.apache.hadoop.hbase.client.Scan;
28  import org.apache.hadoop.mapreduce.Job;
29  import org.apache.hadoop.util.GenericOptionsParser;
30  
31  import java.io.IOException;
32  import java.util.HashMap;
33  import java.util.Map;
34  
35  /**
36   * Tool used to copy a table to another one which can be on a different setup.
37   * It is also configurable with a start and time as well as a specification
38   * of the region server implementation if different from the local cluster.
39   */
40  @InterfaceAudience.Public
41  @InterfaceStability.Stable
42  public class CopyTable {
43  
44    final static String NAME = "copytable";
45    static long startTime = 0;
46    static long endTime = 0;
47    static int versions = -1;
48    static String tableName = null;
49    static String newTableName = null;
50    static String peerAddress = null;
51    static String families = null;
52    static boolean allCells = false;
53  
54    /**
55     * Sets up the actual job.
56     *
57     * @param conf  The current configuration.
58     * @param args  The command line parameters.
59     * @return The newly created job.
60     * @throws IOException When setting up the job fails.
61     */
62    public static Job createSubmittableJob(Configuration conf, String[] args)
63    throws IOException {
64      if (!doCommandLine(args)) {
65        return null;
66      }
67      Job job = new Job(conf, NAME + "_" + tableName);
68      job.setJarByClass(CopyTable.class);
69      Scan scan = new Scan();
70      scan.setCacheBlocks(false);
71      if (startTime != 0) {
72        scan.setTimeRange(startTime,
73            endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
74      }
75      if (allCells) {
76        scan.setRaw(true);
77      }
78      if (versions >= 0) {
79        scan.setMaxVersions(versions);
80      }
81      if(families != null) {
82        String[] fams = families.split(",");
83        Map<String,String> cfRenameMap = new HashMap<String,String>();
84        for(String fam : fams) {
85          String sourceCf;
86          if(fam.contains(":")) { 
87              // fam looks like "sourceCfName:destCfName"
88              String[] srcAndDest = fam.split(":", 2);
89              sourceCf = srcAndDest[0];
90              String destCf = srcAndDest[1];
91              cfRenameMap.put(sourceCf, destCf);
92          } else {
93              // fam is just "sourceCf"
94              sourceCf = fam; 
95          }
96          scan.addFamily(Bytes.toBytes(sourceCf));
97        }
98        Import.configureCfRenaming(job.getConfiguration(), cfRenameMap);
99      }
100     TableMapReduceUtil.initTableMapperJob(tableName, scan,
101         Import.Importer.class, null, null, job);
102     TableMapReduceUtil.initTableReducerJob(
103         newTableName == null ? tableName : newTableName, null, job,
104         null, peerAddress, null, null);
105     job.setNumReduceTasks(0);
106     return job;
107   }
108 
109   /*
110    * @param errorMsg Error message.  Can be null.
111    */
112   private static void printUsage(final String errorMsg) {
113     if (errorMsg != null && errorMsg.length() > 0) {
114       System.err.println("ERROR: " + errorMsg);
115     }
116     System.err.println("Usage: CopyTable [general options] [--starttime=X] [--endtime=Y] " +
117         "[--new.name=NEW] [--peer.adr=ADR] <tablename>");
118     System.err.println();
119     System.err.println("Options:");
120     System.err.println(" rs.class     hbase.regionserver.class of the peer cluster");
121     System.err.println("              specify if different from current cluster");
122     System.err.println(" rs.impl      hbase.regionserver.impl of the peer cluster");
123     System.err.println(" starttime    beginning of the time range (unixtime in millis)");
124     System.err.println("              without endtime means from starttime to forever");
125     System.err.println(" endtime      end of the time range.  Ignored if no starttime specified.");
126     System.err.println(" versions     number of cell versions to copy");
127     System.err.println(" new.name     new table's name");
128     System.err.println(" peer.adr     Address of the peer cluster given in the format");
129     System.err.println("              hbase.zookeeer.quorum:hbase.zookeeper.client.port:zookeeper.znode.parent");
130     System.err.println(" families     comma-separated list of families to copy");
131     System.err.println("              To copy from cf1 to cf2, give sourceCfName:destCfName. ");
132     System.err.println("              To keep the same name, just give \"cfName\"");
133     System.err.println(" all.cells    also copy delete markers and deleted cells");
134     System.err.println();
135     System.err.println("Args:");
136     System.err.println(" tablename    Name of the table to copy");
137     System.err.println();
138     System.err.println("Examples:");
139     System.err.println(" To copy 'TestTable' to a cluster that uses replication for a 1 hour window:");
140     System.err.println(" $ bin/hbase " +
141         "org.apache.hadoop.hbase.mapreduce.CopyTable --starttime=1265875194289 --endtime=1265878794289 " +
142         "--peer.adr=server1,server2,server3:2181:/hbase --families=myOldCf:myNewCf,cf2,cf3 TestTable ");
143     System.err.println("For performance consider the following general options:\n"
144         + "-Dhbase.client.scanner.caching=100\n"
145         + "-Dmapred.map.tasks.speculative.execution=false");
146   }
147 
148   private static boolean doCommandLine(final String[] args) {
149     // Process command-line args. TODO: Better cmd-line processing
150     // (but hopefully something not as painful as cli options).
151     if (args.length < 1) {
152       printUsage(null);
153       return false;
154     }
155     try {
156       for (int i = 0; i < args.length; i++) {
157         String cmd = args[i];
158         if (cmd.equals("-h") || cmd.startsWith("--h")) {
159           printUsage(null);
160           return false;
161         }
162 
163         final String startTimeArgKey = "--starttime=";
164         if (cmd.startsWith(startTimeArgKey)) {
165           startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
166           continue;
167         }
168 
169         final String endTimeArgKey = "--endtime=";
170         if (cmd.startsWith(endTimeArgKey)) {
171           endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
172           continue;
173         }
174 
175         final String versionsArgKey = "--versions=";
176         if (cmd.startsWith(versionsArgKey)) {
177           versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
178           continue;
179         }
180 
181         final String newNameArgKey = "--new.name=";
182         if (cmd.startsWith(newNameArgKey)) {
183           newTableName = cmd.substring(newNameArgKey.length());
184           continue;
185         }
186 
187         final String peerAdrArgKey = "--peer.adr=";
188         if (cmd.startsWith(peerAdrArgKey)) {
189           peerAddress = cmd.substring(peerAdrArgKey.length());
190           continue;
191         }
192 
193         final String familiesArgKey = "--families=";
194         if (cmd.startsWith(familiesArgKey)) {
195           families = cmd.substring(familiesArgKey.length());
196           continue;
197         }
198 
199         if (cmd.startsWith("--all.cells")) {
200           allCells = true;
201           continue;
202         }
203 
204         if (i == args.length-1) {
205           tableName = cmd;
206         } else {
207           printUsage("Invalid argument '" + cmd + "'" );
208           return false;
209         }
210       }
211       if (newTableName == null && peerAddress == null) {
212         printUsage("At least a new table name or a " +
213             "peer address must be specified");
214         return false;
215       }
216       if (startTime > endTime) {
217         printUsage("Invalid time range filter: starttime=" + startTime + " >  endtime=" + endTime);
218         return false;
219       }
220     } catch (Exception e) {
221       e.printStackTrace();
222       printUsage("Can't start because " + e.getMessage());
223       return false;
224     }
225     return true;
226   }
227 
228   /**
229    * Main entry point.
230    *
231    * @param args  The command line parameters.
232    * @throws Exception When running the job fails.
233    */
234   public static void main(String[] args) throws Exception {
235     Configuration conf = HBaseConfiguration.create();
236     String[] otherArgs =
237       new GenericOptionsParser(conf, args).getRemainingArgs();
238     Job job = createSubmittableJob(conf, otherArgs);
239     if (job != null) {
240       System.exit(job.waitForCompletion(true) ? 0 : 1);
241     }
242   }
243 }