View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import org.apache.hadoop.conf.Configuration;
23  import org.apache.hadoop.hbase.HBaseConfiguration;
24  import org.apache.hadoop.hbase.HConstants;
25  import org.apache.hadoop.hbase.util.Bytes;
26  import org.apache.hadoop.hbase.client.Scan;
27  import org.apache.hadoop.mapreduce.Job;
28  
29  import java.io.IOException;
30  
31  /**
32   * Tool used to copy a table to another one which can be on a different setup.
33   * It is also configurable with a start and time as well as a specification
34   * of the region server implementation if different from the local cluster.
35   */
36  public class CopyTable {
37  
38    final static String NAME = "copytable";
39    static String rsClass = null;
40    static String rsImpl = null;
41    static long startTime = 0;
42    static long endTime = 0;
43    static String tableName = null;
44    static String newTableName = null;
45    static String peerAddress = null;
46    static String families = null;
47  
48    /**
49     * Sets up the actual job.
50     *
51     * @param conf  The current configuration.
52     * @param args  The command line parameters.
53     * @return The newly created job.
54     * @throws IOException When setting up the job fails.
55     */
56    public static Job createSubmittableJob(Configuration conf, String[] args)
57    throws IOException {
58      if (!doCommandLine(args)) {
59        return null;
60      }
61      Job job = new Job(conf, NAME + "_" + tableName);
62      job.setJarByClass(CopyTable.class);
63      Scan scan = new Scan();
64      if (startTime != 0) {
65        scan.setTimeRange(startTime,
66            endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
67      }
68      if(families != null) {
69        String[] fams = families.split(",");
70        for(String fam : fams) {
71          scan.addFamily(Bytes.toBytes(fam));
72        }
73      }
74      TableMapReduceUtil.initTableMapperJob(tableName, scan,
75          Import.Importer.class, null, null, job);
76      TableMapReduceUtil.initTableReducerJob(
77          newTableName == null ? tableName : newTableName, null, job,
78          null, peerAddress, rsClass, rsImpl);
79      job.setNumReduceTasks(0);
80      return job;
81    }
82  
83    /*
84     * @param errorMsg Error message.  Can be null.
85     */
86    private static void printUsage(final String errorMsg) {
87      if (errorMsg != null && errorMsg.length() > 0) {
88        System.err.println("ERROR: " + errorMsg);
89      }
90      System.err.println("Usage: CopyTable [--rs.class=CLASS] " +
91          "[--rs.impl=IMPL] [--starttime=X] [--endtime=Y] " +
92          "[--new.name=NEW] [--peer.adr=ADR] <tablename>");
93      System.err.println();
94      System.err.println("Options:");
95      System.err.println(" rs.class     hbase.regionserver.class of the peer cluster");
96      System.err.println("              specify if different from current cluster");
97      System.err.println(" rs.impl      hbase.regionserver.impl of the peer cluster");
98      System.err.println(" starttime    beginning of the time range");
99      System.err.println("              without endtime means from starttime to forever");
100     System.err.println(" endtime      end of the time range");
101     System.err.println(" new.name     new table's name");
102     System.err.println(" peer.adr     Address of the peer cluster given in the format");
103     System.err.println("              hbase.zookeeer.quorum:hbase.zookeeper.client.port:zookeeper.znode.parent");
104     System.err.println(" families     comma-seperated list of families to copy");
105     System.err.println();
106     System.err.println("Args:");
107     System.err.println(" tablename    Name of the table to copy");
108     System.err.println();
109     System.err.println("Examples:");
110     System.err.println(" To copy 'TestTable' to a cluster that uses replication for a 1 hour window:");
111     System.err.println(" $ bin/hbase " +
112         "org.apache.hadoop.hbase.mapreduce.CopyTable --rs.class=org.apache.hadoop.hbase.ipc.ReplicationRegionInterface " +
113         "--rs.impl=org.apache.hadoop.hbase.regionserver.replication.ReplicationRegionServer --starttime=1265875194289 --endtime=1265878794289 " +
114         "--peer.adr=server1,server2,server3:2181:/hbase TestTable ");
115   }
116 
117   private static boolean doCommandLine(final String[] args) {
118     // Process command-line args. TODO: Better cmd-line processing
119     // (but hopefully something not as painful as cli options).
120     if (args.length < 1) {
121       printUsage(null);
122       return false;
123     }
124     try {
125       for (int i = 0; i < args.length; i++) {
126         String cmd = args[i];
127         if (cmd.equals("-h") || cmd.startsWith("--h")) {
128           printUsage(null);
129           return false;
130         }
131 
132         final String rsClassArgKey = "--rs.class=";
133         if (cmd.startsWith(rsClassArgKey)) {
134           rsClass = cmd.substring(rsClassArgKey.length());
135           continue;
136         }
137 
138         final String rsImplArgKey = "--rs.impl=";
139         if (cmd.startsWith(rsImplArgKey)) {
140           rsImpl = cmd.substring(rsImplArgKey.length());
141           continue;
142         }
143 
144         final String startTimeArgKey = "--starttime=";
145         if (cmd.startsWith(startTimeArgKey)) {
146           startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
147           continue;
148         }
149 
150         final String endTimeArgKey = "--endtime=";
151         if (cmd.startsWith(endTimeArgKey)) {
152           endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
153           continue;
154         }
155 
156         final String newNameArgKey = "--new.name=";
157         if (cmd.startsWith(newNameArgKey)) {
158           newTableName = cmd.substring(newNameArgKey.length());
159           continue;
160         }
161 
162         final String peerAdrArgKey = "--peer.adr=";
163         if (cmd.startsWith(peerAdrArgKey)) {
164           peerAddress = cmd.substring(peerAdrArgKey.length());
165           continue;
166         }
167 
168         final String familiesArgKey = "--families=";
169         if (cmd.startsWith(familiesArgKey)) {
170           families = cmd.substring(familiesArgKey.length());
171           continue;
172         }
173 
174         if (i == args.length-1) {
175           tableName = cmd;
176         }
177       }
178       if (newTableName == null && peerAddress == null) {
179         printUsage("At least a new table name or a " +
180             "peer address must be specified");
181         return false;
182       }
183     } catch (Exception e) {
184       e.printStackTrace();
185       printUsage("Can't start because " + e.getMessage());
186       return false;
187     }
188     return true;
189   }
190 
191   /**
192    * Main entry point.
193    *
194    * @param args  The command line parameters.
195    * @throws Exception When running the job fails.
196    */
197   public static void main(String[] args) throws Exception {
198     Configuration conf = HBaseConfiguration.create();
199     Job job = createSubmittableJob(conf, args);
200     if (job != null) {
201       System.exit(job.waitForCompletion(true) ? 0 : 1);
202     }
203   }
204 }