View Javadoc

1   /**
2    * Copyright 2009 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import java.io.IOException;
23  
24  import org.apache.hadoop.conf.Configuration;
25  import org.apache.hadoop.fs.Path;
26  import org.apache.hadoop.hbase.HBaseConfiguration;
27  import org.apache.hadoop.hbase.client.Result;
28  import org.apache.hadoop.hbase.client.Scan;
29  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
30  import org.apache.hadoop.hbase.util.Bytes;
31  import org.apache.hadoop.mapreduce.Job;
32  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
33  import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
34  import org.apache.hadoop.util.GenericOptionsParser;
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  
38  /**
39   * Export an HBase table.
40   * Writes content to sequence files up in HDFS.  Use {@link Import} to read it
41   * back in again.
42   */
43  public class Export {
44    private static final Log LOG = LogFactory.getLog(Export.class);
45    final static String NAME = "export";
46  
47    /**
48     * Mapper.
49     */
50    static class Exporter
51    extends TableMapper<ImmutableBytesWritable, Result> {
52      /**
53       * @param row  The current table row key.
54       * @param value  The columns.
55       * @param context  The current context.
56       * @throws IOException When something is broken with the data.
57       * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN,
58       *   org.apache.hadoop.mapreduce.Mapper.Context)
59       */
60      @Override
61      public void map(ImmutableBytesWritable row, Result value,
62        Context context)
63      throws IOException {
64        try {
65          context.write(row, value);
66        } catch (InterruptedException e) {
67          e.printStackTrace();
68        }
69      }
70    }
71  
72    /**
73     * Sets up the actual job.
74     *
75     * @param conf  The current configuration.
76     * @param args  The command line parameters.
77     * @return The newly created job.
78     * @throws IOException When setting up the job fails.
79     */
80    public static Job createSubmittableJob(Configuration conf, String[] args)
81    throws IOException {
82      String tableName = args[0];
83      Path outputDir = new Path(args[1]);
84      Job job = new Job(conf, NAME + "_" + tableName);
85      job.setJobName(NAME + "_" + tableName);
86      job.setJarByClass(Exporter.class);
87      // TODO: Allow passing filter and subset of rows/columns.
88      Scan s = new Scan();
89      // Optional arguments.
90      int versions = args.length > 2? Integer.parseInt(args[2]): 1;
91      s.setMaxVersions(versions);
92      long startTime = args.length > 3? Long.parseLong(args[3]): 0L;
93      long endTime = args.length > 4? Long.parseLong(args[4]): Long.MAX_VALUE;
94      s.setTimeRange(startTime, endTime);
95      s.setCacheBlocks(false);
96      if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) {
97        s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY)));
98      }
99      LOG.info("verisons=" + versions + ", starttime=" + startTime +
100       ", endtime=" + endTime);
101     TableMapReduceUtil.initTableMapperJob(tableName, s, Exporter.class, null,
102       null, job);
103     // No reducers.  Just write straight to output files.
104     job.setNumReduceTasks(0);
105     job.setOutputFormatClass(SequenceFileOutputFormat.class);
106     job.setOutputKeyClass(ImmutableBytesWritable.class);
107     job.setOutputValueClass(Result.class);
108     FileOutputFormat.setOutputPath(job, outputDir);
109     return job;
110   }
111 
112   /*
113    * @param errorMsg Error message.  Can be null.
114    */
115   private static void usage(final String errorMsg) {
116     if (errorMsg != null && errorMsg.length() > 0) {
117       System.err.println("ERROR: " + errorMsg);
118     }
119     System.err.println("Usage: Export [-D <property=value>]* <tablename> <outputdir> [<versions> " +
120       "[<starttime> [<endtime>]]]\n");
121     System.err.println("  Note: -D properties will be applied to the conf used. ");
122     System.err.println("  For example: ");
123     System.err.println("   -D mapred.output.compress=true");
124     System.err.println("   -D mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec");
125     System.err.println("   -D mapred.output.compression.type=BLOCK");
126     System.err.println("  Additionally, the following SCAN properties can be specified");
127     System.err.println("  to control/limit what is exported..");
128     System.err.println("   -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>");
129   }
130 
131   /**
132    * Main entry point.
133    *
134    * @param args  The command line parameters.
135    * @throws Exception When running the job fails.
136    */
137   public static void main(String[] args) throws Exception {
138     Configuration conf = HBaseConfiguration.create();
139     String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
140     if (otherArgs.length < 2) {
141       usage("Wrong number of arguments: " + otherArgs.length);
142       System.exit(-1);
143     }
144     Job job = createSubmittableJob(conf, otherArgs);
145     System.exit(job.waitForCompletion(true)? 0 : 1);
146   }
147 }