View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.IOException;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.hbase.classification.InterfaceAudience;
26  import org.apache.hadoop.hbase.classification.InterfaceStability;
27  import org.apache.hadoop.conf.Configurable;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.hbase.HBaseConfiguration;
30  import org.apache.hadoop.hbase.HConstants;
31  import org.apache.hadoop.hbase.client.Delete;
32  import org.apache.hadoop.hbase.client.HTable;
33  import org.apache.hadoop.hbase.client.Mutation;
34  import org.apache.hadoop.hbase.client.Put;
35  import org.apache.hadoop.mapreduce.JobContext;
36  import org.apache.hadoop.mapreduce.OutputCommitter;
37  import org.apache.hadoop.mapreduce.OutputFormat;
38  import org.apache.hadoop.mapreduce.RecordWriter;
39  import org.apache.hadoop.mapreduce.TaskAttemptContext;
40  
41  /**
42   * Convert Map/Reduce output and write it to an HBase table. The KEY is ignored
43   * while the output value <u>must</u> be either a {@link Put} or a
44   * {@link Delete} instance.
45   *
46   * @param <KEY>  The type of the key. Ignored in this class.
47   */
48  @InterfaceAudience.Public
49  @InterfaceStability.Stable
50  public class TableOutputFormat<KEY> extends OutputFormat<KEY, Mutation>
51  implements Configurable {
52  
53    private final Log LOG = LogFactory.getLog(TableOutputFormat.class);
54  
55    /**
56     * Prefix for configuration property overrides to apply in {@link #setConf(Configuration)}.
57     * For keys matching this prefix, the prefix is stripped, and the value is set in the
58     * configuration with the resulting key, ie. the entry "hbase.mapred.output.key1 = value1"
59     * would be set in the configuration as "key1 = value1".  Use this to set properties
60     * which should only be applied to the {@code TableOutputFormat} configuration and not the
61     * input configuration.
62     */
63    public static final String OUTPUT_CONF_PREFIX = "hbase.mapred.output.";
64  
65    /** Job parameter that specifies the output table. */
66    public static final String OUTPUT_TABLE = OUTPUT_CONF_PREFIX + "outputtable";
67  
68    /**
69     * Optional job parameter to specify a peer cluster.
70     * Used specifying remote cluster when copying between hbase clusters (the
71     * source is picked up from <code>hbase-site.xml</code>).
72     * @see TableMapReduceUtil#initTableReducerJob(String, Class, org.apache.hadoop.mapreduce.Job, Class, String, String, String)
73     */
74    public static final String QUORUM_ADDRESS = OUTPUT_CONF_PREFIX + "quorum";
75  
76    /** Optional job parameter to specify peer cluster's ZK client port */
77    public static final String QUORUM_PORT = OUTPUT_CONF_PREFIX + "port";
78  
79    /** Optional specification of the rs class name of the peer cluster */
80    public static final String REGION_SERVER_CLASS = OUTPUT_CONF_PREFIX + "rs.class";
81    /** Optional specification of the rs impl name of the peer cluster */
82    public static final String REGION_SERVER_IMPL = OUTPUT_CONF_PREFIX + "rs.impl";
83  
84    /** The configuration. */
85    private Configuration conf = null;
86  
87    private HTable table;
88  
89    /**
90     * Writes the reducer output to an HBase table.
91     *
92     * @param <KEY>  The type of the key.
93     */
94    protected static class TableRecordWriter<KEY>
95    extends RecordWriter<KEY, Mutation> {
96  
97      /** The table to write to. */
98      private HTable table;
99  
100     /**
101      * Instantiate a TableRecordWriter with the HBase HClient for writing.
102      *
103      * @param table  The table to write to.
104      */
105     public TableRecordWriter(HTable table) {
106       this.table = table;
107     }
108 
109     /**
110      * Closes the writer, in this case flush table commits.
111      *
112      * @param context  The context.
113      * @throws IOException When closing the writer fails.
114      * @see org.apache.hadoop.mapreduce.RecordWriter#close(org.apache.hadoop.mapreduce.TaskAttemptContext)
115      */
116     @Override
117     public void close(TaskAttemptContext context)
118     throws IOException {
119       table.close();
120     }
121 
122     /**
123      * Writes a key/value pair into the table.
124      *
125      * @param key  The key.
126      * @param value  The value.
127      * @throws IOException When writing fails.
128      * @see org.apache.hadoop.mapreduce.RecordWriter#write(java.lang.Object, java.lang.Object)
129      */
130     @Override
131     public void write(KEY key, Mutation value)
132     throws IOException {
133       if (value instanceof Put) this.table.put(new Put((Put)value));
134       else if (value instanceof Delete) this.table.delete(new Delete((Delete)value));
135       else throw new IOException("Pass a Delete or a Put");
136     }
137   }
138 
139   /**
140    * Creates a new record writer.
141    *
142    * @param context  The current task context.
143    * @return The newly created writer instance.
144    * @throws IOException When creating the writer fails.
145    * @throws InterruptedException When the jobs is cancelled.
146    * @see org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext)
147    */
148   @Override
149   public RecordWriter<KEY, Mutation> getRecordWriter(
150     TaskAttemptContext context)
151   throws IOException, InterruptedException {
152     return new TableRecordWriter<KEY>(this.table);
153   }
154 
155   /**
156    * Checks if the output target exists.
157    *
158    * @param context  The current context.
159    * @throws IOException When the check fails.
160    * @throws InterruptedException When the job is aborted.
161    * @see org.apache.hadoop.mapreduce.OutputFormat#checkOutputSpecs(org.apache.hadoop.mapreduce.JobContext)
162    */
163   @Override
164   public void checkOutputSpecs(JobContext context) throws IOException,
165       InterruptedException {
166     // TODO Check if the table exists?
167 
168   }
169 
170   /**
171    * Returns the output committer.
172    *
173    * @param context  The current context.
174    * @return The committer.
175    * @throws IOException When creating the committer fails.
176    * @throws InterruptedException When the job is aborted.
177    * @see org.apache.hadoop.mapreduce.OutputFormat#getOutputCommitter(org.apache.hadoop.mapreduce.TaskAttemptContext)
178    */
179   @Override
180   public OutputCommitter getOutputCommitter(TaskAttemptContext context)
181   throws IOException, InterruptedException {
182     return new TableOutputCommitter();
183   }
184 
185   public Configuration getConf() {
186     return conf;
187   }
188 
189   @Override
190   public void setConf(Configuration otherConf) {
191     String tableName = otherConf.get(OUTPUT_TABLE);
192     if(tableName == null || tableName.length() <= 0) {
193       throw new IllegalArgumentException("Must specify table name");
194     }
195 
196     String address = otherConf.get(QUORUM_ADDRESS);
197     int zkClientPort = otherConf.getInt(QUORUM_PORT, 0);
198     String serverClass = otherConf.get(REGION_SERVER_CLASS);
199     String serverImpl = otherConf.get(REGION_SERVER_IMPL);
200 
201     try {
202       this.conf = HBaseConfiguration.createClusterConf(otherConf, address, OUTPUT_CONF_PREFIX);
203 
204       if (serverClass != null) {
205         this.conf.set(HConstants.REGION_SERVER_IMPL, serverImpl);
206       }
207       if (zkClientPort != 0) {
208         this.conf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, zkClientPort);
209       }
210       this.table = new BufferedHTable(this.conf, tableName);
211       this.table.setAutoFlush(false, true);
212       LOG.info("Created table instance for "  + tableName);
213     } catch(IOException e) {
214       LOG.error(e);
215       throw new RuntimeException(e);
216     }
217   }
218 }