View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.IOException;
22  import java.util.HashMap;
23  import java.util.Map;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.classification.InterfaceAudience;
28  import org.apache.hadoop.classification.InterfaceStability;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.hbase.HBaseConfiguration;
31  import org.apache.hadoop.hbase.client.Delete;
32  import org.apache.hadoop.hbase.client.HTable;
33  import org.apache.hadoop.hbase.client.Mutation;
34  import org.apache.hadoop.hbase.client.Put;
35  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
36  import org.apache.hadoop.hbase.util.Bytes;
37  import org.apache.hadoop.mapreduce.JobContext;
38  import org.apache.hadoop.mapreduce.OutputCommitter;
39  import org.apache.hadoop.mapreduce.OutputFormat;
40  import org.apache.hadoop.mapreduce.RecordWriter;
41  import org.apache.hadoop.mapreduce.TaskAttemptContext;
42  
43  /**
44   * <p>
45   * Hadoop output format that writes to one or more HBase tables. The key is
46   * taken to be the table name while the output value <em>must</em> be either a
47   * {@link Put} or a {@link Delete} instance. All tables must already exist, and
48   * all Puts and Deletes must reference only valid column families.
49   * </p>
50   *
51   * <p>
52   * Write-ahead logging (HLog) for Puts can be disabled by setting
53   * {@link #WAL_PROPERTY} to {@link #WAL_OFF}. Default value is {@link #WAL_ON}.
54   * Note that disabling write-ahead logging is only appropriate for jobs where
55   * loss of data due to region server failure can be tolerated (for example,
56   * because it is easy to rerun a bulk import).
57   * </p>
58   */
59  @InterfaceAudience.Public
60  @InterfaceStability.Stable
61  public class MultiTableOutputFormat extends OutputFormat<ImmutableBytesWritable, Mutation> {
62    /** Set this to {@link #WAL_OFF} to turn off write-ahead logging (HLog) */
63    public static final String WAL_PROPERTY = "hbase.mapreduce.multitableoutputformat.wal";
64    /** Property value to use write-ahead logging */
65    public static final boolean WAL_ON = true;
66    /** Property value to disable write-ahead logging */
67    public static final boolean WAL_OFF = false;
68    /**
69     * Record writer for outputting to multiple HTables.
70     */
71    protected static class MultiTableRecordWriter extends
72        RecordWriter<ImmutableBytesWritable, Mutation> {
73      private static final Log LOG = LogFactory.getLog(MultiTableRecordWriter.class);
74      Map<ImmutableBytesWritable, HTable> tables;
75      Configuration conf;
76      boolean useWriteAheadLogging;
77  
78      /**
79       * @param conf
80       *          HBaseConfiguration to used
81       * @param useWriteAheadLogging
82       *          whether to use write ahead logging. This can be turned off (
83       *          <tt>false</tt>) to improve performance when bulk loading data.
84       */
85      public MultiTableRecordWriter(Configuration conf,
86          boolean useWriteAheadLogging) {
87        LOG.debug("Created new MultiTableRecordReader with WAL "
88            + (useWriteAheadLogging ? "on" : "off"));
89        this.tables = new HashMap<ImmutableBytesWritable, HTable>();
90        this.conf = conf;
91        this.useWriteAheadLogging = useWriteAheadLogging;
92      }
93  
94      /**
95       * @param tableName
96       *          the name of the table, as a string
97       * @return the named table
98       * @throws IOException
99       *           if there is a problem opening a table
100      */
101     HTable getTable(ImmutableBytesWritable tableName) throws IOException {
102       if (!tables.containsKey(tableName)) {
103         LOG.debug("Opening HTable \"" + Bytes.toString(tableName.get())+ "\" for writing");
104         HTable table = new HTable(conf, tableName.get());
105         table.setAutoFlush(false);
106         tables.put(tableName, table);
107       }
108       return tables.get(tableName);
109     }
110 
111     @Override
112     public void close(TaskAttemptContext context) throws IOException {
113       for (HTable table : tables.values()) {
114         table.flushCommits();
115       }
116     }
117 
118     /**
119      * Writes an action (Put or Delete) to the specified table.
120      *
121      * @param tableName
122      *          the table being updated.
123      * @param action
124      *          the update, either a put or a delete.
125      * @throws IllegalArgumentException
126      *          if the action is not a put or a delete.
127      */
128     @Override
129     public void write(ImmutableBytesWritable tableName, Mutation action) throws IOException {
130       HTable table = getTable(tableName);
131       // The actions are not immutable, so we defensively copy them
132       if (action instanceof Put) {
133         Put put = new Put((Put) action);
134         put.setWriteToWAL(useWriteAheadLogging);
135         table.put(put);
136       } else if (action instanceof Delete) {
137         Delete delete = new Delete((Delete) action);
138         table.delete(delete);
139       } else
140         throw new IllegalArgumentException(
141             "action must be either Delete or Put");
142     }
143   }
144 
145   @Override
146   public void checkOutputSpecs(JobContext context) throws IOException,
147       InterruptedException {
148     // we can't know ahead of time if it's going to blow up when the user
149     // passes a table name that doesn't exist, so nothing useful here.
150   }
151 
152   @Override
153   public OutputCommitter getOutputCommitter(TaskAttemptContext context)
154       throws IOException, InterruptedException {
155     return new TableOutputCommitter();
156   }
157 
158   @Override
159   public RecordWriter<ImmutableBytesWritable, Mutation> getRecordWriter(TaskAttemptContext context)
160       throws IOException, InterruptedException {
161     Configuration conf = context.getConfiguration();
162     return new MultiTableRecordWriter(HBaseConfiguration.create(conf),
163         conf.getBoolean(WAL_PROPERTY, WAL_ON));
164   }
165 
166 }