View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.IOException;
22  import java.util.List;
23  import java.util.Map;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.classification.InterfaceAudience;
28  import org.apache.hadoop.classification.InterfaceStability;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.hbase.KeyValue;
31  import org.apache.hadoop.hbase.client.HTable;
32  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
33  import org.apache.hadoop.mapreduce.Job;
34  import org.apache.hadoop.mapreduce.RecordWriter;
35  import org.apache.hadoop.mapreduce.TaskAttemptContext;
36  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
37  
38  /**
39   * Writes HFiles. Passed KeyValues must arrive in order.
40   * Writes current time as the sequence id for the file. Sets the major compacted
41   * attribute on created hfiles. Calling write(null,null) will forceably roll
42   * all HFiles being written.
43   * <p>
44   * Using this class as part of a MapReduce job is best done
45   * using {@link #configureIncrementalLoad(Job, HTable)}.
46   * @see KeyValueSortReducer
47   * @deprecated use {@link HFileOutputFormat2} instead.
48   */
49  @Deprecated
50  @InterfaceAudience.Public
51  @InterfaceStability.Stable
52  public class HFileOutputFormat extends FileOutputFormat<ImmutableBytesWritable, KeyValue> {
53    static Log LOG = LogFactory.getLog(HFileOutputFormat.class);
54  
55    public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
56    throws IOException, InterruptedException {
57      return HFileOutputFormat2.createRecordWriter(context);
58    }
59  
60    /**
61     * Configure a MapReduce Job to perform an incremental load into the given
62     * table. This
63     * <ul>
64     *   <li>Inspects the table to configure a total order partitioner</li>
65     *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
66     *   <li>Sets the number of reduce tasks to match the current number of regions</li>
67     *   <li>Sets the output key/value class to match HFileOutputFormat's requirements</li>
68     *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
69     *     PutSortReducer)</li>
70     * </ul>
71     * The user should be sure to set the map output value class to either KeyValue or Put before
72     * running this function.
73     */
74    public static void configureIncrementalLoad(Job job, HTable table)
75    throws IOException {
76      HFileOutputFormat2.configureIncrementalLoad(job, table, HFileOutputFormat.class);
77    }
78  
79    /**
80     * Run inside the task to deserialize column family to compression algorithm
81     * map from the
82     * configuration.
83     *
84     * Package-private for unit tests only.
85     *
86     * @return a map from column family to the name of the configured compression
87     *         algorithm
88     */
89    static Map<byte[], String> createFamilyCompressionMap(Configuration conf) {
90      return HFileOutputFormat2.createFamilyCompressionMap(conf);
91    }
92  
93    /**
94     * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
95     * <code>splitPoints</code>. Cleans up the partitions file after job exists.
96     */
97    static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)
98        throws IOException {
99      HFileOutputFormat2.configurePartitioner(job, splitPoints);
100   }
101 
102   /**
103    * Serialize column family to compression algorithm map to configuration.
104    * Invoked while configuring the MR job for incremental load.
105    *
106    * Package-private for unit tests only.
107    *
108    * @throws IOException
109    *           on failure to read column family descriptors
110    */
111   @edu.umd.cs.findbugs.annotations.SuppressWarnings(
112       value="RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
113   static void configureCompression(HTable table, Configuration conf) throws IOException {
114     HFileOutputFormat2.configureCompression(table, conf);
115   }
116 
117   /**
118    * Serialize column family to bloom type map to configuration.
119    * Invoked while configuring the MR job for incremental load.
120    *
121    * @throws IOException
122    *           on failure to read column family descriptors
123    */
124   static void configureBloomType(HTable table, Configuration conf) throws IOException {
125     HFileOutputFormat2.configureBloomType(table, conf);
126   }
127 }