1 /**
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.IOException;
22 import java.util.List;
23 import java.util.Map;
24
25 import org.apache.commons.logging.Log;
26 import org.apache.commons.logging.LogFactory;
27 import org.apache.hadoop.classification.InterfaceAudience;
28 import org.apache.hadoop.classification.InterfaceStability;
29 import org.apache.hadoop.conf.Configuration;
30 import org.apache.hadoop.hbase.KeyValue;
31 import org.apache.hadoop.hbase.client.HTable;
32 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
33 import org.apache.hadoop.mapreduce.Job;
34 import org.apache.hadoop.mapreduce.RecordWriter;
35 import org.apache.hadoop.mapreduce.TaskAttemptContext;
36 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
37
38 /**
39 * Writes HFiles. Passed KeyValues must arrive in order.
40 * Writes current time as the sequence id for the file. Sets the major compacted
41 * attribute on created hfiles. Calling write(null,null) will forceably roll
42 * all HFiles being written.
43 * <p>
44 * Using this class as part of a MapReduce job is best done
45 * using {@link #configureIncrementalLoad(Job, HTable)}.
46 * @see KeyValueSortReducer
47 * @deprecated use {@link HFileOutputFormat2} instead.
48 */
49 @Deprecated
50 @InterfaceAudience.Public
51 @InterfaceStability.Stable
52 public class HFileOutputFormat extends FileOutputFormat<ImmutableBytesWritable, KeyValue> {
53 static Log LOG = LogFactory.getLog(HFileOutputFormat.class);
54
55 public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
56 throws IOException, InterruptedException {
57 return HFileOutputFormat2.createRecordWriter(context);
58 }
59
60 /**
61 * Configure a MapReduce Job to perform an incremental load into the given
62 * table. This
63 * <ul>
64 * <li>Inspects the table to configure a total order partitioner</li>
65 * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
66 * <li>Sets the number of reduce tasks to match the current number of regions</li>
67 * <li>Sets the output key/value class to match HFileOutputFormat's requirements</li>
68 * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
69 * PutSortReducer)</li>
70 * </ul>
71 * The user should be sure to set the map output value class to either KeyValue or Put before
72 * running this function.
73 */
74 public static void configureIncrementalLoad(Job job, HTable table)
75 throws IOException {
76 HFileOutputFormat2.configureIncrementalLoad(job, table, HFileOutputFormat.class);
77 }
78
79 /**
80 * Run inside the task to deserialize column family to compression algorithm
81 * map from the
82 * configuration.
83 *
84 * Package-private for unit tests only.
85 *
86 * @return a map from column family to the name of the configured compression
87 * algorithm
88 */
89 static Map<byte[], String> createFamilyCompressionMap(Configuration conf) {
90 return HFileOutputFormat2.createFamilyCompressionMap(conf);
91 }
92
93 /**
94 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
95 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
96 */
97 static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)
98 throws IOException {
99 HFileOutputFormat2.configurePartitioner(job, splitPoints);
100 }
101
102 /**
103 * Serialize column family to compression algorithm map to configuration.
104 * Invoked while configuring the MR job for incremental load.
105 *
106 * Package-private for unit tests only.
107 *
108 * @throws IOException
109 * on failure to read column family descriptors
110 */
111 @edu.umd.cs.findbugs.annotations.SuppressWarnings(
112 value="RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
113 static void configureCompression(HTable table, Configuration conf) throws IOException {
114 HFileOutputFormat2.configureCompression(table, conf);
115 }
116
117 /**
118 * Serialize column family to bloom type map to configuration.
119 * Invoked while configuring the MR job for incremental load.
120 *
121 * @throws IOException
122 * on failure to read column family descriptors
123 */
124 static void configureBloomType(HTable table, Configuration conf) throws IOException {
125 HFileOutputFormat2.configureBloomType(table, conf);
126 }
127 }