1 /**
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.IOException;
22 import java.util.List;
23 import java.util.Map;
24
25 import org.apache.commons.logging.Log;
26 import org.apache.commons.logging.LogFactory;
27 import org.apache.hadoop.hbase.classification.InterfaceAudience;
28 import org.apache.hadoop.hbase.classification.InterfaceStability;
29 import org.apache.hadoop.conf.Configuration;
30 import org.apache.hadoop.hbase.KeyValue;
31 import org.apache.hadoop.hbase.client.HTable;
32 import org.apache.hadoop.hbase.client.Table;
33 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
34 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
35 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
36 import org.apache.hadoop.hbase.regionserver.BloomType;
37 import org.apache.hadoop.mapreduce.Job;
38 import org.apache.hadoop.mapreduce.RecordWriter;
39 import org.apache.hadoop.mapreduce.TaskAttemptContext;
40 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
41
42 import com.google.common.annotations.VisibleForTesting;
43
44 /**
45 * Writes HFiles. Passed KeyValues must arrive in order.
46 * Writes current time as the sequence id for the file. Sets the major compacted
47 * attribute on created hfiles. Calling write(null,null) will forcibly roll
48 * all HFiles being written.
49 * <p>
50 * Using this class as part of a MapReduce job is best done
51 * using {@link #configureIncrementalLoad(Job, HTable)}.
52 * @see KeyValueSortReducer
53 * @deprecated use {@link HFileOutputFormat2} instead.
54 */
55 @Deprecated
56 @InterfaceAudience.Public
57 @InterfaceStability.Stable
58 public class HFileOutputFormat extends FileOutputFormat<ImmutableBytesWritable, KeyValue> {
59 static Log LOG = LogFactory.getLog(HFileOutputFormat.class);
60
61 // This constant is public since the client can modify this when setting
62 // up their conf object and thus refer to this symbol.
63 // It is present for backwards compatibility reasons. Use it only to
64 // override the auto-detection of datablock encoding.
65 public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY =
66 HFileOutputFormat2.DATABLOCK_ENCODING_OVERRIDE_CONF_KEY;
67
68 @Override
69 public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(
70 final TaskAttemptContext context) throws IOException, InterruptedException {
71 return HFileOutputFormat2.createRecordWriter(context);
72 }
73
74 /**
75 * Configure a MapReduce Job to perform an incremental load into the given
76 * table. This
77 * <ul>
78 * <li>Inspects the table to configure a total order partitioner</li>
79 * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
80 * <li>Sets the number of reduce tasks to match the current number of regions</li>
81 * <li>Sets the output key/value class to match HFileOutputFormat's requirements</li>
82 * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
83 * PutSortReducer)</li>
84 * </ul>
85 * The user should be sure to set the map output value class to either KeyValue or Put before
86 * running this function.
87 */
88 public static void configureIncrementalLoad(Job job, HTable table)
89 throws IOException {
90 HFileOutputFormat2.configureIncrementalLoad(job, table, table);
91 }
92
93 /**
94 * Runs inside the task to deserialize column family to compression algorithm
95 * map from the configuration.
96 *
97 * @param conf to read the serialized values from
98 * @return a map from column family to the configured compression algorithm
99 */
100 @VisibleForTesting
101 static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration
102 conf) {
103 return HFileOutputFormat2.createFamilyCompressionMap(conf);
104 }
105
106 /**
107 * Runs inside the task to deserialize column family to bloom filter type
108 * map from the configuration.
109 *
110 * @param conf to read the serialized values from
111 * @return a map from column family to the the configured bloom filter type
112 */
113 @VisibleForTesting
114 static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) {
115 return HFileOutputFormat2.createFamilyBloomTypeMap(conf);
116 }
117
118 /**
119 * Runs inside the task to deserialize column family to block size
120 * map from the configuration.
121 *
122 * @param conf to read the serialized values from
123 * @return a map from column family to the configured block size
124 */
125 @VisibleForTesting
126 static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) {
127 return HFileOutputFormat2.createFamilyBlockSizeMap(conf);
128 }
129
130 /**
131 * Runs inside the task to deserialize column family to data block encoding
132 * type map from the configuration.
133 *
134 * @param conf to read the serialized values from
135 * @return a map from column family to HFileDataBlockEncoder for the
136 * configured data block type for the family
137 */
138 @VisibleForTesting
139 static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap(
140 Configuration conf) {
141 return HFileOutputFormat2.createFamilyDataBlockEncodingMap(conf);
142 }
143
144 /**
145 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
146 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
147 */
148 static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)
149 throws IOException {
150 HFileOutputFormat2.configurePartitioner(job, splitPoints);
151 }
152
153 /**
154 * Serialize column family to compression algorithm map to configuration.
155 * Invoked while configuring the MR job for incremental load.
156 *
157 * @param table to read the properties from
158 * @param conf to persist serialized values into
159 * @throws IOException
160 * on failure to read column family descriptors
161 */
162 @edu.umd.cs.findbugs.annotations.SuppressWarnings(
163 value="RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
164 @VisibleForTesting
165 static void configureCompression(Table table, Configuration conf) throws IOException {
166 HFileOutputFormat2.configureCompression(table, conf);
167 }
168
169 /**
170 * Serialize column family to block size map to configuration.
171 * Invoked while configuring the MR job for incremental load.
172 *
173 * @param table to read the properties from
174 * @param conf to persist serialized values into
175 * @throws IOException
176 * on failure to read column family descriptors
177 */
178 @VisibleForTesting
179 static void configureBlockSize(Table table, Configuration conf) throws IOException {
180 HFileOutputFormat2.configureBlockSize(table, conf);
181 }
182
183 /**
184 * Serialize column family to bloom type map to configuration.
185 * Invoked while configuring the MR job for incremental load.
186 *
187 * @param table to read the properties from
188 * @param conf to persist serialized values into
189 * @throws IOException
190 * on failure to read column family descriptors
191 */
192 @VisibleForTesting
193 static void configureBloomType(Table table, Configuration conf) throws IOException {
194 HFileOutputFormat2.configureBloomType(table, conf);
195 }
196
197 /**
198 * Serialize column family to data block encoding map to configuration.
199 * Invoked while configuring the MR job for incremental load.
200 *
201 * @param table to read the properties from
202 * @param conf to persist serialized values into
203 * @throws IOException
204 * on failure to read column family descriptors
205 */
206 @VisibleForTesting
207 static void configureDataBlockEncoding(Table table,
208 Configuration conf) throws IOException {
209 HFileOutputFormat2.configureDataBlockEncoding(table, conf);
210 }
211 }