1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapred;
20
21 import org.apache.hadoop.fs.Path;
22 import org.apache.hadoop.hbase.HBaseConfiguration;
23 import org.apache.hadoop.hbase.catalog.MetaReader;
24 import org.apache.hadoop.hbase.classification.InterfaceAudience;
25 import org.apache.hadoop.hbase.classification.InterfaceStability;
26 import org.apache.hadoop.hbase.client.HConnection;
27 import org.apache.hadoop.hbase.client.HConnectionManager;
28 import org.apache.hadoop.hbase.client.Put;
29 import org.apache.hadoop.hbase.client.Scan;
30 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
31 import org.apache.hadoop.hbase.mapreduce.MutationSerialization;
32 import org.apache.hadoop.hbase.mapreduce.ResultSerialization;
33 import org.apache.hadoop.hbase.security.User;
34 import org.apache.hadoop.hbase.security.UserProvider;
35 import org.apache.hadoop.hbase.security.token.TokenUtil;
36 import org.apache.hadoop.mapred.FileInputFormat;
37 import org.apache.hadoop.mapred.InputFormat;
38 import org.apache.hadoop.mapred.JobConf;
39 import org.apache.hadoop.mapred.OutputFormat;
40 import org.apache.hadoop.mapred.TextInputFormat;
41 import org.apache.hadoop.mapred.TextOutputFormat;
42
43 import java.io.IOException;
44 import java.util.Collection;
45 import java.util.Map;
46
47
48
49
50 @Deprecated
51 @InterfaceAudience.Public
52 @InterfaceStability.Stable
53 @SuppressWarnings({ "rawtypes", "unchecked" })
54 public class TableMapReduceUtil {
55
56
57
58
59
60
61
62
63
64
65
66
67 public static void initTableMapJob(String table, String columns,
68 Class<? extends TableMap> mapper,
69 Class<?> outputKeyClass,
70 Class<?> outputValueClass, JobConf job) {
71 initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job,
72 true, TableInputFormat.class);
73 }
74
75 public static void initTableMapJob(String table, String columns,
76 Class<? extends TableMap> mapper,
77 Class<?> outputKeyClass,
78 Class<?> outputValueClass, JobConf job, boolean addDependencyJars) {
79 initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job,
80 addDependencyJars, TableInputFormat.class);
81 }
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96 public static void initTableMapJob(String table, String columns,
97 Class<? extends TableMap> mapper,
98 Class<?> outputKeyClass,
99 Class<?> outputValueClass, JobConf job, boolean addDependencyJars,
100 Class<? extends InputFormat> inputFormat) {
101
102 job.setInputFormat(inputFormat);
103 job.setMapOutputValueClass(outputValueClass);
104 job.setMapOutputKeyClass(outputKeyClass);
105 job.setMapperClass(mapper);
106 job.setStrings("io.serializations", job.get("io.serializations"),
107 MutationSerialization.class.getName(), ResultSerialization.class.getName());
108 FileInputFormat.addInputPaths(job, table);
109 job.set(TableInputFormat.COLUMN_LIST, columns);
110 if (addDependencyJars) {
111 try {
112 addDependencyJars(job);
113 } catch (IOException e) {
114 e.printStackTrace();
115 }
116 }
117 try {
118 initCredentials(job);
119 } catch (IOException ioe) {
120
121 ioe.printStackTrace();
122 }
123 }
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139 public static void initMultiTableSnapshotMapperJob(Map<String, Collection<Scan>> snapshotScans,
140 Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass,
141 JobConf job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException {
142 MultiTableSnapshotInputFormat.setInput(job, snapshotScans, tmpRestoreDir);
143
144 job.setInputFormat(MultiTableSnapshotInputFormat.class);
145 if (outputValueClass != null) {
146 job.setMapOutputValueClass(outputValueClass);
147 }
148 if (outputKeyClass != null) {
149 job.setMapOutputKeyClass(outputKeyClass);
150 }
151 job.setMapperClass(mapper);
152 if (addDependencyJars) {
153 addDependencyJars(job);
154 }
155
156 org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job);
157 }
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178 public static void initTableSnapshotMapJob(String snapshotName, String columns,
179 Class<? extends TableMap> mapper,
180 Class<?> outputKeyClass,
181 Class<?> outputValueClass, JobConf job,
182 boolean addDependencyJars, Path tmpRestoreDir)
183 throws IOException {
184 TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
185 initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, job,
186 addDependencyJars, TableSnapshotInputFormat.class);
187 org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job);
188 }
189
190
191
192
193
194
195
196
197
198
199 public static void initTableReduceJob(String table,
200 Class<? extends TableReduce> reducer, JobConf job)
201 throws IOException {
202 initTableReduceJob(table, reducer, job, null);
203 }
204
205
206
207
208
209
210
211
212
213
214
215
216 public static void initTableReduceJob(String table,
217 Class<? extends TableReduce> reducer, JobConf job, Class partitioner)
218 throws IOException {
219 initTableReduceJob(table, reducer, job, partitioner, true);
220 }
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235 public static void initTableReduceJob(String table,
236 Class<? extends TableReduce> reducer, JobConf job, Class partitioner,
237 boolean addDependencyJars) throws IOException {
238 job.setOutputFormat(TableOutputFormat.class);
239 job.setReducerClass(reducer);
240 job.set(TableOutputFormat.OUTPUT_TABLE, table);
241 job.setOutputKeyClass(ImmutableBytesWritable.class);
242 job.setOutputValueClass(Put.class);
243 job.setStrings("io.serializations", job.get("io.serializations"),
244 MutationSerialization.class.getName(), ResultSerialization.class.getName());
245 if (partitioner == HRegionPartitioner.class) {
246 job.setPartitionerClass(HRegionPartitioner.class);
247 int regions = MetaReader.getRegionCount(HBaseConfiguration.create(job), table);
248 if (job.getNumReduceTasks() > regions) {
249 job.setNumReduceTasks(regions);
250 }
251 } else if (partitioner != null) {
252 job.setPartitionerClass(partitioner);
253 }
254 if (addDependencyJars) {
255 addDependencyJars(job);
256 }
257 initCredentials(job);
258 }
259
260 public static void initCredentials(JobConf job) throws IOException {
261 UserProvider userProvider = UserProvider.instantiate(job);
262 if (userProvider.isHadoopSecurityEnabled()) {
263
264 if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
265 job.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
266 }
267 }
268
269 if (userProvider.isHBaseSecurityEnabled()) {
270 HConnection conn = HConnectionManager.createConnection(job);
271 try {
272
273 User user = userProvider.getCurrent();
274 TokenUtil.addTokenForJob(conn, job, user);
275 } catch (InterruptedException ie) {
276 ie.printStackTrace();
277 Thread.currentThread().interrupt();
278 } finally {
279 conn.close();
280 }
281 }
282 }
283
284
285
286
287
288
289
290
291
292 public static void limitNumReduceTasks(String table, JobConf job)
293 throws IOException {
294 int regions = MetaReader.getRegionCount(HBaseConfiguration.create(job), table);
295 if (job.getNumReduceTasks() > regions)
296 job.setNumReduceTasks(regions);
297 }
298
299
300
301
302
303
304
305
306
307 public static void limitNumMapTasks(String table, JobConf job)
308 throws IOException {
309 int regions = MetaReader.getRegionCount(HBaseConfiguration.create(job), table);
310 if (job.getNumMapTasks() > regions)
311 job.setNumMapTasks(regions);
312 }
313
314
315
316
317
318
319
320
321
322 public static void setNumReduceTasks(String table, JobConf job)
323 throws IOException {
324 job.setNumReduceTasks(MetaReader.getRegionCount(HBaseConfiguration.create(job), table));
325 }
326
327
328
329
330
331
332
333
334
335 public static void setNumMapTasks(String table, JobConf job)
336 throws IOException {
337 job.setNumMapTasks(MetaReader.getRegionCount(HBaseConfiguration.create(job), table));
338 }
339
340
341
342
343
344
345
346
347
348
349 public static void setScannerCaching(JobConf job, int batchSize) {
350 job.setInt("hbase.client.scanner.caching", batchSize);
351 }
352
353
354
355
356 public static void addDependencyJars(JobConf job) throws IOException {
357 org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addHBaseDependencyJars(job);
358 org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(
359 job,
360
361
362 job.getMapOutputKeyClass(),
363 job.getMapOutputValueClass(),
364 job.getOutputKeyClass(),
365 job.getOutputValueClass(),
366 job.getPartitionerClass(),
367 job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
368 job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
369 job.getCombinerClass());
370 }
371 }