1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.IOException;
22 import java.lang.reflect.InvocationTargetException;
23 import java.lang.reflect.Method;
24 import java.net.URL;
25 import java.net.URLDecoder;
26 import java.util.ArrayList;
27 import java.util.Enumeration;
28 import java.util.HashSet;
29 import java.util.List;
30 import java.util.Set;
31
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34 import org.apache.hadoop.classification.InterfaceAudience;
35 import org.apache.hadoop.classification.InterfaceStability;
36 import org.apache.hadoop.conf.Configuration;
37 import org.apache.hadoop.fs.FileSystem;
38 import org.apache.hadoop.fs.Path;
39 import org.apache.hadoop.hbase.HBaseConfiguration;
40 import org.apache.hadoop.hbase.catalog.MetaReader;
41 import org.apache.hadoop.hbase.client.Put;
42 import org.apache.hadoop.hbase.client.Scan;
43 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
44 import org.apache.hadoop.hbase.mapreduce.hadoopbackport.JarFinder;
45 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
46 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
47 import org.apache.hadoop.hbase.security.User;
48 import org.apache.hadoop.hbase.util.Base64;
49 import org.apache.hadoop.hbase.util.Bytes;
50 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
51 import org.apache.hadoop.io.Writable;
52 import org.apache.hadoop.io.WritableComparable;
53 import org.apache.hadoop.mapreduce.InputFormat;
54 import org.apache.hadoop.mapreduce.Job;
55 import org.apache.hadoop.util.StringUtils;
56
57 import com.google.protobuf.InvalidProtocolBufferException;
58
59
60
61
62 @SuppressWarnings("unchecked")
63 @InterfaceAudience.Public
64 @InterfaceStability.Stable
65 public class TableMapReduceUtil {
66 static Log LOG = LogFactory.getLog(TableMapReduceUtil.class);
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81 public static void initTableMapperJob(String table, Scan scan,
82 Class<? extends TableMapper> mapper,
83 Class<?> outputKeyClass,
84 Class<?> outputValueClass, Job job)
85 throws IOException {
86 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
87 job, true);
88 }
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104 public static void initTableMapperJob(byte[] table, Scan scan,
105 Class<? extends TableMapper> mapper,
106 Class<?> outputKeyClass,
107 Class<?> outputValueClass, Job job)
108 throws IOException {
109 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass,
110 job, true);
111 }
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128 public static void initTableMapperJob(String table, Scan scan,
129 Class<? extends TableMapper> mapper,
130 Class<?> outputKeyClass,
131 Class<?> outputValueClass, Job job,
132 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
133 throws IOException {
134 job.setInputFormatClass(inputFormatClass);
135 if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
136 if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
137 job.setMapperClass(mapper);
138 if (Put.class.equals(outputValueClass)) {
139 job.setCombinerClass(PutCombiner.class);
140 }
141 Configuration conf = job.getConfiguration();
142 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
143 conf.set(TableInputFormat.INPUT_TABLE, table);
144 conf.set(TableInputFormat.SCAN, convertScanToString(scan));
145 conf.setStrings("io.serializations", conf.get("io.serializations"),
146 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
147 KeyValueSerialization.class.getName());
148 if (addDependencyJars) {
149 addDependencyJars(job);
150 }
151 initCredentials(job);
152 }
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170 public static void initTableMapperJob(byte[] table, Scan scan,
171 Class<? extends TableMapper> mapper,
172 Class<?> outputKeyClass,
173 Class<?> outputValueClass, Job job,
174 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
175 throws IOException {
176 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
177 outputValueClass, job, addDependencyJars, inputFormatClass);
178 }
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195 public static void initTableMapperJob(byte[] table, Scan scan,
196 Class<? extends TableMapper> mapper,
197 Class<?> outputKeyClass,
198 Class<?> outputValueClass, Job job,
199 boolean addDependencyJars)
200 throws IOException {
201 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
202 outputValueClass, job, addDependencyJars, TableInputFormat.class);
203 }
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220 public static void initTableMapperJob(String table, Scan scan,
221 Class<? extends TableMapper> mapper,
222 Class<?> outputKeyClass,
223 Class<?> outputValueClass, Job job,
224 boolean addDependencyJars)
225 throws IOException {
226 initTableMapperJob(table, scan, mapper, outputKeyClass,
227 outputValueClass, job, addDependencyJars, TableInputFormat.class);
228 }
229
230
231
232
233
234
235
236
237
238
239
240
241
242 public static void initTableMapperJob(List<Scan> scans,
243 Class<? extends TableMapper> mapper,
244 Class<? extends WritableComparable> outputKeyClass,
245 Class<? extends Writable> outputValueClass, Job job) throws IOException {
246 initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
247 true);
248 }
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264 public static void initTableMapperJob(List<Scan> scans,
265 Class<? extends TableMapper> mapper,
266 Class<? extends WritableComparable> outputKeyClass,
267 Class<? extends Writable> outputValueClass, Job job,
268 boolean addDependencyJars) throws IOException {
269 job.setInputFormatClass(MultiTableInputFormat.class);
270 if (outputValueClass != null) {
271 job.setMapOutputValueClass(outputValueClass);
272 }
273 if (outputKeyClass != null) {
274 job.setMapOutputKeyClass(outputKeyClass);
275 }
276 job.setMapperClass(mapper);
277 HBaseConfiguration.addHbaseResources(job.getConfiguration());
278 List<String> scanStrings = new ArrayList<String>();
279
280 for (Scan scan : scans) {
281 scanStrings.add(convertScanToString(scan));
282 }
283 job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
284 scanStrings.toArray(new String[scanStrings.size()]));
285
286 if (addDependencyJars) {
287 addDependencyJars(job);
288 }
289 }
290
291 public static void initCredentials(Job job) throws IOException {
292 if (User.isHBaseSecurityEnabled(job.getConfiguration())) {
293 try {
294
295 String quorumAddress = job.getConfiguration().get(TableOutputFormat.QUORUM_ADDRESS);
296 if (quorumAddress != null) {
297 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
298 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
299 User.getCurrent().obtainAuthTokenForJob(peerConf, job);
300 }
301 User.getCurrent().obtainAuthTokenForJob(job.getConfiguration(), job);
302 } catch (InterruptedException ie) {
303 LOG.info("Interrupted obtaining user authentication token");
304 Thread.interrupted();
305 }
306 }
307 }
308
309
310
311
312
313
314
315
316 static String convertScanToString(Scan scan) throws IOException {
317 ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
318 return Base64.encodeBytes(proto.toByteArray());
319 }
320
321
322
323
324
325
326
327
328 static Scan convertStringToScan(String base64) throws IOException {
329 byte [] decoded = Base64.decode(base64);
330 ClientProtos.Scan scan;
331 try {
332 scan = ClientProtos.Scan.parseFrom(decoded);
333 } catch (InvalidProtocolBufferException ipbe) {
334 throw new IOException(ipbe);
335 }
336
337 return ProtobufUtil.toScan(scan);
338 }
339
340
341
342
343
344
345
346
347
348
349 public static void initTableReducerJob(String table,
350 Class<? extends TableReducer> reducer, Job job)
351 throws IOException {
352 initTableReducerJob(table, reducer, job, null);
353 }
354
355
356
357
358
359
360
361
362
363
364
365
366 public static void initTableReducerJob(String table,
367 Class<? extends TableReducer> reducer, Job job,
368 Class partitioner) throws IOException {
369 initTableReducerJob(table, reducer, job, partitioner, null, null, null);
370 }
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395 public static void initTableReducerJob(String table,
396 Class<? extends TableReducer> reducer, Job job,
397 Class partitioner, String quorumAddress, String serverClass,
398 String serverImpl) throws IOException {
399 initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
400 serverClass, serverImpl, true);
401 }
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428 public static void initTableReducerJob(String table,
429 Class<? extends TableReducer> reducer, Job job,
430 Class partitioner, String quorumAddress, String serverClass,
431 String serverImpl, boolean addDependencyJars) throws IOException {
432
433 Configuration conf = job.getConfiguration();
434 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
435 job.setOutputFormatClass(TableOutputFormat.class);
436 if (reducer != null) job.setReducerClass(reducer);
437 conf.set(TableOutputFormat.OUTPUT_TABLE, table);
438 conf.setStrings("io.serializations", conf.get("io.serializations"),
439 MutationSerialization.class.getName(), ResultSerialization.class.getName());
440
441 if (quorumAddress != null) {
442
443 ZKUtil.transformClusterKey(quorumAddress);
444 conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
445 }
446 if (serverClass != null && serverImpl != null) {
447 conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
448 conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
449 }
450 job.setOutputKeyClass(ImmutableBytesWritable.class);
451 job.setOutputValueClass(Writable.class);
452 if (partitioner == HRegionPartitioner.class) {
453 job.setPartitionerClass(HRegionPartitioner.class);
454 int regions = MetaReader.getRegionCount(conf, table);
455 if (job.getNumReduceTasks() > regions) {
456 job.setNumReduceTasks(regions);
457 }
458 } else if (partitioner != null) {
459 job.setPartitionerClass(partitioner);
460 }
461
462 if (addDependencyJars) {
463 addDependencyJars(job);
464 }
465
466 initCredentials(job);
467 }
468
469
470
471
472
473
474
475
476
477 public static void limitNumReduceTasks(String table, Job job)
478 throws IOException {
479 int regions = MetaReader.getRegionCount(job.getConfiguration(), table);
480 if (job.getNumReduceTasks() > regions)
481 job.setNumReduceTasks(regions);
482 }
483
484
485
486
487
488
489
490
491
492 public static void setNumReduceTasks(String table, Job job)
493 throws IOException {
494 job.setNumReduceTasks(MetaReader.getRegionCount(job.getConfiguration(), table));
495 }
496
497
498
499
500
501
502
503
504
505
506 public static void setScannerCaching(Job job, int batchSize) {
507 job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);
508 }
509
510
511
512
513
514
515 public static void addDependencyJars(Job job) throws IOException {
516 try {
517 addDependencyJars(job.getConfiguration(),
518
519 org.apache.hadoop.hbase.HConstants.class,
520 org.apache.hadoop.hbase.protobuf.generated.ClientProtos.class,
521 org.apache.hadoop.hbase.client.Put.class,
522 org.apache.hadoop.hbase.CompatibilityFactory.class,
523
524 org.apache.zookeeper.ZooKeeper.class,
525 com.google.protobuf.Message.class,
526 com.google.common.collect.Lists.class,
527 org.cloudera.htrace.Trace.class,
528
529 job.getMapOutputKeyClass(),
530 job.getMapOutputValueClass(),
531 job.getInputFormatClass(),
532 job.getOutputKeyClass(),
533 job.getOutputValueClass(),
534 job.getOutputFormatClass(),
535 job.getPartitionerClass(),
536 job.getCombinerClass());
537 } catch (ClassNotFoundException e) {
538 throw new IOException(e);
539 }
540 }
541
542
543
544
545
546
547 public static void addDependencyJars(Configuration conf,
548 Class... classes) throws IOException {
549
550 FileSystem localFs = FileSystem.getLocal(conf);
551
552 Set<String> jars = new HashSet<String>();
553
554
555 jars.addAll( conf.getStringCollection("tmpjars") );
556
557
558 for (Class clazz : classes) {
559 if (clazz == null) continue;
560
561 String pathStr = findOrCreateJar(clazz);
562 if (pathStr == null) {
563 LOG.warn("Could not find jar for class " + clazz +
564 " in order to ship it to the cluster.");
565 continue;
566 }
567 Path path = new Path(pathStr);
568 if (!localFs.exists(path)) {
569 LOG.warn("Could not validate jar file " + path + " for class "
570 + clazz);
571 continue;
572 }
573 jars.add(path.makeQualified(localFs).toString());
574 }
575 if (jars.isEmpty()) return;
576
577 conf.set("tmpjars",
578 StringUtils.arrayToString(jars.toArray(new String[0])));
579 }
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596 private static String findOrCreateJar(Class<?> my_class)
597 throws IOException {
598 try {
599 Class<?> jarFinder = Class.forName("org.apache.hadoop.util.JarFinder");
600
601
602
603
604
605 Method m = jarFinder.getMethod("getJar", Class.class);
606 return (String)m.invoke(null,my_class);
607 } catch (InvocationTargetException ite) {
608
609 throw new IOException(ite.getCause());
610 } catch (Exception e) {
611
612 }
613
614 LOG.debug("New JarFinder: org.apache.hadoop.util.JarFinder.getJar " +
615 "not available. Falling back to backported JarFinder");
616
617
618
619
620
621
622
623
624
625 return JarFinder.getJar(my_class);
626 }
627 }