1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.File;
22 import java.io.IOException;
23 import java.lang.reflect.InvocationTargetException;
24 import java.lang.reflect.Method;
25 import java.net.URL;
26 import java.net.URLDecoder;
27 import java.util.ArrayList;
28 import java.util.Enumeration;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.Set;
34 import java.util.zip.ZipEntry;
35 import java.util.zip.ZipFile;
36
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.hadoop.classification.InterfaceAudience;
40 import org.apache.hadoop.classification.InterfaceStability;
41 import org.apache.hadoop.conf.Configuration;
42 import org.apache.hadoop.fs.FileSystem;
43 import org.apache.hadoop.fs.Path;
44 import org.apache.hadoop.hbase.HBaseConfiguration;
45 import org.apache.hadoop.hbase.catalog.MetaReader;
46 import org.apache.hadoop.hbase.client.Put;
47 import org.apache.hadoop.hbase.client.Scan;
48 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
49 import org.apache.hadoop.hbase.mapreduce.hadoopbackport.JarFinder;
50 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
51 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
52 import org.apache.hadoop.hbase.security.User;
53 import org.apache.hadoop.hbase.security.UserProvider;
54 import org.apache.hadoop.hbase.security.token.AuthenticationTokenIdentifier;
55 import org.apache.hadoop.hbase.security.token.AuthenticationTokenSelector;
56 import org.apache.hadoop.hbase.util.Base64;
57 import org.apache.hadoop.hbase.util.Bytes;
58 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
59 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
60 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
61 import org.apache.hadoop.io.Text;
62 import org.apache.hadoop.io.Writable;
63 import org.apache.hadoop.io.WritableComparable;
64 import org.apache.hadoop.mapreduce.InputFormat;
65 import org.apache.hadoop.mapreduce.Job;
66 import org.apache.hadoop.security.token.Token;
67 import org.apache.hadoop.util.StringUtils;
68 import org.apache.zookeeper.KeeperException;
69
70 import com.google.protobuf.InvalidProtocolBufferException;
71
72
73
74
75 @SuppressWarnings({ "rawtypes", "unchecked" })
76 @InterfaceAudience.Public
77 @InterfaceStability.Stable
78 public class TableMapReduceUtil {
79 static Log LOG = LogFactory.getLog(TableMapReduceUtil.class);
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94 public static void initTableMapperJob(String table, Scan scan,
95 Class<? extends TableMapper> mapper,
96 Class<?> outputKeyClass,
97 Class<?> outputValueClass, Job job)
98 throws IOException {
99 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
100 job, true);
101 }
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116 public static void initTableMapperJob(byte[] table, Scan scan,
117 Class<? extends TableMapper> mapper,
118 Class<?> outputKeyClass,
119 Class<?> outputValueClass, Job job)
120 throws IOException {
121 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass,
122 job, true);
123 }
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140 public static void initTableMapperJob(String table, Scan scan,
141 Class<? extends TableMapper> mapper,
142 Class<?> outputKeyClass,
143 Class<?> outputValueClass, Job job,
144 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
145 throws IOException {
146 job.setInputFormatClass(inputFormatClass);
147 if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
148 if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
149 job.setMapperClass(mapper);
150 if (Put.class.equals(outputValueClass)) {
151 job.setCombinerClass(PutCombiner.class);
152 }
153 Configuration conf = job.getConfiguration();
154 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
155 conf.set(TableInputFormat.INPUT_TABLE, table);
156 conf.set(TableInputFormat.SCAN, convertScanToString(scan));
157 conf.setStrings("io.serializations", conf.get("io.serializations"),
158 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
159 KeyValueSerialization.class.getName());
160 if (addDependencyJars) {
161 addDependencyJars(job);
162 }
163 initCredentials(job);
164 }
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182 public static void initTableMapperJob(byte[] table, Scan scan,
183 Class<? extends TableMapper> mapper,
184 Class<?> outputKeyClass,
185 Class<?> outputValueClass, Job job,
186 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
187 throws IOException {
188 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
189 outputValueClass, job, addDependencyJars, inputFormatClass);
190 }
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207 public static void initTableMapperJob(byte[] table, Scan scan,
208 Class<? extends TableMapper> mapper,
209 Class<?> outputKeyClass,
210 Class<?> outputValueClass, Job job,
211 boolean addDependencyJars)
212 throws IOException {
213 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
214 outputValueClass, job, addDependencyJars, TableInputFormat.class);
215 }
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232 public static void initTableMapperJob(String table, Scan scan,
233 Class<? extends TableMapper> mapper,
234 Class<?> outputKeyClass,
235 Class<?> outputValueClass, Job job,
236 boolean addDependencyJars)
237 throws IOException {
238 initTableMapperJob(table, scan, mapper, outputKeyClass,
239 outputValueClass, job, addDependencyJars, TableInputFormat.class);
240 }
241
242
243
244
245
246
247
248
249
250
251
252
253
254 public static void initTableMapperJob(List<Scan> scans,
255 Class<? extends TableMapper> mapper,
256 Class<? extends WritableComparable> outputKeyClass,
257 Class<? extends Writable> outputValueClass, Job job) throws IOException {
258 initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
259 true);
260 }
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276 public static void initTableMapperJob(List<Scan> scans,
277 Class<? extends TableMapper> mapper,
278 Class<? extends WritableComparable> outputKeyClass,
279 Class<? extends Writable> outputValueClass, Job job,
280 boolean addDependencyJars) throws IOException {
281 job.setInputFormatClass(MultiTableInputFormat.class);
282 if (outputValueClass != null) {
283 job.setMapOutputValueClass(outputValueClass);
284 }
285 if (outputKeyClass != null) {
286 job.setMapOutputKeyClass(outputKeyClass);
287 }
288 job.setMapperClass(mapper);
289 HBaseConfiguration.addHbaseResources(job.getConfiguration());
290 List<String> scanStrings = new ArrayList<String>();
291
292 for (Scan scan : scans) {
293 scanStrings.add(convertScanToString(scan));
294 }
295 job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
296 scanStrings.toArray(new String[scanStrings.size()]));
297
298 if (addDependencyJars) {
299 addDependencyJars(job);
300 }
301 }
302
303 public static void initCredentials(Job job) throws IOException {
304 UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
305 if (userProvider.isHadoopSecurityEnabled()) {
306
307 if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
308 job.getConfiguration().set("mapreduce.job.credentials.binary",
309 System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
310 }
311 }
312
313 if (userProvider.isHBaseSecurityEnabled()) {
314 try {
315
316 String quorumAddress = job.getConfiguration().get(TableOutputFormat.QUORUM_ADDRESS);
317 User user = userProvider.getCurrent();
318 if (quorumAddress != null) {
319 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
320 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
321 obtainAuthTokenForJob(job, peerConf, user);
322 }
323
324 obtainAuthTokenForJob(job, job.getConfiguration(), user);
325 } catch (InterruptedException ie) {
326 LOG.info("Interrupted obtaining user authentication token");
327 Thread.interrupted();
328 }
329 }
330 }
331
332 private static void obtainAuthTokenForJob(Job job, Configuration conf, User user)
333 throws IOException, InterruptedException {
334 Token<AuthenticationTokenIdentifier> authToken = getAuthToken(conf, user);
335 if (authToken == null) {
336 user.obtainAuthTokenForJob(conf, job);
337 } else {
338 job.getCredentials().addToken(authToken.getService(), authToken);
339 }
340 }
341
342
343
344
345
346 private static Token<AuthenticationTokenIdentifier> getAuthToken(Configuration conf, User user)
347 throws IOException, InterruptedException {
348 ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "mr-init-credentials", null);
349 try {
350 String clusterId = ZKClusterId.readClusterIdZNode(zkw);
351 return new AuthenticationTokenSelector().selectToken(new Text(clusterId), user.getUGI().getTokens());
352 } catch (KeeperException e) {
353 throw new IOException(e);
354 } finally {
355 zkw.close();
356 }
357 }
358
359
360
361
362
363
364
365
366 static String convertScanToString(Scan scan) throws IOException {
367 ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
368 return Base64.encodeBytes(proto.toByteArray());
369 }
370
371
372
373
374
375
376
377
378 static Scan convertStringToScan(String base64) throws IOException {
379 byte [] decoded = Base64.decode(base64);
380 ClientProtos.Scan scan;
381 try {
382 scan = ClientProtos.Scan.parseFrom(decoded);
383 } catch (InvalidProtocolBufferException ipbe) {
384 throw new IOException(ipbe);
385 }
386
387 return ProtobufUtil.toScan(scan);
388 }
389
390
391
392
393
394
395
396
397
398
399 public static void initTableReducerJob(String table,
400 Class<? extends TableReducer> reducer, Job job)
401 throws IOException {
402 initTableReducerJob(table, reducer, job, null);
403 }
404
405
406
407
408
409
410
411
412
413
414
415
416 public static void initTableReducerJob(String table,
417 Class<? extends TableReducer> reducer, Job job,
418 Class partitioner) throws IOException {
419 initTableReducerJob(table, reducer, job, partitioner, null, null, null);
420 }
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445 public static void initTableReducerJob(String table,
446 Class<? extends TableReducer> reducer, Job job,
447 Class partitioner, String quorumAddress, String serverClass,
448 String serverImpl) throws IOException {
449 initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
450 serverClass, serverImpl, true);
451 }
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478 public static void initTableReducerJob(String table,
479 Class<? extends TableReducer> reducer, Job job,
480 Class partitioner, String quorumAddress, String serverClass,
481 String serverImpl, boolean addDependencyJars) throws IOException {
482
483 Configuration conf = job.getConfiguration();
484 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
485 job.setOutputFormatClass(TableOutputFormat.class);
486 if (reducer != null) job.setReducerClass(reducer);
487 conf.set(TableOutputFormat.OUTPUT_TABLE, table);
488 conf.setStrings("io.serializations", conf.get("io.serializations"),
489 MutationSerialization.class.getName(), ResultSerialization.class.getName());
490
491 if (quorumAddress != null) {
492
493 ZKUtil.transformClusterKey(quorumAddress);
494 conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
495 }
496 if (serverClass != null && serverImpl != null) {
497 conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
498 conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
499 }
500 job.setOutputKeyClass(ImmutableBytesWritable.class);
501 job.setOutputValueClass(Writable.class);
502 if (partitioner == HRegionPartitioner.class) {
503 job.setPartitionerClass(HRegionPartitioner.class);
504 int regions = MetaReader.getRegionCount(conf, table);
505 if (job.getNumReduceTasks() > regions) {
506 job.setNumReduceTasks(regions);
507 }
508 } else if (partitioner != null) {
509 job.setPartitionerClass(partitioner);
510 }
511
512 if (addDependencyJars) {
513 addDependencyJars(job);
514 }
515
516 initCredentials(job);
517 }
518
519
520
521
522
523
524
525
526
527 public static void limitNumReduceTasks(String table, Job job)
528 throws IOException {
529 int regions = MetaReader.getRegionCount(job.getConfiguration(), table);
530 if (job.getNumReduceTasks() > regions)
531 job.setNumReduceTasks(regions);
532 }
533
534
535
536
537
538
539
540
541
542 public static void setNumReduceTasks(String table, Job job)
543 throws IOException {
544 job.setNumReduceTasks(MetaReader.getRegionCount(job.getConfiguration(), table));
545 }
546
547
548
549
550
551
552
553
554
555
556 public static void setScannerCaching(Job job, int batchSize) {
557 job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);
558 }
559
560
561
562
563
564
565
566
567
568
569
570
571
572 public static void addHBaseDependencyJars(Configuration conf) throws IOException {
573 addDependencyJars(conf,
574
575 org.apache.hadoop.hbase.HConstants.class,
576 org.apache.hadoop.hbase.protobuf.generated.ClientProtos.class,
577 org.apache.hadoop.hbase.client.Put.class,
578 org.apache.hadoop.hbase.CompatibilityFactory.class,
579 org.apache.hadoop.hbase.mapreduce.TableMapper.class,
580
581 org.apache.zookeeper.ZooKeeper.class,
582 org.jboss.netty.channel.ChannelFactory.class,
583 com.google.protobuf.Message.class,
584 com.google.common.collect.Lists.class,
585 org.cloudera.htrace.Trace.class);
586 }
587
588
589
590
591
592 public static String buildDependencyClasspath(Configuration conf) {
593 if (conf == null) {
594 throw new IllegalArgumentException("Must provide a configuration object.");
595 }
596 Set<String> paths = new HashSet<String>(conf.getStringCollection("tmpjars"));
597 if (paths.size() == 0) {
598 throw new IllegalArgumentException("Configuration contains no tmpjars.");
599 }
600 StringBuilder sb = new StringBuilder();
601 for (String s : paths) {
602
603 int idx = s.indexOf(":");
604 if (idx != -1) s = s.substring(idx + 1);
605 if (sb.length() > 0) sb.append(File.pathSeparator);
606 sb.append(s);
607 }
608 return sb.toString();
609 }
610
611
612
613
614
615
616 public static void addDependencyJars(Job job) throws IOException {
617 addHBaseDependencyJars(job.getConfiguration());
618 try {
619 addDependencyJars(job.getConfiguration(),
620
621
622 job.getMapOutputKeyClass(),
623 job.getMapOutputValueClass(),
624 job.getInputFormatClass(),
625 job.getOutputKeyClass(),
626 job.getOutputValueClass(),
627 job.getOutputFormatClass(),
628 job.getPartitionerClass(),
629 job.getCombinerClass());
630 } catch (ClassNotFoundException e) {
631 throw new IOException(e);
632 }
633 }
634
635
636
637
638
639
640 public static void addDependencyJars(Configuration conf,
641 Class<?>... classes) throws IOException {
642
643 FileSystem localFs = FileSystem.getLocal(conf);
644 Set<String> jars = new HashSet<String>();
645
646 jars.addAll(conf.getStringCollection("tmpjars"));
647
648
649
650 Map<String, String> packagedClasses = new HashMap<String, String>();
651
652
653 for (Class<?> clazz : classes) {
654 if (clazz == null) continue;
655
656 Path path = findOrCreateJar(clazz, localFs, packagedClasses);
657 if (path == null) {
658 LOG.warn("Could not find jar for class " + clazz +
659 " in order to ship it to the cluster.");
660 continue;
661 }
662 if (!localFs.exists(path)) {
663 LOG.warn("Could not validate jar file " + path + " for class "
664 + clazz);
665 continue;
666 }
667 jars.add(path.toString());
668 }
669 if (jars.isEmpty()) return;
670
671 conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
672 }
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688 private static Path findOrCreateJar(Class<?> my_class, FileSystem fs,
689 Map<String, String> packagedClasses)
690 throws IOException {
691
692 String jar = findContainingJar(my_class, packagedClasses);
693 if (null == jar || jar.isEmpty()) {
694 jar = getJar(my_class);
695 updateMap(jar, packagedClasses);
696 }
697
698 if (null == jar || jar.isEmpty()) {
699 return null;
700 }
701
702 LOG.debug(String.format("For class %s, using jar %s", my_class.getName(), jar));
703 return new Path(jar).makeQualified(fs);
704 }
705
706
707
708
709
710
711
712 private static void updateMap(String jar, Map<String, String> packagedClasses) throws IOException {
713 if (null == jar || jar.isEmpty()) {
714 return;
715 }
716 ZipFile zip = null;
717 try {
718 zip = new ZipFile(jar);
719 for (Enumeration<? extends ZipEntry> iter = zip.entries(); iter.hasMoreElements();) {
720 ZipEntry entry = iter.nextElement();
721 if (entry.getName().endsWith("class")) {
722 packagedClasses.put(entry.getName(), jar);
723 }
724 }
725 } finally {
726 if (null != zip) zip.close();
727 }
728 }
729
730
731
732
733
734
735
736
737
738
739 private static String findContainingJar(Class<?> my_class, Map<String, String> packagedClasses)
740 throws IOException {
741 ClassLoader loader = my_class.getClassLoader();
742 String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
743
744
745 for (Enumeration<URL> itr = loader.getResources(class_file); itr.hasMoreElements();) {
746 URL url = itr.nextElement();
747 if ("jar".equals(url.getProtocol())) {
748 String toReturn = url.getPath();
749 if (toReturn.startsWith("file:")) {
750 toReturn = toReturn.substring("file:".length());
751 }
752
753
754
755
756
757
758 toReturn = toReturn.replaceAll("\\+", "%2B");
759 toReturn = URLDecoder.decode(toReturn, "UTF-8");
760 return toReturn.replaceAll("!.*$", "");
761 }
762 }
763
764
765
766 return packagedClasses.get(class_file);
767 }
768
769
770
771
772
773
774
775
776 private static String getJar(Class<?> my_class) {
777 String ret = null;
778 String hadoopJarFinder = "org.apache.hadoop.util.JarFinder";
779 Class<?> jarFinder = null;
780 try {
781 LOG.debug("Looking for " + hadoopJarFinder + ".");
782 jarFinder = Class.forName(hadoopJarFinder);
783 LOG.debug(hadoopJarFinder + " found.");
784 Method getJar = jarFinder.getMethod("getJar", Class.class);
785 ret = (String) getJar.invoke(null, my_class);
786 } catch (ClassNotFoundException e) {
787 LOG.debug("Using backported JarFinder.");
788 ret = JarFinder.getJar(my_class);
789 } catch (InvocationTargetException e) {
790
791
792 throw new RuntimeException(e.getCause());
793 } catch (Exception e) {
794
795 throw new RuntimeException("getJar invocation failed.", e);
796 }
797
798 return ret;
799 }
800 }