1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.File;
22 import java.io.IOException;
23 import java.lang.reflect.InvocationTargetException;
24 import java.lang.reflect.Method;
25 import java.net.URL;
26 import java.net.URLDecoder;
27 import java.util.ArrayList;
28 import java.util.Enumeration;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.Set;
34 import java.util.zip.ZipEntry;
35 import java.util.zip.ZipFile;
36
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.hadoop.classification.InterfaceAudience;
40 import org.apache.hadoop.classification.InterfaceStability;
41 import org.apache.hadoop.conf.Configuration;
42 import org.apache.hadoop.fs.FileSystem;
43 import org.apache.hadoop.fs.Path;
44 import org.apache.hadoop.hbase.HBaseConfiguration;
45 import org.apache.hadoop.hbase.catalog.MetaReader;
46 import org.apache.hadoop.hbase.client.Put;
47 import org.apache.hadoop.hbase.client.Scan;
48 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
49 import org.apache.hadoop.hbase.mapreduce.hadoopbackport.JarFinder;
50 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
51 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
52 import org.apache.hadoop.hbase.security.User;
53 import org.apache.hadoop.hbase.security.UserProvider;
54 import org.apache.hadoop.hbase.security.token.AuthenticationTokenIdentifier;
55 import org.apache.hadoop.hbase.security.token.AuthenticationTokenSelector;
56 import org.apache.hadoop.hbase.util.Base64;
57 import org.apache.hadoop.hbase.util.Bytes;
58 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
59 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
60 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
61 import org.apache.hadoop.io.Text;
62 import org.apache.hadoop.io.Writable;
63 import org.apache.hadoop.io.WritableComparable;
64 import org.apache.hadoop.mapreduce.InputFormat;
65 import org.apache.hadoop.mapreduce.Job;
66 import org.apache.hadoop.security.token.Token;
67 import org.apache.hadoop.util.StringUtils;
68 import org.apache.zookeeper.KeeperException;
69
70 import com.google.protobuf.InvalidProtocolBufferException;
71
72
73
74
75 @SuppressWarnings({ "rawtypes", "unchecked" })
76 @InterfaceAudience.Public
77 @InterfaceStability.Stable
78 public class TableMapReduceUtil {
79 static Log LOG = LogFactory.getLog(TableMapReduceUtil.class);
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94 public static void initTableMapperJob(String table, Scan scan,
95 Class<? extends TableMapper> mapper,
96 Class<?> outputKeyClass,
97 Class<?> outputValueClass, Job job)
98 throws IOException {
99 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
100 job, true);
101 }
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116 public static void initTableMapperJob(byte[] table, Scan scan,
117 Class<? extends TableMapper> mapper,
118 Class<?> outputKeyClass,
119 Class<?> outputValueClass, Job job)
120 throws IOException {
121 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass,
122 job, true);
123 }
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140 public static void initTableMapperJob(String table, Scan scan,
141 Class<? extends TableMapper> mapper,
142 Class<?> outputKeyClass,
143 Class<?> outputValueClass, Job job,
144 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
145 throws IOException {
146 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass, job,
147 addDependencyJars, true, inputFormatClass);
148 }
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168 public static void initTableMapperJob(String table, Scan scan,
169 Class<? extends TableMapper> mapper,
170 Class<?> outputKeyClass,
171 Class<?> outputValueClass, Job job,
172 boolean addDependencyJars, boolean initCredentials,
173 Class<? extends InputFormat> inputFormatClass)
174 throws IOException {
175 job.setInputFormatClass(inputFormatClass);
176 if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
177 if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
178 job.setMapperClass(mapper);
179 if (Put.class.equals(outputValueClass)) {
180 job.setCombinerClass(PutCombiner.class);
181 }
182 Configuration conf = job.getConfiguration();
183 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
184 conf.set(TableInputFormat.INPUT_TABLE, table);
185 conf.set(TableInputFormat.SCAN, convertScanToString(scan));
186 conf.setStrings("io.serializations", conf.get("io.serializations"),
187 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
188 KeyValueSerialization.class.getName());
189 if (addDependencyJars) {
190 addDependencyJars(job);
191 }
192 if (initCredentials) {
193 initCredentials(job);
194 }
195 }
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213 public static void initTableMapperJob(byte[] table, Scan scan,
214 Class<? extends TableMapper> mapper,
215 Class<?> outputKeyClass,
216 Class<?> outputValueClass, Job job,
217 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
218 throws IOException {
219 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
220 outputValueClass, job, addDependencyJars, inputFormatClass);
221 }
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238 public static void initTableMapperJob(byte[] table, Scan scan,
239 Class<? extends TableMapper> mapper,
240 Class<?> outputKeyClass,
241 Class<?> outputValueClass, Job job,
242 boolean addDependencyJars)
243 throws IOException {
244 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
245 outputValueClass, job, addDependencyJars, TableInputFormat.class);
246 }
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263 public static void initTableMapperJob(String table, Scan scan,
264 Class<? extends TableMapper> mapper,
265 Class<?> outputKeyClass,
266 Class<?> outputValueClass, Job job,
267 boolean addDependencyJars)
268 throws IOException {
269 initTableMapperJob(table, scan, mapper, outputKeyClass,
270 outputValueClass, job, addDependencyJars, TableInputFormat.class);
271 }
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293 public static void initTableSnapshotMapperJob(String snapshotName, Scan scan,
294 Class<? extends TableMapper> mapper,
295 Class<?> outputKeyClass,
296 Class<?> outputValueClass, Job job,
297 boolean addDependencyJars, Path tmpRestoreDir)
298 throws IOException {
299 TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
300 initTableMapperJob(snapshotName, scan, mapper, outputKeyClass,
301 outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class);
302 }
303
304
305
306
307
308
309
310
311
312
313
314
315
316 public static void initTableMapperJob(List<Scan> scans,
317 Class<? extends TableMapper> mapper,
318 Class<? extends WritableComparable> outputKeyClass,
319 Class<? extends Writable> outputValueClass, Job job) throws IOException {
320 initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
321 true);
322 }
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338 public static void initTableMapperJob(List<Scan> scans,
339 Class<? extends TableMapper> mapper,
340 Class<? extends WritableComparable> outputKeyClass,
341 Class<? extends Writable> outputValueClass, Job job,
342 boolean addDependencyJars) throws IOException {
343 job.setInputFormatClass(MultiTableInputFormat.class);
344 if (outputValueClass != null) {
345 job.setMapOutputValueClass(outputValueClass);
346 }
347 if (outputKeyClass != null) {
348 job.setMapOutputKeyClass(outputKeyClass);
349 }
350 job.setMapperClass(mapper);
351 HBaseConfiguration.addHbaseResources(job.getConfiguration());
352 List<String> scanStrings = new ArrayList<String>();
353
354 for (Scan scan : scans) {
355 scanStrings.add(convertScanToString(scan));
356 }
357 job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
358 scanStrings.toArray(new String[scanStrings.size()]));
359
360 if (addDependencyJars) {
361 addDependencyJars(job);
362 }
363 }
364
365 public static void initCredentials(Job job) throws IOException {
366 UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
367 if (userProvider.isHadoopSecurityEnabled()) {
368
369 if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
370 job.getConfiguration().set("mapreduce.job.credentials.binary",
371 System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
372 }
373 }
374
375 if (userProvider.isHBaseSecurityEnabled()) {
376 try {
377
378 String quorumAddress = job.getConfiguration().get(TableOutputFormat.QUORUM_ADDRESS);
379 User user = userProvider.getCurrent();
380 if (quorumAddress != null) {
381 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
382 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
383 obtainAuthTokenForJob(job, peerConf, user);
384 }
385
386 obtainAuthTokenForJob(job, job.getConfiguration(), user);
387 } catch (InterruptedException ie) {
388 LOG.info("Interrupted obtaining user authentication token");
389 Thread.interrupted();
390 }
391 }
392 }
393
394
395
396
397
398
399
400
401
402
403
404
405 public static void initCredentialsForCluster(Job job, String quorumAddress)
406 throws IOException {
407 UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
408 if (userProvider.isHBaseSecurityEnabled()) {
409 try {
410 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
411 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
412 obtainAuthTokenForJob(job, peerConf, userProvider.getCurrent());
413 } catch (InterruptedException e) {
414 LOG.info("Interrupted obtaining user authentication token");
415 Thread.interrupted();
416 }
417 }
418 }
419
420 private static void obtainAuthTokenForJob(Job job, Configuration conf, User user)
421 throws IOException, InterruptedException {
422 Token<AuthenticationTokenIdentifier> authToken = getAuthToken(conf, user);
423 if (authToken == null) {
424 user.obtainAuthTokenForJob(conf, job);
425 } else {
426 job.getCredentials().addToken(authToken.getService(), authToken);
427 }
428 }
429
430
431
432
433
434 private static Token<AuthenticationTokenIdentifier> getAuthToken(Configuration conf, User user)
435 throws IOException, InterruptedException {
436 ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "mr-init-credentials", null);
437 try {
438 String clusterId = ZKClusterId.readClusterIdZNode(zkw);
439 return new AuthenticationTokenSelector().selectToken(new Text(clusterId), user.getUGI().getTokens());
440 } catch (KeeperException e) {
441 throw new IOException(e);
442 } finally {
443 zkw.close();
444 }
445 }
446
447
448
449
450
451
452
453
454 static String convertScanToString(Scan scan) throws IOException {
455 ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
456 return Base64.encodeBytes(proto.toByteArray());
457 }
458
459
460
461
462
463
464
465
466 static Scan convertStringToScan(String base64) throws IOException {
467 byte [] decoded = Base64.decode(base64);
468 ClientProtos.Scan scan;
469 try {
470 scan = ClientProtos.Scan.parseFrom(decoded);
471 } catch (InvalidProtocolBufferException ipbe) {
472 throw new IOException(ipbe);
473 }
474
475 return ProtobufUtil.toScan(scan);
476 }
477
478
479
480
481
482
483
484
485
486
487 public static void initTableReducerJob(String table,
488 Class<? extends TableReducer> reducer, Job job)
489 throws IOException {
490 initTableReducerJob(table, reducer, job, null);
491 }
492
493
494
495
496
497
498
499
500
501
502
503
504 public static void initTableReducerJob(String table,
505 Class<? extends TableReducer> reducer, Job job,
506 Class partitioner) throws IOException {
507 initTableReducerJob(table, reducer, job, partitioner, null, null, null);
508 }
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533 public static void initTableReducerJob(String table,
534 Class<? extends TableReducer> reducer, Job job,
535 Class partitioner, String quorumAddress, String serverClass,
536 String serverImpl) throws IOException {
537 initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
538 serverClass, serverImpl, true);
539 }
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566 public static void initTableReducerJob(String table,
567 Class<? extends TableReducer> reducer, Job job,
568 Class partitioner, String quorumAddress, String serverClass,
569 String serverImpl, boolean addDependencyJars) throws IOException {
570
571 Configuration conf = job.getConfiguration();
572 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
573 job.setOutputFormatClass(TableOutputFormat.class);
574 if (reducer != null) job.setReducerClass(reducer);
575 conf.set(TableOutputFormat.OUTPUT_TABLE, table);
576 conf.setStrings("io.serializations", conf.get("io.serializations"),
577 MutationSerialization.class.getName(), ResultSerialization.class.getName());
578
579 if (quorumAddress != null) {
580
581 ZKUtil.transformClusterKey(quorumAddress);
582 conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
583 }
584 if (serverClass != null && serverImpl != null) {
585 conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
586 conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
587 }
588 job.setOutputKeyClass(ImmutableBytesWritable.class);
589 job.setOutputValueClass(Writable.class);
590 if (partitioner == HRegionPartitioner.class) {
591 job.setPartitionerClass(HRegionPartitioner.class);
592 int regions = MetaReader.getRegionCount(conf, table);
593 if (job.getNumReduceTasks() > regions) {
594 job.setNumReduceTasks(regions);
595 }
596 } else if (partitioner != null) {
597 job.setPartitionerClass(partitioner);
598 }
599
600 if (addDependencyJars) {
601 addDependencyJars(job);
602 }
603
604 initCredentials(job);
605 }
606
607
608
609
610
611
612
613
614
615 public static void limitNumReduceTasks(String table, Job job)
616 throws IOException {
617 int regions = MetaReader.getRegionCount(job.getConfiguration(), table);
618 if (job.getNumReduceTasks() > regions)
619 job.setNumReduceTasks(regions);
620 }
621
622
623
624
625
626
627
628
629
630 public static void setNumReduceTasks(String table, Job job)
631 throws IOException {
632 job.setNumReduceTasks(MetaReader.getRegionCount(job.getConfiguration(), table));
633 }
634
635
636
637
638
639
640
641
642
643
644 public static void setScannerCaching(Job job, int batchSize) {
645 job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);
646 }
647
648
649
650
651
652
653
654
655
656
657
658
659
660 public static void addHBaseDependencyJars(Configuration conf) throws IOException {
661 addDependencyJars(conf,
662
663 org.apache.hadoop.hbase.HConstants.class,
664 org.apache.hadoop.hbase.protobuf.generated.ClientProtos.class,
665 org.apache.hadoop.hbase.client.Put.class,
666 org.apache.hadoop.hbase.CompatibilityFactory.class,
667 org.apache.hadoop.hbase.mapreduce.TableMapper.class,
668
669 org.apache.zookeeper.ZooKeeper.class,
670 org.jboss.netty.channel.ChannelFactory.class,
671 com.google.protobuf.Message.class,
672 com.google.common.collect.Lists.class,
673 org.cloudera.htrace.Trace.class);
674 }
675
676
677
678
679
680 public static String buildDependencyClasspath(Configuration conf) {
681 if (conf == null) {
682 throw new IllegalArgumentException("Must provide a configuration object.");
683 }
684 Set<String> paths = new HashSet<String>(conf.getStringCollection("tmpjars"));
685 if (paths.size() == 0) {
686 throw new IllegalArgumentException("Configuration contains no tmpjars.");
687 }
688 StringBuilder sb = new StringBuilder();
689 for (String s : paths) {
690
691 int idx = s.indexOf(":");
692 if (idx != -1) s = s.substring(idx + 1);
693 if (sb.length() > 0) sb.append(File.pathSeparator);
694 sb.append(s);
695 }
696 return sb.toString();
697 }
698
699
700
701
702
703
704 public static void addDependencyJars(Job job) throws IOException {
705 addHBaseDependencyJars(job.getConfiguration());
706 try {
707 addDependencyJars(job.getConfiguration(),
708
709
710 job.getMapOutputKeyClass(),
711 job.getMapOutputValueClass(),
712 job.getInputFormatClass(),
713 job.getOutputKeyClass(),
714 job.getOutputValueClass(),
715 job.getOutputFormatClass(),
716 job.getPartitionerClass(),
717 job.getCombinerClass());
718 } catch (ClassNotFoundException e) {
719 throw new IOException(e);
720 }
721 }
722
723
724
725
726
727
728 public static void addDependencyJars(Configuration conf,
729 Class<?>... classes) throws IOException {
730
731 FileSystem localFs = FileSystem.getLocal(conf);
732 Set<String> jars = new HashSet<String>();
733
734 jars.addAll(conf.getStringCollection("tmpjars"));
735
736
737
738 Map<String, String> packagedClasses = new HashMap<String, String>();
739
740
741 for (Class<?> clazz : classes) {
742 if (clazz == null) continue;
743
744 Path path = findOrCreateJar(clazz, localFs, packagedClasses);
745 if (path == null) {
746 LOG.warn("Could not find jar for class " + clazz +
747 " in order to ship it to the cluster.");
748 continue;
749 }
750 if (!localFs.exists(path)) {
751 LOG.warn("Could not validate jar file " + path + " for class "
752 + clazz);
753 continue;
754 }
755 jars.add(path.toString());
756 }
757 if (jars.isEmpty()) return;
758
759 conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
760 }
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776 private static Path findOrCreateJar(Class<?> my_class, FileSystem fs,
777 Map<String, String> packagedClasses)
778 throws IOException {
779
780 String jar = findContainingJar(my_class, packagedClasses);
781 if (null == jar || jar.isEmpty()) {
782 jar = getJar(my_class);
783 updateMap(jar, packagedClasses);
784 }
785
786 if (null == jar || jar.isEmpty()) {
787 return null;
788 }
789
790 LOG.debug(String.format("For class %s, using jar %s", my_class.getName(), jar));
791 return new Path(jar).makeQualified(fs);
792 }
793
794
795
796
797
798
799
800 private static void updateMap(String jar, Map<String, String> packagedClasses) throws IOException {
801 if (null == jar || jar.isEmpty()) {
802 return;
803 }
804 ZipFile zip = null;
805 try {
806 zip = new ZipFile(jar);
807 for (Enumeration<? extends ZipEntry> iter = zip.entries(); iter.hasMoreElements();) {
808 ZipEntry entry = iter.nextElement();
809 if (entry.getName().endsWith("class")) {
810 packagedClasses.put(entry.getName(), jar);
811 }
812 }
813 } finally {
814 if (null != zip) zip.close();
815 }
816 }
817
818
819
820
821
822
823
824
825
826
827 private static String findContainingJar(Class<?> my_class, Map<String, String> packagedClasses)
828 throws IOException {
829 ClassLoader loader = my_class.getClassLoader();
830 String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
831
832
833 for (Enumeration<URL> itr = loader.getResources(class_file); itr.hasMoreElements();) {
834 URL url = itr.nextElement();
835 if ("jar".equals(url.getProtocol())) {
836 String toReturn = url.getPath();
837 if (toReturn.startsWith("file:")) {
838 toReturn = toReturn.substring("file:".length());
839 }
840
841
842
843
844
845
846 toReturn = toReturn.replaceAll("\\+", "%2B");
847 toReturn = URLDecoder.decode(toReturn, "UTF-8");
848 return toReturn.replaceAll("!.*$", "");
849 }
850 }
851
852
853
854 return packagedClasses.get(class_file);
855 }
856
857
858
859
860
861
862
863
864 private static String getJar(Class<?> my_class) {
865 String ret = null;
866 String hadoopJarFinder = "org.apache.hadoop.util.JarFinder";
867 Class<?> jarFinder = null;
868 try {
869 LOG.debug("Looking for " + hadoopJarFinder + ".");
870 jarFinder = Class.forName(hadoopJarFinder);
871 LOG.debug(hadoopJarFinder + " found.");
872 Method getJar = jarFinder.getMethod("getJar", Class.class);
873 ret = (String) getJar.invoke(null, my_class);
874 } catch (ClassNotFoundException e) {
875 LOG.debug("Using backported JarFinder.");
876 ret = JarFinder.getJar(my_class);
877 } catch (InvocationTargetException e) {
878
879
880 throw new RuntimeException(e.getCause());
881 } catch (Exception e) {
882
883 throw new RuntimeException("getJar invocation failed.", e);
884 }
885
886 return ret;
887 }
888 }