1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.File;
22 import java.io.IOException;
23 import java.lang.reflect.InvocationTargetException;
24 import java.lang.reflect.Method;
25 import java.net.URL;
26 import java.net.URLDecoder;
27 import java.util.ArrayList;
28 import java.util.Enumeration;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.Set;
34 import java.util.zip.ZipEntry;
35 import java.util.zip.ZipFile;
36
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.hadoop.classification.InterfaceAudience;
40 import org.apache.hadoop.classification.InterfaceStability;
41 import org.apache.hadoop.conf.Configuration;
42 import org.apache.hadoop.fs.FileSystem;
43 import org.apache.hadoop.fs.Path;
44 import org.apache.hadoop.hbase.HBaseConfiguration;
45 import org.apache.hadoop.hbase.catalog.MetaReader;
46 import org.apache.hadoop.hbase.client.Put;
47 import org.apache.hadoop.hbase.client.Scan;
48 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
49 import org.apache.hadoop.hbase.mapreduce.hadoopbackport.JarFinder;
50 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
51 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
52 import org.apache.hadoop.hbase.security.User;
53 import org.apache.hadoop.hbase.security.UserProvider;
54 import org.apache.hadoop.hbase.security.token.AuthenticationTokenIdentifier;
55 import org.apache.hadoop.hbase.security.token.AuthenticationTokenSelector;
56 import org.apache.hadoop.hbase.util.Base64;
57 import org.apache.hadoop.hbase.util.Bytes;
58 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
59 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
60 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
61 import org.apache.hadoop.io.Text;
62 import org.apache.hadoop.io.Writable;
63 import org.apache.hadoop.io.WritableComparable;
64 import org.apache.hadoop.mapreduce.InputFormat;
65 import org.apache.hadoop.mapreduce.Job;
66 import org.apache.hadoop.security.token.Token;
67 import org.apache.hadoop.util.StringUtils;
68 import org.apache.zookeeper.KeeperException;
69 import org.cliffc.high_scale_lib.Counter;
70
71 import com.google.protobuf.InvalidProtocolBufferException;
72
73
74
75
76 @SuppressWarnings({ "rawtypes", "unchecked" })
77 @InterfaceAudience.Public
78 @InterfaceStability.Stable
79 public class TableMapReduceUtil {
80 static Log LOG = LogFactory.getLog(TableMapReduceUtil.class);
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95 public static void initTableMapperJob(String table, Scan scan,
96 Class<? extends TableMapper> mapper,
97 Class<?> outputKeyClass,
98 Class<?> outputValueClass, Job job)
99 throws IOException {
100 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
101 job, true);
102 }
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117 public static void initTableMapperJob(byte[] table, Scan scan,
118 Class<? extends TableMapper> mapper,
119 Class<?> outputKeyClass,
120 Class<?> outputValueClass, Job job)
121 throws IOException {
122 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass,
123 job, true);
124 }
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141 public static void initTableMapperJob(String table, Scan scan,
142 Class<? extends TableMapper> mapper,
143 Class<?> outputKeyClass,
144 Class<?> outputValueClass, Job job,
145 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
146 throws IOException {
147 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass, job,
148 addDependencyJars, true, inputFormatClass);
149 }
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169 public static void initTableMapperJob(String table, Scan scan,
170 Class<? extends TableMapper> mapper,
171 Class<?> outputKeyClass,
172 Class<?> outputValueClass, Job job,
173 boolean addDependencyJars, boolean initCredentials,
174 Class<? extends InputFormat> inputFormatClass)
175 throws IOException {
176 job.setInputFormatClass(inputFormatClass);
177 if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
178 if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
179 job.setMapperClass(mapper);
180 if (Put.class.equals(outputValueClass)) {
181 job.setCombinerClass(PutCombiner.class);
182 }
183 Configuration conf = job.getConfiguration();
184 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
185 conf.set(TableInputFormat.INPUT_TABLE, table);
186 conf.set(TableInputFormat.SCAN, convertScanToString(scan));
187 conf.setStrings("io.serializations", conf.get("io.serializations"),
188 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
189 KeyValueSerialization.class.getName());
190 if (addDependencyJars) {
191 addDependencyJars(job);
192 }
193 if (initCredentials) {
194 initCredentials(job);
195 }
196 }
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214 public static void initTableMapperJob(byte[] table, Scan scan,
215 Class<? extends TableMapper> mapper,
216 Class<?> outputKeyClass,
217 Class<?> outputValueClass, Job job,
218 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
219 throws IOException {
220 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
221 outputValueClass, job, addDependencyJars, inputFormatClass);
222 }
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239 public static void initTableMapperJob(byte[] table, Scan scan,
240 Class<? extends TableMapper> mapper,
241 Class<?> outputKeyClass,
242 Class<?> outputValueClass, Job job,
243 boolean addDependencyJars)
244 throws IOException {
245 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
246 outputValueClass, job, addDependencyJars, TableInputFormat.class);
247 }
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264 public static void initTableMapperJob(String table, Scan scan,
265 Class<? extends TableMapper> mapper,
266 Class<?> outputKeyClass,
267 Class<?> outputValueClass, Job job,
268 boolean addDependencyJars)
269 throws IOException {
270 initTableMapperJob(table, scan, mapper, outputKeyClass,
271 outputValueClass, job, addDependencyJars, TableInputFormat.class);
272 }
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294 public static void initTableSnapshotMapperJob(String snapshotName, Scan scan,
295 Class<? extends TableMapper> mapper,
296 Class<?> outputKeyClass,
297 Class<?> outputValueClass, Job job,
298 boolean addDependencyJars, Path tmpRestoreDir)
299 throws IOException {
300 TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
301 initTableMapperJob(snapshotName, scan, mapper, outputKeyClass,
302 outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class);
303
304
305 TableMapReduceUtil.addDependencyJars(job.getConfiguration(), Counter.class);
306 }
307
308
309
310
311
312
313
314
315
316
317
318
319
320 public static void initTableMapperJob(List<Scan> scans,
321 Class<? extends TableMapper> mapper,
322 Class<? extends WritableComparable> outputKeyClass,
323 Class<? extends Writable> outputValueClass, Job job) throws IOException {
324 initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
325 true);
326 }
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342 public static void initTableMapperJob(List<Scan> scans,
343 Class<? extends TableMapper> mapper,
344 Class<? extends WritableComparable> outputKeyClass,
345 Class<? extends Writable> outputValueClass, Job job,
346 boolean addDependencyJars) throws IOException {
347 job.setInputFormatClass(MultiTableInputFormat.class);
348 if (outputValueClass != null) {
349 job.setMapOutputValueClass(outputValueClass);
350 }
351 if (outputKeyClass != null) {
352 job.setMapOutputKeyClass(outputKeyClass);
353 }
354 job.setMapperClass(mapper);
355 HBaseConfiguration.addHbaseResources(job.getConfiguration());
356 List<String> scanStrings = new ArrayList<String>();
357
358 for (Scan scan : scans) {
359 scanStrings.add(convertScanToString(scan));
360 }
361 job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
362 scanStrings.toArray(new String[scanStrings.size()]));
363
364 if (addDependencyJars) {
365 addDependencyJars(job);
366 }
367 }
368
369 public static void initCredentials(Job job) throws IOException {
370 UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
371 if (userProvider.isHadoopSecurityEnabled()) {
372
373 if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
374 job.getConfiguration().set("mapreduce.job.credentials.binary",
375 System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
376 }
377 }
378
379 if (userProvider.isHBaseSecurityEnabled()) {
380 try {
381
382 String quorumAddress = job.getConfiguration().get(TableOutputFormat.QUORUM_ADDRESS);
383 User user = userProvider.getCurrent();
384 if (quorumAddress != null) {
385 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
386 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
387 obtainAuthTokenForJob(job, peerConf, user);
388 }
389
390 obtainAuthTokenForJob(job, job.getConfiguration(), user);
391 } catch (InterruptedException ie) {
392 LOG.info("Interrupted obtaining user authentication token");
393 Thread.currentThread().interrupt();
394 }
395 }
396 }
397
398
399
400
401
402
403
404
405
406
407
408
409 public static void initCredentialsForCluster(Job job, String quorumAddress)
410 throws IOException {
411 UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
412 if (userProvider.isHBaseSecurityEnabled()) {
413 try {
414 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
415 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
416 obtainAuthTokenForJob(job, peerConf, userProvider.getCurrent());
417 } catch (InterruptedException e) {
418 LOG.info("Interrupted obtaining user authentication token");
419 Thread.interrupted();
420 }
421 }
422 }
423
424 private static void obtainAuthTokenForJob(Job job, Configuration conf, User user)
425 throws IOException, InterruptedException {
426 Token<AuthenticationTokenIdentifier> authToken = getAuthToken(conf, user);
427 if (authToken == null) {
428 user.obtainAuthTokenForJob(conf, job);
429 } else {
430 job.getCredentials().addToken(authToken.getService(), authToken);
431 }
432 }
433
434
435
436
437
438 private static Token<AuthenticationTokenIdentifier> getAuthToken(Configuration conf, User user)
439 throws IOException, InterruptedException {
440 ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "mr-init-credentials", null);
441 try {
442 String clusterId = ZKClusterId.readClusterIdZNode(zkw);
443 return new AuthenticationTokenSelector().selectToken(new Text(clusterId), user.getUGI().getTokens());
444 } catch (KeeperException e) {
445 throw new IOException(e);
446 } finally {
447 zkw.close();
448 }
449 }
450
451
452
453
454
455
456
457
458 static String convertScanToString(Scan scan) throws IOException {
459 ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
460 return Base64.encodeBytes(proto.toByteArray());
461 }
462
463
464
465
466
467
468
469
470 static Scan convertStringToScan(String base64) throws IOException {
471 byte [] decoded = Base64.decode(base64);
472 ClientProtos.Scan scan;
473 try {
474 scan = ClientProtos.Scan.parseFrom(decoded);
475 } catch (InvalidProtocolBufferException ipbe) {
476 throw new IOException(ipbe);
477 }
478
479 return ProtobufUtil.toScan(scan);
480 }
481
482
483
484
485
486
487
488
489
490
491 public static void initTableReducerJob(String table,
492 Class<? extends TableReducer> reducer, Job job)
493 throws IOException {
494 initTableReducerJob(table, reducer, job, null);
495 }
496
497
498
499
500
501
502
503
504
505
506
507
508 public static void initTableReducerJob(String table,
509 Class<? extends TableReducer> reducer, Job job,
510 Class partitioner) throws IOException {
511 initTableReducerJob(table, reducer, job, partitioner, null, null, null);
512 }
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537 public static void initTableReducerJob(String table,
538 Class<? extends TableReducer> reducer, Job job,
539 Class partitioner, String quorumAddress, String serverClass,
540 String serverImpl) throws IOException {
541 initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
542 serverClass, serverImpl, true);
543 }
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570 public static void initTableReducerJob(String table,
571 Class<? extends TableReducer> reducer, Job job,
572 Class partitioner, String quorumAddress, String serverClass,
573 String serverImpl, boolean addDependencyJars) throws IOException {
574
575 Configuration conf = job.getConfiguration();
576 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
577 job.setOutputFormatClass(TableOutputFormat.class);
578 if (reducer != null) job.setReducerClass(reducer);
579 conf.set(TableOutputFormat.OUTPUT_TABLE, table);
580 conf.setStrings("io.serializations", conf.get("io.serializations"),
581 MutationSerialization.class.getName(), ResultSerialization.class.getName());
582
583 if (quorumAddress != null) {
584
585 ZKUtil.transformClusterKey(quorumAddress);
586 conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
587 }
588 if (serverClass != null && serverImpl != null) {
589 conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
590 conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
591 }
592 job.setOutputKeyClass(ImmutableBytesWritable.class);
593 job.setOutputValueClass(Writable.class);
594 if (partitioner == HRegionPartitioner.class) {
595 job.setPartitionerClass(HRegionPartitioner.class);
596 int regions = MetaReader.getRegionCount(conf, table);
597 if (job.getNumReduceTasks() > regions) {
598 job.setNumReduceTasks(regions);
599 }
600 } else if (partitioner != null) {
601 job.setPartitionerClass(partitioner);
602 }
603
604 if (addDependencyJars) {
605 addDependencyJars(job);
606 }
607
608 initCredentials(job);
609 }
610
611
612
613
614
615
616
617
618
619 public static void limitNumReduceTasks(String table, Job job)
620 throws IOException {
621 int regions = MetaReader.getRegionCount(job.getConfiguration(), table);
622 if (job.getNumReduceTasks() > regions)
623 job.setNumReduceTasks(regions);
624 }
625
626
627
628
629
630
631
632
633
634 public static void setNumReduceTasks(String table, Job job)
635 throws IOException {
636 job.setNumReduceTasks(MetaReader.getRegionCount(job.getConfiguration(), table));
637 }
638
639
640
641
642
643
644
645
646
647
648 public static void setScannerCaching(Job job, int batchSize) {
649 job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);
650 }
651
652
653
654
655
656
657
658
659
660
661
662
663
664 public static void addHBaseDependencyJars(Configuration conf) throws IOException {
665 addDependencyJars(conf,
666
667 org.apache.hadoop.hbase.HConstants.class,
668 org.apache.hadoop.hbase.protobuf.generated.ClientProtos.class,
669 org.apache.hadoop.hbase.client.Put.class,
670 org.apache.hadoop.hbase.CompatibilityFactory.class,
671 org.apache.hadoop.hbase.mapreduce.TableMapper.class,
672
673 org.apache.zookeeper.ZooKeeper.class,
674 org.jboss.netty.channel.ChannelFactory.class,
675 com.google.protobuf.Message.class,
676 com.google.common.collect.Lists.class,
677 org.cloudera.htrace.Trace.class);
678 }
679
680
681
682
683
684 public static String buildDependencyClasspath(Configuration conf) {
685 if (conf == null) {
686 throw new IllegalArgumentException("Must provide a configuration object.");
687 }
688 Set<String> paths = new HashSet<String>(conf.getStringCollection("tmpjars"));
689 if (paths.size() == 0) {
690 throw new IllegalArgumentException("Configuration contains no tmpjars.");
691 }
692 StringBuilder sb = new StringBuilder();
693 for (String s : paths) {
694
695 int idx = s.indexOf(":");
696 if (idx != -1) s = s.substring(idx + 1);
697 if (sb.length() > 0) sb.append(File.pathSeparator);
698 sb.append(s);
699 }
700 return sb.toString();
701 }
702
703
704
705
706
707
708 public static void addDependencyJars(Job job) throws IOException {
709 addHBaseDependencyJars(job.getConfiguration());
710 try {
711 addDependencyJars(job.getConfiguration(),
712
713
714 job.getMapOutputKeyClass(),
715 job.getMapOutputValueClass(),
716 job.getInputFormatClass(),
717 job.getOutputKeyClass(),
718 job.getOutputValueClass(),
719 job.getOutputFormatClass(),
720 job.getPartitionerClass(),
721 job.getCombinerClass());
722 } catch (ClassNotFoundException e) {
723 throw new IOException(e);
724 }
725 }
726
727
728
729
730
731
732 public static void addDependencyJars(Configuration conf,
733 Class<?>... classes) throws IOException {
734
735 FileSystem localFs = FileSystem.getLocal(conf);
736 Set<String> jars = new HashSet<String>();
737
738 jars.addAll(conf.getStringCollection("tmpjars"));
739
740
741
742 Map<String, String> packagedClasses = new HashMap<String, String>();
743
744
745 for (Class<?> clazz : classes) {
746 if (clazz == null) continue;
747
748 Path path = findOrCreateJar(clazz, localFs, packagedClasses);
749 if (path == null) {
750 LOG.warn("Could not find jar for class " + clazz +
751 " in order to ship it to the cluster.");
752 continue;
753 }
754 if (!localFs.exists(path)) {
755 LOG.warn("Could not validate jar file " + path + " for class "
756 + clazz);
757 continue;
758 }
759 jars.add(path.toString());
760 }
761 if (jars.isEmpty()) return;
762
763 conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
764 }
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780 private static Path findOrCreateJar(Class<?> my_class, FileSystem fs,
781 Map<String, String> packagedClasses)
782 throws IOException {
783
784 String jar = findContainingJar(my_class, packagedClasses);
785 if (null == jar || jar.isEmpty()) {
786 jar = getJar(my_class);
787 updateMap(jar, packagedClasses);
788 }
789
790 if (null == jar || jar.isEmpty()) {
791 return null;
792 }
793
794 LOG.debug(String.format("For class %s, using jar %s", my_class.getName(), jar));
795 return new Path(jar).makeQualified(fs);
796 }
797
798
799
800
801
802
803
804 private static void updateMap(String jar, Map<String, String> packagedClasses) throws IOException {
805 if (null == jar || jar.isEmpty()) {
806 return;
807 }
808 ZipFile zip = null;
809 try {
810 zip = new ZipFile(jar);
811 for (Enumeration<? extends ZipEntry> iter = zip.entries(); iter.hasMoreElements();) {
812 ZipEntry entry = iter.nextElement();
813 if (entry.getName().endsWith("class")) {
814 packagedClasses.put(entry.getName(), jar);
815 }
816 }
817 } finally {
818 if (null != zip) zip.close();
819 }
820 }
821
822
823
824
825
826
827
828
829
830
831 private static String findContainingJar(Class<?> my_class, Map<String, String> packagedClasses)
832 throws IOException {
833 ClassLoader loader = my_class.getClassLoader();
834 String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
835
836
837 for (Enumeration<URL> itr = loader.getResources(class_file); itr.hasMoreElements();) {
838 URL url = itr.nextElement();
839 if ("jar".equals(url.getProtocol())) {
840 String toReturn = url.getPath();
841 if (toReturn.startsWith("file:")) {
842 toReturn = toReturn.substring("file:".length());
843 }
844
845
846
847
848
849
850 toReturn = toReturn.replaceAll("\\+", "%2B");
851 toReturn = URLDecoder.decode(toReturn, "UTF-8");
852 return toReturn.replaceAll("!.*$", "");
853 }
854 }
855
856
857
858 return packagedClasses.get(class_file);
859 }
860
861
862
863
864
865
866
867
868 private static String getJar(Class<?> my_class) {
869 String ret = null;
870 String hadoopJarFinder = "org.apache.hadoop.util.JarFinder";
871 Class<?> jarFinder = null;
872 try {
873 LOG.debug("Looking for " + hadoopJarFinder + ".");
874 jarFinder = Class.forName(hadoopJarFinder);
875 LOG.debug(hadoopJarFinder + " found.");
876 Method getJar = jarFinder.getMethod("getJar", Class.class);
877 ret = (String) getJar.invoke(null, my_class);
878 } catch (ClassNotFoundException e) {
879 LOG.debug("Using backported JarFinder.");
880 ret = JarFinder.getJar(my_class);
881 } catch (InvocationTargetException e) {
882
883
884 throw new RuntimeException(e.getCause());
885 } catch (Exception e) {
886
887 throw new RuntimeException("getJar invocation failed.", e);
888 }
889
890 return ret;
891 }
892 }