1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.File;
22 import java.io.IOException;
23 import java.net.URL;
24 import java.net.URLDecoder;
25 import java.util.ArrayList;
26 import java.util.Enumeration;
27 import java.util.HashMap;
28 import java.util.HashSet;
29 import java.util.List;
30 import java.util.Map;
31 import java.util.Set;
32 import java.util.zip.ZipEntry;
33 import java.util.zip.ZipFile;
34
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.conf.Configuration;
38 import org.apache.hadoop.fs.FileSystem;
39 import org.apache.hadoop.fs.Path;
40 import org.apache.hadoop.hbase.HBaseConfiguration;
41 import org.apache.hadoop.hbase.HConstants;
42 import org.apache.hadoop.hbase.MetaTableAccessor;
43 import org.apache.hadoop.hbase.TableName;
44 import org.apache.hadoop.hbase.classification.InterfaceAudience;
45 import org.apache.hadoop.hbase.classification.InterfaceStability;
46 import org.apache.hadoop.hbase.client.Connection;
47 import org.apache.hadoop.hbase.client.ConnectionFactory;
48 import org.apache.hadoop.hbase.client.Put;
49 import org.apache.hadoop.hbase.client.Scan;
50 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
51 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
52 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
53 import org.apache.hadoop.hbase.security.User;
54 import org.apache.hadoop.hbase.security.UserProvider;
55 import org.apache.hadoop.hbase.security.token.TokenUtil;
56 import org.apache.hadoop.hbase.util.Base64;
57 import org.apache.hadoop.hbase.util.Bytes;
58 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
59 import org.apache.hadoop.io.Writable;
60 import org.apache.hadoop.mapreduce.InputFormat;
61 import org.apache.hadoop.mapreduce.Job;
62 import org.apache.hadoop.util.StringUtils;
63 import com.google.protobuf.InvalidProtocolBufferException;
64 import com.yammer.metrics.core.MetricsRegistry;
65
66
67
68
69 @SuppressWarnings({ "rawtypes", "unchecked" })
70 @InterfaceAudience.Public
71 @InterfaceStability.Stable
72 public class TableMapReduceUtil {
73 static Log LOG = LogFactory.getLog(TableMapReduceUtil.class);
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88 public static void initTableMapperJob(String table, Scan scan,
89 Class<? extends TableMapper> mapper,
90 Class<?> outputKeyClass,
91 Class<?> outputValueClass, Job job)
92 throws IOException {
93 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
94 job, true);
95 }
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111 public static void initTableMapperJob(TableName table,
112 Scan scan,
113 Class<? extends TableMapper> mapper,
114 Class<?> outputKeyClass,
115 Class<?> outputValueClass,
116 Job job) throws IOException {
117 initTableMapperJob(table.getNameAsString(),
118 scan,
119 mapper,
120 outputKeyClass,
121 outputValueClass,
122 job,
123 true);
124 }
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139 public static void initTableMapperJob(byte[] table, Scan scan,
140 Class<? extends TableMapper> mapper,
141 Class<?> outputKeyClass,
142 Class<?> outputValueClass, Job job)
143 throws IOException {
144 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass,
145 job, true);
146 }
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163 public static void initTableMapperJob(String table, Scan scan,
164 Class<? extends TableMapper> mapper,
165 Class<?> outputKeyClass,
166 Class<?> outputValueClass, Job job,
167 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
168 throws IOException {
169 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass, job,
170 addDependencyJars, true, inputFormatClass);
171 }
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191 public static void initTableMapperJob(String table, Scan scan,
192 Class<? extends TableMapper> mapper,
193 Class<?> outputKeyClass,
194 Class<?> outputValueClass, Job job,
195 boolean addDependencyJars, boolean initCredentials,
196 Class<? extends InputFormat> inputFormatClass)
197 throws IOException {
198 job.setInputFormatClass(inputFormatClass);
199 if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
200 if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
201 job.setMapperClass(mapper);
202 if (Put.class.equals(outputValueClass)) {
203 job.setCombinerClass(PutCombiner.class);
204 }
205 Configuration conf = job.getConfiguration();
206 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
207 conf.set(TableInputFormat.INPUT_TABLE, table);
208 conf.set(TableInputFormat.SCAN, convertScanToString(scan));
209 conf.setStrings("io.serializations", conf.get("io.serializations"),
210 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
211 KeyValueSerialization.class.getName());
212 if (addDependencyJars) {
213 addDependencyJars(job);
214 }
215 if (initCredentials) {
216 initCredentials(job);
217 }
218 }
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236 public static void initTableMapperJob(byte[] table, Scan scan,
237 Class<? extends TableMapper> mapper,
238 Class<?> outputKeyClass,
239 Class<?> outputValueClass, Job job,
240 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
241 throws IOException {
242 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
243 outputValueClass, job, addDependencyJars, inputFormatClass);
244 }
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261 public static void initTableMapperJob(byte[] table, Scan scan,
262 Class<? extends TableMapper> mapper,
263 Class<?> outputKeyClass,
264 Class<?> outputValueClass, Job job,
265 boolean addDependencyJars)
266 throws IOException {
267 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
268 outputValueClass, job, addDependencyJars, TableInputFormat.class);
269 }
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286 public static void initTableMapperJob(String table, Scan scan,
287 Class<? extends TableMapper> mapper,
288 Class<?> outputKeyClass,
289 Class<?> outputValueClass, Job job,
290 boolean addDependencyJars)
291 throws IOException {
292 initTableMapperJob(table, scan, mapper, outputKeyClass,
293 outputValueClass, job, addDependencyJars, TableInputFormat.class);
294 }
295
296
297
298
299
300
301
302 public static void resetCacheConfig(Configuration conf) {
303 conf.setFloat(
304 HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT);
305 conf.setFloat(HConstants.BUCKET_CACHE_SIZE_KEY, 0f);
306 conf.unset(HConstants.BUCKET_CACHE_IOENGINE_KEY);
307 }
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329 public static void initTableSnapshotMapperJob(String snapshotName, Scan scan,
330 Class<? extends TableMapper> mapper,
331 Class<?> outputKeyClass,
332 Class<?> outputValueClass, Job job,
333 boolean addDependencyJars, Path tmpRestoreDir)
334 throws IOException {
335 TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
336 initTableMapperJob(snapshotName, scan, mapper, outputKeyClass,
337 outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class);
338 addDependencyJars(job.getConfiguration(), MetricsRegistry.class);
339 resetCacheConfig(job.getConfiguration());
340 }
341
342
343
344
345
346
347
348
349
350
351
352
353
354 public static void initTableMapperJob(List<Scan> scans,
355 Class<? extends TableMapper> mapper,
356 Class<?> outputKeyClass,
357 Class<?> outputValueClass, Job job) throws IOException {
358 initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
359 true);
360 }
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376 public static void initTableMapperJob(List<Scan> scans,
377 Class<? extends TableMapper> mapper,
378 Class<?> outputKeyClass,
379 Class<?> outputValueClass, Job job,
380 boolean addDependencyJars) throws IOException {
381 initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
382 addDependencyJars, true);
383 }
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400 public static void initTableMapperJob(List<Scan> scans,
401 Class<? extends TableMapper> mapper,
402 Class<?> outputKeyClass,
403 Class<?> outputValueClass, Job job,
404 boolean addDependencyJars,
405 boolean initCredentials) throws IOException {
406 job.setInputFormatClass(MultiTableInputFormat.class);
407 if (outputValueClass != null) {
408 job.setMapOutputValueClass(outputValueClass);
409 }
410 if (outputKeyClass != null) {
411 job.setMapOutputKeyClass(outputKeyClass);
412 }
413 job.setMapperClass(mapper);
414 Configuration conf = job.getConfiguration();
415 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
416 List<String> scanStrings = new ArrayList<String>();
417
418 for (Scan scan : scans) {
419 scanStrings.add(convertScanToString(scan));
420 }
421 job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
422 scanStrings.toArray(new String[scanStrings.size()]));
423
424 if (addDependencyJars) {
425 addDependencyJars(job);
426 }
427
428 if (initCredentials) {
429 initCredentials(job);
430 }
431 }
432
433 public static void initCredentials(Job job) throws IOException {
434 UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
435 if (userProvider.isHadoopSecurityEnabled()) {
436
437 if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
438 job.getConfiguration().set("mapreduce.job.credentials.binary",
439 System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
440 }
441 }
442
443 if (userProvider.isHBaseSecurityEnabled()) {
444 try {
445
446 String quorumAddress = job.getConfiguration().get(TableOutputFormat.QUORUM_ADDRESS);
447 User user = userProvider.getCurrent();
448 if (quorumAddress != null) {
449 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
450 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
451 Connection peerConn = ConnectionFactory.createConnection(peerConf);
452 try {
453 TokenUtil.addTokenForJob(peerConn, user, job);
454 } finally {
455 peerConn.close();
456 }
457 }
458
459 Connection conn = ConnectionFactory.createConnection(job.getConfiguration());
460 try {
461 TokenUtil.addTokenForJob(conn, user, job);
462 } finally {
463 conn.close();
464 }
465 } catch (InterruptedException ie) {
466 LOG.info("Interrupted obtaining user authentication token");
467 Thread.currentThread().interrupt();
468 }
469 }
470 }
471
472
473
474
475
476
477
478
479
480
481
482
483 public static void initCredentialsForCluster(Job job, String quorumAddress)
484 throws IOException {
485 UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
486 if (userProvider.isHBaseSecurityEnabled()) {
487 try {
488 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
489 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
490 Connection peerConn = ConnectionFactory.createConnection(peerConf);
491 try {
492 TokenUtil.addTokenForJob(peerConn, userProvider.getCurrent(), job);
493 } finally {
494 peerConn.close();
495 }
496 } catch (InterruptedException e) {
497 LOG.info("Interrupted obtaining user authentication token");
498 Thread.interrupted();
499 }
500 }
501 }
502
503
504
505
506
507
508
509
510 static String convertScanToString(Scan scan) throws IOException {
511 ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
512 return Base64.encodeBytes(proto.toByteArray());
513 }
514
515
516
517
518
519
520
521
522 static Scan convertStringToScan(String base64) throws IOException {
523 byte [] decoded = Base64.decode(base64);
524 ClientProtos.Scan scan;
525 try {
526 scan = ClientProtos.Scan.parseFrom(decoded);
527 } catch (InvalidProtocolBufferException ipbe) {
528 throw new IOException(ipbe);
529 }
530
531 return ProtobufUtil.toScan(scan);
532 }
533
534
535
536
537
538
539
540
541
542
543 public static void initTableReducerJob(String table,
544 Class<? extends TableReducer> reducer, Job job)
545 throws IOException {
546 initTableReducerJob(table, reducer, job, null);
547 }
548
549
550
551
552
553
554
555
556
557
558
559
560 public static void initTableReducerJob(String table,
561 Class<? extends TableReducer> reducer, Job job,
562 Class partitioner) throws IOException {
563 initTableReducerJob(table, reducer, job, partitioner, null, null, null);
564 }
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589 public static void initTableReducerJob(String table,
590 Class<? extends TableReducer> reducer, Job job,
591 Class partitioner, String quorumAddress, String serverClass,
592 String serverImpl) throws IOException {
593 initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
594 serverClass, serverImpl, true);
595 }
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622 public static void initTableReducerJob(String table,
623 Class<? extends TableReducer> reducer, Job job,
624 Class partitioner, String quorumAddress, String serverClass,
625 String serverImpl, boolean addDependencyJars) throws IOException {
626
627 Configuration conf = job.getConfiguration();
628 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
629 job.setOutputFormatClass(TableOutputFormat.class);
630 if (reducer != null) job.setReducerClass(reducer);
631 conf.set(TableOutputFormat.OUTPUT_TABLE, table);
632 conf.setStrings("io.serializations", conf.get("io.serializations"),
633 MutationSerialization.class.getName(), ResultSerialization.class.getName());
634
635 if (quorumAddress != null) {
636
637 ZKUtil.transformClusterKey(quorumAddress);
638 conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
639 }
640 if (serverClass != null && serverImpl != null) {
641 conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
642 conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
643 }
644 job.setOutputKeyClass(ImmutableBytesWritable.class);
645 job.setOutputValueClass(Writable.class);
646 if (partitioner == HRegionPartitioner.class) {
647 job.setPartitionerClass(HRegionPartitioner.class);
648 int regions = MetaTableAccessor.getRegionCount(conf, TableName.valueOf(table));
649 if (job.getNumReduceTasks() > regions) {
650 job.setNumReduceTasks(regions);
651 }
652 } else if (partitioner != null) {
653 job.setPartitionerClass(partitioner);
654 }
655
656 if (addDependencyJars) {
657 addDependencyJars(job);
658 }
659
660 initCredentials(job);
661 }
662
663
664
665
666
667
668
669
670
671 public static void limitNumReduceTasks(String table, Job job)
672 throws IOException {
673 int regions =
674 MetaTableAccessor.getRegionCount(job.getConfiguration(), TableName.valueOf(table));
675 if (job.getNumReduceTasks() > regions)
676 job.setNumReduceTasks(regions);
677 }
678
679
680
681
682
683
684
685
686
687 public static void setNumReduceTasks(String table, Job job)
688 throws IOException {
689 job.setNumReduceTasks(MetaTableAccessor.getRegionCount(job.getConfiguration(),
690 TableName.valueOf(table)));
691 }
692
693
694
695
696
697
698
699
700
701
702 public static void setScannerCaching(Job job, int batchSize) {
703 job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);
704 }
705
706
707
708
709
710
711
712
713
714
715
716
717
718 public static void addHBaseDependencyJars(Configuration conf) throws IOException {
719 addDependencyJars(conf,
720
721 org.apache.hadoop.hbase.HConstants.class,
722 org.apache.hadoop.hbase.protobuf.generated.ClientProtos.class,
723 org.apache.hadoop.hbase.client.Put.class,
724 org.apache.hadoop.hbase.CompatibilityFactory.class,
725 org.apache.hadoop.hbase.mapreduce.TableMapper.class,
726
727 org.apache.zookeeper.ZooKeeper.class,
728 io.netty.channel.Channel.class,
729 com.google.protobuf.Message.class,
730 com.google.common.collect.Lists.class,
731 org.apache.htrace.Trace.class);
732 }
733
734
735
736
737
738 public static String buildDependencyClasspath(Configuration conf) {
739 if (conf == null) {
740 throw new IllegalArgumentException("Must provide a configuration object.");
741 }
742 Set<String> paths = new HashSet<String>(conf.getStringCollection("tmpjars"));
743 if (paths.size() == 0) {
744 throw new IllegalArgumentException("Configuration contains no tmpjars.");
745 }
746 StringBuilder sb = new StringBuilder();
747 for (String s : paths) {
748
749 int idx = s.indexOf(":");
750 if (idx != -1) s = s.substring(idx + 1);
751 if (sb.length() > 0) sb.append(File.pathSeparator);
752 sb.append(s);
753 }
754 return sb.toString();
755 }
756
757
758
759
760
761
762 public static void addDependencyJars(Job job) throws IOException {
763 addHBaseDependencyJars(job.getConfiguration());
764 try {
765 addDependencyJars(job.getConfiguration(),
766
767
768 job.getMapOutputKeyClass(),
769 job.getMapOutputValueClass(),
770 job.getInputFormatClass(),
771 job.getOutputKeyClass(),
772 job.getOutputValueClass(),
773 job.getOutputFormatClass(),
774 job.getPartitionerClass(),
775 job.getCombinerClass());
776 } catch (ClassNotFoundException e) {
777 throw new IOException(e);
778 }
779 }
780
781
782
783
784
785
786 public static void addDependencyJars(Configuration conf,
787 Class<?>... classes) throws IOException {
788
789 FileSystem localFs = FileSystem.getLocal(conf);
790 Set<String> jars = new HashSet<String>();
791
792 jars.addAll(conf.getStringCollection("tmpjars"));
793
794
795
796 Map<String, String> packagedClasses = new HashMap<String, String>();
797
798
799 for (Class<?> clazz : classes) {
800 if (clazz == null) continue;
801
802 Path path = findOrCreateJar(clazz, localFs, packagedClasses);
803 if (path == null) {
804 LOG.warn("Could not find jar for class " + clazz +
805 " in order to ship it to the cluster.");
806 continue;
807 }
808 if (!localFs.exists(path)) {
809 LOG.warn("Could not validate jar file " + path + " for class "
810 + clazz);
811 continue;
812 }
813 jars.add(path.toString());
814 }
815 if (jars.isEmpty()) return;
816
817 conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
818 }
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833 private static Path findOrCreateJar(Class<?> my_class, FileSystem fs,
834 Map<String, String> packagedClasses)
835 throws IOException {
836
837 String jar = findContainingJar(my_class, packagedClasses);
838 if (null == jar || jar.isEmpty()) {
839 jar = getJar(my_class);
840 updateMap(jar, packagedClasses);
841 }
842
843 if (null == jar || jar.isEmpty()) {
844 return null;
845 }
846
847 LOG.debug(String.format("For class %s, using jar %s", my_class.getName(), jar));
848 return new Path(jar).makeQualified(fs);
849 }
850
851
852
853
854
855
856
857 private static void updateMap(String jar, Map<String, String> packagedClasses) throws IOException {
858 if (null == jar || jar.isEmpty()) {
859 return;
860 }
861 ZipFile zip = null;
862 try {
863 zip = new ZipFile(jar);
864 for (Enumeration<? extends ZipEntry> iter = zip.entries(); iter.hasMoreElements();) {
865 ZipEntry entry = iter.nextElement();
866 if (entry.getName().endsWith("class")) {
867 packagedClasses.put(entry.getName(), jar);
868 }
869 }
870 } finally {
871 if (null != zip) zip.close();
872 }
873 }
874
875
876
877
878
879
880
881
882
883
884 private static String findContainingJar(Class<?> my_class, Map<String, String> packagedClasses)
885 throws IOException {
886 ClassLoader loader = my_class.getClassLoader();
887
888 String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
889
890 if (loader != null) {
891
892 for (Enumeration<URL> itr = loader.getResources(class_file); itr.hasMoreElements();) {
893 URL url = itr.nextElement();
894 if ("jar".equals(url.getProtocol())) {
895 String toReturn = url.getPath();
896 if (toReturn.startsWith("file:")) {
897 toReturn = toReturn.substring("file:".length());
898 }
899
900
901
902
903
904
905 toReturn = toReturn.replaceAll("\\+", "%2B");
906 toReturn = URLDecoder.decode(toReturn, "UTF-8");
907 return toReturn.replaceAll("!.*$", "");
908 }
909 }
910 }
911
912
913
914 return packagedClasses.get(class_file);
915 }
916
917
918
919
920
921
922
923
924 private static String getJar(Class<?> my_class) {
925 String ret = null;
926 try {
927 ret = JarFinder.getJar(my_class);
928 } catch (Exception e) {
929
930 throw new RuntimeException("getJar invocation failed.", e);
931 }
932
933 return ret;
934 }
935 }