1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import com.google.protobuf.InvalidProtocolBufferException;
22 import com.yammer.metrics.core.MetricsRegistry;
23
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.hadoop.hbase.classification.InterfaceAudience;
27 import org.apache.hadoop.hbase.classification.InterfaceStability;
28 import org.apache.hadoop.conf.Configuration;
29 import org.apache.hadoop.fs.FileSystem;
30 import org.apache.hadoop.fs.Path;
31 import org.apache.hadoop.hbase.HBaseConfiguration;
32 import org.apache.hadoop.hbase.HConstants;
33 import org.apache.hadoop.hbase.catalog.MetaReader;
34 import org.apache.hadoop.hbase.client.HConnection;
35 import org.apache.hadoop.hbase.client.HConnectionManager;
36 import org.apache.hadoop.hbase.client.Put;
37 import org.apache.hadoop.hbase.client.Scan;
38 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
39 import org.apache.hadoop.hbase.mapreduce.hadoopbackport.JarFinder;
40 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
41 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
42 import org.apache.hadoop.hbase.security.User;
43 import org.apache.hadoop.hbase.security.UserProvider;
44 import org.apache.hadoop.hbase.security.token.TokenUtil;
45 import org.apache.hadoop.hbase.util.Base64;
46 import org.apache.hadoop.hbase.util.Bytes;
47 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
48 import org.apache.hadoop.io.Writable;
49 import org.apache.hadoop.io.WritableComparable;
50 import org.apache.hadoop.mapreduce.InputFormat;
51 import org.apache.hadoop.mapreduce.Job;
52 import org.apache.hadoop.util.StringUtils;
53
54 import java.io.File;
55 import java.io.IOException;
56 import java.lang.reflect.InvocationTargetException;
57 import java.lang.reflect.Method;
58 import java.net.URL;
59 import java.net.URLDecoder;
60 import java.util.ArrayList;
61 import java.util.Collection;
62 import java.util.Enumeration;
63 import java.util.HashMap;
64 import java.util.HashSet;
65 import java.util.List;
66 import java.util.Map;
67 import java.util.Set;
68 import java.util.zip.ZipEntry;
69 import java.util.zip.ZipFile;
70
71
72
73
74 @SuppressWarnings({ "rawtypes", "unchecked" })
75 @InterfaceAudience.Public
76 @InterfaceStability.Stable
77 public class TableMapReduceUtil {
78 static Log LOG = LogFactory.getLog(TableMapReduceUtil.class);
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93 public static void initTableMapperJob(String table, Scan scan,
94 Class<? extends TableMapper> mapper,
95 Class<?> outputKeyClass,
96 Class<?> outputValueClass, Job job)
97 throws IOException {
98 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
99 job, true);
100 }
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115 public static void initTableMapperJob(byte[] table, Scan scan,
116 Class<? extends TableMapper> mapper,
117 Class<?> outputKeyClass,
118 Class<?> outputValueClass, Job job)
119 throws IOException {
120 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass,
121 job, true);
122 }
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139 public static void initTableMapperJob(String table, Scan scan,
140 Class<? extends TableMapper> mapper,
141 Class<?> outputKeyClass,
142 Class<?> outputValueClass, Job job,
143 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
144 throws IOException {
145 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass, job,
146 addDependencyJars, true, inputFormatClass);
147 }
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167 public static void initTableMapperJob(String table, Scan scan,
168 Class<? extends TableMapper> mapper,
169 Class<?> outputKeyClass,
170 Class<?> outputValueClass, Job job,
171 boolean addDependencyJars, boolean initCredentials,
172 Class<? extends InputFormat> inputFormatClass)
173 throws IOException {
174 job.setInputFormatClass(inputFormatClass);
175 if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
176 if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
177 job.setMapperClass(mapper);
178 if (Put.class.equals(outputValueClass)) {
179 job.setCombinerClass(PutCombiner.class);
180 }
181 Configuration conf = job.getConfiguration();
182 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
183 conf.set(TableInputFormat.INPUT_TABLE, table);
184 conf.set(TableInputFormat.SCAN, convertScanToString(scan));
185 conf.setStrings("io.serializations", conf.get("io.serializations"),
186 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
187 KeyValueSerialization.class.getName());
188 if (addDependencyJars) {
189 addDependencyJars(job);
190 }
191 if (initCredentials) {
192 initCredentials(job);
193 }
194 }
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212 public static void initTableMapperJob(byte[] table, Scan scan,
213 Class<? extends TableMapper> mapper,
214 Class<?> outputKeyClass,
215 Class<?> outputValueClass, Job job,
216 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
217 throws IOException {
218 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
219 outputValueClass, job, addDependencyJars, inputFormatClass);
220 }
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237 public static void initTableMapperJob(byte[] table, Scan scan,
238 Class<? extends TableMapper> mapper,
239 Class<?> outputKeyClass,
240 Class<?> outputValueClass, Job job,
241 boolean addDependencyJars)
242 throws IOException {
243 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
244 outputValueClass, job, addDependencyJars, TableInputFormat.class);
245 }
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262 public static void initTableMapperJob(String table, Scan scan,
263 Class<? extends TableMapper> mapper,
264 Class<?> outputKeyClass,
265 Class<?> outputValueClass, Job job,
266 boolean addDependencyJars)
267 throws IOException {
268 initTableMapperJob(table, scan, mapper, outputKeyClass,
269 outputValueClass, job, addDependencyJars, TableInputFormat.class);
270 }
271
272
273
274
275
276
277
278 public static void resetCacheConfig(Configuration conf) {
279 conf.setFloat(
280 HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT);
281 conf.setFloat("hbase.offheapcache.percentage", 0f);
282 conf.setFloat("hbase.bucketcache.size", 0f);
283 conf.unset("hbase.bucketcache.ioengine");
284 }
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300 public static void initMultiTableSnapshotMapperJob(Map<String, Collection<Scan>> snapshotScans,
301 Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass,
302 Job job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException {
303 MultiTableSnapshotInputFormat.setInput(job.getConfiguration(), snapshotScans, tmpRestoreDir);
304
305 job.setInputFormatClass(MultiTableSnapshotInputFormat.class);
306 if (outputValueClass != null) {
307 job.setMapOutputValueClass(outputValueClass);
308 }
309 if (outputKeyClass != null) {
310 job.setMapOutputKeyClass(outputKeyClass);
311 }
312 job.setMapperClass(mapper);
313 Configuration conf = job.getConfiguration();
314 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
315
316 if (addDependencyJars) {
317 addDependencyJars(job);
318 addDependencyJars(job.getConfiguration(), MetricsRegistry.class);
319 }
320
321 resetCacheConfig(job.getConfiguration());
322 }
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344 public static void initTableSnapshotMapperJob(String snapshotName, Scan scan,
345 Class<? extends TableMapper> mapper,
346 Class<?> outputKeyClass,
347 Class<?> outputValueClass, Job job,
348 boolean addDependencyJars, Path tmpRestoreDir)
349 throws IOException {
350 TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
351 initTableMapperJob(snapshotName, scan, mapper, outputKeyClass,
352 outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class);
353 resetCacheConfig(job.getConfiguration());
354 }
355
356
357
358
359
360
361
362
363
364
365
366
367
368 public static void initTableMapperJob(List<Scan> scans,
369 Class<? extends TableMapper> mapper,
370 Class<? extends WritableComparable> outputKeyClass,
371 Class<? extends Writable> outputValueClass, Job job) throws IOException {
372 initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
373 true);
374 }
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390 public static void initTableMapperJob(List<Scan> scans,
391 Class<? extends TableMapper> mapper,
392 Class<? extends WritableComparable> outputKeyClass,
393 Class<? extends Writable> outputValueClass, Job job,
394 boolean addDependencyJars) throws IOException {
395 initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
396 addDependencyJars, true);
397 }
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414 public static void initTableMapperJob(List<Scan> scans,
415 Class<? extends TableMapper> mapper,
416 Class<? extends WritableComparable> outputKeyClass,
417 Class<? extends Writable> outputValueClass, Job job,
418 boolean addDependencyJars,
419 boolean initCredentials) throws IOException {
420 job.setInputFormatClass(MultiTableInputFormat.class);
421 if (outputValueClass != null) {
422 job.setMapOutputValueClass(outputValueClass);
423 }
424 if (outputKeyClass != null) {
425 job.setMapOutputKeyClass(outputKeyClass);
426 }
427 job.setMapperClass(mapper);
428 Configuration conf = job.getConfiguration();
429 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
430 List<String> scanStrings = new ArrayList<String>();
431
432 for (Scan scan : scans) {
433 scanStrings.add(convertScanToString(scan));
434 }
435 job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
436 scanStrings.toArray(new String[scanStrings.size()]));
437
438 if (addDependencyJars) {
439 addDependencyJars(job);
440 }
441
442 if (initCredentials) {
443 initCredentials(job);
444 }
445 }
446
447 public static void initCredentials(Job job) throws IOException {
448 UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
449 if (userProvider.isHadoopSecurityEnabled()) {
450
451 if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
452 job.getConfiguration().set("mapreduce.job.credentials.binary",
453 System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
454 }
455 }
456
457 if (userProvider.isHBaseSecurityEnabled()) {
458 try {
459
460 String quorumAddress = job.getConfiguration().get(TableOutputFormat.QUORUM_ADDRESS);
461 User user = userProvider.getCurrent();
462 if (quorumAddress != null) {
463 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
464 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
465 HConnection peerConn = HConnectionManager.createConnection(peerConf);
466 try {
467 TokenUtil.addTokenForJob(peerConn, user, job);
468 } finally {
469 peerConn.close();
470 }
471 }
472
473 HConnection conn = HConnectionManager.createConnection(job.getConfiguration());
474 try {
475 TokenUtil.addTokenForJob(conn, user, job);
476 } finally {
477 conn.close();
478 }
479 } catch (InterruptedException ie) {
480 LOG.info("Interrupted obtaining user authentication token");
481 Thread.currentThread().interrupt();
482 }
483 }
484 }
485
486
487
488
489
490
491
492
493
494
495
496
497 public static void initCredentialsForCluster(Job job, String quorumAddress)
498 throws IOException {
499 UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
500 if (userProvider.isHBaseSecurityEnabled()) {
501 try {
502 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
503 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
504 HConnection peerConn = HConnectionManager.createConnection(peerConf);
505 try {
506 TokenUtil.addTokenForJob(peerConn, userProvider.getCurrent(), job);
507 } finally {
508 peerConn.close();
509 }
510 } catch (InterruptedException e) {
511 LOG.info("Interrupted obtaining user authentication token");
512 Thread.interrupted();
513 }
514 }
515 }
516
517
518
519
520
521
522
523
524 static String convertScanToString(Scan scan) throws IOException {
525 ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
526 return Base64.encodeBytes(proto.toByteArray());
527 }
528
529
530
531
532
533
534
535
536 static Scan convertStringToScan(String base64) throws IOException {
537 byte [] decoded = Base64.decode(base64);
538 ClientProtos.Scan scan;
539 try {
540 scan = ClientProtos.Scan.parseFrom(decoded);
541 } catch (InvalidProtocolBufferException ipbe) {
542 throw new IOException(ipbe);
543 }
544
545 return ProtobufUtil.toScan(scan);
546 }
547
548
549
550
551
552
553
554
555
556
557 public static void initTableReducerJob(String table,
558 Class<? extends TableReducer> reducer, Job job)
559 throws IOException {
560 initTableReducerJob(table, reducer, job, null);
561 }
562
563
564
565
566
567
568
569
570
571
572
573
574 public static void initTableReducerJob(String table,
575 Class<? extends TableReducer> reducer, Job job,
576 Class partitioner) throws IOException {
577 initTableReducerJob(table, reducer, job, partitioner, null, null, null);
578 }
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603 public static void initTableReducerJob(String table,
604 Class<? extends TableReducer> reducer, Job job,
605 Class partitioner, String quorumAddress, String serverClass,
606 String serverImpl) throws IOException {
607 initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
608 serverClass, serverImpl, true);
609 }
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636 public static void initTableReducerJob(String table,
637 Class<? extends TableReducer> reducer, Job job,
638 Class partitioner, String quorumAddress, String serverClass,
639 String serverImpl, boolean addDependencyJars) throws IOException {
640
641 Configuration conf = job.getConfiguration();
642 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
643 job.setOutputFormatClass(TableOutputFormat.class);
644 if (reducer != null) job.setReducerClass(reducer);
645 conf.set(TableOutputFormat.OUTPUT_TABLE, table);
646 conf.setStrings("io.serializations", conf.get("io.serializations"),
647 MutationSerialization.class.getName(), ResultSerialization.class.getName());
648
649 if (quorumAddress != null) {
650
651 ZKUtil.transformClusterKey(quorumAddress);
652 conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
653 }
654 if (serverClass != null && serverImpl != null) {
655 conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
656 conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
657 }
658 job.setOutputKeyClass(ImmutableBytesWritable.class);
659 job.setOutputValueClass(Writable.class);
660 if (partitioner == HRegionPartitioner.class) {
661 job.setPartitionerClass(HRegionPartitioner.class);
662 int regions = MetaReader.getRegionCount(conf, table);
663 if (job.getNumReduceTasks() > regions) {
664 job.setNumReduceTasks(regions);
665 }
666 } else if (partitioner != null) {
667 job.setPartitionerClass(partitioner);
668 }
669
670 if (addDependencyJars) {
671 addDependencyJars(job);
672 }
673
674 initCredentials(job);
675 }
676
677
678
679
680
681
682
683
684
685 public static void limitNumReduceTasks(String table, Job job)
686 throws IOException {
687 int regions = MetaReader.getRegionCount(job.getConfiguration(), table);
688 if (job.getNumReduceTasks() > regions)
689 job.setNumReduceTasks(regions);
690 }
691
692
693
694
695
696
697
698
699
700 public static void setNumReduceTasks(String table, Job job)
701 throws IOException {
702 job.setNumReduceTasks(MetaReader.getRegionCount(job.getConfiguration(), table));
703 }
704
705
706
707
708
709
710
711
712
713
714 public static void setScannerCaching(Job job, int batchSize) {
715 job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);
716 }
717
718
719
720
721
722
723
724
725
726
727
728
729
730 public static void addHBaseDependencyJars(Configuration conf) throws IOException {
731 addDependencyJars(conf,
732
733 org.apache.hadoop.hbase.HConstants.class,
734 org.apache.hadoop.hbase.protobuf.generated.ClientProtos.class,
735 org.apache.hadoop.hbase.client.Put.class,
736 org.apache.hadoop.hbase.CompatibilityFactory.class,
737 org.apache.hadoop.hbase.mapreduce.TableMapper.class,
738
739 org.apache.zookeeper.ZooKeeper.class,
740 org.jboss.netty.channel.ChannelFactory.class,
741 com.google.protobuf.Message.class,
742 com.google.common.collect.Lists.class,
743 org.cloudera.htrace.Trace.class,
744 org.cliffc.high_scale_lib.Counter.class,
745 com.yammer.metrics.core.MetricsRegistry.class);
746 }
747
748
749
750
751
752 public static String buildDependencyClasspath(Configuration conf) {
753 if (conf == null) {
754 throw new IllegalArgumentException("Must provide a configuration object.");
755 }
756 Set<String> paths = new HashSet<String>(conf.getStringCollection("tmpjars"));
757 if (paths.size() == 0) {
758 throw new IllegalArgumentException("Configuration contains no tmpjars.");
759 }
760 StringBuilder sb = new StringBuilder();
761 for (String s : paths) {
762
763 int idx = s.indexOf(":");
764 if (idx != -1) s = s.substring(idx + 1);
765 if (sb.length() > 0) sb.append(File.pathSeparator);
766 sb.append(s);
767 }
768 return sb.toString();
769 }
770
771
772
773
774
775
776 public static void addDependencyJars(Job job) throws IOException {
777 addHBaseDependencyJars(job.getConfiguration());
778 try {
779 addDependencyJars(job.getConfiguration(),
780
781
782 job.getMapOutputKeyClass(),
783 job.getMapOutputValueClass(),
784 job.getInputFormatClass(),
785 job.getOutputKeyClass(),
786 job.getOutputValueClass(),
787 job.getOutputFormatClass(),
788 job.getPartitionerClass(),
789 job.getCombinerClass());
790 } catch (ClassNotFoundException e) {
791 throw new IOException(e);
792 }
793 }
794
795
796
797
798
799
800 public static void addDependencyJars(Configuration conf,
801 Class<?>... classes) throws IOException {
802
803 FileSystem localFs = FileSystem.getLocal(conf);
804 Set<String> jars = new HashSet<String>();
805
806 jars.addAll(conf.getStringCollection("tmpjars"));
807
808
809
810 Map<String, String> packagedClasses = new HashMap<String, String>();
811
812
813 for (Class<?> clazz : classes) {
814 if (clazz == null) continue;
815
816 Path path = findOrCreateJar(clazz, localFs, packagedClasses);
817 if (path == null) {
818 LOG.warn("Could not find jar for class " + clazz +
819 " in order to ship it to the cluster.");
820 continue;
821 }
822 if (!localFs.exists(path)) {
823 LOG.warn("Could not validate jar file " + path + " for class "
824 + clazz);
825 continue;
826 }
827 jars.add(path.toString());
828 }
829 if (jars.isEmpty()) return;
830
831 conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
832 }
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848 private static Path findOrCreateJar(Class<?> my_class, FileSystem fs,
849 Map<String, String> packagedClasses)
850 throws IOException {
851
852 String jar = findContainingJar(my_class, packagedClasses);
853 if (null == jar || jar.isEmpty()) {
854 jar = getJar(my_class);
855 updateMap(jar, packagedClasses);
856 }
857
858 if (null == jar || jar.isEmpty()) {
859 return null;
860 }
861
862 LOG.debug(String.format("For class %s, using jar %s", my_class.getName(), jar));
863 return new Path(jar).makeQualified(fs);
864 }
865
866
867
868
869
870
871
872 private static void updateMap(String jar, Map<String, String> packagedClasses) throws IOException {
873 if (null == jar || jar.isEmpty()) {
874 return;
875 }
876 ZipFile zip = null;
877 try {
878 zip = new ZipFile(jar);
879 for (Enumeration<? extends ZipEntry> iter = zip.entries(); iter.hasMoreElements();) {
880 ZipEntry entry = iter.nextElement();
881 if (entry.getName().endsWith("class")) {
882 packagedClasses.put(entry.getName(), jar);
883 }
884 }
885 } finally {
886 if (null != zip) zip.close();
887 }
888 }
889
890
891
892
893
894
895
896
897
898
899 private static String findContainingJar(Class<?> my_class, Map<String, String> packagedClasses)
900 throws IOException {
901 ClassLoader loader = my_class.getClassLoader();
902
903 String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
904
905 if (loader != null) {
906
907 for (Enumeration<URL> itr = loader.getResources(class_file); itr.hasMoreElements();) {
908 URL url = itr.nextElement();
909 if ("jar".equals(url.getProtocol())) {
910 String toReturn = url.getPath();
911 if (toReturn.startsWith("file:")) {
912 toReturn = toReturn.substring("file:".length());
913 }
914
915
916
917
918
919
920 toReturn = toReturn.replaceAll("\\+", "%2B");
921 toReturn = URLDecoder.decode(toReturn, "UTF-8");
922 return toReturn.replaceAll("!.*$", "");
923 }
924 }
925 }
926
927
928
929 return packagedClasses.get(class_file);
930 }
931
932
933
934
935
936
937
938
939 private static String getJar(Class<?> my_class) {
940 String ret = null;
941 String hadoopJarFinder = "org.apache.hadoop.util.JarFinder";
942 Class<?> jarFinder = null;
943 try {
944 LOG.debug("Looking for " + hadoopJarFinder + ".");
945 jarFinder = Class.forName(hadoopJarFinder);
946 LOG.debug(hadoopJarFinder + " found.");
947 Method getJar = jarFinder.getMethod("getJar", Class.class);
948 ret = (String) getJar.invoke(null, my_class);
949 } catch (ClassNotFoundException e) {
950 LOG.debug("Using backported JarFinder.");
951 ret = JarFinder.getJar(my_class);
952 } catch (InvocationTargetException e) {
953
954
955 throw new RuntimeException(e.getCause());
956 } catch (Exception e) {
957
958 throw new RuntimeException("getJar invocation failed.", e);
959 }
960
961 return ret;
962 }
963 }