1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.File;
22 import java.io.IOException;
23 import java.lang.reflect.InvocationTargetException;
24 import java.lang.reflect.Method;
25 import java.net.URL;
26 import java.net.URLDecoder;
27 import java.util.ArrayList;
28 import java.util.Enumeration;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.Set;
34 import java.util.zip.ZipEntry;
35 import java.util.zip.ZipFile;
36
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.hadoop.hbase.classification.InterfaceAudience;
40 import org.apache.hadoop.hbase.classification.InterfaceStability;
41 import org.apache.hadoop.conf.Configuration;
42 import org.apache.hadoop.fs.FileSystem;
43 import org.apache.hadoop.fs.Path;
44 import org.apache.hadoop.hbase.HBaseConfiguration;
45 import org.apache.hadoop.hbase.HConstants;
46 import org.apache.hadoop.hbase.catalog.MetaReader;
47 import org.apache.hadoop.hbase.client.HConnection;
48 import org.apache.hadoop.hbase.client.HConnectionManager;
49 import org.apache.hadoop.hbase.client.Put;
50 import org.apache.hadoop.hbase.client.Scan;
51 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
52 import org.apache.hadoop.hbase.mapreduce.hadoopbackport.JarFinder;
53 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
54 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
55 import org.apache.hadoop.hbase.security.User;
56 import org.apache.hadoop.hbase.security.UserProvider;
57 import org.apache.hadoop.hbase.security.token.TokenUtil;
58 import org.apache.hadoop.hbase.util.Base64;
59 import org.apache.hadoop.hbase.util.Bytes;
60 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
61 import org.apache.hadoop.io.Writable;
62 import org.apache.hadoop.io.WritableComparable;
63 import org.apache.hadoop.mapreduce.InputFormat;
64 import org.apache.hadoop.mapreduce.Job;
65 import org.apache.hadoop.util.StringUtils;
66 import com.google.protobuf.InvalidProtocolBufferException;
67
68
69
70
71 @SuppressWarnings({ "rawtypes", "unchecked" })
72 @InterfaceAudience.Public
73 @InterfaceStability.Stable
74 public class TableMapReduceUtil {
75 static Log LOG = LogFactory.getLog(TableMapReduceUtil.class);
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90 public static void initTableMapperJob(String table, Scan scan,
91 Class<? extends TableMapper> mapper,
92 Class<?> outputKeyClass,
93 Class<?> outputValueClass, Job job)
94 throws IOException {
95 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
96 job, true);
97 }
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112 public static void initTableMapperJob(byte[] table, Scan scan,
113 Class<? extends TableMapper> mapper,
114 Class<?> outputKeyClass,
115 Class<?> outputValueClass, Job job)
116 throws IOException {
117 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass,
118 job, true);
119 }
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136 public static void initTableMapperJob(String table, Scan scan,
137 Class<? extends TableMapper> mapper,
138 Class<?> outputKeyClass,
139 Class<?> outputValueClass, Job job,
140 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
141 throws IOException {
142 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass, job,
143 addDependencyJars, true, inputFormatClass);
144 }
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164 public static void initTableMapperJob(String table, Scan scan,
165 Class<? extends TableMapper> mapper,
166 Class<?> outputKeyClass,
167 Class<?> outputValueClass, Job job,
168 boolean addDependencyJars, boolean initCredentials,
169 Class<? extends InputFormat> inputFormatClass)
170 throws IOException {
171 job.setInputFormatClass(inputFormatClass);
172 if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
173 if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
174 job.setMapperClass(mapper);
175 if (Put.class.equals(outputValueClass)) {
176 job.setCombinerClass(PutCombiner.class);
177 }
178 Configuration conf = job.getConfiguration();
179 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
180 conf.set(TableInputFormat.INPUT_TABLE, table);
181 conf.set(TableInputFormat.SCAN, convertScanToString(scan));
182 conf.setStrings("io.serializations", conf.get("io.serializations"),
183 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
184 KeyValueSerialization.class.getName());
185 if (addDependencyJars) {
186 addDependencyJars(job);
187 }
188 if (initCredentials) {
189 initCredentials(job);
190 }
191 }
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209 public static void initTableMapperJob(byte[] table, Scan scan,
210 Class<? extends TableMapper> mapper,
211 Class<?> outputKeyClass,
212 Class<?> outputValueClass, Job job,
213 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
214 throws IOException {
215 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
216 outputValueClass, job, addDependencyJars, inputFormatClass);
217 }
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234 public static void initTableMapperJob(byte[] table, Scan scan,
235 Class<? extends TableMapper> mapper,
236 Class<?> outputKeyClass,
237 Class<?> outputValueClass, Job job,
238 boolean addDependencyJars)
239 throws IOException {
240 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
241 outputValueClass, job, addDependencyJars, TableInputFormat.class);
242 }
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259 public static void initTableMapperJob(String table, Scan scan,
260 Class<? extends TableMapper> mapper,
261 Class<?> outputKeyClass,
262 Class<?> outputValueClass, Job job,
263 boolean addDependencyJars)
264 throws IOException {
265 initTableMapperJob(table, scan, mapper, outputKeyClass,
266 outputValueClass, job, addDependencyJars, TableInputFormat.class);
267 }
268
269
270
271
272
273
274
275 public static void resetCacheConfig(Configuration conf) {
276 conf.setFloat(
277 HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT);
278 conf.setFloat("hbase.offheapcache.percentage", 0f);
279 conf.setFloat("hbase.bucketcache.size", 0f);
280 conf.unset("hbase.bucketcache.ioengine");
281 }
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303 public static void initTableSnapshotMapperJob(String snapshotName, Scan scan,
304 Class<? extends TableMapper> mapper,
305 Class<?> outputKeyClass,
306 Class<?> outputValueClass, Job job,
307 boolean addDependencyJars, Path tmpRestoreDir)
308 throws IOException {
309 TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
310 initTableMapperJob(snapshotName, scan, mapper, outputKeyClass,
311 outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class);
312 resetCacheConfig(job.getConfiguration());
313 }
314
315
316
317
318
319
320
321
322
323
324
325
326
327 public static void initTableMapperJob(List<Scan> scans,
328 Class<? extends TableMapper> mapper,
329 Class<? extends WritableComparable> outputKeyClass,
330 Class<? extends Writable> outputValueClass, Job job) throws IOException {
331 initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
332 true);
333 }
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349 public static void initTableMapperJob(List<Scan> scans,
350 Class<? extends TableMapper> mapper,
351 Class<? extends WritableComparable> outputKeyClass,
352 Class<? extends Writable> outputValueClass, Job job,
353 boolean addDependencyJars) throws IOException {
354 initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
355 addDependencyJars, true);
356 }
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373 public static void initTableMapperJob(List<Scan> scans,
374 Class<? extends TableMapper> mapper,
375 Class<? extends WritableComparable> outputKeyClass,
376 Class<? extends Writable> outputValueClass, Job job,
377 boolean addDependencyJars,
378 boolean initCredentials) throws IOException {
379 job.setInputFormatClass(MultiTableInputFormat.class);
380 if (outputValueClass != null) {
381 job.setMapOutputValueClass(outputValueClass);
382 }
383 if (outputKeyClass != null) {
384 job.setMapOutputKeyClass(outputKeyClass);
385 }
386 job.setMapperClass(mapper);
387 Configuration conf = job.getConfiguration();
388 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
389 List<String> scanStrings = new ArrayList<String>();
390
391 for (Scan scan : scans) {
392 scanStrings.add(convertScanToString(scan));
393 }
394 job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
395 scanStrings.toArray(new String[scanStrings.size()]));
396
397 if (addDependencyJars) {
398 addDependencyJars(job);
399 }
400
401 if (initCredentials) {
402 initCredentials(job);
403 }
404 }
405
406 public static void initCredentials(Job job) throws IOException {
407 UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
408 if (userProvider.isHadoopSecurityEnabled()) {
409
410 if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
411 job.getConfiguration().set("mapreduce.job.credentials.binary",
412 System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
413 }
414 }
415
416 if (userProvider.isHBaseSecurityEnabled()) {
417 try {
418
419 String quorumAddress = job.getConfiguration().get(TableOutputFormat.QUORUM_ADDRESS);
420 User user = userProvider.getCurrent();
421 if (quorumAddress != null) {
422 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
423 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
424 HConnection peerConn = HConnectionManager.createConnection(peerConf);
425 try {
426 TokenUtil.addTokenForJob(peerConn, user, job);
427 } finally {
428 peerConn.close();
429 }
430 }
431
432 HConnection conn = HConnectionManager.createConnection(job.getConfiguration());
433 try {
434 TokenUtil.addTokenForJob(conn, user, job);
435 } finally {
436 conn.close();
437 }
438 } catch (InterruptedException ie) {
439 LOG.info("Interrupted obtaining user authentication token");
440 Thread.currentThread().interrupt();
441 }
442 }
443 }
444
445
446
447
448
449
450
451
452
453
454
455
456 public static void initCredentialsForCluster(Job job, String quorumAddress)
457 throws IOException {
458 UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
459 if (userProvider.isHBaseSecurityEnabled()) {
460 try {
461 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
462 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
463 HConnection peerConn = HConnectionManager.createConnection(peerConf);
464 try {
465 TokenUtil.addTokenForJob(peerConn, userProvider.getCurrent(), job);
466 } finally {
467 peerConn.close();
468 }
469 } catch (InterruptedException e) {
470 LOG.info("Interrupted obtaining user authentication token");
471 Thread.interrupted();
472 }
473 }
474 }
475
476
477
478
479
480
481
482
483 static String convertScanToString(Scan scan) throws IOException {
484 ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
485 return Base64.encodeBytes(proto.toByteArray());
486 }
487
488
489
490
491
492
493
494
495 static Scan convertStringToScan(String base64) throws IOException {
496 byte [] decoded = Base64.decode(base64);
497 ClientProtos.Scan scan;
498 try {
499 scan = ClientProtos.Scan.parseFrom(decoded);
500 } catch (InvalidProtocolBufferException ipbe) {
501 throw new IOException(ipbe);
502 }
503
504 return ProtobufUtil.toScan(scan);
505 }
506
507
508
509
510
511
512
513
514
515
516 public static void initTableReducerJob(String table,
517 Class<? extends TableReducer> reducer, Job job)
518 throws IOException {
519 initTableReducerJob(table, reducer, job, null);
520 }
521
522
523
524
525
526
527
528
529
530
531
532
533 public static void initTableReducerJob(String table,
534 Class<? extends TableReducer> reducer, Job job,
535 Class partitioner) throws IOException {
536 initTableReducerJob(table, reducer, job, partitioner, null, null, null);
537 }
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562 public static void initTableReducerJob(String table,
563 Class<? extends TableReducer> reducer, Job job,
564 Class partitioner, String quorumAddress, String serverClass,
565 String serverImpl) throws IOException {
566 initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
567 serverClass, serverImpl, true);
568 }
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595 public static void initTableReducerJob(String table,
596 Class<? extends TableReducer> reducer, Job job,
597 Class partitioner, String quorumAddress, String serverClass,
598 String serverImpl, boolean addDependencyJars) throws IOException {
599
600 Configuration conf = job.getConfiguration();
601 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
602 job.setOutputFormatClass(TableOutputFormat.class);
603 if (reducer != null) job.setReducerClass(reducer);
604 conf.set(TableOutputFormat.OUTPUT_TABLE, table);
605 conf.setStrings("io.serializations", conf.get("io.serializations"),
606 MutationSerialization.class.getName(), ResultSerialization.class.getName());
607
608 if (quorumAddress != null) {
609
610 ZKUtil.transformClusterKey(quorumAddress);
611 conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
612 }
613 if (serverClass != null && serverImpl != null) {
614 conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
615 conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
616 }
617 job.setOutputKeyClass(ImmutableBytesWritable.class);
618 job.setOutputValueClass(Writable.class);
619 if (partitioner == HRegionPartitioner.class) {
620 job.setPartitionerClass(HRegionPartitioner.class);
621 int regions = MetaReader.getRegionCount(conf, table);
622 if (job.getNumReduceTasks() > regions) {
623 job.setNumReduceTasks(regions);
624 }
625 } else if (partitioner != null) {
626 job.setPartitionerClass(partitioner);
627 }
628
629 if (addDependencyJars) {
630 addDependencyJars(job);
631 }
632
633 initCredentials(job);
634 }
635
636
637
638
639
640
641
642
643
644 public static void limitNumReduceTasks(String table, Job job)
645 throws IOException {
646 int regions = MetaReader.getRegionCount(job.getConfiguration(), table);
647 if (job.getNumReduceTasks() > regions)
648 job.setNumReduceTasks(regions);
649 }
650
651
652
653
654
655
656
657
658
659 public static void setNumReduceTasks(String table, Job job)
660 throws IOException {
661 job.setNumReduceTasks(MetaReader.getRegionCount(job.getConfiguration(), table));
662 }
663
664
665
666
667
668
669
670
671
672
673 public static void setScannerCaching(Job job, int batchSize) {
674 job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);
675 }
676
677
678
679
680
681
682
683
684
685
686
687
688
689 public static void addHBaseDependencyJars(Configuration conf) throws IOException {
690 addDependencyJars(conf,
691
692 org.apache.hadoop.hbase.HConstants.class,
693 org.apache.hadoop.hbase.protobuf.generated.ClientProtos.class,
694 org.apache.hadoop.hbase.client.Put.class,
695 org.apache.hadoop.hbase.CompatibilityFactory.class,
696 org.apache.hadoop.hbase.mapreduce.TableMapper.class,
697
698 org.apache.zookeeper.ZooKeeper.class,
699 org.jboss.netty.channel.ChannelFactory.class,
700 com.google.protobuf.Message.class,
701 com.google.common.collect.Lists.class,
702 org.cloudera.htrace.Trace.class,
703 org.cliffc.high_scale_lib.Counter.class,
704 com.yammer.metrics.core.MetricsRegistry.class);
705 }
706
707
708
709
710
711 public static String buildDependencyClasspath(Configuration conf) {
712 if (conf == null) {
713 throw new IllegalArgumentException("Must provide a configuration object.");
714 }
715 Set<String> paths = new HashSet<String>(conf.getStringCollection("tmpjars"));
716 if (paths.size() == 0) {
717 throw new IllegalArgumentException("Configuration contains no tmpjars.");
718 }
719 StringBuilder sb = new StringBuilder();
720 for (String s : paths) {
721
722 int idx = s.indexOf(":");
723 if (idx != -1) s = s.substring(idx + 1);
724 if (sb.length() > 0) sb.append(File.pathSeparator);
725 sb.append(s);
726 }
727 return sb.toString();
728 }
729
730
731
732
733
734
735 public static void addDependencyJars(Job job) throws IOException {
736 addHBaseDependencyJars(job.getConfiguration());
737 try {
738 addDependencyJars(job.getConfiguration(),
739
740
741 job.getMapOutputKeyClass(),
742 job.getMapOutputValueClass(),
743 job.getInputFormatClass(),
744 job.getOutputKeyClass(),
745 job.getOutputValueClass(),
746 job.getOutputFormatClass(),
747 job.getPartitionerClass(),
748 job.getCombinerClass());
749 } catch (ClassNotFoundException e) {
750 throw new IOException(e);
751 }
752 }
753
754
755
756
757
758
759 public static void addDependencyJars(Configuration conf,
760 Class<?>... classes) throws IOException {
761
762 FileSystem localFs = FileSystem.getLocal(conf);
763 Set<String> jars = new HashSet<String>();
764
765 jars.addAll(conf.getStringCollection("tmpjars"));
766
767
768
769 Map<String, String> packagedClasses = new HashMap<String, String>();
770
771
772 for (Class<?> clazz : classes) {
773 if (clazz == null) continue;
774
775 Path path = findOrCreateJar(clazz, localFs, packagedClasses);
776 if (path == null) {
777 LOG.warn("Could not find jar for class " + clazz +
778 " in order to ship it to the cluster.");
779 continue;
780 }
781 if (!localFs.exists(path)) {
782 LOG.warn("Could not validate jar file " + path + " for class "
783 + clazz);
784 continue;
785 }
786 jars.add(path.toString());
787 }
788 if (jars.isEmpty()) return;
789
790 conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
791 }
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807 private static Path findOrCreateJar(Class<?> my_class, FileSystem fs,
808 Map<String, String> packagedClasses)
809 throws IOException {
810
811 String jar = findContainingJar(my_class, packagedClasses);
812 if (null == jar || jar.isEmpty()) {
813 jar = getJar(my_class);
814 updateMap(jar, packagedClasses);
815 }
816
817 if (null == jar || jar.isEmpty()) {
818 return null;
819 }
820
821 LOG.debug(String.format("For class %s, using jar %s", my_class.getName(), jar));
822 return new Path(jar).makeQualified(fs);
823 }
824
825
826
827
828
829
830
831 private static void updateMap(String jar, Map<String, String> packagedClasses) throws IOException {
832 if (null == jar || jar.isEmpty()) {
833 return;
834 }
835 ZipFile zip = null;
836 try {
837 zip = new ZipFile(jar);
838 for (Enumeration<? extends ZipEntry> iter = zip.entries(); iter.hasMoreElements();) {
839 ZipEntry entry = iter.nextElement();
840 if (entry.getName().endsWith("class")) {
841 packagedClasses.put(entry.getName(), jar);
842 }
843 }
844 } finally {
845 if (null != zip) zip.close();
846 }
847 }
848
849
850
851
852
853
854
855
856
857
858 private static String findContainingJar(Class<?> my_class, Map<String, String> packagedClasses)
859 throws IOException {
860 ClassLoader loader = my_class.getClassLoader();
861
862 String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
863
864 if (loader != null) {
865
866 for (Enumeration<URL> itr = loader.getResources(class_file); itr.hasMoreElements();) {
867 URL url = itr.nextElement();
868 if ("jar".equals(url.getProtocol())) {
869 String toReturn = url.getPath();
870 if (toReturn.startsWith("file:")) {
871 toReturn = toReturn.substring("file:".length());
872 }
873
874
875
876
877
878
879 toReturn = toReturn.replaceAll("\\+", "%2B");
880 toReturn = URLDecoder.decode(toReturn, "UTF-8");
881 return toReturn.replaceAll("!.*$", "");
882 }
883 }
884 }
885
886
887
888 return packagedClasses.get(class_file);
889 }
890
891
892
893
894
895
896
897
898 private static String getJar(Class<?> my_class) {
899 String ret = null;
900 String hadoopJarFinder = "org.apache.hadoop.util.JarFinder";
901 Class<?> jarFinder = null;
902 try {
903 LOG.debug("Looking for " + hadoopJarFinder + ".");
904 jarFinder = Class.forName(hadoopJarFinder);
905 LOG.debug(hadoopJarFinder + " found.");
906 Method getJar = jarFinder.getMethod("getJar", Class.class);
907 ret = (String) getJar.invoke(null, my_class);
908 } catch (ClassNotFoundException e) {
909 LOG.debug("Using backported JarFinder.");
910 ret = JarFinder.getJar(my_class);
911 } catch (InvocationTargetException e) {
912
913
914 throw new RuntimeException(e.getCause());
915 } catch (Exception e) {
916
917 throw new RuntimeException("getJar invocation failed.", e);
918 }
919
920 return ret;
921 }
922 }