1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.File;
22 import java.io.IOException;
23 import java.lang.reflect.InvocationTargetException;
24 import java.lang.reflect.Method;
25 import java.net.URL;
26 import java.net.URLDecoder;
27 import java.util.ArrayList;
28 import java.util.Enumeration;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.Set;
34 import java.util.zip.ZipEntry;
35 import java.util.zip.ZipFile;
36
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.hadoop.hbase.classification.InterfaceAudience;
40 import org.apache.hadoop.hbase.classification.InterfaceStability;
41 import org.apache.hadoop.conf.Configuration;
42 import org.apache.hadoop.fs.FileSystem;
43 import org.apache.hadoop.fs.Path;
44 import org.apache.hadoop.hbase.HBaseConfiguration;
45 import org.apache.hadoop.hbase.HConstants;
46 import org.apache.hadoop.hbase.catalog.MetaReader;
47 import org.apache.hadoop.hbase.client.HConnection;
48 import org.apache.hadoop.hbase.client.HConnectionManager;
49 import org.apache.hadoop.hbase.client.Put;
50 import org.apache.hadoop.hbase.client.Scan;
51 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
52 import org.apache.hadoop.hbase.mapreduce.hadoopbackport.JarFinder;
53 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
54 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
55 import org.apache.hadoop.hbase.security.User;
56 import org.apache.hadoop.hbase.security.UserProvider;
57 import org.apache.hadoop.hbase.security.token.TokenUtil;
58 import org.apache.hadoop.hbase.util.Base64;
59 import org.apache.hadoop.hbase.util.Bytes;
60 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
61 import org.apache.hadoop.io.Writable;
62 import org.apache.hadoop.io.WritableComparable;
63 import org.apache.hadoop.mapreduce.InputFormat;
64 import org.apache.hadoop.mapreduce.Job;
65 import org.apache.hadoop.util.StringUtils;
66
67 import com.google.protobuf.InvalidProtocolBufferException;
68
69
70
71
72 @SuppressWarnings({ "rawtypes", "unchecked" })
73 @InterfaceAudience.Public
74 @InterfaceStability.Stable
75 public class TableMapReduceUtil {
76 static Log LOG = LogFactory.getLog(TableMapReduceUtil.class);
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91 public static void initTableMapperJob(String table, Scan scan,
92 Class<? extends TableMapper> mapper,
93 Class<?> outputKeyClass,
94 Class<?> outputValueClass, Job job)
95 throws IOException {
96 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
97 job, true);
98 }
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113 public static void initTableMapperJob(byte[] table, Scan scan,
114 Class<? extends TableMapper> mapper,
115 Class<?> outputKeyClass,
116 Class<?> outputValueClass, Job job)
117 throws IOException {
118 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass,
119 job, true);
120 }
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137 public static void initTableMapperJob(String table, Scan scan,
138 Class<? extends TableMapper> mapper,
139 Class<?> outputKeyClass,
140 Class<?> outputValueClass, Job job,
141 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
142 throws IOException {
143 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass, job,
144 addDependencyJars, true, inputFormatClass);
145 }
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165 public static void initTableMapperJob(String table, Scan scan,
166 Class<? extends TableMapper> mapper,
167 Class<?> outputKeyClass,
168 Class<?> outputValueClass, Job job,
169 boolean addDependencyJars, boolean initCredentials,
170 Class<? extends InputFormat> inputFormatClass)
171 throws IOException {
172 job.setInputFormatClass(inputFormatClass);
173 if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
174 if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
175 job.setMapperClass(mapper);
176 if (Put.class.equals(outputValueClass)) {
177 job.setCombinerClass(PutCombiner.class);
178 }
179 Configuration conf = job.getConfiguration();
180 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
181 conf.set(TableInputFormat.INPUT_TABLE, table);
182 conf.set(TableInputFormat.SCAN, convertScanToString(scan));
183 conf.setStrings("io.serializations", conf.get("io.serializations"),
184 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
185 KeyValueSerialization.class.getName());
186 if (addDependencyJars) {
187 addDependencyJars(job);
188 }
189 if (initCredentials) {
190 initCredentials(job);
191 }
192 }
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210 public static void initTableMapperJob(byte[] table, Scan scan,
211 Class<? extends TableMapper> mapper,
212 Class<?> outputKeyClass,
213 Class<?> outputValueClass, Job job,
214 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
215 throws IOException {
216 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
217 outputValueClass, job, addDependencyJars, inputFormatClass);
218 }
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235 public static void initTableMapperJob(byte[] table, Scan scan,
236 Class<? extends TableMapper> mapper,
237 Class<?> outputKeyClass,
238 Class<?> outputValueClass, Job job,
239 boolean addDependencyJars)
240 throws IOException {
241 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
242 outputValueClass, job, addDependencyJars, TableInputFormat.class);
243 }
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260 public static void initTableMapperJob(String table, Scan scan,
261 Class<? extends TableMapper> mapper,
262 Class<?> outputKeyClass,
263 Class<?> outputValueClass, Job job,
264 boolean addDependencyJars)
265 throws IOException {
266 initTableMapperJob(table, scan, mapper, outputKeyClass,
267 outputValueClass, job, addDependencyJars, TableInputFormat.class);
268 }
269
270
271
272
273
274
275
276 public static void resetCacheConfig(Configuration conf) {
277 conf.setFloat(
278 HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT);
279 conf.setFloat("hbase.offheapcache.percentage", 0f);
280 conf.setFloat("hbase.bucketcache.size", 0f);
281 }
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303 public static void initTableSnapshotMapperJob(String snapshotName, Scan scan,
304 Class<? extends TableMapper> mapper,
305 Class<?> outputKeyClass,
306 Class<?> outputValueClass, Job job,
307 boolean addDependencyJars, Path tmpRestoreDir)
308 throws IOException {
309 TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
310 initTableMapperJob(snapshotName, scan, mapper, outputKeyClass,
311 outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class);
312 resetCacheConfig(job.getConfiguration());
313 }
314
315
316
317
318
319
320
321
322
323
324
325
326
327 public static void initTableMapperJob(List<Scan> scans,
328 Class<? extends TableMapper> mapper,
329 Class<? extends WritableComparable> outputKeyClass,
330 Class<? extends Writable> outputValueClass, Job job) throws IOException {
331 initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
332 true);
333 }
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349 public static void initTableMapperJob(List<Scan> scans,
350 Class<? extends TableMapper> mapper,
351 Class<? extends WritableComparable> outputKeyClass,
352 Class<? extends Writable> outputValueClass, Job job,
353 boolean addDependencyJars) throws IOException {
354 initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
355 addDependencyJars, true);
356 }
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373 public static void initTableMapperJob(List<Scan> scans,
374 Class<? extends TableMapper> mapper,
375 Class<? extends WritableComparable> outputKeyClass,
376 Class<? extends Writable> outputValueClass, Job job,
377 boolean addDependencyJars,
378 boolean initCredentials) throws IOException {
379 job.setInputFormatClass(MultiTableInputFormat.class);
380 if (outputValueClass != null) {
381 job.setMapOutputValueClass(outputValueClass);
382 }
383 if (outputKeyClass != null) {
384 job.setMapOutputKeyClass(outputKeyClass);
385 }
386 job.setMapperClass(mapper);
387 Configuration conf = job.getConfiguration();
388 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
389 List<String> scanStrings = new ArrayList<String>();
390
391 for (Scan scan : scans) {
392 scanStrings.add(convertScanToString(scan));
393 }
394 job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
395 scanStrings.toArray(new String[scanStrings.size()]));
396
397 if (addDependencyJars) {
398 addDependencyJars(job);
399 }
400
401 if (initCredentials) {
402 initCredentials(job);
403 }
404 }
405
406 public static void initCredentials(Job job) throws IOException {
407 UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
408 if (userProvider.isHadoopSecurityEnabled()) {
409
410 if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
411 job.getConfiguration().set("mapreduce.job.credentials.binary",
412 System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
413 }
414 }
415
416 if (userProvider.isHBaseSecurityEnabled()) {
417 try {
418
419 String quorumAddress = job.getConfiguration().get(TableOutputFormat.QUORUM_ADDRESS);
420 User user = userProvider.getCurrent();
421 if (quorumAddress != null) {
422 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
423 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
424 HConnection peerConn = HConnectionManager.createConnection(peerConf);
425 try {
426 TokenUtil.addTokenForJob(peerConn, user, job);
427 } finally {
428 peerConn.close();
429 }
430 }
431
432 HConnection conn = HConnectionManager.createConnection(job.getConfiguration());
433 try {
434 TokenUtil.addTokenForJob(conn, user, job);
435 } finally {
436 conn.close();
437 }
438 } catch (InterruptedException ie) {
439 LOG.info("Interrupted obtaining user authentication token");
440 Thread.currentThread().interrupt();
441 }
442 }
443 }
444
445
446
447
448
449
450
451
452
453
454
455
456 public static void initCredentialsForCluster(Job job, String quorumAddress)
457 throws IOException {
458 UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
459 if (userProvider.isHBaseSecurityEnabled()) {
460 try {
461 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
462 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
463 HConnection peerConn = HConnectionManager.createConnection(peerConf);
464 try {
465 TokenUtil.addTokenForJob(peerConn, userProvider.getCurrent(), job);
466 } finally {
467 peerConn.close();
468 }
469 } catch (InterruptedException e) {
470 LOG.info("Interrupted obtaining user authentication token");
471 Thread.interrupted();
472 }
473 }
474 }
475
476
477
478
479
480
481
482
483 static String convertScanToString(Scan scan) throws IOException {
484 ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
485 return Base64.encodeBytes(proto.toByteArray());
486 }
487
488
489
490
491
492
493
494
495 static Scan convertStringToScan(String base64) throws IOException {
496 byte [] decoded = Base64.decode(base64);
497 ClientProtos.Scan scan;
498 try {
499 scan = ClientProtos.Scan.parseFrom(decoded);
500 } catch (InvalidProtocolBufferException ipbe) {
501 throw new IOException(ipbe);
502 }
503
504 return ProtobufUtil.toScan(scan);
505 }
506
507
508
509
510
511
512
513
514
515
516 public static void initTableReducerJob(String table,
517 Class<? extends TableReducer> reducer, Job job)
518 throws IOException {
519 initTableReducerJob(table, reducer, job, null);
520 }
521
522
523
524
525
526
527
528
529
530
531
532
533 public static void initTableReducerJob(String table,
534 Class<? extends TableReducer> reducer, Job job,
535 Class partitioner) throws IOException {
536 initTableReducerJob(table, reducer, job, partitioner, null, null, null);
537 }
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562 public static void initTableReducerJob(String table,
563 Class<? extends TableReducer> reducer, Job job,
564 Class partitioner, String quorumAddress, String serverClass,
565 String serverImpl) throws IOException {
566 initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
567 serverClass, serverImpl, true);
568 }
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595 public static void initTableReducerJob(String table,
596 Class<? extends TableReducer> reducer, Job job,
597 Class partitioner, String quorumAddress, String serverClass,
598 String serverImpl, boolean addDependencyJars) throws IOException {
599
600 Configuration conf = job.getConfiguration();
601 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
602 job.setOutputFormatClass(TableOutputFormat.class);
603 if (reducer != null) job.setReducerClass(reducer);
604 conf.set(TableOutputFormat.OUTPUT_TABLE, table);
605 conf.setStrings("io.serializations", conf.get("io.serializations"),
606 MutationSerialization.class.getName(), ResultSerialization.class.getName());
607
608 if (quorumAddress != null) {
609
610 ZKUtil.transformClusterKey(quorumAddress);
611 conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
612 }
613 if (serverClass != null && serverImpl != null) {
614 conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
615 conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
616 }
617 job.setOutputKeyClass(ImmutableBytesWritable.class);
618 job.setOutputValueClass(Writable.class);
619 if (partitioner == HRegionPartitioner.class) {
620 job.setPartitionerClass(HRegionPartitioner.class);
621 int regions = MetaReader.getRegionCount(conf, table);
622 if (job.getNumReduceTasks() > regions) {
623 job.setNumReduceTasks(regions);
624 }
625 } else if (partitioner != null) {
626 job.setPartitionerClass(partitioner);
627 }
628
629 if (addDependencyJars) {
630 addDependencyJars(job);
631 }
632
633 initCredentials(job);
634 }
635
636
637
638
639
640
641
642
643
644 public static void limitNumReduceTasks(String table, Job job)
645 throws IOException {
646 int regions = MetaReader.getRegionCount(job.getConfiguration(), table);
647 if (job.getNumReduceTasks() > regions)
648 job.setNumReduceTasks(regions);
649 }
650
651
652
653
654
655
656
657
658
659 public static void setNumReduceTasks(String table, Job job)
660 throws IOException {
661 job.setNumReduceTasks(MetaReader.getRegionCount(job.getConfiguration(), table));
662 }
663
664
665
666
667
668
669
670
671
672
673 public static void setScannerCaching(Job job, int batchSize) {
674 job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);
675 }
676
677
678
679
680
681
682
683
684
685
686
687
688
689 public static void addHBaseDependencyJars(Configuration conf) throws IOException {
690 addDependencyJars(conf,
691
692 org.apache.hadoop.hbase.HConstants.class,
693 org.apache.hadoop.hbase.protobuf.generated.ClientProtos.class,
694 org.apache.hadoop.hbase.client.Put.class,
695 org.apache.hadoop.hbase.CompatibilityFactory.class,
696 org.apache.hadoop.hbase.mapreduce.TableMapper.class,
697
698 org.apache.zookeeper.ZooKeeper.class,
699 org.jboss.netty.channel.ChannelFactory.class,
700 com.google.protobuf.Message.class,
701 com.google.common.collect.Lists.class,
702 org.cloudera.htrace.Trace.class,
703 org.cliffc.high_scale_lib.Counter.class);
704 }
705
706
707
708
709
710 public static String buildDependencyClasspath(Configuration conf) {
711 if (conf == null) {
712 throw new IllegalArgumentException("Must provide a configuration object.");
713 }
714 Set<String> paths = new HashSet<String>(conf.getStringCollection("tmpjars"));
715 if (paths.size() == 0) {
716 throw new IllegalArgumentException("Configuration contains no tmpjars.");
717 }
718 StringBuilder sb = new StringBuilder();
719 for (String s : paths) {
720
721 int idx = s.indexOf(":");
722 if (idx != -1) s = s.substring(idx + 1);
723 if (sb.length() > 0) sb.append(File.pathSeparator);
724 sb.append(s);
725 }
726 return sb.toString();
727 }
728
729
730
731
732
733
734 public static void addDependencyJars(Job job) throws IOException {
735 addHBaseDependencyJars(job.getConfiguration());
736 try {
737 addDependencyJars(job.getConfiguration(),
738
739
740 job.getMapOutputKeyClass(),
741 job.getMapOutputValueClass(),
742 job.getInputFormatClass(),
743 job.getOutputKeyClass(),
744 job.getOutputValueClass(),
745 job.getOutputFormatClass(),
746 job.getPartitionerClass(),
747 job.getCombinerClass());
748 } catch (ClassNotFoundException e) {
749 throw new IOException(e);
750 }
751 }
752
753
754
755
756
757
758 public static void addDependencyJars(Configuration conf,
759 Class<?>... classes) throws IOException {
760
761 FileSystem localFs = FileSystem.getLocal(conf);
762 Set<String> jars = new HashSet<String>();
763
764 jars.addAll(conf.getStringCollection("tmpjars"));
765
766
767
768 Map<String, String> packagedClasses = new HashMap<String, String>();
769
770
771 for (Class<?> clazz : classes) {
772 if (clazz == null) continue;
773
774 Path path = findOrCreateJar(clazz, localFs, packagedClasses);
775 if (path == null) {
776 LOG.warn("Could not find jar for class " + clazz +
777 " in order to ship it to the cluster.");
778 continue;
779 }
780 if (!localFs.exists(path)) {
781 LOG.warn("Could not validate jar file " + path + " for class "
782 + clazz);
783 continue;
784 }
785 jars.add(path.toString());
786 }
787 if (jars.isEmpty()) return;
788
789 conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
790 }
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806 private static Path findOrCreateJar(Class<?> my_class, FileSystem fs,
807 Map<String, String> packagedClasses)
808 throws IOException {
809
810 String jar = findContainingJar(my_class, packagedClasses);
811 if (null == jar || jar.isEmpty()) {
812 jar = getJar(my_class);
813 updateMap(jar, packagedClasses);
814 }
815
816 if (null == jar || jar.isEmpty()) {
817 return null;
818 }
819
820 LOG.debug(String.format("For class %s, using jar %s", my_class.getName(), jar));
821 return new Path(jar).makeQualified(fs);
822 }
823
824
825
826
827
828
829
830 private static void updateMap(String jar, Map<String, String> packagedClasses) throws IOException {
831 if (null == jar || jar.isEmpty()) {
832 return;
833 }
834 ZipFile zip = null;
835 try {
836 zip = new ZipFile(jar);
837 for (Enumeration<? extends ZipEntry> iter = zip.entries(); iter.hasMoreElements();) {
838 ZipEntry entry = iter.nextElement();
839 if (entry.getName().endsWith("class")) {
840 packagedClasses.put(entry.getName(), jar);
841 }
842 }
843 } finally {
844 if (null != zip) zip.close();
845 }
846 }
847
848
849
850
851
852
853
854
855
856
857 private static String findContainingJar(Class<?> my_class, Map<String, String> packagedClasses)
858 throws IOException {
859 ClassLoader loader = my_class.getClassLoader();
860
861 String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
862
863 if (loader != null) {
864
865 for (Enumeration<URL> itr = loader.getResources(class_file); itr.hasMoreElements();) {
866 URL url = itr.nextElement();
867 if ("jar".equals(url.getProtocol())) {
868 String toReturn = url.getPath();
869 if (toReturn.startsWith("file:")) {
870 toReturn = toReturn.substring("file:".length());
871 }
872
873
874
875
876
877
878 toReturn = toReturn.replaceAll("\\+", "%2B");
879 toReturn = URLDecoder.decode(toReturn, "UTF-8");
880 return toReturn.replaceAll("!.*$", "");
881 }
882 }
883 }
884
885
886
887 return packagedClasses.get(class_file);
888 }
889
890
891
892
893
894
895
896
897 private static String getJar(Class<?> my_class) {
898 String ret = null;
899 String hadoopJarFinder = "org.apache.hadoop.util.JarFinder";
900 Class<?> jarFinder = null;
901 try {
902 LOG.debug("Looking for " + hadoopJarFinder + ".");
903 jarFinder = Class.forName(hadoopJarFinder);
904 LOG.debug(hadoopJarFinder + " found.");
905 Method getJar = jarFinder.getMethod("getJar", Class.class);
906 ret = (String) getJar.invoke(null, my_class);
907 } catch (ClassNotFoundException e) {
908 LOG.debug("Using backported JarFinder.");
909 ret = JarFinder.getJar(my_class);
910 } catch (InvocationTargetException e) {
911
912
913 throw new RuntimeException(e.getCause());
914 } catch (Exception e) {
915
916 throw new RuntimeException("getJar invocation failed.", e);
917 }
918
919 return ret;
920 }
921 }