1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.File;
22 import java.io.IOException;
23 import java.lang.reflect.InvocationTargetException;
24 import java.lang.reflect.Method;
25 import java.net.URL;
26 import java.net.URLDecoder;
27 import java.util.ArrayList;
28 import java.util.Enumeration;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.Set;
34 import java.util.zip.ZipEntry;
35 import java.util.zip.ZipFile;
36
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.hadoop.classification.InterfaceAudience;
40 import org.apache.hadoop.classification.InterfaceStability;
41 import org.apache.hadoop.conf.Configuration;
42 import org.apache.hadoop.fs.FileSystem;
43 import org.apache.hadoop.fs.Path;
44 import org.apache.hadoop.hbase.HBaseConfiguration;
45 import org.apache.hadoop.hbase.HConstants;
46 import org.apache.hadoop.hbase.catalog.MetaReader;
47 import org.apache.hadoop.hbase.client.Put;
48 import org.apache.hadoop.hbase.client.Scan;
49 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
50 import org.apache.hadoop.hbase.mapreduce.hadoopbackport.JarFinder;
51 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
52 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
53 import org.apache.hadoop.hbase.security.User;
54 import org.apache.hadoop.hbase.security.UserProvider;
55 import org.apache.hadoop.hbase.security.token.AuthenticationTokenIdentifier;
56 import org.apache.hadoop.hbase.security.token.AuthenticationTokenSelector;
57 import org.apache.hadoop.hbase.util.Base64;
58 import org.apache.hadoop.hbase.util.Bytes;
59 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
60 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
61 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
62 import org.apache.hadoop.io.Text;
63 import org.apache.hadoop.io.Writable;
64 import org.apache.hadoop.io.WritableComparable;
65 import org.apache.hadoop.mapreduce.InputFormat;
66 import org.apache.hadoop.mapreduce.Job;
67 import org.apache.hadoop.security.token.Token;
68 import org.apache.hadoop.util.StringUtils;
69 import org.apache.zookeeper.KeeperException;
70 import org.cliffc.high_scale_lib.Counter;
71
72 import com.google.protobuf.InvalidProtocolBufferException;
73
74
75
76
77 @SuppressWarnings({ "rawtypes", "unchecked" })
78 @InterfaceAudience.Public
79 @InterfaceStability.Stable
80 public class TableMapReduceUtil {
81 static Log LOG = LogFactory.getLog(TableMapReduceUtil.class);
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96 public static void initTableMapperJob(String table, Scan scan,
97 Class<? extends TableMapper> mapper,
98 Class<?> outputKeyClass,
99 Class<?> outputValueClass, Job job)
100 throws IOException {
101 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
102 job, true);
103 }
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118 public static void initTableMapperJob(byte[] table, Scan scan,
119 Class<? extends TableMapper> mapper,
120 Class<?> outputKeyClass,
121 Class<?> outputValueClass, Job job)
122 throws IOException {
123 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass,
124 job, true);
125 }
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142 public static void initTableMapperJob(String table, Scan scan,
143 Class<? extends TableMapper> mapper,
144 Class<?> outputKeyClass,
145 Class<?> outputValueClass, Job job,
146 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
147 throws IOException {
148 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass, job,
149 addDependencyJars, true, inputFormatClass);
150 }
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170 public static void initTableMapperJob(String table, Scan scan,
171 Class<? extends TableMapper> mapper,
172 Class<?> outputKeyClass,
173 Class<?> outputValueClass, Job job,
174 boolean addDependencyJars, boolean initCredentials,
175 Class<? extends InputFormat> inputFormatClass)
176 throws IOException {
177 job.setInputFormatClass(inputFormatClass);
178 if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
179 if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
180 job.setMapperClass(mapper);
181 if (Put.class.equals(outputValueClass)) {
182 job.setCombinerClass(PutCombiner.class);
183 }
184 Configuration conf = job.getConfiguration();
185 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
186 conf.set(TableInputFormat.INPUT_TABLE, table);
187 conf.set(TableInputFormat.SCAN, convertScanToString(scan));
188 conf.setStrings("io.serializations", conf.get("io.serializations"),
189 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
190 KeyValueSerialization.class.getName());
191 if (addDependencyJars) {
192 addDependencyJars(job);
193 }
194 if (initCredentials) {
195 initCredentials(job);
196 }
197 }
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215 public static void initTableMapperJob(byte[] table, Scan scan,
216 Class<? extends TableMapper> mapper,
217 Class<?> outputKeyClass,
218 Class<?> outputValueClass, Job job,
219 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
220 throws IOException {
221 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
222 outputValueClass, job, addDependencyJars, inputFormatClass);
223 }
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240 public static void initTableMapperJob(byte[] table, Scan scan,
241 Class<? extends TableMapper> mapper,
242 Class<?> outputKeyClass,
243 Class<?> outputValueClass, Job job,
244 boolean addDependencyJars)
245 throws IOException {
246 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
247 outputValueClass, job, addDependencyJars, TableInputFormat.class);
248 }
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265 public static void initTableMapperJob(String table, Scan scan,
266 Class<? extends TableMapper> mapper,
267 Class<?> outputKeyClass,
268 Class<?> outputValueClass, Job job,
269 boolean addDependencyJars)
270 throws IOException {
271 initTableMapperJob(table, scan, mapper, outputKeyClass,
272 outputValueClass, job, addDependencyJars, TableInputFormat.class);
273 }
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295 public static void initTableSnapshotMapperJob(String snapshotName, Scan scan,
296 Class<? extends TableMapper> mapper,
297 Class<?> outputKeyClass,
298 Class<?> outputValueClass, Job job,
299 boolean addDependencyJars, Path tmpRestoreDir)
300 throws IOException {
301 TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
302 initTableMapperJob(snapshotName, scan, mapper, outputKeyClass,
303 outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class);
304
305
306
307
308
309
310
311 job.getConfiguration().setFloat(
312 HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT);
313 job.getConfiguration().setFloat("hbase.offheapcache.percentage", 0f);
314 job.getConfiguration().setFloat("hbase.bucketcache.size", 0f);
315
316
317 TableMapReduceUtil.addDependencyJars(job.getConfiguration(), Counter.class);
318 }
319
320
321
322
323
324
325
326
327
328
329
330
331
332 public static void initTableMapperJob(List<Scan> scans,
333 Class<? extends TableMapper> mapper,
334 Class<? extends WritableComparable> outputKeyClass,
335 Class<? extends Writable> outputValueClass, Job job) throws IOException {
336 initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
337 true);
338 }
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354 public static void initTableMapperJob(List<Scan> scans,
355 Class<? extends TableMapper> mapper,
356 Class<? extends WritableComparable> outputKeyClass,
357 Class<? extends Writable> outputValueClass, Job job,
358 boolean addDependencyJars) throws IOException {
359 job.setInputFormatClass(MultiTableInputFormat.class);
360 if (outputValueClass != null) {
361 job.setMapOutputValueClass(outputValueClass);
362 }
363 if (outputKeyClass != null) {
364 job.setMapOutputKeyClass(outputKeyClass);
365 }
366 job.setMapperClass(mapper);
367 HBaseConfiguration.addHbaseResources(job.getConfiguration());
368 List<String> scanStrings = new ArrayList<String>();
369
370 for (Scan scan : scans) {
371 scanStrings.add(convertScanToString(scan));
372 }
373 job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
374 scanStrings.toArray(new String[scanStrings.size()]));
375
376 if (addDependencyJars) {
377 addDependencyJars(job);
378 }
379 }
380
381 public static void initCredentials(Job job) throws IOException {
382 UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
383 if (userProvider.isHadoopSecurityEnabled()) {
384
385 if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
386 job.getConfiguration().set("mapreduce.job.credentials.binary",
387 System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
388 }
389 }
390
391 if (userProvider.isHBaseSecurityEnabled()) {
392 try {
393
394 String quorumAddress = job.getConfiguration().get(TableOutputFormat.QUORUM_ADDRESS);
395 User user = userProvider.getCurrent();
396 if (quorumAddress != null) {
397 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
398 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
399 obtainAuthTokenForJob(job, peerConf, user);
400 }
401
402 obtainAuthTokenForJob(job, job.getConfiguration(), user);
403 } catch (InterruptedException ie) {
404 LOG.info("Interrupted obtaining user authentication token");
405 Thread.currentThread().interrupt();
406 }
407 }
408 }
409
410
411
412
413
414
415
416
417
418
419
420
421 public static void initCredentialsForCluster(Job job, String quorumAddress)
422 throws IOException {
423 UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
424 if (userProvider.isHBaseSecurityEnabled()) {
425 try {
426 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
427 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
428 obtainAuthTokenForJob(job, peerConf, userProvider.getCurrent());
429 } catch (InterruptedException e) {
430 LOG.info("Interrupted obtaining user authentication token");
431 Thread.interrupted();
432 }
433 }
434 }
435
436 private static void obtainAuthTokenForJob(Job job, Configuration conf, User user)
437 throws IOException, InterruptedException {
438 Token<AuthenticationTokenIdentifier> authToken = getAuthToken(conf, user);
439 if (authToken == null) {
440 user.obtainAuthTokenForJob(conf, job);
441 } else {
442 job.getCredentials().addToken(authToken.getService(), authToken);
443 }
444 }
445
446
447
448
449
450 private static Token<AuthenticationTokenIdentifier> getAuthToken(Configuration conf, User user)
451 throws IOException, InterruptedException {
452 ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "mr-init-credentials", null);
453 try {
454 String clusterId = ZKClusterId.readClusterIdZNode(zkw);
455 return new AuthenticationTokenSelector().selectToken(new Text(clusterId), user.getUGI().getTokens());
456 } catch (KeeperException e) {
457 throw new IOException(e);
458 } finally {
459 zkw.close();
460 }
461 }
462
463
464
465
466
467
468
469
470 static String convertScanToString(Scan scan) throws IOException {
471 ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
472 return Base64.encodeBytes(proto.toByteArray());
473 }
474
475
476
477
478
479
480
481
482 static Scan convertStringToScan(String base64) throws IOException {
483 byte [] decoded = Base64.decode(base64);
484 ClientProtos.Scan scan;
485 try {
486 scan = ClientProtos.Scan.parseFrom(decoded);
487 } catch (InvalidProtocolBufferException ipbe) {
488 throw new IOException(ipbe);
489 }
490
491 return ProtobufUtil.toScan(scan);
492 }
493
494
495
496
497
498
499
500
501
502
503 public static void initTableReducerJob(String table,
504 Class<? extends TableReducer> reducer, Job job)
505 throws IOException {
506 initTableReducerJob(table, reducer, job, null);
507 }
508
509
510
511
512
513
514
515
516
517
518
519
520 public static void initTableReducerJob(String table,
521 Class<? extends TableReducer> reducer, Job job,
522 Class partitioner) throws IOException {
523 initTableReducerJob(table, reducer, job, partitioner, null, null, null);
524 }
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549 public static void initTableReducerJob(String table,
550 Class<? extends TableReducer> reducer, Job job,
551 Class partitioner, String quorumAddress, String serverClass,
552 String serverImpl) throws IOException {
553 initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
554 serverClass, serverImpl, true);
555 }
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582 public static void initTableReducerJob(String table,
583 Class<? extends TableReducer> reducer, Job job,
584 Class partitioner, String quorumAddress, String serverClass,
585 String serverImpl, boolean addDependencyJars) throws IOException {
586
587 Configuration conf = job.getConfiguration();
588 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
589 job.setOutputFormatClass(TableOutputFormat.class);
590 if (reducer != null) job.setReducerClass(reducer);
591 conf.set(TableOutputFormat.OUTPUT_TABLE, table);
592 conf.setStrings("io.serializations", conf.get("io.serializations"),
593 MutationSerialization.class.getName(), ResultSerialization.class.getName());
594
595 if (quorumAddress != null) {
596
597 ZKUtil.transformClusterKey(quorumAddress);
598 conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
599 }
600 if (serverClass != null && serverImpl != null) {
601 conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
602 conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
603 }
604 job.setOutputKeyClass(ImmutableBytesWritable.class);
605 job.setOutputValueClass(Writable.class);
606 if (partitioner == HRegionPartitioner.class) {
607 job.setPartitionerClass(HRegionPartitioner.class);
608 int regions = MetaReader.getRegionCount(conf, table);
609 if (job.getNumReduceTasks() > regions) {
610 job.setNumReduceTasks(regions);
611 }
612 } else if (partitioner != null) {
613 job.setPartitionerClass(partitioner);
614 }
615
616 if (addDependencyJars) {
617 addDependencyJars(job);
618 }
619
620 initCredentials(job);
621 }
622
623
624
625
626
627
628
629
630
631 public static void limitNumReduceTasks(String table, Job job)
632 throws IOException {
633 int regions = MetaReader.getRegionCount(job.getConfiguration(), table);
634 if (job.getNumReduceTasks() > regions)
635 job.setNumReduceTasks(regions);
636 }
637
638
639
640
641
642
643
644
645
646 public static void setNumReduceTasks(String table, Job job)
647 throws IOException {
648 job.setNumReduceTasks(MetaReader.getRegionCount(job.getConfiguration(), table));
649 }
650
651
652
653
654
655
656
657
658
659
660 public static void setScannerCaching(Job job, int batchSize) {
661 job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);
662 }
663
664
665
666
667
668
669
670
671
672
673
674
675
676 public static void addHBaseDependencyJars(Configuration conf) throws IOException {
677 addDependencyJars(conf,
678
679 org.apache.hadoop.hbase.HConstants.class,
680 org.apache.hadoop.hbase.protobuf.generated.ClientProtos.class,
681 org.apache.hadoop.hbase.client.Put.class,
682 org.apache.hadoop.hbase.CompatibilityFactory.class,
683 org.apache.hadoop.hbase.mapreduce.TableMapper.class,
684
685 org.apache.zookeeper.ZooKeeper.class,
686 org.jboss.netty.channel.ChannelFactory.class,
687 com.google.protobuf.Message.class,
688 com.google.common.collect.Lists.class,
689 org.cloudera.htrace.Trace.class);
690 }
691
692
693
694
695
696 public static String buildDependencyClasspath(Configuration conf) {
697 if (conf == null) {
698 throw new IllegalArgumentException("Must provide a configuration object.");
699 }
700 Set<String> paths = new HashSet<String>(conf.getStringCollection("tmpjars"));
701 if (paths.size() == 0) {
702 throw new IllegalArgumentException("Configuration contains no tmpjars.");
703 }
704 StringBuilder sb = new StringBuilder();
705 for (String s : paths) {
706
707 int idx = s.indexOf(":");
708 if (idx != -1) s = s.substring(idx + 1);
709 if (sb.length() > 0) sb.append(File.pathSeparator);
710 sb.append(s);
711 }
712 return sb.toString();
713 }
714
715
716
717
718
719
720 public static void addDependencyJars(Job job) throws IOException {
721 addHBaseDependencyJars(job.getConfiguration());
722 try {
723 addDependencyJars(job.getConfiguration(),
724
725
726 job.getMapOutputKeyClass(),
727 job.getMapOutputValueClass(),
728 job.getInputFormatClass(),
729 job.getOutputKeyClass(),
730 job.getOutputValueClass(),
731 job.getOutputFormatClass(),
732 job.getPartitionerClass(),
733 job.getCombinerClass());
734 } catch (ClassNotFoundException e) {
735 throw new IOException(e);
736 }
737 }
738
739
740
741
742
743
744 public static void addDependencyJars(Configuration conf,
745 Class<?>... classes) throws IOException {
746
747 FileSystem localFs = FileSystem.getLocal(conf);
748 Set<String> jars = new HashSet<String>();
749
750 jars.addAll(conf.getStringCollection("tmpjars"));
751
752
753
754 Map<String, String> packagedClasses = new HashMap<String, String>();
755
756
757 for (Class<?> clazz : classes) {
758 if (clazz == null) continue;
759
760 Path path = findOrCreateJar(clazz, localFs, packagedClasses);
761 if (path == null) {
762 LOG.warn("Could not find jar for class " + clazz +
763 " in order to ship it to the cluster.");
764 continue;
765 }
766 if (!localFs.exists(path)) {
767 LOG.warn("Could not validate jar file " + path + " for class "
768 + clazz);
769 continue;
770 }
771 jars.add(path.toString());
772 }
773 if (jars.isEmpty()) return;
774
775 conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
776 }
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792 private static Path findOrCreateJar(Class<?> my_class, FileSystem fs,
793 Map<String, String> packagedClasses)
794 throws IOException {
795
796 String jar = findContainingJar(my_class, packagedClasses);
797 if (null == jar || jar.isEmpty()) {
798 jar = getJar(my_class);
799 updateMap(jar, packagedClasses);
800 }
801
802 if (null == jar || jar.isEmpty()) {
803 return null;
804 }
805
806 LOG.debug(String.format("For class %s, using jar %s", my_class.getName(), jar));
807 return new Path(jar).makeQualified(fs);
808 }
809
810
811
812
813
814
815
816 private static void updateMap(String jar, Map<String, String> packagedClasses) throws IOException {
817 if (null == jar || jar.isEmpty()) {
818 return;
819 }
820 ZipFile zip = null;
821 try {
822 zip = new ZipFile(jar);
823 for (Enumeration<? extends ZipEntry> iter = zip.entries(); iter.hasMoreElements();) {
824 ZipEntry entry = iter.nextElement();
825 if (entry.getName().endsWith("class")) {
826 packagedClasses.put(entry.getName(), jar);
827 }
828 }
829 } finally {
830 if (null != zip) zip.close();
831 }
832 }
833
834
835
836
837
838
839
840
841
842
843 private static String findContainingJar(Class<?> my_class, Map<String, String> packagedClasses)
844 throws IOException {
845 ClassLoader loader = my_class.getClassLoader();
846 String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
847
848
849 for (Enumeration<URL> itr = loader.getResources(class_file); itr.hasMoreElements();) {
850 URL url = itr.nextElement();
851 if ("jar".equals(url.getProtocol())) {
852 String toReturn = url.getPath();
853 if (toReturn.startsWith("file:")) {
854 toReturn = toReturn.substring("file:".length());
855 }
856
857
858
859
860
861
862 toReturn = toReturn.replaceAll("\\+", "%2B");
863 toReturn = URLDecoder.decode(toReturn, "UTF-8");
864 return toReturn.replaceAll("!.*$", "");
865 }
866 }
867
868
869
870 return packagedClasses.get(class_file);
871 }
872
873
874
875
876
877
878
879
880 private static String getJar(Class<?> my_class) {
881 String ret = null;
882 String hadoopJarFinder = "org.apache.hadoop.util.JarFinder";
883 Class<?> jarFinder = null;
884 try {
885 LOG.debug("Looking for " + hadoopJarFinder + ".");
886 jarFinder = Class.forName(hadoopJarFinder);
887 LOG.debug(hadoopJarFinder + " found.");
888 Method getJar = jarFinder.getMethod("getJar", Class.class);
889 ret = (String) getJar.invoke(null, my_class);
890 } catch (ClassNotFoundException e) {
891 LOG.debug("Using backported JarFinder.");
892 ret = JarFinder.getJar(my_class);
893 } catch (InvocationTargetException e) {
894
895
896 throw new RuntimeException(e.getCause());
897 } catch (Exception e) {
898
899 throw new RuntimeException("getJar invocation failed.", e);
900 }
901
902 return ret;
903 }
904 }