1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.File;
22 import java.io.IOException;
23 import java.lang.reflect.InvocationTargetException;
24 import java.lang.reflect.Method;
25 import java.net.URL;
26 import java.net.URLDecoder;
27 import java.util.ArrayList;
28 import java.util.Enumeration;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.Set;
34 import java.util.zip.ZipEntry;
35 import java.util.zip.ZipFile;
36
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.hadoop.classification.InterfaceAudience;
40 import org.apache.hadoop.classification.InterfaceStability;
41 import org.apache.hadoop.conf.Configuration;
42 import org.apache.hadoop.fs.FileSystem;
43 import org.apache.hadoop.fs.Path;
44 import org.apache.hadoop.hbase.HBaseConfiguration;
45 import org.apache.hadoop.hbase.HConstants;
46 import org.apache.hadoop.hbase.catalog.MetaReader;
47 import org.apache.hadoop.hbase.client.Put;
48 import org.apache.hadoop.hbase.client.Scan;
49 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
50 import org.apache.hadoop.hbase.mapreduce.hadoopbackport.JarFinder;
51 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
52 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
53 import org.apache.hadoop.hbase.security.User;
54 import org.apache.hadoop.hbase.security.UserProvider;
55 import org.apache.hadoop.hbase.security.token.AuthenticationTokenIdentifier;
56 import org.apache.hadoop.hbase.security.token.AuthenticationTokenSelector;
57 import org.apache.hadoop.hbase.util.Base64;
58 import org.apache.hadoop.hbase.util.Bytes;
59 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
60 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
61 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
62 import org.apache.hadoop.io.Text;
63 import org.apache.hadoop.io.Writable;
64 import org.apache.hadoop.io.WritableComparable;
65 import org.apache.hadoop.mapreduce.InputFormat;
66 import org.apache.hadoop.mapreduce.Job;
67 import org.apache.hadoop.security.token.Token;
68 import org.apache.hadoop.util.StringUtils;
69 import org.apache.zookeeper.KeeperException;
70 import org.cliffc.high_scale_lib.Counter;
71
72 import com.google.protobuf.InvalidProtocolBufferException;
73
74
75
76
77 @SuppressWarnings({ "rawtypes", "unchecked" })
78 @InterfaceAudience.Public
79 @InterfaceStability.Stable
80 public class TableMapReduceUtil {
81 static Log LOG = LogFactory.getLog(TableMapReduceUtil.class);
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96 public static void initTableMapperJob(String table, Scan scan,
97 Class<? extends TableMapper> mapper,
98 Class<?> outputKeyClass,
99 Class<?> outputValueClass, Job job)
100 throws IOException {
101 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
102 job, true);
103 }
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118 public static void initTableMapperJob(byte[] table, Scan scan,
119 Class<? extends TableMapper> mapper,
120 Class<?> outputKeyClass,
121 Class<?> outputValueClass, Job job)
122 throws IOException {
123 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass,
124 job, true);
125 }
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142 public static void initTableMapperJob(String table, Scan scan,
143 Class<? extends TableMapper> mapper,
144 Class<?> outputKeyClass,
145 Class<?> outputValueClass, Job job,
146 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
147 throws IOException {
148 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass, job,
149 addDependencyJars, true, inputFormatClass);
150 }
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170 public static void initTableMapperJob(String table, Scan scan,
171 Class<? extends TableMapper> mapper,
172 Class<?> outputKeyClass,
173 Class<?> outputValueClass, Job job,
174 boolean addDependencyJars, boolean initCredentials,
175 Class<? extends InputFormat> inputFormatClass)
176 throws IOException {
177 job.setInputFormatClass(inputFormatClass);
178 if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
179 if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
180 job.setMapperClass(mapper);
181 if (Put.class.equals(outputValueClass)) {
182 job.setCombinerClass(PutCombiner.class);
183 }
184 Configuration conf = job.getConfiguration();
185 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
186 conf.set(TableInputFormat.INPUT_TABLE, table);
187 conf.set(TableInputFormat.SCAN, convertScanToString(scan));
188 conf.setStrings("io.serializations", conf.get("io.serializations"),
189 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
190 KeyValueSerialization.class.getName());
191 if (addDependencyJars) {
192 addDependencyJars(job);
193 }
194 if (initCredentials) {
195 initCredentials(job);
196 }
197 }
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215 public static void initTableMapperJob(byte[] table, Scan scan,
216 Class<? extends TableMapper> mapper,
217 Class<?> outputKeyClass,
218 Class<?> outputValueClass, Job job,
219 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
220 throws IOException {
221 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
222 outputValueClass, job, addDependencyJars, inputFormatClass);
223 }
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240 public static void initTableMapperJob(byte[] table, Scan scan,
241 Class<? extends TableMapper> mapper,
242 Class<?> outputKeyClass,
243 Class<?> outputValueClass, Job job,
244 boolean addDependencyJars)
245 throws IOException {
246 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
247 outputValueClass, job, addDependencyJars, TableInputFormat.class);
248 }
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265 public static void initTableMapperJob(String table, Scan scan,
266 Class<? extends TableMapper> mapper,
267 Class<?> outputKeyClass,
268 Class<?> outputValueClass, Job job,
269 boolean addDependencyJars)
270 throws IOException {
271 initTableMapperJob(table, scan, mapper, outputKeyClass,
272 outputValueClass, job, addDependencyJars, TableInputFormat.class);
273 }
274
275
276
277
278
279
280
281 public static void resetCacheConfig(Configuration conf) {
282 conf.setFloat(
283 HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT);
284 conf.setFloat("hbase.offheapcache.percentage", 0f);
285 conf.setFloat("hbase.bucketcache.size", 0f);
286 }
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308 public static void initTableSnapshotMapperJob(String snapshotName, Scan scan,
309 Class<? extends TableMapper> mapper,
310 Class<?> outputKeyClass,
311 Class<?> outputValueClass, Job job,
312 boolean addDependencyJars, Path tmpRestoreDir)
313 throws IOException {
314 TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
315 initTableMapperJob(snapshotName, scan, mapper, outputKeyClass,
316 outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class);
317 resetCacheConfig(job.getConfiguration());
318 }
319
320
321
322
323
324
325
326
327
328
329
330
331
332 public static void initTableMapperJob(List<Scan> scans,
333 Class<? extends TableMapper> mapper,
334 Class<? extends WritableComparable> outputKeyClass,
335 Class<? extends Writable> outputValueClass, Job job) throws IOException {
336 initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
337 true);
338 }
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354 public static void initTableMapperJob(List<Scan> scans,
355 Class<? extends TableMapper> mapper,
356 Class<? extends WritableComparable> outputKeyClass,
357 Class<? extends Writable> outputValueClass, Job job,
358 boolean addDependencyJars) throws IOException {
359 initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
360 addDependencyJars, true);
361 }
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378 public static void initTableMapperJob(List<Scan> scans,
379 Class<? extends TableMapper> mapper,
380 Class<? extends WritableComparable> outputKeyClass,
381 Class<? extends Writable> outputValueClass, Job job,
382 boolean addDependencyJars,
383 boolean initCredentials) throws IOException {
384 job.setInputFormatClass(MultiTableInputFormat.class);
385 if (outputValueClass != null) {
386 job.setMapOutputValueClass(outputValueClass);
387 }
388 if (outputKeyClass != null) {
389 job.setMapOutputKeyClass(outputKeyClass);
390 }
391 job.setMapperClass(mapper);
392 Configuration conf = job.getConfiguration();
393 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
394 List<String> scanStrings = new ArrayList<String>();
395
396 for (Scan scan : scans) {
397 scanStrings.add(convertScanToString(scan));
398 }
399 job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
400 scanStrings.toArray(new String[scanStrings.size()]));
401
402 if (addDependencyJars) {
403 addDependencyJars(job);
404 }
405
406 if (initCredentials) {
407 initCredentials(job);
408 }
409 }
410
411 public static void initCredentials(Job job) throws IOException {
412 UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
413 if (userProvider.isHadoopSecurityEnabled()) {
414
415 if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
416 job.getConfiguration().set("mapreduce.job.credentials.binary",
417 System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
418 }
419 }
420
421 if (userProvider.isHBaseSecurityEnabled()) {
422 try {
423
424 String quorumAddress = job.getConfiguration().get(TableOutputFormat.QUORUM_ADDRESS);
425 User user = userProvider.getCurrent();
426 if (quorumAddress != null) {
427 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
428 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
429 obtainAuthTokenForJob(job, peerConf, user);
430 }
431
432 obtainAuthTokenForJob(job, job.getConfiguration(), user);
433 } catch (InterruptedException ie) {
434 LOG.info("Interrupted obtaining user authentication token");
435 Thread.currentThread().interrupt();
436 }
437 }
438 }
439
440
441
442
443
444
445
446
447
448
449
450
451 public static void initCredentialsForCluster(Job job, String quorumAddress)
452 throws IOException {
453 UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
454 if (userProvider.isHBaseSecurityEnabled()) {
455 try {
456 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
457 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
458 obtainAuthTokenForJob(job, peerConf, userProvider.getCurrent());
459 } catch (InterruptedException e) {
460 LOG.info("Interrupted obtaining user authentication token");
461 Thread.interrupted();
462 }
463 }
464 }
465
466 private static void obtainAuthTokenForJob(Job job, Configuration conf, User user)
467 throws IOException, InterruptedException {
468 Token<AuthenticationTokenIdentifier> authToken = getAuthToken(conf, user);
469 if (authToken == null) {
470 user.obtainAuthTokenForJob(conf, job);
471 } else {
472 job.getCredentials().addToken(authToken.getService(), authToken);
473 }
474 }
475
476
477
478
479
480 private static Token<AuthenticationTokenIdentifier> getAuthToken(Configuration conf, User user)
481 throws IOException, InterruptedException {
482 ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "mr-init-credentials", null);
483 try {
484 String clusterId = ZKClusterId.readClusterIdZNode(zkw);
485 return new AuthenticationTokenSelector().selectToken(new Text(clusterId), user.getUGI().getTokens());
486 } catch (KeeperException e) {
487 throw new IOException(e);
488 } finally {
489 zkw.close();
490 }
491 }
492
493
494
495
496
497
498
499
500 static String convertScanToString(Scan scan) throws IOException {
501 ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
502 return Base64.encodeBytes(proto.toByteArray());
503 }
504
505
506
507
508
509
510
511
512 static Scan convertStringToScan(String base64) throws IOException {
513 byte [] decoded = Base64.decode(base64);
514 ClientProtos.Scan scan;
515 try {
516 scan = ClientProtos.Scan.parseFrom(decoded);
517 } catch (InvalidProtocolBufferException ipbe) {
518 throw new IOException(ipbe);
519 }
520
521 return ProtobufUtil.toScan(scan);
522 }
523
524
525
526
527
528
529
530
531
532
533 public static void initTableReducerJob(String table,
534 Class<? extends TableReducer> reducer, Job job)
535 throws IOException {
536 initTableReducerJob(table, reducer, job, null);
537 }
538
539
540
541
542
543
544
545
546
547
548
549
550 public static void initTableReducerJob(String table,
551 Class<? extends TableReducer> reducer, Job job,
552 Class partitioner) throws IOException {
553 initTableReducerJob(table, reducer, job, partitioner, null, null, null);
554 }
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579 public static void initTableReducerJob(String table,
580 Class<? extends TableReducer> reducer, Job job,
581 Class partitioner, String quorumAddress, String serverClass,
582 String serverImpl) throws IOException {
583 initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
584 serverClass, serverImpl, true);
585 }
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612 public static void initTableReducerJob(String table,
613 Class<? extends TableReducer> reducer, Job job,
614 Class partitioner, String quorumAddress, String serverClass,
615 String serverImpl, boolean addDependencyJars) throws IOException {
616
617 Configuration conf = job.getConfiguration();
618 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
619 job.setOutputFormatClass(TableOutputFormat.class);
620 if (reducer != null) job.setReducerClass(reducer);
621 conf.set(TableOutputFormat.OUTPUT_TABLE, table);
622 conf.setStrings("io.serializations", conf.get("io.serializations"),
623 MutationSerialization.class.getName(), ResultSerialization.class.getName());
624
625 if (quorumAddress != null) {
626
627 ZKUtil.transformClusterKey(quorumAddress);
628 conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
629 }
630 if (serverClass != null && serverImpl != null) {
631 conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
632 conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
633 }
634 job.setOutputKeyClass(ImmutableBytesWritable.class);
635 job.setOutputValueClass(Writable.class);
636 if (partitioner == HRegionPartitioner.class) {
637 job.setPartitionerClass(HRegionPartitioner.class);
638 int regions = MetaReader.getRegionCount(conf, table);
639 if (job.getNumReduceTasks() > regions) {
640 job.setNumReduceTasks(regions);
641 }
642 } else if (partitioner != null) {
643 job.setPartitionerClass(partitioner);
644 }
645
646 if (addDependencyJars) {
647 addDependencyJars(job);
648 }
649
650 initCredentials(job);
651 }
652
653
654
655
656
657
658
659
660
661 public static void limitNumReduceTasks(String table, Job job)
662 throws IOException {
663 int regions = MetaReader.getRegionCount(job.getConfiguration(), table);
664 if (job.getNumReduceTasks() > regions)
665 job.setNumReduceTasks(regions);
666 }
667
668
669
670
671
672
673
674
675
676 public static void setNumReduceTasks(String table, Job job)
677 throws IOException {
678 job.setNumReduceTasks(MetaReader.getRegionCount(job.getConfiguration(), table));
679 }
680
681
682
683
684
685
686
687
688
689
690 public static void setScannerCaching(Job job, int batchSize) {
691 job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);
692 }
693
694
695
696
697
698
699
700
701
702
703
704
705
706 public static void addHBaseDependencyJars(Configuration conf) throws IOException {
707 addDependencyJars(conf,
708
709 org.apache.hadoop.hbase.HConstants.class,
710 org.apache.hadoop.hbase.protobuf.generated.ClientProtos.class,
711 org.apache.hadoop.hbase.client.Put.class,
712 org.apache.hadoop.hbase.CompatibilityFactory.class,
713 org.apache.hadoop.hbase.mapreduce.TableMapper.class,
714
715 org.apache.zookeeper.ZooKeeper.class,
716 org.jboss.netty.channel.ChannelFactory.class,
717 com.google.protobuf.Message.class,
718 com.google.common.collect.Lists.class,
719 org.cloudera.htrace.Trace.class,
720 org.cliffc.high_scale_lib.Counter.class);
721 }
722
723
724
725
726
727 public static String buildDependencyClasspath(Configuration conf) {
728 if (conf == null) {
729 throw new IllegalArgumentException("Must provide a configuration object.");
730 }
731 Set<String> paths = new HashSet<String>(conf.getStringCollection("tmpjars"));
732 if (paths.size() == 0) {
733 throw new IllegalArgumentException("Configuration contains no tmpjars.");
734 }
735 StringBuilder sb = new StringBuilder();
736 for (String s : paths) {
737
738 int idx = s.indexOf(":");
739 if (idx != -1) s = s.substring(idx + 1);
740 if (sb.length() > 0) sb.append(File.pathSeparator);
741 sb.append(s);
742 }
743 return sb.toString();
744 }
745
746
747
748
749
750
751 public static void addDependencyJars(Job job) throws IOException {
752 addHBaseDependencyJars(job.getConfiguration());
753 try {
754 addDependencyJars(job.getConfiguration(),
755
756
757 job.getMapOutputKeyClass(),
758 job.getMapOutputValueClass(),
759 job.getInputFormatClass(),
760 job.getOutputKeyClass(),
761 job.getOutputValueClass(),
762 job.getOutputFormatClass(),
763 job.getPartitionerClass(),
764 job.getCombinerClass());
765 } catch (ClassNotFoundException e) {
766 throw new IOException(e);
767 }
768 }
769
770
771
772
773
774
775 public static void addDependencyJars(Configuration conf,
776 Class<?>... classes) throws IOException {
777
778 FileSystem localFs = FileSystem.getLocal(conf);
779 Set<String> jars = new HashSet<String>();
780
781 jars.addAll(conf.getStringCollection("tmpjars"));
782
783
784
785 Map<String, String> packagedClasses = new HashMap<String, String>();
786
787
788 for (Class<?> clazz : classes) {
789 if (clazz == null) continue;
790
791 Path path = findOrCreateJar(clazz, localFs, packagedClasses);
792 if (path == null) {
793 LOG.warn("Could not find jar for class " + clazz +
794 " in order to ship it to the cluster.");
795 continue;
796 }
797 if (!localFs.exists(path)) {
798 LOG.warn("Could not validate jar file " + path + " for class "
799 + clazz);
800 continue;
801 }
802 jars.add(path.toString());
803 }
804 if (jars.isEmpty()) return;
805
806 conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
807 }
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823 private static Path findOrCreateJar(Class<?> my_class, FileSystem fs,
824 Map<String, String> packagedClasses)
825 throws IOException {
826
827 String jar = findContainingJar(my_class, packagedClasses);
828 if (null == jar || jar.isEmpty()) {
829 jar = getJar(my_class);
830 updateMap(jar, packagedClasses);
831 }
832
833 if (null == jar || jar.isEmpty()) {
834 return null;
835 }
836
837 LOG.debug(String.format("For class %s, using jar %s", my_class.getName(), jar));
838 return new Path(jar).makeQualified(fs);
839 }
840
841
842
843
844
845
846
847 private static void updateMap(String jar, Map<String, String> packagedClasses) throws IOException {
848 if (null == jar || jar.isEmpty()) {
849 return;
850 }
851 ZipFile zip = null;
852 try {
853 zip = new ZipFile(jar);
854 for (Enumeration<? extends ZipEntry> iter = zip.entries(); iter.hasMoreElements();) {
855 ZipEntry entry = iter.nextElement();
856 if (entry.getName().endsWith("class")) {
857 packagedClasses.put(entry.getName(), jar);
858 }
859 }
860 } finally {
861 if (null != zip) zip.close();
862 }
863 }
864
865
866
867
868
869
870
871
872
873
874 private static String findContainingJar(Class<?> my_class, Map<String, String> packagedClasses)
875 throws IOException {
876 ClassLoader loader = my_class.getClassLoader();
877 String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
878
879
880 for (Enumeration<URL> itr = loader.getResources(class_file); itr.hasMoreElements();) {
881 URL url = itr.nextElement();
882 if ("jar".equals(url.getProtocol())) {
883 String toReturn = url.getPath();
884 if (toReturn.startsWith("file:")) {
885 toReturn = toReturn.substring("file:".length());
886 }
887
888
889
890
891
892
893 toReturn = toReturn.replaceAll("\\+", "%2B");
894 toReturn = URLDecoder.decode(toReturn, "UTF-8");
895 return toReturn.replaceAll("!.*$", "");
896 }
897 }
898
899
900
901 return packagedClasses.get(class_file);
902 }
903
904
905
906
907
908
909
910
911 private static String getJar(Class<?> my_class) {
912 String ret = null;
913 String hadoopJarFinder = "org.apache.hadoop.util.JarFinder";
914 Class<?> jarFinder = null;
915 try {
916 LOG.debug("Looking for " + hadoopJarFinder + ".");
917 jarFinder = Class.forName(hadoopJarFinder);
918 LOG.debug(hadoopJarFinder + " found.");
919 Method getJar = jarFinder.getMethod("getJar", Class.class);
920 ret = (String) getJar.invoke(null, my_class);
921 } catch (ClassNotFoundException e) {
922 LOG.debug("Using backported JarFinder.");
923 ret = JarFinder.getJar(my_class);
924 } catch (InvocationTargetException e) {
925
926
927 throw new RuntimeException(e.getCause());
928 } catch (Exception e) {
929
930 throw new RuntimeException("getJar invocation failed.", e);
931 }
932
933 return ret;
934 }
935 }