1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import java.io.ByteArrayInputStream;
23 import java.io.ByteArrayOutputStream;
24 import java.io.DataInputStream;
25 import java.io.DataOutputStream;
26 import java.io.File;
27 import java.io.IOException;
28 import java.lang.reflect.InvocationTargetException;
29 import java.lang.reflect.Method;
30 import java.net.URL;
31 import java.net.URLDecoder;
32 import java.util.ArrayList;
33 import java.util.Enumeration;
34 import java.util.HashMap;
35 import java.util.HashSet;
36 import java.util.List;
37 import java.util.Map;
38 import java.util.Set;
39 import java.util.zip.ZipEntry;
40 import java.util.zip.ZipFile;
41
42 import org.apache.commons.logging.Log;
43 import org.apache.commons.logging.LogFactory;
44 import org.apache.hadoop.conf.Configuration;
45 import org.apache.hadoop.fs.FileSystem;
46 import org.apache.hadoop.fs.Path;
47 import org.apache.hadoop.hbase.HBaseConfiguration;
48 import org.apache.hadoop.hbase.HConstants;
49 import org.apache.hadoop.hbase.client.HTable;
50 import org.apache.hadoop.hbase.client.Scan;
51 import org.apache.hadoop.hbase.client.UserProvider;
52 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
53 import org.apache.hadoop.hbase.mapreduce.hadoopbackport.JarFinder;
54 import org.apache.hadoop.hbase.security.User;
55 import org.apache.hadoop.hbase.util.Base64;
56 import org.apache.hadoop.hbase.util.Bytes;
57 import org.apache.hadoop.hbase.zookeeper.ClusterId;
58 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
59 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
60 import org.apache.hadoop.io.Writable;
61 import org.apache.hadoop.io.WritableComparable;
62 import org.apache.hadoop.mapreduce.InputFormat;
63 import org.apache.hadoop.mapreduce.Job;
64 import org.apache.hadoop.util.StringUtils;
65 import org.apache.hadoop.security.token.Token;
66 import org.apache.zookeeper.KeeperException;
67 import org.cliffc.high_scale_lib.Counter;
68
69
70
71
72 @SuppressWarnings("unchecked")
73 public class TableMapReduceUtil {
74 static Log LOG = LogFactory.getLog(TableMapReduceUtil.class);
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89 public static void initTableMapperJob(String table, Scan scan,
90 Class<? extends TableMapper> mapper,
91 Class<? extends WritableComparable> outputKeyClass,
92 Class<? extends Writable> outputValueClass, Job job)
93 throws IOException {
94 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
95 job, true);
96 }
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112 public static void initTableMapperJob(byte[] table, Scan scan,
113 Class<? extends TableMapper> mapper,
114 Class<? extends WritableComparable> outputKeyClass,
115 Class<? extends Writable> outputValueClass, Job job)
116 throws IOException {
117 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass,
118 job, true);
119 }
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136 public static void initTableMapperJob(String table, Scan scan,
137 Class<? extends TableMapper> mapper,
138 Class<? extends WritableComparable> outputKeyClass,
139 Class<? extends Writable> outputValueClass, Job job,
140 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
141 throws IOException {
142 job.setInputFormatClass(inputFormatClass);
143 if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
144 if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
145 job.setMapperClass(mapper);
146 Configuration conf = job.getConfiguration();
147 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
148 conf.set(TableInputFormat.INPUT_TABLE, table);
149 conf.set(TableInputFormat.SCAN, convertScanToString(scan));
150 if (addDependencyJars) {
151 addDependencyJars(job);
152 }
153 initCredentials(job);
154 }
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172 public static void initTableMapperJob(byte[] table, Scan scan,
173 Class<? extends TableMapper> mapper,
174 Class<? extends WritableComparable> outputKeyClass,
175 Class<? extends Writable> outputValueClass, Job job,
176 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
177 throws IOException {
178 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
179 outputValueClass, job, addDependencyJars, inputFormatClass);
180 }
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197 public static void initTableMapperJob(byte[] table, Scan scan,
198 Class<? extends TableMapper> mapper,
199 Class<? extends WritableComparable> outputKeyClass,
200 Class<? extends Writable> outputValueClass, Job job,
201 boolean addDependencyJars)
202 throws IOException {
203 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
204 outputValueClass, job, addDependencyJars, TableInputFormat.class);
205 }
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222 public static void initTableMapperJob(String table, Scan scan,
223 Class<? extends TableMapper> mapper,
224 Class<? extends WritableComparable> outputKeyClass,
225 Class<? extends Writable> outputValueClass, Job job,
226 boolean addDependencyJars)
227 throws IOException {
228 initTableMapperJob(table, scan, mapper, outputKeyClass,
229 outputValueClass, job, addDependencyJars, TableInputFormat.class);
230 }
231
232
233
234
235
236
237
238
239
240
241
242
243
244 public static void initTableMapperJob(List<Scan> scans,
245 Class<? extends TableMapper> mapper,
246 Class<? extends WritableComparable> outputKeyClass,
247 Class<? extends Writable> outputValueClass, Job job) throws IOException {
248 initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
249 true);
250 }
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266 public static void initTableMapperJob(List<Scan> scans,
267 Class<? extends TableMapper> mapper,
268 Class<? extends WritableComparable> outputKeyClass,
269 Class<? extends Writable> outputValueClass, Job job,
270 boolean addDependencyJars) throws IOException {
271 job.setInputFormatClass(MultiTableInputFormat.class);
272 if (outputValueClass != null) {
273 job.setMapOutputValueClass(outputValueClass);
274 }
275 if (outputKeyClass != null) {
276 job.setMapOutputKeyClass(outputKeyClass);
277 }
278 job.setMapperClass(mapper);
279 HBaseConfiguration.addHbaseResources(job.getConfiguration());
280 List<String> scanStrings = new ArrayList<String>();
281
282 for (Scan scan : scans) {
283 scanStrings.add(convertScanToString(scan));
284 }
285 job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
286 scanStrings.toArray(new String[scanStrings.size()]));
287
288 if (addDependencyJars) {
289 addDependencyJars(job);
290 }
291 initCredentials(job);
292 }
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320 public static void initTableSnapshotMapperJob(String snapshotName, Scan scan,
321 Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass,
322 Job job, boolean addDependencyJars, Path tableRootDir) throws IOException {
323
324 TableSnapshotInputFormat.setInput(job, snapshotName, tableRootDir);
325
326 Configuration conf = job.getConfiguration();
327
328 job.setInputFormatClass(TableSnapshotInputFormat.class);
329 if (outputValueClass != null) {
330 job.setMapOutputValueClass(outputValueClass);
331 }
332 if (outputKeyClass != null) {
333 job.setMapOutputKeyClass(outputKeyClass);
334 }
335 job.setMapperClass(mapper);
336 conf.set(TableInputFormat.SCAN, convertScanToString(scan));
337
338
339
340
341
342
343
344 job.getConfiguration().setFloat(
345 HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT);
346 job.getConfiguration().setFloat("hbase.offheapcache.percentage", 0f);
347
348 if (addDependencyJars) {
349 TableMapReduceUtil.addDependencyJars(job);
350 }
351
352 TableMapReduceUtil.addDependencyJars(job.getConfiguration(), Counter.class);
353 }
354
355 public static void initCredentials(Job job) throws IOException {
356 UserProvider provider = UserProvider.instantiate(job.getConfiguration());
357
358 if (provider.isHadoopSecurityEnabled()) {
359
360 if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
361 job.getConfiguration().set("mapreduce.job.credentials.binary",
362 System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
363 }
364 }
365
366 if (provider.isHBaseSecurityEnabled()) {
367 try {
368
369 String quorumAddress = job.getConfiguration().get(
370 TableOutputFormat.QUORUM_ADDRESS);
371 User user = provider.getCurrent();
372 if (quorumAddress != null) {
373 String[] parts = ZKUtil.transformClusterKey(quorumAddress);
374 Configuration peerConf = HBaseConfiguration.create(job
375 .getConfiguration());
376 peerConf.set(HConstants.ZOOKEEPER_QUORUM, parts[0]);
377 peerConf.set("hbase.zookeeper.client.port", parts[1]);
378 peerConf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, parts[2]);
379 obtainAuthTokenForJob(job, peerConf, user);
380 }
381
382 obtainAuthTokenForJob(job, job.getConfiguration(), user);
383 } catch (InterruptedException ie) {
384 LOG.info("Interrupted obtaining user authentication token");
385 Thread.interrupted();
386 }
387 }
388 }
389
390
391
392
393
394
395
396
397
398
399
400
401 public static void initCredentialsForCluster(Job job, String quorumAddress)
402 throws IOException {
403 UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
404 if (userProvider.isHBaseSecurityEnabled()) {
405 try {
406 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
407 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
408 obtainAuthTokenForJob(job, peerConf, userProvider.getCurrent());
409 } catch (InterruptedException e) {
410 LOG.info("Interrupted obtaining user authentication token");
411 Thread.interrupted();
412 }
413 }
414 }
415
416 private static void obtainAuthTokenForJob(Job job, Configuration conf, User user)
417 throws IOException, InterruptedException {
418 Token<?> authToken = getAuthToken(conf, user);
419 if (authToken == null) {
420 user.obtainAuthTokenForJob(conf, job);
421 } else {
422 job.getCredentials().addToken(authToken.getService(), authToken);
423 }
424 }
425
426
427
428
429
430 private static Token<?> getAuthToken(Configuration conf, User user)
431 throws IOException, InterruptedException {
432 ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "mr-init-credentials", null);
433 try {
434 String clusterId = ClusterId.readClusterIdZNode(zkw);
435 return user.getToken("HBASE_AUTH_TOKEN", clusterId);
436 } catch (KeeperException e) {
437 throw new IOException(e);
438 } finally {
439 zkw.close();
440 }
441 }
442
443
444
445
446
447
448
449
450 static String convertScanToString(Scan scan) throws IOException {
451 ByteArrayOutputStream out = new ByteArrayOutputStream();
452 DataOutputStream dos = new DataOutputStream(out);
453 scan.write(dos);
454 return Base64.encodeBytes(out.toByteArray());
455 }
456
457
458
459
460
461
462
463
464 static Scan convertStringToScan(String base64) throws IOException {
465 ByteArrayInputStream bis = new ByteArrayInputStream(Base64.decode(base64));
466 DataInputStream dis = new DataInputStream(bis);
467 Scan scan = new Scan();
468 scan.readFields(dis);
469 return scan;
470 }
471
472
473
474
475
476
477
478
479
480
481 public static void initTableReducerJob(String table,
482 Class<? extends TableReducer> reducer, Job job)
483 throws IOException {
484 initTableReducerJob(table, reducer, job, null);
485 }
486
487
488
489
490
491
492
493
494
495
496
497
498 public static void initTableReducerJob(String table,
499 Class<? extends TableReducer> reducer, Job job,
500 Class partitioner) throws IOException {
501 initTableReducerJob(table, reducer, job, partitioner, null, null, null);
502 }
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527 public static void initTableReducerJob(String table,
528 Class<? extends TableReducer> reducer, Job job,
529 Class partitioner, String quorumAddress, String serverClass,
530 String serverImpl) throws IOException {
531 initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
532 serverClass, serverImpl, true);
533 }
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560 public static void initTableReducerJob(String table,
561 Class<? extends TableReducer> reducer, Job job,
562 Class partitioner, String quorumAddress, String serverClass,
563 String serverImpl, boolean addDependencyJars) throws IOException {
564
565 Configuration conf = job.getConfiguration();
566 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
567 job.setOutputFormatClass(TableOutputFormat.class);
568 if (reducer != null) job.setReducerClass(reducer);
569 conf.set(TableOutputFormat.OUTPUT_TABLE, table);
570
571 if (quorumAddress != null) {
572
573 ZKUtil.transformClusterKey(quorumAddress);
574 conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
575 }
576 if (serverClass != null && serverImpl != null) {
577 conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
578 conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
579 }
580 job.setOutputKeyClass(ImmutableBytesWritable.class);
581 job.setOutputValueClass(Writable.class);
582 if (partitioner == HRegionPartitioner.class) {
583 job.setPartitionerClass(HRegionPartitioner.class);
584 HTable outputTable = new HTable(conf, table);
585 int regions = outputTable.getRegionsInfo().size();
586 if (job.getNumReduceTasks() > regions) {
587 job.setNumReduceTasks(outputTable.getRegionsInfo().size());
588 }
589 } else if (partitioner != null) {
590 job.setPartitionerClass(partitioner);
591 }
592
593 if (addDependencyJars) {
594 addDependencyJars(job);
595 }
596
597 initCredentials(job);
598 }
599
600
601
602
603
604
605
606
607
608 public static void limitNumReduceTasks(String table, Job job)
609 throws IOException {
610 HTable outputTable = new HTable(job.getConfiguration(), table);
611 int regions = outputTable.getRegionsInfo().size();
612 if (job.getNumReduceTasks() > regions)
613 job.setNumReduceTasks(regions);
614 }
615
616
617
618
619
620
621
622
623
624 public static void setNumReduceTasks(String table, Job job)
625 throws IOException {
626 HTable outputTable = new HTable(job.getConfiguration(), table);
627 int regions = outputTable.getRegionsInfo().size();
628 job.setNumReduceTasks(regions);
629 }
630
631
632
633
634
635
636
637
638
639
640 public static void setScannerCaching(Job job, int batchSize) {
641 job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);
642 }
643
644
645
646
647
648
649
650
651
652
653
654
655
656 public static void addHBaseDependencyJars(Configuration conf) throws IOException {
657 addDependencyJars(conf,
658 org.apache.zookeeper.ZooKeeper.class,
659 com.google.protobuf.Message.class,
660 com.google.common.base.Function.class,
661 com.google.common.collect.ImmutableSet.class,
662 org.apache.hadoop.hbase.util.Bytes.class);
663 }
664
665
666
667
668
669 public static String buildDependencyClasspath(Configuration conf) {
670 if (conf == null) {
671 throw new IllegalArgumentException("Must provide a configuration object.");
672 }
673 Set<String> paths = new HashSet<String>(conf.getStringCollection("tmpjars"));
674 if (paths.size() == 0) {
675 throw new IllegalArgumentException("Configuration contains no tmpjars.");
676 }
677 StringBuilder sb = new StringBuilder();
678 for (String s : paths) {
679
680 int idx = s.indexOf(":");
681 if (idx != -1) s = s.substring(idx + 1);
682 if (sb.length() > 0) sb.append(File.pathSeparator);
683 sb.append(s);
684 }
685 return sb.toString();
686 }
687
688
689
690
691
692
693 public static void addDependencyJars(Job job) throws IOException {
694 addHBaseDependencyJars(job.getConfiguration());
695 try {
696 addDependencyJars(job.getConfiguration(),
697
698 job.getMapOutputKeyClass(),
699 job.getMapOutputValueClass(),
700 job.getInputFormatClass(),
701 job.getOutputKeyClass(),
702 job.getOutputValueClass(),
703 job.getOutputFormatClass(),
704 job.getPartitionerClass(),
705 job.getCombinerClass());
706 } catch (ClassNotFoundException e) {
707 throw new IOException(e);
708 }
709 }
710
711
712
713
714
715
716 public static void addDependencyJars(Configuration conf,
717 Class<?>... classes) throws IOException {
718
719 FileSystem localFs = FileSystem.getLocal(conf);
720 Set<String> jars = new HashSet<String>();
721
722 jars.addAll(conf.getStringCollection("tmpjars"));
723
724
725
726 Map<String, String> packagedClasses = new HashMap<String, String>();
727
728
729 for (Class<?> clazz : classes) {
730 if (clazz == null) continue;
731
732 Path path = findOrCreateJar(clazz, localFs, packagedClasses);
733 if (path == null) {
734 LOG.warn("Could not find jar for class " + clazz +
735 " in order to ship it to the cluster.");
736 continue;
737 }
738 if (!localFs.exists(path)) {
739 LOG.warn("Could not validate jar file " + path + " for class "
740 + clazz);
741 continue;
742 }
743 jars.add(path.toString());
744 }
745 if (jars.isEmpty()) return;
746
747 conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0])));
748 }
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764 private static Path findOrCreateJar(Class<?> my_class, FileSystem fs,
765 Map<String, String> packagedClasses)
766 throws IOException {
767
768 String jar = findContainingJar(my_class, packagedClasses);
769 if (null == jar || jar.isEmpty()) {
770 jar = getJar(my_class);
771 updateMap(jar, packagedClasses);
772 }
773
774 if (null == jar || jar.isEmpty()) {
775 return null;
776 }
777
778 LOG.debug(String.format("For class %s, using jar %s", my_class.getName(), jar));
779 return new Path(jar).makeQualified(fs);
780 }
781
782
783
784
785
786
787
788 private static void updateMap(String jar, Map<String, String> packagedClasses) throws IOException {
789 if (null == jar || jar.isEmpty()) {
790 return;
791 }
792 ZipFile zip = null;
793 try {
794 zip = new ZipFile(jar);
795 for (Enumeration<? extends ZipEntry> iter = zip.entries(); iter.hasMoreElements();) {
796 ZipEntry entry = iter.nextElement();
797 if (entry.getName().endsWith("class")) {
798 packagedClasses.put(entry.getName(), jar);
799 }
800 }
801 } finally {
802 if (null != zip) zip.close();
803 }
804 }
805
806
807
808
809
810
811
812
813
814
815 private static String findContainingJar(Class<?> my_class, Map<String, String> packagedClasses)
816 throws IOException {
817 ClassLoader loader = my_class.getClassLoader();
818 String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
819
820
821 for (Enumeration<URL> itr = loader.getResources(class_file); itr.hasMoreElements();) {
822 URL url = itr.nextElement();
823 if ("jar".equals(url.getProtocol())) {
824 String toReturn = url.getPath();
825 if (toReturn.startsWith("file:")) {
826 toReturn = toReturn.substring("file:".length());
827 }
828
829
830
831
832
833
834 toReturn = toReturn.replaceAll("\\+", "%2B");
835 toReturn = URLDecoder.decode(toReturn, "UTF-8");
836 return toReturn.replaceAll("!.*$", "");
837 }
838 }
839
840
841
842 return packagedClasses.get(class_file);
843 }
844
845
846
847
848
849
850
851
852 private static String getJar(Class<?> my_class) {
853 String ret = null;
854 String hadoopJarFinder = "org.apache.hadoop.util.JarFinder";
855 Class<?> jarFinder = null;
856 try {
857 LOG.debug("Looking for " + hadoopJarFinder + ".");
858 jarFinder = Class.forName(hadoopJarFinder);
859 LOG.debug(hadoopJarFinder + " found.");
860 Method getJar = jarFinder.getMethod("getJar", Class.class);
861 ret = (String) getJar.invoke(null, my_class);
862 } catch (ClassNotFoundException e) {
863 LOG.debug("Using backported JarFinder.");
864 ret = JarFinder.getJar(my_class);
865 } catch (InvocationTargetException e) {
866
867
868 throw new RuntimeException(e.getCause());
869 } catch (Exception e) {
870
871 throw new RuntimeException("getJar invocation failed.", e);
872 }
873
874 return ret;
875 }
876 }