1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import java.io.ByteArrayInputStream;
23 import java.io.ByteArrayOutputStream;
24 import java.io.DataInputStream;
25 import java.io.DataOutputStream;
26 import java.io.File;
27 import java.io.IOException;
28 import java.lang.reflect.InvocationTargetException;
29 import java.lang.reflect.Method;
30 import java.net.URL;
31 import java.net.URLDecoder;
32 import java.util.ArrayList;
33 import java.util.Enumeration;
34 import java.util.HashMap;
35 import java.util.HashSet;
36 import java.util.List;
37 import java.util.Map;
38 import java.util.Set;
39 import java.util.zip.ZipEntry;
40 import java.util.zip.ZipFile;
41
42 import org.apache.commons.logging.Log;
43 import org.apache.commons.logging.LogFactory;
44 import org.apache.hadoop.conf.Configuration;
45 import org.apache.hadoop.fs.FileSystem;
46 import org.apache.hadoop.fs.Path;
47 import org.apache.hadoop.hbase.HBaseConfiguration;
48 import org.apache.hadoop.hbase.HConstants;
49 import org.apache.hadoop.hbase.client.HTable;
50 import org.apache.hadoop.hbase.client.Scan;
51 import org.apache.hadoop.hbase.client.UserProvider;
52 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
53 import org.apache.hadoop.hbase.mapreduce.hadoopbackport.JarFinder;
54 import org.apache.hadoop.hbase.security.User;
55 import org.apache.hadoop.hbase.util.Base64;
56 import org.apache.hadoop.hbase.util.Bytes;
57 import org.apache.hadoop.hbase.zookeeper.ClusterId;
58 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
59 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
60 import org.apache.hadoop.io.Writable;
61 import org.apache.hadoop.io.WritableComparable;
62 import org.apache.hadoop.mapreduce.InputFormat;
63 import org.apache.hadoop.mapreduce.Job;
64 import org.apache.hadoop.util.StringUtils;
65 import org.apache.hadoop.security.token.Token;
66 import org.apache.zookeeper.KeeperException;
67 import org.cliffc.high_scale_lib.Counter;
68
69
70
71
72 @SuppressWarnings("unchecked")
73 public class TableMapReduceUtil {
74 static Log LOG = LogFactory.getLog(TableMapReduceUtil.class);
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89 public static void initTableMapperJob(String table, Scan scan,
90 Class<? extends TableMapper> mapper,
91 Class<? extends WritableComparable> outputKeyClass,
92 Class<? extends Writable> outputValueClass, Job job)
93 throws IOException {
94 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
95 job, true);
96 }
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112 public static void initTableMapperJob(byte[] table, Scan scan,
113 Class<? extends TableMapper> mapper,
114 Class<? extends WritableComparable> outputKeyClass,
115 Class<? extends Writable> outputValueClass, Job job)
116 throws IOException {
117 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass,
118 job, true);
119 }
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136 public static void initTableMapperJob(String table, Scan scan,
137 Class<? extends TableMapper> mapper,
138 Class<? extends WritableComparable> outputKeyClass,
139 Class<? extends Writable> outputValueClass, Job job,
140 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
141 throws IOException {
142 job.setInputFormatClass(inputFormatClass);
143 if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
144 if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
145 job.setMapperClass(mapper);
146 Configuration conf = job.getConfiguration();
147 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
148 conf.set(TableInputFormat.INPUT_TABLE, table);
149 conf.set(TableInputFormat.SCAN, convertScanToString(scan));
150 if (addDependencyJars) {
151 addDependencyJars(job);
152 }
153 initCredentials(job);
154 }
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172 public static void initTableMapperJob(byte[] table, Scan scan,
173 Class<? extends TableMapper> mapper,
174 Class<? extends WritableComparable> outputKeyClass,
175 Class<? extends Writable> outputValueClass, Job job,
176 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
177 throws IOException {
178 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
179 outputValueClass, job, addDependencyJars, inputFormatClass);
180 }
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197 public static void initTableMapperJob(byte[] table, Scan scan,
198 Class<? extends TableMapper> mapper,
199 Class<? extends WritableComparable> outputKeyClass,
200 Class<? extends Writable> outputValueClass, Job job,
201 boolean addDependencyJars)
202 throws IOException {
203 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
204 outputValueClass, job, addDependencyJars, TableInputFormat.class);
205 }
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222 public static void initTableMapperJob(String table, Scan scan,
223 Class<? extends TableMapper> mapper,
224 Class<? extends WritableComparable> outputKeyClass,
225 Class<? extends Writable> outputValueClass, Job job,
226 boolean addDependencyJars)
227 throws IOException {
228 initTableMapperJob(table, scan, mapper, outputKeyClass,
229 outputValueClass, job, addDependencyJars, TableInputFormat.class);
230 }
231
232
233
234
235
236
237
238
239
240
241
242
243
244 public static void initTableMapperJob(List<Scan> scans,
245 Class<? extends TableMapper> mapper,
246 Class<? extends WritableComparable> outputKeyClass,
247 Class<? extends Writable> outputValueClass, Job job) throws IOException {
248 initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
249 true);
250 }
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266 public static void initTableMapperJob(List<Scan> scans,
267 Class<? extends TableMapper> mapper,
268 Class<? extends WritableComparable> outputKeyClass,
269 Class<? extends Writable> outputValueClass, Job job,
270 boolean addDependencyJars) throws IOException {
271 job.setInputFormatClass(MultiTableInputFormat.class);
272 if (outputValueClass != null) {
273 job.setMapOutputValueClass(outputValueClass);
274 }
275 if (outputKeyClass != null) {
276 job.setMapOutputKeyClass(outputKeyClass);
277 }
278 job.setMapperClass(mapper);
279 HBaseConfiguration.addHbaseResources(job.getConfiguration());
280 List<String> scanStrings = new ArrayList<String>();
281
282 for (Scan scan : scans) {
283 scanStrings.add(convertScanToString(scan));
284 }
285 job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
286 scanStrings.toArray(new String[scanStrings.size()]));
287
288 if (addDependencyJars) {
289 addDependencyJars(job);
290 }
291 }
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319 public static void initTableSnapshotMapperJob(String snapshotName, Scan scan,
320 Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass,
321 Job job, boolean addDependencyJars, Path tableRootDir) throws IOException {
322
323 TableSnapshotInputFormat.setInput(job, snapshotName, tableRootDir);
324
325 Configuration conf = job.getConfiguration();
326
327 job.setInputFormatClass(TableSnapshotInputFormat.class);
328 if (outputValueClass != null) {
329 job.setMapOutputValueClass(outputValueClass);
330 }
331 if (outputKeyClass != null) {
332 job.setMapOutputKeyClass(outputKeyClass);
333 }
334 job.setMapperClass(mapper);
335 conf.set(TableInputFormat.SCAN, convertScanToString(scan));
336
337
338
339
340
341
342
343 job.getConfiguration().setFloat(
344 HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT);
345 job.getConfiguration().setFloat("hbase.offheapcache.percentage", 0f);
346
347 if (addDependencyJars) {
348 TableMapReduceUtil.addDependencyJars(job);
349 }
350
351 TableMapReduceUtil.addDependencyJars(job.getConfiguration(), Counter.class);
352 }
353
354 public static void initCredentials(Job job) throws IOException {
355 UserProvider provider = UserProvider.instantiate(job.getConfiguration());
356
357 if (provider.isHadoopSecurityEnabled()) {
358
359 if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
360 job.getConfiguration().set("mapreduce.job.credentials.binary",
361 System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
362 }
363 }
364
365 if (provider.isHBaseSecurityEnabled()) {
366 try {
367
368 String quorumAddress = job.getConfiguration().get(
369 TableOutputFormat.QUORUM_ADDRESS);
370 User user = provider.getCurrent();
371 if (quorumAddress != null) {
372 String[] parts = ZKUtil.transformClusterKey(quorumAddress);
373 Configuration peerConf = HBaseConfiguration.create(job
374 .getConfiguration());
375 peerConf.set(HConstants.ZOOKEEPER_QUORUM, parts[0]);
376 peerConf.set("hbase.zookeeper.client.port", parts[1]);
377 peerConf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, parts[2]);
378 obtainAuthTokenForJob(job, peerConf, user);
379 }
380
381 obtainAuthTokenForJob(job, job.getConfiguration(), user);
382 } catch (InterruptedException ie) {
383 LOG.info("Interrupted obtaining user authentication token");
384 Thread.interrupted();
385 }
386 }
387 }
388
389
390
391
392
393
394
395
396
397
398
399
400 public static void initCredentialsForCluster(Job job, String quorumAddress)
401 throws IOException {
402 UserProvider userProvider = UserProvider.instantiate(job.getConfiguration());
403 if (userProvider.isHBaseSecurityEnabled()) {
404 try {
405 Configuration peerConf = HBaseConfiguration.create(job.getConfiguration());
406 ZKUtil.applyClusterKeyToConf(peerConf, quorumAddress);
407 obtainAuthTokenForJob(job, peerConf, userProvider.getCurrent());
408 } catch (InterruptedException e) {
409 LOG.info("Interrupted obtaining user authentication token");
410 Thread.interrupted();
411 }
412 }
413 }
414
415 private static void obtainAuthTokenForJob(Job job, Configuration conf, User user)
416 throws IOException, InterruptedException {
417 Token<?> authToken = getAuthToken(conf, user);
418 if (authToken == null) {
419 user.obtainAuthTokenForJob(conf, job);
420 } else {
421 job.getCredentials().addToken(authToken.getService(), authToken);
422 }
423 }
424
425
426
427
428
429 private static Token<?> getAuthToken(Configuration conf, User user)
430 throws IOException, InterruptedException {
431 ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "mr-init-credentials", null);
432 try {
433 String clusterId = ClusterId.readClusterIdZNode(zkw);
434 return user.getToken("HBASE_AUTH_TOKEN", clusterId);
435 } catch (KeeperException e) {
436 throw new IOException(e);
437 } finally {
438 zkw.close();
439 }
440 }
441
442
443
444
445
446
447
448
449 static String convertScanToString(Scan scan) throws IOException {
450 ByteArrayOutputStream out = new ByteArrayOutputStream();
451 DataOutputStream dos = new DataOutputStream(out);
452 scan.write(dos);
453 return Base64.encodeBytes(out.toByteArray());
454 }
455
456
457
458
459
460
461
462
463 static Scan convertStringToScan(String base64) throws IOException {
464 ByteArrayInputStream bis = new ByteArrayInputStream(Base64.decode(base64));
465 DataInputStream dis = new DataInputStream(bis);
466 Scan scan = new Scan();
467 scan.readFields(dis);
468 return scan;
469 }
470
471
472
473
474
475
476
477
478
479
480 public static void initTableReducerJob(String table,
481 Class<? extends TableReducer> reducer, Job job)
482 throws IOException {
483 initTableReducerJob(table, reducer, job, null);
484 }
485
486
487
488
489
490
491
492
493
494
495
496
497 public static void initTableReducerJob(String table,
498 Class<? extends TableReducer> reducer, Job job,
499 Class partitioner) throws IOException {
500 initTableReducerJob(table, reducer, job, partitioner, null, null, null);
501 }
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526 public static void initTableReducerJob(String table,
527 Class<? extends TableReducer> reducer, Job job,
528 Class partitioner, String quorumAddress, String serverClass,
529 String serverImpl) throws IOException {
530 initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
531 serverClass, serverImpl, true);
532 }
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559 public static void initTableReducerJob(String table,
560 Class<? extends TableReducer> reducer, Job job,
561 Class partitioner, String quorumAddress, String serverClass,
562 String serverImpl, boolean addDependencyJars) throws IOException {
563
564 Configuration conf = job.getConfiguration();
565 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
566 job.setOutputFormatClass(TableOutputFormat.class);
567 if (reducer != null) job.setReducerClass(reducer);
568 conf.set(TableOutputFormat.OUTPUT_TABLE, table);
569
570 if (quorumAddress != null) {
571
572 ZKUtil.transformClusterKey(quorumAddress);
573 conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
574 }
575 if (serverClass != null && serverImpl != null) {
576 conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
577 conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
578 }
579 job.setOutputKeyClass(ImmutableBytesWritable.class);
580 job.setOutputValueClass(Writable.class);
581 if (partitioner == HRegionPartitioner.class) {
582 job.setPartitionerClass(HRegionPartitioner.class);
583 HTable outputTable = new HTable(conf, table);
584 int regions = outputTable.getRegionsInfo().size();
585 if (job.getNumReduceTasks() > regions) {
586 job.setNumReduceTasks(outputTable.getRegionsInfo().size());
587 }
588 } else if (partitioner != null) {
589 job.setPartitionerClass(partitioner);
590 }
591
592 if (addDependencyJars) {
593 addDependencyJars(job);
594 }
595
596 initCredentials(job);
597 }
598
599
600
601
602
603
604
605
606
607 public static void limitNumReduceTasks(String table, Job job)
608 throws IOException {
609 HTable outputTable = new HTable(job.getConfiguration(), table);
610 int regions = outputTable.getRegionsInfo().size();
611 if (job.getNumReduceTasks() > regions)
612 job.setNumReduceTasks(regions);
613 }
614
615
616
617
618
619
620
621
622
623 public static void setNumReduceTasks(String table, Job job)
624 throws IOException {
625 HTable outputTable = new HTable(job.getConfiguration(), table);
626 int regions = outputTable.getRegionsInfo().size();
627 job.setNumReduceTasks(regions);
628 }
629
630
631
632
633
634
635
636
637
638
639 public static void setScannerCaching(Job job, int batchSize) {
640 job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);
641 }
642
643
644
645
646
647
648
649
650
651
652
653
654
655 public static void addHBaseDependencyJars(Configuration conf) throws IOException {
656 addDependencyJars(conf,
657 org.apache.zookeeper.ZooKeeper.class,
658 com.google.protobuf.Message.class,
659 com.google.common.base.Function.class,
660 com.google.common.collect.ImmutableSet.class,
661 org.apache.hadoop.hbase.util.Bytes.class);
662 }
663
664
665
666
667
668 public static String buildDependencyClasspath(Configuration conf) {
669 if (conf == null) {
670 throw new IllegalArgumentException("Must provide a configuration object.");
671 }
672 Set<String> paths = new HashSet<String>(conf.getStringCollection("tmpjars"));
673 if (paths.size() == 0) {
674 throw new IllegalArgumentException("Configuration contains no tmpjars.");
675 }
676 StringBuilder sb = new StringBuilder();
677 for (String s : paths) {
678
679 int idx = s.indexOf(":");
680 if (idx != -1) s = s.substring(idx + 1);
681 if (sb.length() > 0) sb.append(File.pathSeparator);
682 sb.append(s);
683 }
684 return sb.toString();
685 }
686
687
688
689
690
691
692 public static void addDependencyJars(Job job) throws IOException {
693 addHBaseDependencyJars(job.getConfiguration());
694 try {
695 addDependencyJars(job.getConfiguration(),
696
697 job.getMapOutputKeyClass(),
698 job.getMapOutputValueClass(),
699 job.getInputFormatClass(),
700 job.getOutputKeyClass(),
701 job.getOutputValueClass(),
702 job.getOutputFormatClass(),
703 job.getPartitionerClass(),
704 job.getCombinerClass());
705 } catch (ClassNotFoundException e) {
706 throw new IOException(e);
707 }
708 }
709
710
711
712
713
714
715 public static void addDependencyJars(Configuration conf,
716 Class<?>... classes) throws IOException {
717
718 FileSystem localFs = FileSystem.getLocal(conf);
719 Set<String> jars = new HashSet<String>();
720
721 jars.addAll(conf.getStringCollection("tmpjars"));
722
723
724
725 Map<String, String> packagedClasses = new HashMap<String, String>();
726
727
728 for (Class<?> clazz : classes) {
729 if (clazz == null) continue;
730
731 Path path = findOrCreateJar(clazz, localFs, packagedClasses);
732 if (path == null) {
733 LOG.warn("Could not find jar for class " + clazz +
734 " in order to ship it to the cluster.");
735 continue;
736 }
737 if (!localFs.exists(path)) {
738 LOG.warn("Could not validate jar file " + path + " for class "
739 + clazz);
740 continue;
741 }
742 jars.add(path.toString());
743 }
744 if (jars.isEmpty()) return;
745
746 conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0])));
747 }
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763 private static Path findOrCreateJar(Class<?> my_class, FileSystem fs,
764 Map<String, String> packagedClasses)
765 throws IOException {
766
767 String jar = findContainingJar(my_class, packagedClasses);
768 if (null == jar || jar.isEmpty()) {
769 jar = getJar(my_class);
770 updateMap(jar, packagedClasses);
771 }
772
773 if (null == jar || jar.isEmpty()) {
774 return null;
775 }
776
777 LOG.debug(String.format("For class %s, using jar %s", my_class.getName(), jar));
778 return new Path(jar).makeQualified(fs);
779 }
780
781
782
783
784
785
786
787 private static void updateMap(String jar, Map<String, String> packagedClasses) throws IOException {
788 if (null == jar || jar.isEmpty()) {
789 return;
790 }
791 ZipFile zip = null;
792 try {
793 zip = new ZipFile(jar);
794 for (Enumeration<? extends ZipEntry> iter = zip.entries(); iter.hasMoreElements();) {
795 ZipEntry entry = iter.nextElement();
796 if (entry.getName().endsWith("class")) {
797 packagedClasses.put(entry.getName(), jar);
798 }
799 }
800 } finally {
801 if (null != zip) zip.close();
802 }
803 }
804
805
806
807
808
809
810
811
812
813
814 private static String findContainingJar(Class<?> my_class, Map<String, String> packagedClasses)
815 throws IOException {
816 ClassLoader loader = my_class.getClassLoader();
817 String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
818
819
820 for (Enumeration<URL> itr = loader.getResources(class_file); itr.hasMoreElements();) {
821 URL url = itr.nextElement();
822 if ("jar".equals(url.getProtocol())) {
823 String toReturn = url.getPath();
824 if (toReturn.startsWith("file:")) {
825 toReturn = toReturn.substring("file:".length());
826 }
827
828
829
830
831
832
833 toReturn = toReturn.replaceAll("\\+", "%2B");
834 toReturn = URLDecoder.decode(toReturn, "UTF-8");
835 return toReturn.replaceAll("!.*$", "");
836 }
837 }
838
839
840
841 return packagedClasses.get(class_file);
842 }
843
844
845
846
847
848
849
850
851 private static String getJar(Class<?> my_class) {
852 String ret = null;
853 String hadoopJarFinder = "org.apache.hadoop.util.JarFinder";
854 Class<?> jarFinder = null;
855 try {
856 LOG.debug("Looking for " + hadoopJarFinder + ".");
857 jarFinder = Class.forName(hadoopJarFinder);
858 LOG.debug(hadoopJarFinder + " found.");
859 Method getJar = jarFinder.getMethod("getJar", Class.class);
860 ret = (String) getJar.invoke(null, my_class);
861 } catch (ClassNotFoundException e) {
862 LOG.debug("Using backported JarFinder.");
863 ret = JarFinder.getJar(my_class);
864 } catch (InvocationTargetException e) {
865
866
867 throw new RuntimeException(e.getCause());
868 } catch (Exception e) {
869
870 throw new RuntimeException("getJar invocation failed.", e);
871 }
872
873 return ret;
874 }
875 }