1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import java.io.ByteArrayInputStream;
23 import java.io.ByteArrayOutputStream;
24 import java.io.DataInputStream;
25 import java.io.DataOutputStream;
26 import java.io.File;
27 import java.io.IOException;
28 import java.lang.reflect.InvocationTargetException;
29 import java.lang.reflect.Method;
30 import java.net.URL;
31 import java.net.URLDecoder;
32 import java.util.ArrayList;
33 import java.util.Enumeration;
34 import java.util.HashMap;
35 import java.util.HashSet;
36 import java.util.List;
37 import java.util.Map;
38 import java.util.Set;
39 import java.util.zip.ZipEntry;
40 import java.util.zip.ZipFile;
41
42 import org.apache.commons.logging.Log;
43 import org.apache.commons.logging.LogFactory;
44 import org.apache.hadoop.conf.Configuration;
45 import org.apache.hadoop.fs.FileSystem;
46 import org.apache.hadoop.fs.Path;
47 import org.apache.hadoop.hbase.HBaseConfiguration;
48 import org.apache.hadoop.hbase.HConstants;
49 import org.apache.hadoop.hbase.client.HTable;
50 import org.apache.hadoop.hbase.client.Scan;
51 import org.apache.hadoop.hbase.client.UserProvider;
52 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
53 import org.apache.hadoop.hbase.mapreduce.hadoopbackport.JarFinder;
54 import org.apache.hadoop.hbase.security.User;
55 import org.apache.hadoop.hbase.util.Base64;
56 import org.apache.hadoop.hbase.util.Bytes;
57 import org.apache.hadoop.hbase.zookeeper.ClusterId;
58 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
59 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
60 import org.apache.hadoop.io.Writable;
61 import org.apache.hadoop.io.WritableComparable;
62 import org.apache.hadoop.mapreduce.InputFormat;
63 import org.apache.hadoop.mapreduce.Job;
64 import org.apache.hadoop.util.StringUtils;
65 import org.apache.hadoop.security.token.Token;
66 import org.apache.zookeeper.KeeperException;
67
68
69
70
71 @SuppressWarnings("unchecked")
72 public class TableMapReduceUtil {
73 static Log LOG = LogFactory.getLog(TableMapReduceUtil.class);
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88 public static void initTableMapperJob(String table, Scan scan,
89 Class<? extends TableMapper> mapper,
90 Class<? extends WritableComparable> outputKeyClass,
91 Class<? extends Writable> outputValueClass, Job job)
92 throws IOException {
93 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
94 job, true);
95 }
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111 public static void initTableMapperJob(byte[] table, Scan scan,
112 Class<? extends TableMapper> mapper,
113 Class<? extends WritableComparable> outputKeyClass,
114 Class<? extends Writable> outputValueClass, Job job)
115 throws IOException {
116 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass,
117 job, true);
118 }
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135 public static void initTableMapperJob(String table, Scan scan,
136 Class<? extends TableMapper> mapper,
137 Class<? extends WritableComparable> outputKeyClass,
138 Class<? extends Writable> outputValueClass, Job job,
139 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
140 throws IOException {
141 job.setInputFormatClass(inputFormatClass);
142 if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
143 if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
144 job.setMapperClass(mapper);
145 Configuration conf = job.getConfiguration();
146 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
147 conf.set(TableInputFormat.INPUT_TABLE, table);
148 conf.set(TableInputFormat.SCAN, convertScanToString(scan));
149 if (addDependencyJars) {
150 addDependencyJars(job);
151 }
152 initCredentials(job);
153 }
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171 public static void initTableMapperJob(byte[] table, Scan scan,
172 Class<? extends TableMapper> mapper,
173 Class<? extends WritableComparable> outputKeyClass,
174 Class<? extends Writable> outputValueClass, Job job,
175 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
176 throws IOException {
177 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
178 outputValueClass, job, addDependencyJars, inputFormatClass);
179 }
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196 public static void initTableMapperJob(byte[] table, Scan scan,
197 Class<? extends TableMapper> mapper,
198 Class<? extends WritableComparable> outputKeyClass,
199 Class<? extends Writable> outputValueClass, Job job,
200 boolean addDependencyJars)
201 throws IOException {
202 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
203 outputValueClass, job, addDependencyJars, TableInputFormat.class);
204 }
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221 public static void initTableMapperJob(String table, Scan scan,
222 Class<? extends TableMapper> mapper,
223 Class<? extends WritableComparable> outputKeyClass,
224 Class<? extends Writable> outputValueClass, Job job,
225 boolean addDependencyJars)
226 throws IOException {
227 initTableMapperJob(table, scan, mapper, outputKeyClass,
228 outputValueClass, job, addDependencyJars, TableInputFormat.class);
229 }
230
231
232
233
234
235
236
237
238
239
240
241
242
243 public static void initTableMapperJob(List<Scan> scans,
244 Class<? extends TableMapper> mapper,
245 Class<? extends WritableComparable> outputKeyClass,
246 Class<? extends Writable> outputValueClass, Job job) throws IOException {
247 initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
248 true);
249 }
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265 public static void initTableMapperJob(List<Scan> scans,
266 Class<? extends TableMapper> mapper,
267 Class<? extends WritableComparable> outputKeyClass,
268 Class<? extends Writable> outputValueClass, Job job,
269 boolean addDependencyJars) throws IOException {
270 job.setInputFormatClass(MultiTableInputFormat.class);
271 if (outputValueClass != null) {
272 job.setMapOutputValueClass(outputValueClass);
273 }
274 if (outputKeyClass != null) {
275 job.setMapOutputKeyClass(outputKeyClass);
276 }
277 job.setMapperClass(mapper);
278 HBaseConfiguration.addHbaseResources(job.getConfiguration());
279 List<String> scanStrings = new ArrayList<String>();
280
281 for (Scan scan : scans) {
282 scanStrings.add(convertScanToString(scan));
283 }
284 job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
285 scanStrings.toArray(new String[scanStrings.size()]));
286
287 if (addDependencyJars) {
288 addDependencyJars(job);
289 }
290 }
291
292 public static void initCredentials(Job job) throws IOException {
293 UserProvider provider = UserProvider.instantiate(job.getConfiguration());
294
295 if (provider.isHadoopSecurityEnabled()) {
296
297 if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
298 job.getConfiguration().set("mapreduce.job.credentials.binary",
299 System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
300 }
301 }
302
303 if (provider.isHBaseSecurityEnabled()) {
304 try {
305
306 String quorumAddress = job.getConfiguration().get(
307 TableOutputFormat.QUORUM_ADDRESS);
308 User user = provider.getCurrent();
309 if (quorumAddress != null) {
310 String[] parts = ZKUtil.transformClusterKey(quorumAddress);
311 Configuration peerConf = HBaseConfiguration.create(job
312 .getConfiguration());
313 peerConf.set(HConstants.ZOOKEEPER_QUORUM, parts[0]);
314 peerConf.set("hbase.zookeeper.client.port", parts[1]);
315 peerConf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, parts[2]);
316 obtainAuthTokenForJob(job, peerConf, user);
317 }
318
319 obtainAuthTokenForJob(job, job.getConfiguration(), user);
320 } catch (InterruptedException ie) {
321 LOG.info("Interrupted obtaining user authentication token");
322 Thread.interrupted();
323 }
324 }
325 }
326
327 private static void obtainAuthTokenForJob(Job job, Configuration conf, User user)
328 throws IOException, InterruptedException {
329 Token<?> authToken = getAuthToken(conf, user);
330 if (authToken == null) {
331 user.obtainAuthTokenForJob(conf, job);
332 } else {
333 job.getCredentials().addToken(authToken.getService(), authToken);
334 }
335 }
336
337
338
339
340
341 private static Token<?> getAuthToken(Configuration conf, User user)
342 throws IOException, InterruptedException {
343 ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "mr-init-credentials", null);
344 try {
345 String clusterId = ClusterId.readClusterIdZNode(zkw);
346 return user.getToken("HBASE_AUTH_TOKEN", clusterId);
347 } catch (KeeperException e) {
348 throw new IOException(e);
349 } finally {
350 zkw.close();
351 }
352 }
353
354
355
356
357
358
359
360
361 static String convertScanToString(Scan scan) throws IOException {
362 ByteArrayOutputStream out = new ByteArrayOutputStream();
363 DataOutputStream dos = new DataOutputStream(out);
364 scan.write(dos);
365 return Base64.encodeBytes(out.toByteArray());
366 }
367
368
369
370
371
372
373
374
375 static Scan convertStringToScan(String base64) throws IOException {
376 ByteArrayInputStream bis = new ByteArrayInputStream(Base64.decode(base64));
377 DataInputStream dis = new DataInputStream(bis);
378 Scan scan = new Scan();
379 scan.readFields(dis);
380 return scan;
381 }
382
383
384
385
386
387
388
389
390
391
392 public static void initTableReducerJob(String table,
393 Class<? extends TableReducer> reducer, Job job)
394 throws IOException {
395 initTableReducerJob(table, reducer, job, null);
396 }
397
398
399
400
401
402
403
404
405
406
407
408
409 public static void initTableReducerJob(String table,
410 Class<? extends TableReducer> reducer, Job job,
411 Class partitioner) throws IOException {
412 initTableReducerJob(table, reducer, job, partitioner, null, null, null);
413 }
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438 public static void initTableReducerJob(String table,
439 Class<? extends TableReducer> reducer, Job job,
440 Class partitioner, String quorumAddress, String serverClass,
441 String serverImpl) throws IOException {
442 initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
443 serverClass, serverImpl, true);
444 }
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471 public static void initTableReducerJob(String table,
472 Class<? extends TableReducer> reducer, Job job,
473 Class partitioner, String quorumAddress, String serverClass,
474 String serverImpl, boolean addDependencyJars) throws IOException {
475
476 Configuration conf = job.getConfiguration();
477 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
478 job.setOutputFormatClass(TableOutputFormat.class);
479 if (reducer != null) job.setReducerClass(reducer);
480 conf.set(TableOutputFormat.OUTPUT_TABLE, table);
481
482 if (quorumAddress != null) {
483
484 ZKUtil.transformClusterKey(quorumAddress);
485 conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
486 }
487 if (serverClass != null && serverImpl != null) {
488 conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
489 conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
490 }
491 job.setOutputKeyClass(ImmutableBytesWritable.class);
492 job.setOutputValueClass(Writable.class);
493 if (partitioner == HRegionPartitioner.class) {
494 job.setPartitionerClass(HRegionPartitioner.class);
495 HTable outputTable = new HTable(conf, table);
496 int regions = outputTable.getRegionsInfo().size();
497 if (job.getNumReduceTasks() > regions) {
498 job.setNumReduceTasks(outputTable.getRegionsInfo().size());
499 }
500 } else if (partitioner != null) {
501 job.setPartitionerClass(partitioner);
502 }
503
504 if (addDependencyJars) {
505 addDependencyJars(job);
506 }
507
508 initCredentials(job);
509 }
510
511
512
513
514
515
516
517
518
519 public static void limitNumReduceTasks(String table, Job job)
520 throws IOException {
521 HTable outputTable = new HTable(job.getConfiguration(), table);
522 int regions = outputTable.getRegionsInfo().size();
523 if (job.getNumReduceTasks() > regions)
524 job.setNumReduceTasks(regions);
525 }
526
527
528
529
530
531
532
533
534
535 public static void setNumReduceTasks(String table, Job job)
536 throws IOException {
537 HTable outputTable = new HTable(job.getConfiguration(), table);
538 int regions = outputTable.getRegionsInfo().size();
539 job.setNumReduceTasks(regions);
540 }
541
542
543
544
545
546
547
548
549
550
551 public static void setScannerCaching(Job job, int batchSize) {
552 job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);
553 }
554
555
556
557
558
559
560
561
562
563
564
565
566
567 public static void addHBaseDependencyJars(Configuration conf) throws IOException {
568 addDependencyJars(conf,
569 org.apache.zookeeper.ZooKeeper.class,
570 com.google.protobuf.Message.class,
571 com.google.common.base.Function.class,
572 com.google.common.collect.ImmutableSet.class,
573 org.apache.hadoop.hbase.util.Bytes.class);
574 }
575
576
577
578
579
580 public static String buildDependencyClasspath(Configuration conf) {
581 if (conf == null) {
582 throw new IllegalArgumentException("Must provide a configuration object.");
583 }
584 Set<String> paths = new HashSet<String>(conf.getStringCollection("tmpjars"));
585 if (paths.size() == 0) {
586 throw new IllegalArgumentException("Configuration contains no tmpjars.");
587 }
588 StringBuilder sb = new StringBuilder();
589 for (String s : paths) {
590
591 int idx = s.indexOf(":");
592 if (idx != -1) s = s.substring(idx + 1);
593 if (sb.length() > 0) sb.append(File.pathSeparator);
594 sb.append(s);
595 }
596 return sb.toString();
597 }
598
599
600
601
602
603
604 public static void addDependencyJars(Job job) throws IOException {
605 addHBaseDependencyJars(job.getConfiguration());
606 try {
607 addDependencyJars(job.getConfiguration(),
608
609 job.getMapOutputKeyClass(),
610 job.getMapOutputValueClass(),
611 job.getInputFormatClass(),
612 job.getOutputKeyClass(),
613 job.getOutputValueClass(),
614 job.getOutputFormatClass(),
615 job.getPartitionerClass(),
616 job.getCombinerClass());
617 } catch (ClassNotFoundException e) {
618 throw new IOException(e);
619 }
620 }
621
622
623
624
625
626
627 public static void addDependencyJars(Configuration conf,
628 Class<?>... classes) throws IOException {
629
630 FileSystem localFs = FileSystem.getLocal(conf);
631 Set<String> jars = new HashSet<String>();
632
633 jars.addAll(conf.getStringCollection("tmpjars"));
634
635
636
637 Map<String, String> packagedClasses = new HashMap<String, String>();
638
639
640 for (Class<?> clazz : classes) {
641 if (clazz == null) continue;
642
643 Path path = findOrCreateJar(clazz, localFs, packagedClasses);
644 if (path == null) {
645 LOG.warn("Could not find jar for class " + clazz +
646 " in order to ship it to the cluster.");
647 continue;
648 }
649 if (!localFs.exists(path)) {
650 LOG.warn("Could not validate jar file " + path + " for class "
651 + clazz);
652 continue;
653 }
654 jars.add(path.toString());
655 }
656 if (jars.isEmpty()) return;
657
658 conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0])));
659 }
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675 private static Path findOrCreateJar(Class<?> my_class, FileSystem fs,
676 Map<String, String> packagedClasses)
677 throws IOException {
678
679 String jar = findContainingJar(my_class, packagedClasses);
680 if (null == jar || jar.isEmpty()) {
681 jar = getJar(my_class);
682 updateMap(jar, packagedClasses);
683 }
684
685 if (null == jar || jar.isEmpty()) {
686 return null;
687 }
688
689 LOG.debug(String.format("For class %s, using jar %s", my_class.getName(), jar));
690 return new Path(jar).makeQualified(fs);
691 }
692
693
694
695
696
697
698
699 private static void updateMap(String jar, Map<String, String> packagedClasses) throws IOException {
700 if (null == jar || jar.isEmpty()) {
701 return;
702 }
703 ZipFile zip = null;
704 try {
705 zip = new ZipFile(jar);
706 for (Enumeration<? extends ZipEntry> iter = zip.entries(); iter.hasMoreElements();) {
707 ZipEntry entry = iter.nextElement();
708 if (entry.getName().endsWith("class")) {
709 packagedClasses.put(entry.getName(), jar);
710 }
711 }
712 } finally {
713 if (null != zip) zip.close();
714 }
715 }
716
717
718
719
720
721
722
723
724
725
726 private static String findContainingJar(Class<?> my_class, Map<String, String> packagedClasses)
727 throws IOException {
728 ClassLoader loader = my_class.getClassLoader();
729 String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
730
731
732 for (Enumeration<URL> itr = loader.getResources(class_file); itr.hasMoreElements();) {
733 URL url = itr.nextElement();
734 if ("jar".equals(url.getProtocol())) {
735 String toReturn = url.getPath();
736 if (toReturn.startsWith("file:")) {
737 toReturn = toReturn.substring("file:".length());
738 }
739
740
741
742
743
744
745 toReturn = toReturn.replaceAll("\\+", "%2B");
746 toReturn = URLDecoder.decode(toReturn, "UTF-8");
747 return toReturn.replaceAll("!.*$", "");
748 }
749 }
750
751
752
753 return packagedClasses.get(class_file);
754 }
755
756
757
758
759
760
761
762
763 private static String getJar(Class<?> my_class) {
764 String ret = null;
765 String hadoopJarFinder = "org.apache.hadoop.util.JarFinder";
766 Class<?> jarFinder = null;
767 try {
768 LOG.debug("Looking for " + hadoopJarFinder + ".");
769 jarFinder = Class.forName(hadoopJarFinder);
770 LOG.debug(hadoopJarFinder + " found.");
771 Method getJar = jarFinder.getMethod("getJar", Class.class);
772 ret = (String) getJar.invoke(null, my_class);
773 } catch (ClassNotFoundException e) {
774 LOG.debug("Using backported JarFinder.");
775 ret = JarFinder.getJar(my_class);
776 } catch (InvocationTargetException e) {
777
778
779 throw new RuntimeException(e.getCause());
780 } catch (Exception e) {
781
782 throw new RuntimeException("getJar invocation failed.", e);
783 }
784
785 return ret;
786 }
787 }