1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import java.io.ByteArrayInputStream;
23 import java.io.ByteArrayOutputStream;
24 import java.io.DataInputStream;
25 import java.io.DataOutputStream;
26 import java.io.IOException;
27 import java.lang.reflect.Method;
28 import java.lang.reflect.InvocationTargetException;
29 import java.net.URL;
30 import java.net.URLDecoder;
31 import java.util.ArrayList;
32 import java.util.Enumeration;
33 import java.util.HashSet;
34 import java.util.List;
35 import java.util.Set;
36
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.hadoop.conf.Configuration;
40 import org.apache.hadoop.fs.FileSystem;
41 import org.apache.hadoop.fs.Path;
42 import org.apache.hadoop.hbase.HConstants;
43 import org.apache.hadoop.hbase.HBaseConfiguration;
44 import org.apache.hadoop.hbase.client.HTable;
45 import org.apache.hadoop.hbase.client.Scan;
46 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
47 import org.apache.hadoop.hbase.security.User;
48 import org.apache.hadoop.hbase.util.Base64;
49 import org.apache.hadoop.hbase.util.Bytes;
50 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
51 import org.apache.hadoop.io.Writable;
52 import org.apache.hadoop.io.WritableComparable;
53 import org.apache.hadoop.mapreduce.InputFormat;
54 import org.apache.hadoop.mapreduce.Job;
55 import org.apache.hadoop.util.StringUtils;
56
57
58
59
60 @SuppressWarnings("unchecked")
61 public class TableMapReduceUtil {
62 static Log LOG = LogFactory.getLog(TableMapReduceUtil.class);
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77 public static void initTableMapperJob(String table, Scan scan,
78 Class<? extends TableMapper> mapper,
79 Class<? extends WritableComparable> outputKeyClass,
80 Class<? extends Writable> outputValueClass, Job job)
81 throws IOException {
82 initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
83 job, true);
84 }
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100 public static void initTableMapperJob(byte[] table, Scan scan,
101 Class<? extends TableMapper> mapper,
102 Class<? extends WritableComparable> outputKeyClass,
103 Class<? extends Writable> outputValueClass, Job job)
104 throws IOException {
105 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass,
106 job, true);
107 }
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124 public static void initTableMapperJob(String table, Scan scan,
125 Class<? extends TableMapper> mapper,
126 Class<? extends WritableComparable> outputKeyClass,
127 Class<? extends Writable> outputValueClass, Job job,
128 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
129 throws IOException {
130 job.setInputFormatClass(inputFormatClass);
131 if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
132 if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
133 job.setMapperClass(mapper);
134 Configuration conf = job.getConfiguration();
135 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
136 conf.set(TableInputFormat.INPUT_TABLE, table);
137 conf.set(TableInputFormat.SCAN, convertScanToString(scan));
138 if (addDependencyJars) {
139 addDependencyJars(job);
140 }
141 initCredentials(job);
142 }
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160 public static void initTableMapperJob(byte[] table, Scan scan,
161 Class<? extends TableMapper> mapper,
162 Class<? extends WritableComparable> outputKeyClass,
163 Class<? extends Writable> outputValueClass, Job job,
164 boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
165 throws IOException {
166 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
167 outputValueClass, job, addDependencyJars, inputFormatClass);
168 }
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185 public static void initTableMapperJob(byte[] table, Scan scan,
186 Class<? extends TableMapper> mapper,
187 Class<? extends WritableComparable> outputKeyClass,
188 Class<? extends Writable> outputValueClass, Job job,
189 boolean addDependencyJars)
190 throws IOException {
191 initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass,
192 outputValueClass, job, addDependencyJars, TableInputFormat.class);
193 }
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210 public static void initTableMapperJob(String table, Scan scan,
211 Class<? extends TableMapper> mapper,
212 Class<? extends WritableComparable> outputKeyClass,
213 Class<? extends Writable> outputValueClass, Job job,
214 boolean addDependencyJars)
215 throws IOException {
216 initTableMapperJob(table, scan, mapper, outputKeyClass,
217 outputValueClass, job, addDependencyJars, TableInputFormat.class);
218 }
219
220
221
222
223
224
225
226
227
228
229
230
231
232 public static void initTableMapperJob(List<Scan> scans,
233 Class<? extends TableMapper> mapper,
234 Class<? extends WritableComparable> outputKeyClass,
235 Class<? extends Writable> outputValueClass, Job job) throws IOException {
236 initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job,
237 true);
238 }
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254 public static void initTableMapperJob(List<Scan> scans,
255 Class<? extends TableMapper> mapper,
256 Class<? extends WritableComparable> outputKeyClass,
257 Class<? extends Writable> outputValueClass, Job job,
258 boolean addDependencyJars) throws IOException {
259 job.setInputFormatClass(MultiTableInputFormat.class);
260 if (outputValueClass != null) {
261 job.setMapOutputValueClass(outputValueClass);
262 }
263 if (outputKeyClass != null) {
264 job.setMapOutputKeyClass(outputKeyClass);
265 }
266 job.setMapperClass(mapper);
267 HBaseConfiguration.addHbaseResources(job.getConfiguration());
268 List<String> scanStrings = new ArrayList<String>();
269
270 for (Scan scan : scans) {
271 scanStrings.add(convertScanToString(scan));
272 }
273 job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
274 scanStrings.toArray(new String[scanStrings.size()]));
275
276 if (addDependencyJars) {
277 addDependencyJars(job);
278 }
279 }
280
281 public static void initCredentials(Job job) throws IOException {
282 if (User.isHBaseSecurityEnabled(job.getConfiguration())) {
283 try {
284
285 String quorumAddress = job.getConfiguration().get(
286 TableOutputFormat.QUORUM_ADDRESS);
287 if (quorumAddress != null) {
288 String[] parts = ZKUtil.transformClusterKey(quorumAddress);
289 Configuration peerConf = HBaseConfiguration.create(job
290 .getConfiguration());
291 peerConf.set(HConstants.ZOOKEEPER_QUORUM, parts[0]);
292 peerConf.set("hbase.zookeeper.client.port", parts[1]);
293 peerConf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, parts[2]);
294 User.getCurrent().obtainAuthTokenForJob(peerConf, job);
295 }
296
297 User.getCurrent().obtainAuthTokenForJob(job.getConfiguration(), job);
298 } catch (InterruptedException ie) {
299 LOG.info("Interrupted obtaining user authentication token");
300 Thread.interrupted();
301 }
302 }
303 }
304
305
306
307
308
309
310
311
312 static String convertScanToString(Scan scan) throws IOException {
313 ByteArrayOutputStream out = new ByteArrayOutputStream();
314 DataOutputStream dos = new DataOutputStream(out);
315 scan.write(dos);
316 return Base64.encodeBytes(out.toByteArray());
317 }
318
319
320
321
322
323
324
325
326 static Scan convertStringToScan(String base64) throws IOException {
327 ByteArrayInputStream bis = new ByteArrayInputStream(Base64.decode(base64));
328 DataInputStream dis = new DataInputStream(bis);
329 Scan scan = new Scan();
330 scan.readFields(dis);
331 return scan;
332 }
333
334
335
336
337
338
339
340
341
342
343 public static void initTableReducerJob(String table,
344 Class<? extends TableReducer> reducer, Job job)
345 throws IOException {
346 initTableReducerJob(table, reducer, job, null);
347 }
348
349
350
351
352
353
354
355
356
357
358
359
360 public static void initTableReducerJob(String table,
361 Class<? extends TableReducer> reducer, Job job,
362 Class partitioner) throws IOException {
363 initTableReducerJob(table, reducer, job, partitioner, null, null, null);
364 }
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389 public static void initTableReducerJob(String table,
390 Class<? extends TableReducer> reducer, Job job,
391 Class partitioner, String quorumAddress, String serverClass,
392 String serverImpl) throws IOException {
393 initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
394 serverClass, serverImpl, true);
395 }
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422 public static void initTableReducerJob(String table,
423 Class<? extends TableReducer> reducer, Job job,
424 Class partitioner, String quorumAddress, String serverClass,
425 String serverImpl, boolean addDependencyJars) throws IOException {
426
427 Configuration conf = job.getConfiguration();
428 HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
429 job.setOutputFormatClass(TableOutputFormat.class);
430 if (reducer != null) job.setReducerClass(reducer);
431 conf.set(TableOutputFormat.OUTPUT_TABLE, table);
432
433 if (quorumAddress != null) {
434
435 ZKUtil.transformClusterKey(quorumAddress);
436 conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
437 }
438 if (serverClass != null && serverImpl != null) {
439 conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
440 conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
441 }
442 job.setOutputKeyClass(ImmutableBytesWritable.class);
443 job.setOutputValueClass(Writable.class);
444 if (partitioner == HRegionPartitioner.class) {
445 job.setPartitionerClass(HRegionPartitioner.class);
446 HTable outputTable = new HTable(conf, table);
447 int regions = outputTable.getRegionsInfo().size();
448 if (job.getNumReduceTasks() > regions) {
449 job.setNumReduceTasks(outputTable.getRegionsInfo().size());
450 }
451 } else if (partitioner != null) {
452 job.setPartitionerClass(partitioner);
453 }
454
455 if (addDependencyJars) {
456 addDependencyJars(job);
457 }
458
459 initCredentials(job);
460 }
461
462
463
464
465
466
467
468
469
470 public static void limitNumReduceTasks(String table, Job job)
471 throws IOException {
472 HTable outputTable = new HTable(job.getConfiguration(), table);
473 int regions = outputTable.getRegionsInfo().size();
474 if (job.getNumReduceTasks() > regions)
475 job.setNumReduceTasks(regions);
476 }
477
478
479
480
481
482
483
484
485
486 public static void setNumReduceTasks(String table, Job job)
487 throws IOException {
488 HTable outputTable = new HTable(job.getConfiguration(), table);
489 int regions = outputTable.getRegionsInfo().size();
490 job.setNumReduceTasks(regions);
491 }
492
493
494
495
496
497
498
499
500
501
502 public static void setScannerCaching(Job job, int batchSize) {
503 job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);
504 }
505
506
507
508
509
510
511 public static void addDependencyJars(Job job) throws IOException {
512 try {
513 addDependencyJars(job.getConfiguration(),
514 org.apache.zookeeper.ZooKeeper.class,
515 com.google.protobuf.Message.class,
516 com.google.common.collect.ImmutableSet.class,
517 job.getMapOutputKeyClass(),
518 job.getMapOutputValueClass(),
519 job.getInputFormatClass(),
520 job.getOutputKeyClass(),
521 job.getOutputValueClass(),
522 job.getOutputFormatClass(),
523 job.getPartitionerClass(),
524 job.getCombinerClass());
525 } catch (ClassNotFoundException e) {
526 throw new IOException(e);
527 }
528 }
529
530
531
532
533
534
535 public static void addDependencyJars(Configuration conf,
536 Class... classes) throws IOException {
537
538 FileSystem localFs = FileSystem.getLocal(conf);
539
540 Set<String> jars = new HashSet<String>();
541
542
543 jars.addAll( conf.getStringCollection("tmpjars") );
544
545
546 for (Class clazz : classes) {
547 if (clazz == null) continue;
548
549 String pathStr = findOrCreateJar(clazz);
550 if (pathStr == null) {
551 LOG.warn("Could not find jar for class " + clazz +
552 " in order to ship it to the cluster.");
553 continue;
554 }
555 Path path = new Path(pathStr);
556 if (!localFs.exists(path)) {
557 LOG.warn("Could not validate jar file " + path + " for class "
558 + clazz);
559 continue;
560 }
561 jars.add(path.makeQualified(localFs).toString());
562 }
563 if (jars.isEmpty()) return;
564
565 conf.set("tmpjars",
566 StringUtils.arrayToString(jars.toArray(new String[0])));
567 }
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584 private static String findOrCreateJar(Class my_class)
585 throws IOException {
586 try {
587 Class<?> jarFinder = Class.forName("org.apache.hadoop.util.JarFinder");
588
589
590
591
592
593
594 Method m = jarFinder.getMethod("getJar", Class.class);
595 return (String)m.invoke(null,my_class);
596 } catch (InvocationTargetException ite) {
597
598 throw new IOException(ite.getCause());
599 } catch (Exception e) {
600
601 }
602
603 LOG.debug("New JarFinder: org.apache.hadoop.util.JarFinder.getJar " +
604 "not available. Using old findContainingJar");
605 return findContainingJar(my_class);
606 }
607
608
609
610
611
612
613
614
615
616
617
618
619 private static String findContainingJar(Class my_class) {
620 ClassLoader loader = my_class.getClassLoader();
621 String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
622 try {
623 for(Enumeration itr = loader.getResources(class_file);
624 itr.hasMoreElements();) {
625 URL url = (URL) itr.nextElement();
626 if ("jar".equals(url.getProtocol())) {
627 String toReturn = url.getPath();
628 if (toReturn.startsWith("file:")) {
629 toReturn = toReturn.substring("file:".length());
630 }
631
632
633
634
635
636
637 toReturn = toReturn.replaceAll("\\+", "%2B");
638 toReturn = URLDecoder.decode(toReturn, "UTF-8");
639 return toReturn.replaceAll("!.*$", "");
640 }
641 }
642 } catch (IOException e) {
643 throw new RuntimeException(e);
644 }
645 return null;
646 }
647
648
649 }