1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import java.io.ByteArrayInputStream;
23 import java.io.ByteArrayOutputStream;
24 import java.io.DataInputStream;
25 import java.io.DataOutputStream;
26 import java.io.IOException;
27 import java.net.URL;
28 import java.net.URLDecoder;
29 import java.util.ArrayList;
30 import java.util.Enumeration;
31 import java.util.HashSet;
32 import java.util.List;
33 import java.util.Set;
34
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.fs.FileSystem;
38 import org.apache.hadoop.fs.Path;
39 import org.apache.hadoop.hbase.HBaseConfiguration;
40 import org.apache.hadoop.hbase.HConstants;
41 import org.apache.hadoop.hbase.client.HTable;
42 import org.apache.hadoop.hbase.client.Scan;
43 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
44 import org.apache.hadoop.hbase.util.Base64;
45 import org.apache.hadoop.io.Writable;
46 import org.apache.hadoop.io.WritableComparable;
47 import org.apache.hadoop.mapreduce.Job;
48 import org.apache.hadoop.util.StringUtils;
49 import org.apache.hadoop.conf.Configuration;
50 import org.apache.zookeeper.ZooKeeper;
51
52 import com.google.common.base.Function;
53
54
55
56
57 @SuppressWarnings("unchecked")
58 public class TableMapReduceUtil {
59 static Log LOG = LogFactory.getLog(TableMapReduceUtil.class);
60
61
62
63
64
65
66
67
68
69
70
71
72
73 public static void initTableMapperJob(String table, Scan scan,
74 Class<? extends TableMapper> mapper,
75 Class<? extends WritableComparable> outputKeyClass,
76 Class<? extends Writable> outputValueClass, Job job) throws IOException {
77 job.setInputFormatClass(TableInputFormat.class);
78 if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
79 if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
80 job.setMapperClass(mapper);
81 job.getConfiguration().set(TableInputFormat.INPUT_TABLE, table);
82 job.getConfiguration().set(TableInputFormat.SCAN,
83 convertScanToString(scan));
84 }
85
86
87
88
89
90
91
92
93 static String convertScanToString(Scan scan) throws IOException {
94 ByteArrayOutputStream out = new ByteArrayOutputStream();
95 DataOutputStream dos = new DataOutputStream(out);
96 scan.write(dos);
97 return Base64.encodeBytes(out.toByteArray());
98 }
99
100
101
102
103
104
105
106
107 static Scan convertStringToScan(String base64) throws IOException {
108 ByteArrayInputStream bis = new ByteArrayInputStream(Base64.decode(base64));
109 DataInputStream dis = new DataInputStream(bis);
110 Scan scan = new Scan();
111 scan.readFields(dis);
112 return scan;
113 }
114
115
116
117
118
119
120
121
122
123
124 public static void initTableReducerJob(String table,
125 Class<? extends TableReducer> reducer, Job job)
126 throws IOException {
127 initTableReducerJob(table, reducer, job, null);
128 }
129
130
131
132
133
134
135
136
137
138
139
140
141 public static void initTableReducerJob(String table,
142 Class<? extends TableReducer> reducer, Job job,
143 Class partitioner) throws IOException {
144 initTableReducerJob(table, reducer, job, null, null, null, null);
145 }
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161 public static void initTableReducerJob(String table,
162 Class<? extends TableReducer> reducer, Job job,
163 Class partitioner, String quorumAddress, String serverClass,
164 String serverImpl) throws IOException {
165
166 Configuration conf = job.getConfiguration();
167 job.setOutputFormatClass(TableOutputFormat.class);
168 if (reducer != null) job.setReducerClass(reducer);
169 conf.set(TableOutputFormat.OUTPUT_TABLE, table);
170 if (quorumAddress != null) {
171 if (quorumAddress.split(":").length == 2) {
172 conf.set(TableOutputFormat.QUORUM_ADDRESS, quorumAddress);
173 } else {
174 throw new IOException("Please specify the peer cluster as " +
175 HConstants.ZOOKEEPER_QUORUM+":"+HConstants.ZOOKEEPER_ZNODE_PARENT);
176 }
177 }
178 if (serverClass != null && serverImpl != null) {
179 conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
180 conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
181 }
182 job.setOutputKeyClass(ImmutableBytesWritable.class);
183 job.setOutputValueClass(Writable.class);
184 if (partitioner == HRegionPartitioner.class) {
185 HBaseConfiguration.addHbaseResources(conf);
186 job.setPartitionerClass(HRegionPartitioner.class);
187 HTable outputTable = new HTable(conf, table);
188 int regions = outputTable.getRegionsInfo().size();
189 if (job.getNumReduceTasks() > regions) {
190 job.setNumReduceTasks(outputTable.getRegionsInfo().size());
191 }
192 } else if (partitioner != null) {
193 job.setPartitionerClass(partitioner);
194 }
195 }
196
197
198
199
200
201
202
203
204
205 public static void limitNumReduceTasks(String table, Job job)
206 throws IOException {
207 HTable outputTable = new HTable(job.getConfiguration(), table);
208 int regions = outputTable.getRegionsInfo().size();
209 if (job.getNumReduceTasks() > regions)
210 job.setNumReduceTasks(regions);
211 }
212
213
214
215
216
217
218
219
220
221 public static void setNumReduceTasks(String table, Job job)
222 throws IOException {
223 HTable outputTable = new HTable(job.getConfiguration(), table);
224 int regions = outputTable.getRegionsInfo().size();
225 job.setNumReduceTasks(regions);
226 }
227
228
229
230
231
232
233
234
235
236
237 public static void setScannerCaching(Job job, int batchSize) {
238 job.getConfiguration().setInt("hbase.client.scanner.caching", batchSize);
239 }
240
241
242
243
244
245
246 public static void addDependencyJars(Job job) throws IOException {
247 try {
248 addDependencyJars(job.getConfiguration(),
249 ZooKeeper.class,
250 Function.class,
251 job.getMapOutputKeyClass(),
252 job.getMapOutputValueClass(),
253 job.getOutputKeyClass(),
254 job.getOutputValueClass(),
255 job.getOutputFormatClass(),
256 job.getPartitionerClass(),
257 job.getCombinerClass());
258 } catch (ClassNotFoundException e) {
259 throw new IOException(e);
260 }
261 }
262
263
264
265
266
267
268 public static void addDependencyJars(Configuration conf,
269 Class... classes) throws IOException {
270
271 FileSystem localFs = FileSystem.getLocal(conf);
272
273 Set<String> jars = new HashSet<String>();
274 for (Class clazz : classes) {
275 if (clazz == null) continue;
276
277 String pathStr = findContainingJar(clazz);
278 if (pathStr == null) {
279 LOG.warn("Could not find jar for class " + clazz +
280 " in order to ship it to the cluster.");
281 continue;
282 }
283 Path path = new Path(pathStr);
284 if (!localFs.exists(path)) {
285 LOG.warn("Could not validate jar file " + path + " for class "
286 + clazz);
287 continue;
288 }
289 jars.add(path.makeQualified(localFs).toString());
290 }
291 if (jars.isEmpty()) return;
292
293 String tmpJars = conf.get("tmpjars");
294 if (tmpJars == null) {
295 tmpJars = StringUtils.arrayToString(jars.toArray(new String[0]));
296 } else {
297 tmpJars += "," + StringUtils.arrayToString(jars.toArray(new String[0]));
298 }
299 conf.set("tmpjars", tmpJars);
300 }
301
302
303
304
305
306
307
308
309
310
311
312
313 private static String findContainingJar(Class my_class) {
314 ClassLoader loader = my_class.getClassLoader();
315 String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
316 try {
317 for(Enumeration itr = loader.getResources(class_file);
318 itr.hasMoreElements();) {
319 URL url = (URL) itr.nextElement();
320 if ("jar".equals(url.getProtocol())) {
321 String toReturn = url.getPath();
322 if (toReturn.startsWith("file:")) {
323 toReturn = toReturn.substring("file:".length());
324 }
325
326
327
328
329
330
331 toReturn = toReturn.replaceAll("\\+", "%2B");
332 toReturn = URLDecoder.decode(toReturn, "UTF-8");
333 return toReturn.replaceAll("!.*$", "");
334 }
335 }
336 } catch (IOException e) {
337 throw new RuntimeException(e);
338 }
339 return null;
340 }
341
342
343 }