1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapred;
21
22 import java.io.IOException;
23
24 import org.apache.hadoop.conf.Configuration;
25 import org.apache.hadoop.hbase.HBaseConfiguration;
26 import org.apache.hadoop.hbase.client.HTable;
27 import org.apache.hadoop.hbase.client.Put;
28 import org.apache.hadoop.hbase.client.UserProvider;
29 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
30 import org.apache.hadoop.hbase.security.User;
31 import org.apache.hadoop.hbase.zookeeper.ClusterId;
32 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
33 import org.apache.hadoop.io.Writable;
34 import org.apache.hadoop.io.WritableComparable;
35 import org.apache.hadoop.mapred.FileInputFormat;
36 import org.apache.hadoop.mapred.JobConf;
37 import org.apache.hadoop.mapred.InputFormat;
38 import org.apache.hadoop.mapred.OutputFormat;
39 import org.apache.hadoop.mapred.TextInputFormat;
40 import org.apache.hadoop.mapred.TextOutputFormat;
41 import org.apache.hadoop.security.token.Token;
42 import org.apache.zookeeper.KeeperException;
43
44
45
46
47 @Deprecated
48 @SuppressWarnings("unchecked")
49 public class TableMapReduceUtil {
50
51
52
53
54
55
56
57
58
59
60
61
62 public static void initTableMapJob(String table, String columns,
63 Class<? extends TableMap> mapper,
64 Class<? extends WritableComparable> outputKeyClass,
65 Class<? extends Writable> outputValueClass, JobConf job) {
66 initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job, true);
67 }
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82 public static void initTableMapJob(String table, String columns,
83 Class<? extends TableMap> mapper,
84 Class<? extends WritableComparable> outputKeyClass,
85 Class<? extends Writable> outputValueClass, JobConf job, boolean addDependencyJars) {
86
87 job.setInputFormat(TableInputFormat.class);
88 job.setMapOutputValueClass(outputValueClass);
89 job.setMapOutputKeyClass(outputKeyClass);
90 job.setMapperClass(mapper);
91 FileInputFormat.addInputPaths(job, table);
92 job.set(TableInputFormat.COLUMN_LIST, columns);
93 if (addDependencyJars) {
94 try {
95 addDependencyJars(job);
96 } catch (IOException e) {
97 e.printStackTrace();
98 }
99 }
100 try {
101 initCredentials(job);
102 } catch (IOException ioe) {
103
104 ioe.printStackTrace();
105 }
106 }
107
108
109
110
111
112
113
114
115
116
117 public static void initTableReduceJob(String table,
118 Class<? extends TableReduce> reducer, JobConf job)
119 throws IOException {
120 initTableReduceJob(table, reducer, job, null);
121 }
122
123
124
125
126
127
128
129
130
131
132
133
134 public static void initTableReduceJob(String table,
135 Class<? extends TableReduce> reducer, JobConf job, Class partitioner)
136 throws IOException {
137 initTableReduceJob(table, reducer, job, partitioner, true);
138 }
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153 public static void initTableReduceJob(String table,
154 Class<? extends TableReduce> reducer, JobConf job, Class partitioner,
155 boolean addDependencyJars) throws IOException {
156 job.setOutputFormat(TableOutputFormat.class);
157 job.setReducerClass(reducer);
158 job.set(TableOutputFormat.OUTPUT_TABLE, table);
159 job.setOutputKeyClass(ImmutableBytesWritable.class);
160 job.setOutputValueClass(Put.class);
161 if (partitioner == HRegionPartitioner.class) {
162 job.setPartitionerClass(HRegionPartitioner.class);
163 HTable outputTable = new HTable(HBaseConfiguration.create(job), table);
164 int regions = outputTable.getRegionsInfo().size();
165 if (job.getNumReduceTasks() > regions) {
166 job.setNumReduceTasks(outputTable.getRegionsInfo().size());
167 }
168 } else if (partitioner != null) {
169 job.setPartitionerClass(partitioner);
170 }
171 if (addDependencyJars) {
172 addDependencyJars(job);
173 }
174 initCredentials(job);
175 }
176
177 public static void initCredentials(JobConf job) throws IOException {
178 UserProvider provider = UserProvider.instantiate(job);
179
180 if (provider.isHadoopSecurityEnabled()) {
181
182 if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
183 job.set("mapreduce.job.credentials.binary",
184 System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
185 }
186 }
187
188 if (provider.isHBaseSecurityEnabled()) {
189 try {
190 User user = provider.getCurrent();
191 Token<?> authToken = getAuthToken(job, user);
192 if (authToken == null) {
193 user.obtainAuthTokenForJob(job);
194 } else {
195 job.getCredentials().addToken(authToken.getService(), authToken);
196 }
197 } catch (InterruptedException ie) {
198 ie.printStackTrace();
199 Thread.interrupted();
200 }
201 }
202 }
203
204
205
206
207
208 private static Token<?> getAuthToken(Configuration conf, User user)
209 throws IOException, InterruptedException {
210 ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "mr-init-credentials", null);
211 try {
212 String clusterId = ClusterId.readClusterIdZNode(zkw);
213 return user.getToken("HBASE_AUTH_TOKEN", clusterId);
214 } catch (KeeperException e) {
215 throw new IOException(e);
216 } finally {
217 zkw.close();
218 }
219 }
220
221
222
223
224
225
226
227
228
229 public static void limitNumReduceTasks(String table, JobConf job)
230 throws IOException {
231 HTable outputTable = new HTable(HBaseConfiguration.create(job), table);
232 int regions = outputTable.getRegionsInfo().size();
233 if (job.getNumReduceTasks() > regions)
234 job.setNumReduceTasks(regions);
235 }
236
237
238
239
240
241
242
243
244
245 public static void limitNumMapTasks(String table, JobConf job)
246 throws IOException {
247 HTable outputTable = new HTable(HBaseConfiguration.create(job), table);
248 int regions = outputTable.getRegionsInfo().size();
249 if (job.getNumMapTasks() > regions)
250 job.setNumMapTasks(regions);
251 }
252
253
254
255
256
257
258
259
260
261 public static void setNumReduceTasks(String table, JobConf job)
262 throws IOException {
263 HTable outputTable = new HTable(HBaseConfiguration.create(job), table);
264 int regions = outputTable.getRegionsInfo().size();
265 job.setNumReduceTasks(regions);
266 }
267
268
269
270
271
272
273
274
275
276 public static void setNumMapTasks(String table, JobConf job)
277 throws IOException {
278 HTable outputTable = new HTable(HBaseConfiguration.create(job), table);
279 int regions = outputTable.getRegionsInfo().size();
280 job.setNumMapTasks(regions);
281 }
282
283
284
285
286
287
288
289
290
291
292 public static void setScannerCaching(JobConf job, int batchSize) {
293 job.setInt("hbase.client.scanner.caching", batchSize);
294 }
295
296
297
298
299 public static void addDependencyJars(JobConf job) throws IOException {
300 org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addHBaseDependencyJars(job);
301 org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(
302 job,
303
304 job.getMapOutputKeyClass(),
305 job.getMapOutputValueClass(),
306 job.getOutputKeyClass(),
307 job.getOutputValueClass(),
308 job.getPartitionerClass(),
309 job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
310 job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
311 job.getCombinerClass());
312 }
313 }