1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapred;
20
21 import java.io.IOException;
22
23 import org.apache.hadoop.conf.Configuration;
24 import org.apache.hadoop.classification.InterfaceAudience;
25 import org.apache.hadoop.classification.InterfaceStability;
26 import org.apache.hadoop.fs.Path;
27 import org.apache.hadoop.hbase.HBaseConfiguration;
28 import org.apache.hadoop.hbase.catalog.MetaReader;
29 import org.apache.hadoop.hbase.client.Put;
30 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
31 import org.apache.hadoop.hbase.mapreduce.MutationSerialization;
32 import org.apache.hadoop.hbase.mapreduce.ResultSerialization;
33 import org.apache.hadoop.hbase.security.token.AuthenticationTokenIdentifier;
34 import org.apache.hadoop.hbase.security.token.AuthenticationTokenSelector;
35 import org.apache.hadoop.hbase.security.User;
36 import org.apache.hadoop.hbase.security.UserProvider;
37 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
38 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
39 import org.apache.hadoop.io.Text;
40 import org.apache.hadoop.mapred.FileInputFormat;
41 import org.apache.hadoop.mapred.InputFormat;
42 import org.apache.hadoop.mapred.JobConf;
43 import org.apache.hadoop.mapred.OutputFormat;
44 import org.apache.hadoop.mapred.TextInputFormat;
45 import org.apache.hadoop.mapred.TextOutputFormat;
46 import org.apache.hadoop.security.token.Token;
47 import org.apache.zookeeper.KeeperException;
48 import org.cliffc.high_scale_lib.Counter;
49
50
51
52
53 @Deprecated
54 @InterfaceAudience.Public
55 @InterfaceStability.Stable
56 @SuppressWarnings({ "rawtypes", "unchecked" })
57 public class TableMapReduceUtil {
58
59
60
61
62
63
64
65
66
67
68
69
70 public static void initTableMapJob(String table, String columns,
71 Class<? extends TableMap> mapper,
72 Class<?> outputKeyClass,
73 Class<?> outputValueClass, JobConf job) {
74 initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job,
75 true, TableInputFormat.class);
76 }
77
78 public static void initTableMapJob(String table, String columns,
79 Class<? extends TableMap> mapper,
80 Class<?> outputKeyClass,
81 Class<?> outputValueClass, JobConf job, boolean addDependencyJars) {
82 initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job,
83 addDependencyJars, TableInputFormat.class);
84 }
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99 public static void initTableMapJob(String table, String columns,
100 Class<? extends TableMap> mapper,
101 Class<?> outputKeyClass,
102 Class<?> outputValueClass, JobConf job, boolean addDependencyJars,
103 Class<? extends InputFormat> inputFormat) {
104
105 job.setInputFormat(inputFormat);
106 job.setMapOutputValueClass(outputValueClass);
107 job.setMapOutputKeyClass(outputKeyClass);
108 job.setMapperClass(mapper);
109 job.setStrings("io.serializations", job.get("io.serializations"),
110 MutationSerialization.class.getName(), ResultSerialization.class.getName());
111 FileInputFormat.addInputPaths(job, table);
112 job.set(TableInputFormat.COLUMN_LIST, columns);
113 if (addDependencyJars) {
114 try {
115 addDependencyJars(job);
116 } catch (IOException e) {
117 e.printStackTrace();
118 }
119 }
120 try {
121 initCredentials(job);
122 } catch (IOException ioe) {
123
124 ioe.printStackTrace();
125 }
126 }
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147 public static void initTableSnapshotMapJob(String snapshotName, String columns,
148 Class<? extends TableMap> mapper,
149 Class<?> outputKeyClass,
150 Class<?> outputValueClass, JobConf job,
151 boolean addDependencyJars, Path tmpRestoreDir)
152 throws IOException {
153 TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir);
154 initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, job,
155 addDependencyJars, TableSnapshotInputFormat.class);
156 org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job);
157 }
158
159
160
161
162
163
164
165
166
167
168 public static void initTableReduceJob(String table,
169 Class<? extends TableReduce> reducer, JobConf job)
170 throws IOException {
171 initTableReduceJob(table, reducer, job, null);
172 }
173
174
175
176
177
178
179
180
181
182
183
184
185 public static void initTableReduceJob(String table,
186 Class<? extends TableReduce> reducer, JobConf job, Class partitioner)
187 throws IOException {
188 initTableReduceJob(table, reducer, job, partitioner, true);
189 }
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204 public static void initTableReduceJob(String table,
205 Class<? extends TableReduce> reducer, JobConf job, Class partitioner,
206 boolean addDependencyJars) throws IOException {
207 job.setOutputFormat(TableOutputFormat.class);
208 job.setReducerClass(reducer);
209 job.set(TableOutputFormat.OUTPUT_TABLE, table);
210 job.setOutputKeyClass(ImmutableBytesWritable.class);
211 job.setOutputValueClass(Put.class);
212 job.setStrings("io.serializations", job.get("io.serializations"),
213 MutationSerialization.class.getName(), ResultSerialization.class.getName());
214 if (partitioner == HRegionPartitioner.class) {
215 job.setPartitionerClass(HRegionPartitioner.class);
216 int regions = MetaReader.getRegionCount(HBaseConfiguration.create(job), table);
217 if (job.getNumReduceTasks() > regions) {
218 job.setNumReduceTasks(regions);
219 }
220 } else if (partitioner != null) {
221 job.setPartitionerClass(partitioner);
222 }
223 if (addDependencyJars) {
224 addDependencyJars(job);
225 }
226 initCredentials(job);
227 }
228
229 public static void initCredentials(JobConf job) throws IOException {
230 UserProvider userProvider = UserProvider.instantiate(job);
231 if (userProvider.isHadoopSecurityEnabled()) {
232
233 if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
234 job.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
235 }
236 }
237
238 if (userProvider.isHBaseSecurityEnabled()) {
239 try {
240
241 User user = userProvider.getCurrent();
242 Token<AuthenticationTokenIdentifier> authToken = getAuthToken(job, user);
243 if (authToken == null) {
244 user.obtainAuthTokenForJob(job);
245 } else {
246 job.getCredentials().addToken(authToken.getService(), authToken);
247 }
248 } catch (InterruptedException ie) {
249 ie.printStackTrace();
250 Thread.currentThread().interrupt();
251 }
252 }
253 }
254
255
256
257
258
259 private static Token<AuthenticationTokenIdentifier> getAuthToken(Configuration conf, User user)
260 throws IOException, InterruptedException {
261 ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "mr-init-credentials", null);
262 try {
263 String clusterId = ZKClusterId.readClusterIdZNode(zkw);
264 return new AuthenticationTokenSelector().selectToken(new Text(clusterId), user.getUGI().getTokens());
265 } catch (KeeperException e) {
266 throw new IOException(e);
267 } finally {
268 zkw.close();
269 }
270 }
271
272
273
274
275
276
277
278
279
280 public static void limitNumReduceTasks(String table, JobConf job)
281 throws IOException {
282 int regions = MetaReader.getRegionCount(HBaseConfiguration.create(job), table);
283 if (job.getNumReduceTasks() > regions)
284 job.setNumReduceTasks(regions);
285 }
286
287
288
289
290
291
292
293
294
295 public static void limitNumMapTasks(String table, JobConf job)
296 throws IOException {
297 int regions = MetaReader.getRegionCount(HBaseConfiguration.create(job), table);
298 if (job.getNumMapTasks() > regions)
299 job.setNumMapTasks(regions);
300 }
301
302
303
304
305
306
307
308
309
310 public static void setNumReduceTasks(String table, JobConf job)
311 throws IOException {
312 job.setNumReduceTasks(MetaReader.getRegionCount(HBaseConfiguration.create(job), table));
313 }
314
315
316
317
318
319
320
321
322
323 public static void setNumMapTasks(String table, JobConf job)
324 throws IOException {
325 job.setNumMapTasks(MetaReader.getRegionCount(HBaseConfiguration.create(job), table));
326 }
327
328
329
330
331
332
333
334
335
336
337 public static void setScannerCaching(JobConf job, int batchSize) {
338 job.setInt("hbase.client.scanner.caching", batchSize);
339 }
340
341
342
343
344 public static void addDependencyJars(JobConf job) throws IOException {
345 org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addHBaseDependencyJars(job);
346 org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(
347 job,
348
349
350 job.getMapOutputKeyClass(),
351 job.getMapOutputValueClass(),
352 job.getOutputKeyClass(),
353 job.getOutputValueClass(),
354 job.getPartitionerClass(),
355 job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
356 job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
357 job.getCombinerClass());
358 }
359 }