1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapred;
21
22 import java.io.IOException;
23
24 import org.apache.hadoop.hbase.HBaseConfiguration;
25 import org.apache.hadoop.hbase.client.HTable;
26 import org.apache.hadoop.hbase.client.Put;
27 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
28 import org.apache.hadoop.hbase.security.User;
29 import org.apache.hadoop.io.Writable;
30 import org.apache.hadoop.io.WritableComparable;
31 import org.apache.hadoop.mapred.FileInputFormat;
32 import org.apache.hadoop.mapred.JobConf;
33 import org.apache.hadoop.mapred.InputFormat;
34 import org.apache.hadoop.mapred.OutputFormat;
35 import org.apache.hadoop.mapred.TextInputFormat;
36 import org.apache.hadoop.mapred.TextOutputFormat;
37
38
39
40
41 @Deprecated
42 @SuppressWarnings("unchecked")
43 public class TableMapReduceUtil {
44
45
46
47
48
49
50
51
52
53
54
55
56 public static void initTableMapJob(String table, String columns,
57 Class<? extends TableMap> mapper,
58 Class<? extends WritableComparable> outputKeyClass,
59 Class<? extends Writable> outputValueClass, JobConf job) {
60 initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job, true);
61 }
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76 public static void initTableMapJob(String table, String columns,
77 Class<? extends TableMap> mapper,
78 Class<? extends WritableComparable> outputKeyClass,
79 Class<? extends Writable> outputValueClass, JobConf job, boolean addDependencyJars) {
80
81 job.setInputFormat(TableInputFormat.class);
82 job.setMapOutputValueClass(outputValueClass);
83 job.setMapOutputKeyClass(outputKeyClass);
84 job.setMapperClass(mapper);
85 FileInputFormat.addInputPaths(job, table);
86 job.set(TableInputFormat.COLUMN_LIST, columns);
87 if (addDependencyJars) {
88 try {
89 addDependencyJars(job);
90 } catch (IOException e) {
91 e.printStackTrace();
92 }
93 }
94 try {
95 initCredentials(job);
96 } catch (IOException ioe) {
97
98 ioe.printStackTrace();
99 }
100 }
101
102
103
104
105
106
107
108
109
110
111 public static void initTableReduceJob(String table,
112 Class<? extends TableReduce> reducer, JobConf job)
113 throws IOException {
114 initTableReduceJob(table, reducer, job, null);
115 }
116
117
118
119
120
121
122
123
124
125
126
127
128 public static void initTableReduceJob(String table,
129 Class<? extends TableReduce> reducer, JobConf job, Class partitioner)
130 throws IOException {
131 initTableReduceJob(table, reducer, job, partitioner, true);
132 }
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147 public static void initTableReduceJob(String table,
148 Class<? extends TableReduce> reducer, JobConf job, Class partitioner,
149 boolean addDependencyJars) throws IOException {
150 job.setOutputFormat(TableOutputFormat.class);
151 job.setReducerClass(reducer);
152 job.set(TableOutputFormat.OUTPUT_TABLE, table);
153 job.setOutputKeyClass(ImmutableBytesWritable.class);
154 job.setOutputValueClass(Put.class);
155 if (partitioner == HRegionPartitioner.class) {
156 job.setPartitionerClass(HRegionPartitioner.class);
157 HTable outputTable = new HTable(HBaseConfiguration.create(job), table);
158 int regions = outputTable.getRegionsInfo().size();
159 if (job.getNumReduceTasks() > regions) {
160 job.setNumReduceTasks(outputTable.getRegionsInfo().size());
161 }
162 } else if (partitioner != null) {
163 job.setPartitionerClass(partitioner);
164 }
165 if (addDependencyJars) {
166 addDependencyJars(job);
167 }
168 initCredentials(job);
169 }
170
171 public static void initCredentials(JobConf job) throws IOException {
172 if (User.isHBaseSecurityEnabled(job)) {
173 try {
174 User.getCurrent().obtainAuthTokenForJob(job);
175 } catch (InterruptedException ie) {
176 ie.printStackTrace();
177 Thread.interrupted();
178 }
179 }
180 }
181
182
183
184
185
186
187
188
189
190 public static void limitNumReduceTasks(String table, JobConf job)
191 throws IOException {
192 HTable outputTable = new HTable(HBaseConfiguration.create(job), table);
193 int regions = outputTable.getRegionsInfo().size();
194 if (job.getNumReduceTasks() > regions)
195 job.setNumReduceTasks(regions);
196 }
197
198
199
200
201
202
203
204
205
206 public static void limitNumMapTasks(String table, JobConf job)
207 throws IOException {
208 HTable outputTable = new HTable(HBaseConfiguration.create(job), table);
209 int regions = outputTable.getRegionsInfo().size();
210 if (job.getNumMapTasks() > regions)
211 job.setNumMapTasks(regions);
212 }
213
214
215
216
217
218
219
220
221
222 public static void setNumReduceTasks(String table, JobConf job)
223 throws IOException {
224 HTable outputTable = new HTable(HBaseConfiguration.create(job), table);
225 int regions = outputTable.getRegionsInfo().size();
226 job.setNumReduceTasks(regions);
227 }
228
229
230
231
232
233
234
235
236
237 public static void setNumMapTasks(String table, JobConf job)
238 throws IOException {
239 HTable outputTable = new HTable(HBaseConfiguration.create(job), table);
240 int regions = outputTable.getRegionsInfo().size();
241 job.setNumMapTasks(regions);
242 }
243
244
245
246
247
248
249
250
251
252
253 public static void setScannerCaching(JobConf job, int batchSize) {
254 job.setInt("hbase.client.scanner.caching", batchSize);
255 }
256
257
258
259
260 public static void addDependencyJars(JobConf job) throws IOException {
261 org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(
262 job,
263 org.apache.zookeeper.ZooKeeper.class,
264 com.google.common.base.Function.class,
265 com.google.protobuf.Message.class,
266 job.getMapOutputKeyClass(),
267 job.getMapOutputValueClass(),
268 job.getOutputKeyClass(),
269 job.getOutputValueClass(),
270 job.getPartitionerClass(),
271 job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
272 job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
273 job.getCombinerClass());
274 }
275 }