1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapred;
21
22 import java.io.IOException;
23
24 import org.apache.hadoop.hbase.HBaseConfiguration;
25 import org.apache.hadoop.hbase.client.HTable;
26 import org.apache.hadoop.hbase.client.Put;
27 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
28 import org.apache.hadoop.io.Writable;
29 import org.apache.hadoop.io.WritableComparable;
30 import org.apache.hadoop.mapred.FileInputFormat;
31 import org.apache.hadoop.mapred.JobConf;
32
33
34
35
36 @Deprecated
37 @SuppressWarnings("unchecked")
38 public class TableMapReduceUtil {
39
40
41
42
43
44
45
46
47
48
49
50
51 public static void initTableMapJob(String table, String columns,
52 Class<? extends TableMap> mapper,
53 Class<? extends WritableComparable> outputKeyClass,
54 Class<? extends Writable> outputValueClass, JobConf job) {
55
56 job.setInputFormat(TableInputFormat.class);
57 job.setMapOutputValueClass(outputValueClass);
58 job.setMapOutputKeyClass(outputKeyClass);
59 job.setMapperClass(mapper);
60 FileInputFormat.addInputPaths(job, table);
61 job.set(TableInputFormat.COLUMN_LIST, columns);
62 }
63
64
65
66
67
68
69
70
71
72
73 public static void initTableReduceJob(String table,
74 Class<? extends TableReduce> reducer, JobConf job)
75 throws IOException {
76 initTableReduceJob(table, reducer, job, null);
77 }
78
79
80
81
82
83
84
85
86
87
88
89
90 public static void initTableReduceJob(String table,
91 Class<? extends TableReduce> reducer, JobConf job, Class partitioner)
92 throws IOException {
93 job.setOutputFormat(TableOutputFormat.class);
94 job.setReducerClass(reducer);
95 job.set(TableOutputFormat.OUTPUT_TABLE, table);
96 job.setOutputKeyClass(ImmutableBytesWritable.class);
97 job.setOutputValueClass(Put.class);
98 if (partitioner == HRegionPartitioner.class) {
99 job.setPartitionerClass(HRegionPartitioner.class);
100 HTable outputTable = new HTable(new HBaseConfiguration(job), table);
101 int regions = outputTable.getRegionsInfo().size();
102 if (job.getNumReduceTasks() > regions) {
103 job.setNumReduceTasks(outputTable.getRegionsInfo().size());
104 }
105 } else if (partitioner != null) {
106 job.setPartitionerClass(partitioner);
107 }
108 }
109
110
111
112
113
114
115
116
117
118 public static void limitNumReduceTasks(String table, JobConf job)
119 throws IOException {
120 HTable outputTable = new HTable(new HBaseConfiguration(job), table);
121 int regions = outputTable.getRegionsInfo().size();
122 if (job.getNumReduceTasks() > regions)
123 job.setNumReduceTasks(regions);
124 }
125
126
127
128
129
130
131
132
133
134 public static void limitNumMapTasks(String table, JobConf job)
135 throws IOException {
136 HTable outputTable = new HTable(new HBaseConfiguration(job), table);
137 int regions = outputTable.getRegionsInfo().size();
138 if (job.getNumMapTasks() > regions)
139 job.setNumMapTasks(regions);
140 }
141
142
143
144
145
146
147
148
149
150 public static void setNumReduceTasks(String table, JobConf job)
151 throws IOException {
152 HTable outputTable = new HTable(new HBaseConfiguration(job), table);
153 int regions = outputTable.getRegionsInfo().size();
154 job.setNumReduceTasks(regions);
155 }
156
157
158
159
160
161
162
163
164
165 public static void setNumMapTasks(String table, JobConf job)
166 throws IOException {
167 HTable outputTable = new HTable(new HBaseConfiguration(job), table);
168 int regions = outputTable.getRegionsInfo().size();
169 job.setNumMapTasks(regions);
170 }
171
172
173
174
175
176
177
178
179
180
181 public static void setScannerCaching(JobConf job, int batchSize) {
182 job.setInt("hbase.client.scanner.caching", batchSize);
183 }
184 }