1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.IOException;
22 import java.io.InterruptedIOException;
23 import java.net.InetAddress;
24 import java.net.InetSocketAddress;
25 import java.util.ArrayList;
26 import java.util.HashMap;
27 import java.util.List;
28
29 import javax.naming.NamingException;
30
31 import org.apache.commons.logging.Log;
32 import org.apache.commons.logging.LogFactory;
33 import org.apache.hadoop.classification.InterfaceAudience;
34 import org.apache.hadoop.classification.InterfaceStability;
35 import org.apache.hadoop.hbase.HConstants;
36 import org.apache.hadoop.hbase.HRegionLocation;
37 import org.apache.hadoop.hbase.client.HTable;
38 import org.apache.hadoop.hbase.client.Result;
39 import org.apache.hadoop.hbase.client.Scan;
40 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
41 import org.apache.hadoop.hbase.util.Addressing;
42 import org.apache.hadoop.hbase.util.Bytes;
43 import org.apache.hadoop.hbase.util.Pair;
44 import org.apache.hadoop.hbase.util.Strings;
45 import org.apache.hadoop.mapreduce.InputFormat;
46 import org.apache.hadoop.mapreduce.InputSplit;
47 import org.apache.hadoop.mapreduce.JobContext;
48 import org.apache.hadoop.mapreduce.RecordReader;
49 import org.apache.hadoop.mapreduce.TaskAttemptContext;
50 import org.apache.hadoop.net.DNS;
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80 @InterfaceAudience.Public
81 @InterfaceStability.Stable
82 public abstract class TableInputFormatBase
83 extends InputFormat<ImmutableBytesWritable, Result> {
84
85 final Log LOG = LogFactory.getLog(TableInputFormatBase.class);
86
87
88 private Scan scan = null;
89
90 private HTable table = null;
91
92 private TableRecordReader tableRecordReader = null;
93
94
95
96 private HashMap<InetAddress, String> reverseDNSCacheMap =
97 new HashMap<InetAddress, String>();
98
99
100 private String nameServer = null;
101
102
103
104
105
106
107
108
109
110
111
112
113
114 @Override
115 public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
116 InputSplit split, TaskAttemptContext context)
117 throws IOException {
118 if (table == null) {
119 throw new IOException("Cannot create a record reader because of a" +
120 " previous error. Please look at the previous logs lines from" +
121 " the task's full log for more details.");
122 }
123 TableSplit tSplit = (TableSplit) split;
124 TableRecordReader trr = this.tableRecordReader;
125
126 if (trr == null) {
127 trr = new TableRecordReader();
128 }
129 Scan sc = new Scan(this.scan);
130 sc.setStartRow(tSplit.getStartRow());
131 sc.setStopRow(tSplit.getEndRow());
132 trr.setScan(sc);
133 trr.setHTable(table);
134 return trr;
135 }
136
137
138
139
140
141
142
143
144
145
146
147 @Override
148 public List<InputSplit> getSplits(JobContext context) throws IOException {
149 if (table == null) {
150 throw new IOException("No table was provided.");
151 }
152
153 this.nameServer =
154 context.getConfiguration().get("hbase.nameserver.address", null);
155
156 Pair<byte[][], byte[][]> keys = table.getStartEndKeys();
157 if (keys == null || keys.getFirst() == null ||
158 keys.getFirst().length == 0) {
159 HRegionLocation regLoc = table.getRegionLocation(HConstants.EMPTY_BYTE_ARRAY, false);
160 if (null == regLoc) {
161 throw new IOException("Expecting at least one region.");
162 }
163 List<InputSplit> splits = new ArrayList<InputSplit>(1);
164 InputSplit split = new TableSplit(table.getName(),
165 HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY, regLoc
166 .getHostnamePort().split(Addressing.HOSTNAME_PORT_SEPARATOR)[0]);
167 splits.add(split);
168 return splits;
169 }
170 List<InputSplit> splits = new ArrayList<InputSplit>(keys.getFirst().length);
171 for (int i = 0; i < keys.getFirst().length; i++) {
172 if ( !includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
173 continue;
174 }
175 HRegionLocation location = table.getRegionLocation(keys.getFirst()[i], false);
176
177 InetSocketAddress isa = new InetSocketAddress(location.getHostname(), location.getPort());
178 if (isa.isUnresolved()) {
179 LOG.warn("Failed resolve " + isa);
180 }
181 InetAddress regionAddress = isa.getAddress();
182 String regionLocation;
183 try {
184 regionLocation = reverseDNS(regionAddress);
185 } catch (NamingException e) {
186 LOG.error("Cannot resolve the host name for " + regionAddress + " because of " + e);
187 regionLocation = location.getHostname();
188 }
189
190 byte[] startRow = scan.getStartRow();
191 byte[] stopRow = scan.getStopRow();
192
193 if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
194 Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
195 (stopRow.length == 0 ||
196 Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
197 byte[] splitStart = startRow.length == 0 ||
198 Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ?
199 keys.getFirst()[i] : startRow;
200 byte[] splitStop = (stopRow.length == 0 ||
201 Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
202 keys.getSecond()[i].length > 0 ?
203 keys.getSecond()[i] : stopRow;
204 InputSplit split = new TableSplit(table.getName(),
205 splitStart, splitStop, regionLocation);
206 splits.add(split);
207 if (LOG.isDebugEnabled()) {
208 LOG.debug("getSplits: split -> " + i + " -> " + split);
209 }
210 }
211 }
212 return splits;
213 }
214
215 private String reverseDNS(InetAddress ipAddress) throws NamingException {
216 String hostName = this.reverseDNSCacheMap.get(ipAddress);
217 if (hostName == null) {
218 hostName = Strings.domainNamePointerToHostName(
219 DNS.reverseDns(ipAddress, this.nameServer));
220 this.reverseDNSCacheMap.put(ipAddress, hostName);
221 }
222 return hostName;
223 }
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247 protected boolean includeRegionInSplit(final byte[] startKey, final byte [] endKey) {
248 return true;
249 }
250
251
252
253
254 protected HTable getHTable() {
255 return this.table;
256 }
257
258
259
260
261
262
263 protected void setHTable(HTable table) {
264 this.table = table;
265 }
266
267
268
269
270
271
272 public Scan getScan() {
273 if (this.scan == null) this.scan = new Scan();
274 return scan;
275 }
276
277
278
279
280
281
282 public void setScan(Scan scan) {
283 this.scan = scan;
284 }
285
286
287
288
289
290
291
292 protected void setTableRecordReader(TableRecordReader tableRecordReader) {
293 this.tableRecordReader = tableRecordReader;
294 }
295 }