1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import java.io.IOException;
23 import java.io.InterruptedIOException;
24 import java.net.InetAddress;
25 import java.util.ArrayList;
26 import java.util.HashMap;
27 import java.util.List;
28
29 import javax.naming.NamingException;
30
31 import org.apache.commons.logging.Log;
32 import org.apache.commons.logging.LogFactory;
33 import org.apache.hadoop.hbase.HConstants;
34 import org.apache.hadoop.hbase.HRegionLocation;
35 import org.apache.hadoop.hbase.HServerAddress;
36 import org.apache.hadoop.hbase.client.HTable;
37 import org.apache.hadoop.hbase.client.Result;
38 import org.apache.hadoop.hbase.client.Scan;
39 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
40 import org.apache.hadoop.hbase.util.Addressing;
41 import org.apache.hadoop.hbase.util.Bytes;
42 import org.apache.hadoop.hbase.util.Pair;
43 import org.apache.hadoop.hbase.util.Strings;
44 import org.apache.hadoop.mapreduce.InputFormat;
45 import org.apache.hadoop.mapreduce.InputSplit;
46 import org.apache.hadoop.mapreduce.JobContext;
47 import org.apache.hadoop.mapreduce.RecordReader;
48 import org.apache.hadoop.mapreduce.TaskAttemptContext;
49 import org.apache.hadoop.net.DNS;
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79 public abstract class TableInputFormatBase
80 extends InputFormat<ImmutableBytesWritable, Result> {
81
82 final Log LOG = LogFactory.getLog(TableInputFormatBase.class);
83
84
85 private Scan scan = null;
86
87 private HTable table = null;
88
89 private TableRecordReader tableRecordReader = null;
90
91
92
93 private HashMap<InetAddress, String> reverseDNSCacheMap =
94 new HashMap<InetAddress, String>();
95
96
97 private String nameServer = null;
98
99
100
101
102
103
104
105
106
107
108
109
110
111 @Override
112 public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
113 InputSplit split, TaskAttemptContext context)
114 throws IOException {
115 if (table == null) {
116 throw new IOException("Cannot create a record reader because of a" +
117 " previous error. Please look at the previous logs lines from" +
118 " the task's full log for more details.");
119 }
120 TableSplit tSplit = (TableSplit) split;
121 TableRecordReader trr = this.tableRecordReader;
122
123 if (trr == null) {
124 trr = new TableRecordReader();
125 }
126 Scan sc = new Scan(this.scan);
127 sc.setStartRow(tSplit.getStartRow());
128 sc.setStopRow(tSplit.getEndRow());
129 trr.setScan(sc);
130 trr.setHTable(table);
131 try {
132 trr.initialize(tSplit, context);
133 } catch (InterruptedException e) {
134 throw new InterruptedIOException(e.getMessage());
135 }
136 return trr;
137 }
138
139
140
141
142
143
144
145
146
147
148
149 @Override
150 public List<InputSplit> getSplits(JobContext context) throws IOException {
151 if (table == null) {
152 throw new IOException("No table was provided.");
153 }
154
155 this.nameServer =
156 context.getConfiguration().get("hbase.nameserver.address", null);
157
158 Pair<byte[][], byte[][]> keys = table.getStartEndKeys();
159 if (keys == null || keys.getFirst() == null ||
160 keys.getFirst().length == 0) {
161 HRegionLocation regLoc = table.getRegionLocation(
162 HConstants.EMPTY_BYTE_ARRAY, false);
163 if (null == regLoc) {
164 throw new IOException("Expecting at least one region.");
165 }
166 List<InputSplit> splits = new ArrayList<InputSplit>(1);
167 InputSplit split = new TableSplit(table.getTableName(),
168 HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY, regLoc
169 .getHostnamePort().split(Addressing.HOSTNAME_PORT_SEPARATOR)[0]);
170 splits.add(split);
171 return splits;
172 }
173 List<InputSplit> splits = new ArrayList<InputSplit>(keys.getFirst().length);
174 for (int i = 0; i < keys.getFirst().length; i++) {
175 if ( !includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
176 continue;
177 }
178 HServerAddress regionServerAddress =
179 table.getRegionLocation(keys.getFirst()[i]).getServerAddress();
180 InetAddress regionAddress =
181 regionServerAddress.getInetSocketAddress().getAddress();
182 String regionLocation;
183 try {
184 regionLocation = reverseDNS(regionAddress);
185 } catch (NamingException e) {
186 LOG.error("Cannot resolve the host name for " + regionAddress +
187 " because of " + e);
188 regionLocation = regionServerAddress.getHostname();
189 }
190
191 byte[] startRow = scan.getStartRow();
192 byte[] stopRow = scan.getStopRow();
193
194 if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
195 Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
196 (stopRow.length == 0 ||
197 Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
198 byte[] splitStart = startRow.length == 0 ||
199 Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ?
200 keys.getFirst()[i] : startRow;
201 byte[] splitStop = (stopRow.length == 0 ||
202 Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
203 keys.getSecond()[i].length > 0 ?
204 keys.getSecond()[i] : stopRow;
205 InputSplit split = new TableSplit(table.getTableName(),
206 splitStart, splitStop, regionLocation);
207 splits.add(split);
208 if (LOG.isDebugEnabled()) {
209 LOG.debug("getSplits: split -> " + i + " -> " + split);
210 }
211 }
212 }
213 return splits;
214 }
215
216 private String reverseDNS(InetAddress ipAddress) throws NamingException {
217 String hostName = this.reverseDNSCacheMap.get(ipAddress);
218 if (hostName == null) {
219 hostName = Strings.domainNamePointerToHostName(DNS.reverseDns(ipAddress, this.nameServer));
220 this.reverseDNSCacheMap.put(ipAddress, hostName);
221 }
222 return hostName;
223 }
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247 protected boolean includeRegionInSplit(final byte[] startKey, final byte [] endKey) {
248 return true;
249 }
250
251
252
253
254 protected HTable getHTable() {
255 return this.table;
256 }
257
258
259
260
261
262
263 protected void setHTable(HTable table) {
264 this.table = table;
265 }
266
267
268
269
270
271
272 public Scan getScan() {
273 if (this.scan == null) this.scan = new Scan();
274 return scan;
275 }
276
277
278
279
280
281
282 public void setScan(Scan scan) {
283 this.scan = scan;
284 }
285
286
287
288
289
290
291
292 protected void setTableRecordReader(TableRecordReader tableRecordReader) {
293 this.tableRecordReader = tableRecordReader;
294 }
295 }