1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.Closeable;
22 import java.io.IOException;
23 import java.net.InetAddress;
24 import java.net.InetSocketAddress;
25 import java.net.UnknownHostException;
26 import java.util.ArrayList;
27 import java.util.HashMap;
28 import java.util.List;
29
30 import javax.naming.NamingException;
31
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34 import org.apache.hadoop.hbase.classification.InterfaceAudience;
35 import org.apache.hadoop.hbase.classification.InterfaceStability;
36 import org.apache.hadoop.hbase.HConstants;
37 import org.apache.hadoop.hbase.HRegionLocation;
38 import org.apache.hadoop.hbase.TableName;
39 import org.apache.hadoop.hbase.client.Admin;
40 import org.apache.hadoop.hbase.client.Connection;
41 import org.apache.hadoop.hbase.client.ConnectionFactory;
42 import org.apache.hadoop.hbase.client.HTable;
43 import org.apache.hadoop.hbase.client.NeedUnmanagedConnectionException;
44 import org.apache.hadoop.hbase.client.RegionLocator;
45 import org.apache.hadoop.hbase.client.Result;
46 import org.apache.hadoop.hbase.client.Scan;
47 import org.apache.hadoop.hbase.client.Table;
48 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
49 import org.apache.hadoop.hbase.util.Addressing;
50 import org.apache.hadoop.hbase.util.Bytes;
51 import org.apache.hadoop.hbase.util.Pair;
52 import org.apache.hadoop.hbase.util.RegionSizeCalculator;
53 import org.apache.hadoop.hbase.util.Strings;
54 import org.apache.hadoop.mapreduce.InputFormat;
55 import org.apache.hadoop.mapreduce.InputSplit;
56 import org.apache.hadoop.mapreduce.JobContext;
57 import org.apache.hadoop.mapreduce.RecordReader;
58 import org.apache.hadoop.mapreduce.TaskAttemptContext;
59 import org.apache.hadoop.net.DNS;
60 import org.apache.hadoop.util.StringUtils;
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102 @InterfaceAudience.Public
103 @InterfaceStability.Stable
104 public abstract class TableInputFormatBase
105 extends InputFormat<ImmutableBytesWritable, Result> {
106
107 final Log LOG = LogFactory.getLog(TableInputFormatBase.class);
108
109 private static final String NOT_INITIALIZED = "The input format instance has not been properly " +
110 "initialized. Ensure you call initializeTable either in your constructor or initialize " +
111 "method";
112 private static final String INITIALIZATION_ERROR = "Cannot create a record reader because of a" +
113 " previous error. Please look at the previous logs lines from" +
114 " the task's full log for more details.";
115
116
117
118
119 private Scan scan = null;
120
121 private Admin admin;
122
123 private Table table;
124
125 private RegionLocator regionLocator;
126
127 private TableRecordReader tableRecordReader = null;
128
129 private Connection connection;
130
131
132
133 private HashMap<InetAddress, String> reverseDNSCacheMap =
134 new HashMap<InetAddress, String>();
135
136
137
138
139
140
141
142
143
144
145
146
147
148 @Override
149 public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
150 InputSplit split, TaskAttemptContext context)
151 throws IOException {
152
153 if (table == null) {
154 initialize(context);
155 }
156
157 try {
158 if (getTable() == null) {
159
160 throw new IOException(INITIALIZATION_ERROR);
161 }
162 } catch (IllegalStateException exception) {
163 throw new IOException(INITIALIZATION_ERROR, exception);
164 }
165 TableSplit tSplit = (TableSplit) split;
166 LOG.info("Input split length: " + StringUtils.humanReadableInt(tSplit.getLength()) + " bytes.");
167 final TableRecordReader trr =
168 this.tableRecordReader != null ? this.tableRecordReader : new TableRecordReader();
169 Scan sc = new Scan(this.scan);
170 sc.setStartRow(tSplit.getStartRow());
171 sc.setStopRow(tSplit.getEndRow());
172 trr.setScan(sc);
173 trr.setTable(getTable());
174 return new RecordReader<ImmutableBytesWritable, Result>() {
175
176 @Override
177 public void close() throws IOException {
178 trr.close();
179 closeTable();
180 }
181
182 @Override
183 public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
184 return trr.getCurrentKey();
185 }
186
187 @Override
188 public Result getCurrentValue() throws IOException, InterruptedException {
189 return trr.getCurrentValue();
190 }
191
192 @Override
193 public float getProgress() throws IOException, InterruptedException {
194 return trr.getProgress();
195 }
196
197 @Override
198 public void initialize(InputSplit inputsplit, TaskAttemptContext context) throws IOException,
199 InterruptedException {
200 trr.initialize(inputsplit, context);
201 }
202
203 @Override
204 public boolean nextKeyValue() throws IOException, InterruptedException {
205 return trr.nextKeyValue();
206 }
207 };
208 }
209
210 protected Pair<byte[][],byte[][]> getStartEndKeys() throws IOException {
211 return getRegionLocator().getStartEndKeys();
212 }
213
214
215
216
217
218
219
220
221
222
223
224 @Override
225 public List<InputSplit> getSplits(JobContext context) throws IOException {
226 boolean closeOnFinish = false;
227
228
229 if (table == null) {
230 initialize(context);
231 closeOnFinish = true;
232 }
233
234
235 try {
236 if (getTable() == null) {
237
238 throw new IOException(INITIALIZATION_ERROR);
239 }
240 } catch (IllegalStateException exception) {
241 throw new IOException(INITIALIZATION_ERROR, exception);
242 }
243
244 try {
245 RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(regionLocator, admin);
246
247 Pair<byte[][], byte[][]> keys = getStartEndKeys();
248 if (keys == null || keys.getFirst() == null ||
249 keys.getFirst().length == 0) {
250 HRegionLocation regLoc = regionLocator.getRegionLocation(HConstants.EMPTY_BYTE_ARRAY, false);
251 if (null == regLoc) {
252 throw new IOException("Expecting at least one region.");
253 }
254 List<InputSplit> splits = new ArrayList<InputSplit>(1);
255 long regionSize = sizeCalculator.getRegionSize(regLoc.getRegionInfo().getRegionName());
256 TableSplit split = new TableSplit(table.getName(),
257 HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY, regLoc
258 .getHostnamePort().split(Addressing.HOSTNAME_PORT_SEPARATOR)[0], regionSize);
259 splits.add(split);
260 return splits;
261 }
262 List<InputSplit> splits = new ArrayList<InputSplit>(keys.getFirst().length);
263 for (int i = 0; i < keys.getFirst().length; i++) {
264 if ( !includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
265 continue;
266 }
267 HRegionLocation location = regionLocator.getRegionLocation(keys.getFirst()[i], false);
268
269 InetSocketAddress isa = new InetSocketAddress(location.getHostname(), location.getPort());
270 if (isa.isUnresolved()) {
271 LOG.warn("Failed resolve " + isa);
272 }
273 InetAddress regionAddress = isa.getAddress();
274 String regionLocation;
275 try {
276 regionLocation = reverseDNS(regionAddress);
277 } catch (NamingException e) {
278 LOG.warn("Cannot resolve the host name for " + regionAddress + " because of " + e);
279 regionLocation = location.getHostname();
280 }
281
282 byte[] startRow = scan.getStartRow();
283 byte[] stopRow = scan.getStopRow();
284
285 if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
286 Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
287 (stopRow.length == 0 ||
288 Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
289 byte[] splitStart = startRow.length == 0 ||
290 Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ?
291 keys.getFirst()[i] : startRow;
292 byte[] splitStop = (stopRow.length == 0 ||
293 Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
294 keys.getSecond()[i].length > 0 ?
295 keys.getSecond()[i] : stopRow;
296
297 byte[] regionName = location.getRegionInfo().getRegionName();
298 long regionSize = sizeCalculator.getRegionSize(regionName);
299 TableSplit split = new TableSplit(table.getName(),
300 splitStart, splitStop, regionLocation, regionSize);
301 splits.add(split);
302 if (LOG.isDebugEnabled()) {
303 LOG.debug("getSplits: split -> " + i + " -> " + split);
304 }
305 }
306 }
307 return splits;
308 } finally {
309 if (closeOnFinish) {
310 closeTable();
311 }
312 }
313 }
314
315
316
317
318 @Deprecated
319 public String reverseDNS(InetAddress ipAddress) throws NamingException, UnknownHostException {
320 String hostName = this.reverseDNSCacheMap.get(ipAddress);
321 if (hostName == null) {
322 String ipAddressString = null;
323 try {
324 ipAddressString = DNS.reverseDns(ipAddress, null);
325 } catch (Exception e) {
326
327
328
329 ipAddressString = InetAddress.getByName(ipAddress.getHostAddress()).getHostName();
330 }
331 if (ipAddressString == null) throw new UnknownHostException("No host found for " + ipAddress);
332 hostName = Strings.domainNamePointerToHostName(ipAddressString);
333 this.reverseDNSCacheMap.put(ipAddress, hostName);
334 }
335 return hostName;
336 }
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360 protected boolean includeRegionInSplit(final byte[] startKey, final byte [] endKey) {
361 return true;
362 }
363
364
365
366
367
368
369 @Deprecated
370 protected HTable getHTable() {
371 return (HTable) this.getTable();
372 }
373
374
375
376
377 protected RegionLocator getRegionLocator() {
378 if (regionLocator == null) {
379 throw new IllegalStateException(NOT_INITIALIZED);
380 }
381 return regionLocator;
382 }
383
384
385
386
387 protected Table getTable() {
388 if (table == null) {
389 throw new IllegalStateException(NOT_INITIALIZED);
390 }
391 return table;
392 }
393
394
395
396
397 protected Admin getAdmin() {
398 if (admin == null) {
399 throw new IllegalStateException(NOT_INITIALIZED);
400 }
401 return admin;
402 }
403
404
405
406
407
408
409
410
411
412
413
414 @Deprecated
415 protected void setHTable(HTable table) throws IOException {
416 this.table = table;
417 this.connection = table.getConnection();
418 try {
419 this.regionLocator = table;
420 this.admin = this.connection.getAdmin();
421 } catch (NeedUnmanagedConnectionException exception) {
422 LOG.warn("You are using an HTable instance that relies on an HBase-managed Connection. " +
423 "This is usually due to directly creating an HTable, which is deprecated. Instead, you " +
424 "should create a Connection object and then request a Table instance from it. If you " +
425 "don't need the Table instance for your own use, you should instead use the " +
426 "TableInputFormatBase.initalizeTable method directly.");
427 LOG.info("Creating an additional unmanaged connection because user provided one can't be " +
428 "used for administrative actions. We'll close it when we close out the table.");
429 LOG.debug("Details about our failure to request an administrative interface.", exception);
430
431
432 this.connection = ConnectionFactory.createConnection(this.connection.getConfiguration());
433 this.regionLocator = this.connection.getRegionLocator(table.getName());
434 this.admin = this.connection.getAdmin();
435 }
436 }
437
438
439
440
441
442
443
444
445 protected void initializeTable(Connection connection, TableName tableName) throws IOException {
446 if (table != null || connection != null) {
447 LOG.warn("initializeTable called multiple times. Overwriting connection and table " +
448 "reference; TableInputFormatBase will not close these old references when done.");
449 }
450 this.table = connection.getTable(tableName);
451 this.regionLocator = connection.getRegionLocator(tableName);
452 this.admin = connection.getAdmin();
453 this.connection = connection;
454 }
455
456
457
458
459
460
461 public Scan getScan() {
462 if (this.scan == null) this.scan = new Scan();
463 return scan;
464 }
465
466
467
468
469
470
471 public void setScan(Scan scan) {
472 this.scan = scan;
473 }
474
475
476
477
478
479
480
481 protected void setTableRecordReader(TableRecordReader tableRecordReader) {
482 this.tableRecordReader = tableRecordReader;
483 }
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500 protected void initialize(JobContext context) throws IOException {
501 }
502
503
504
505
506
507
508
509 protected void closeTable() throws IOException {
510 close(admin, table, regionLocator, connection);
511 admin = null;
512 table = null;
513 regionLocator = null;
514 connection = null;
515 }
516
517 private void close(Closeable... closables) throws IOException {
518 for (Closeable c : closables) {
519 if(c != null) { c.close(); }
520 }
521 }
522
523 }