1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.List;
25
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28 import org.apache.hadoop.hbase.client.HTable;
29 import org.apache.hadoop.hbase.client.Result;
30 import org.apache.hadoop.hbase.client.ResultScanner;
31 import org.apache.hadoop.hbase.client.Scan;
32 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
33 import org.apache.hadoop.hbase.util.Bytes;
34 import org.apache.hadoop.hbase.util.Pair;
35 import org.apache.hadoop.mapreduce.InputFormat;
36 import org.apache.hadoop.mapreduce.InputSplit;
37 import org.apache.hadoop.mapreduce.JobContext;
38 import org.apache.hadoop.mapreduce.RecordReader;
39 import org.apache.hadoop.mapreduce.TaskAttemptContext;
40 import org.apache.hadoop.util.StringUtils;
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70 public abstract class TableInputFormatBase
71 extends InputFormat<ImmutableBytesWritable, Result> {
72
73 final Log LOG = LogFactory.getLog(TableInputFormatBase.class);
74
75
76 private Scan scan = null;
77
78 private HTable table = null;
79
80 private TableRecordReader tableRecordReader = null;
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95 @Override
96 public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
97 InputSplit split, TaskAttemptContext context)
98 throws IOException {
99 TableSplit tSplit = (TableSplit) split;
100 TableRecordReader trr = this.tableRecordReader;
101
102 if (trr == null) {
103 trr = new TableRecordReader();
104 }
105 Scan sc = new Scan(this.scan);
106 sc.setStartRow(tSplit.getStartRow());
107 sc.setStopRow(tSplit.getEndRow());
108 trr.setScan(sc);
109 trr.setHTable(table);
110 trr.init();
111 return trr;
112 }
113
114
115
116
117
118
119
120
121
122
123
124 @Override
125 public List<InputSplit> getSplits(JobContext context) throws IOException {
126 if (table == null) {
127 throw new IOException("No table was provided.");
128 }
129 Pair<byte[][], byte[][]> keys = table.getStartEndKeys();
130 if (keys == null || keys.getFirst() == null ||
131 keys.getFirst().length == 0) {
132 throw new IOException("Expecting at least one region.");
133 }
134 int count = 0;
135 List<InputSplit> splits = new ArrayList<InputSplit>(keys.getFirst().length);
136 for (int i = 0; i < keys.getFirst().length; i++) {
137 if ( !includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
138 continue;
139 }
140 String regionLocation = table.getRegionLocation(keys.getFirst()[i]).
141 getServerAddress().getHostname();
142 byte[] startRow = scan.getStartRow();
143 byte[] stopRow = scan.getStopRow();
144
145 if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
146 Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
147 (stopRow.length == 0 ||
148 Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
149 byte[] splitStart = startRow.length == 0 ||
150 Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ?
151 keys.getFirst()[i] : startRow;
152 byte[] splitStop = (stopRow.length == 0 ||
153 Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
154 keys.getSecond()[i].length > 0 ?
155 keys.getSecond()[i] : stopRow;
156 InputSplit split = new TableSplit(table.getTableName(),
157 splitStart, splitStop, regionLocation);
158 splits.add(split);
159 if (LOG.isDebugEnabled())
160 LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
161 }
162 }
163 return splits;
164 }
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188 protected boolean includeRegionInSplit(final byte[] startKey, final byte [] endKey) {
189 return true;
190 }
191
192
193
194
195
196
197 protected HTable getHTable() {
198 return this.table;
199 }
200
201
202
203
204
205
206 protected void setHTable(HTable table) {
207 this.table = table;
208 }
209
210
211
212
213
214
215 public Scan getScan() {
216 if (this.scan == null) this.scan = new Scan();
217 return scan;
218 }
219
220
221
222
223
224
225 public void setScan(Scan scan) {
226 this.scan = scan;
227 }
228
229
230
231
232
233
234
235 protected void setTableRecordReader(TableRecordReader tableRecordReader) {
236 this.tableRecordReader = tableRecordReader;
237 }
238
239 }