1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.mapreduce;
19
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.List;
23
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.hadoop.classification.InterfaceAudience;
27 import org.apache.hadoop.classification.InterfaceStability;
28 import org.apache.hadoop.hbase.client.HTable;
29 import org.apache.hadoop.hbase.client.Result;
30 import org.apache.hadoop.hbase.client.Scan;
31 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
32 import org.apache.hadoop.hbase.util.Bytes;
33 import org.apache.hadoop.hbase.util.Pair;
34 import org.apache.hadoop.mapreduce.InputFormat;
35 import org.apache.hadoop.mapreduce.InputSplit;
36 import org.apache.hadoop.mapreduce.JobContext;
37 import org.apache.hadoop.mapreduce.RecordReader;
38 import org.apache.hadoop.mapreduce.TaskAttemptContext;
39
40
41
42
43
44
45 @InterfaceAudience.Public
46 @InterfaceStability.Evolving
47 public abstract class MultiTableInputFormatBase extends
48 InputFormat<ImmutableBytesWritable, Result> {
49
50 final Log LOG = LogFactory.getLog(MultiTableInputFormatBase.class);
51
52
53 private List<Scan> scans;
54
55
56 private TableRecordReader tableRecordReader = null;
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71 @Override
72 public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
73 InputSplit split, TaskAttemptContext context)
74 throws IOException, InterruptedException {
75 TableSplit tSplit = (TableSplit) split;
76
77 if (tSplit.getTableName() == null) {
78 throw new IOException("Cannot create a record reader because of a"
79 + " previous error. Please look at the previous logs lines from"
80 + " the task's full log for more details.");
81 }
82 HTable table =
83 new HTable(context.getConfiguration(), tSplit.getTableName());
84
85 TableRecordReader trr = this.tableRecordReader;
86
87 try {
88
89 if (trr == null) {
90 trr = new TableRecordReader();
91 }
92 Scan sc = tSplit.getScan();
93 sc.setStartRow(tSplit.getStartRow());
94 sc.setStopRow(tSplit.getEndRow());
95 trr.setScan(sc);
96 trr.setHTable(table);
97 } catch (IOException ioe) {
98
99
100 table.close();
101 trr.close();
102 throw ioe;
103 }
104 return trr;
105 }
106
107
108
109
110
111
112
113
114
115
116 @Override
117 public List<InputSplit> getSplits(JobContext context) throws IOException {
118 if (scans.isEmpty()) {
119 throw new IOException("No scans were provided.");
120 }
121 List<InputSplit> splits = new ArrayList<InputSplit>();
122
123 for (Scan scan : scans) {
124 byte[] tableName = scan.getAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME);
125 if (tableName == null)
126 throw new IOException("A scan object did not have a table name");
127
128 HTable table = null;
129 try {
130 table = new HTable(context.getConfiguration(), tableName);
131 Pair<byte[][], byte[][]> keys = table.getStartEndKeys();
132 if (keys == null || keys.getFirst() == null ||
133 keys.getFirst().length == 0) {
134 throw new IOException("Expecting at least one region for table : "
135 + Bytes.toString(tableName));
136 }
137 int count = 0;
138
139 byte[] startRow = scan.getStartRow();
140 byte[] stopRow = scan.getStopRow();
141
142 for (int i = 0; i < keys.getFirst().length; i++) {
143 if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
144 continue;
145 }
146 String regionLocation =
147 table.getRegionLocation(keys.getFirst()[i], false).getHostname();
148
149
150 if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
151 Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
152 (stopRow.length == 0 ||
153 Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
154 byte[] splitStart =
155 startRow.length == 0 ||
156 Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ? keys
157 .getFirst()[i] : startRow;
158 byte[] splitStop =
159 (stopRow.length == 0 || Bytes.compareTo(keys.getSecond()[i],
160 stopRow) <= 0) && keys.getSecond()[i].length > 0 ? keys
161 .getSecond()[i] : stopRow;
162 InputSplit split =
163 new TableSplit(table.getName(),
164 scan, splitStart, splitStop, regionLocation);
165 splits.add(split);
166 if (LOG.isDebugEnabled())
167 LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
168 }
169 }
170 } finally {
171 if (null != table) table.close();
172 }
173 }
174 return splits;
175 }
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199 protected boolean includeRegionInSplit(final byte[] startKey,
200 final byte[] endKey) {
201 return true;
202 }
203
204
205
206
207 protected List<Scan> getScans() {
208 return this.scans;
209 }
210
211
212
213
214
215
216 protected void setScans(List<Scan> scans) {
217 this.scans = scans;
218 }
219
220
221
222
223
224
225
226 protected void setTableRecordReader(TableRecordReader tableRecordReader) {
227 this.tableRecordReader = tableRecordReader;
228 }
229 }