1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.mapreduce;
19
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.List;
23
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.hadoop.classification.InterfaceAudience;
27 import org.apache.hadoop.classification.InterfaceStability;
28 import org.apache.hadoop.hbase.client.HTable;
29 import org.apache.hadoop.hbase.client.Result;
30 import org.apache.hadoop.hbase.client.Scan;
31 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
32 import org.apache.hadoop.hbase.util.Bytes;
33 import org.apache.hadoop.hbase.util.Pair;
34 import org.apache.hadoop.mapreduce.InputFormat;
35 import org.apache.hadoop.mapreduce.InputSplit;
36 import org.apache.hadoop.mapreduce.JobContext;
37 import org.apache.hadoop.mapreduce.RecordReader;
38 import org.apache.hadoop.mapreduce.TaskAttemptContext;
39
40
41
42
43
44
45 @InterfaceAudience.Public
46 @InterfaceStability.Evolving
47 public abstract class MultiTableInputFormatBase extends
48 InputFormat<ImmutableBytesWritable, Result> {
49
50 final Log LOG = LogFactory.getLog(MultiTableInputFormatBase.class);
51
52
53 private List<Scan> scans;
54
55
56 private TableRecordReader tableRecordReader = null;
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71 @Override
72 public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
73 InputSplit split, TaskAttemptContext context)
74 throws IOException, InterruptedException {
75 TableSplit tSplit = (TableSplit) split;
76
77 if (tSplit.getTableName() == null) {
78 throw new IOException("Cannot create a record reader because of a"
79 + " previous error. Please look at the previous logs lines from"
80 + " the task's full log for more details.");
81 }
82 HTable table =
83 new HTable(context.getConfiguration(), tSplit.getTableName());
84
85 TableRecordReader trr = this.tableRecordReader;
86
87 if (trr == null) {
88 trr = new TableRecordReader();
89 }
90 Scan sc = tSplit.getScan();
91 sc.setStartRow(tSplit.getStartRow());
92 sc.setStopRow(tSplit.getEndRow());
93 trr.setScan(sc);
94 trr.setHTable(table);
95 return trr;
96 }
97
98
99
100
101
102
103
104
105
106
107 @Override
108 public List<InputSplit> getSplits(JobContext context) throws IOException {
109 if (scans.isEmpty()) {
110 throw new IOException("No scans were provided.");
111 }
112 List<InputSplit> splits = new ArrayList<InputSplit>();
113
114 for (Scan scan : scans) {
115 byte[] tableName = scan.getAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME);
116 if (tableName == null)
117 throw new IOException("A scan object did not have a table name");
118
119 HTable table = null;
120 try {
121 table = new HTable(context.getConfiguration(), tableName);
122 Pair<byte[][], byte[][]> keys = table.getStartEndKeys();
123 if (keys == null || keys.getFirst() == null ||
124 keys.getFirst().length == 0) {
125 throw new IOException("Expecting at least one region for table : "
126 + Bytes.toString(tableName));
127 }
128 int count = 0;
129
130 byte[] startRow = scan.getStartRow();
131 byte[] stopRow = scan.getStopRow();
132
133 for (int i = 0; i < keys.getFirst().length; i++) {
134 if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
135 continue;
136 }
137 String regionLocation =
138 table.getRegionLocation(keys.getFirst()[i], false).getHostname();
139
140
141 if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
142 Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
143 (stopRow.length == 0 ||
144 Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
145 byte[] splitStart =
146 startRow.length == 0 ||
147 Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ? keys
148 .getFirst()[i] : startRow;
149 byte[] splitStop =
150 (stopRow.length == 0 || Bytes.compareTo(keys.getSecond()[i],
151 stopRow) <= 0) && keys.getSecond()[i].length > 0 ? keys
152 .getSecond()[i] : stopRow;
153 InputSplit split =
154 new TableSplit(table.getName(),
155 scan, splitStart, splitStop, regionLocation);
156 splits.add(split);
157 if (LOG.isDebugEnabled())
158 LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
159 }
160 }
161 } finally {
162 if (null != table) table.close();
163 }
164 }
165 return splits;
166 }
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190 protected boolean includeRegionInSplit(final byte[] startKey,
191 final byte[] endKey) {
192 return true;
193 }
194
195
196
197
198 protected List<Scan> getScans() {
199 return this.scans;
200 }
201
202
203
204
205
206
207 protected void setScans(List<Scan> scans) {
208 this.scans = scans;
209 }
210
211
212
213
214
215
216
217 protected void setTableRecordReader(TableRecordReader tableRecordReader) {
218 this.tableRecordReader = tableRecordReader;
219 }
220 }