1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.mapreduce;
19
20 import java.io.IOException;
21 import java.text.MessageFormat;
22 import java.util.ArrayList;
23 import java.util.List;
24
25 import org.apache.commons.logging.Log;
26 import org.apache.commons.logging.LogFactory;
27 import org.apache.hadoop.hbase.classification.InterfaceAudience;
28 import org.apache.hadoop.hbase.classification.InterfaceStability;
29 import org.apache.hadoop.hbase.HRegionInfo;
30 import org.apache.hadoop.hbase.HRegionLocation;
31 import org.apache.hadoop.hbase.TableName;
32 import org.apache.hadoop.hbase.TableName;
33 import org.apache.hadoop.hbase.client.Connection;
34 import org.apache.hadoop.hbase.client.ConnectionFactory;
35 import org.apache.hadoop.hbase.client.RegionLocator;
36 import org.apache.hadoop.hbase.client.Result;
37 import org.apache.hadoop.hbase.client.Scan;
38 import org.apache.hadoop.hbase.client.Table;
39 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
40 import org.apache.hadoop.hbase.util.Bytes;
41 import org.apache.hadoop.hbase.util.Pair;
42 import org.apache.hadoop.hbase.util.RegionSizeCalculator;
43 import org.apache.hadoop.mapreduce.InputFormat;
44 import org.apache.hadoop.mapreduce.InputSplit;
45 import org.apache.hadoop.mapreduce.JobContext;
46 import org.apache.hadoop.mapreduce.RecordReader;
47 import org.apache.hadoop.mapreduce.TaskAttemptContext;
48
49
50
51
52
53
54 @InterfaceAudience.Public
55 @InterfaceStability.Evolving
56 public abstract class MultiTableInputFormatBase extends
57 InputFormat<ImmutableBytesWritable, Result> {
58
59 final Log LOG = LogFactory.getLog(MultiTableInputFormatBase.class);
60
61
62 private List<Scan> scans;
63
64
65 private TableRecordReader tableRecordReader = null;
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80 @Override
81 public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
82 InputSplit split, TaskAttemptContext context)
83 throws IOException, InterruptedException {
84 TableSplit tSplit = (TableSplit) split;
85 LOG.info(MessageFormat.format("Input split length: {0} bytes.", tSplit.getLength()));
86
87 if (tSplit.getTable() == null) {
88 throw new IOException("Cannot create a record reader because of a"
89 + " previous error. Please look at the previous logs lines from"
90 + " the task's full log for more details.");
91 }
92 Connection connection = ConnectionFactory.createConnection(context.getConfiguration());
93 Table table = connection.getTable(tSplit.getTable());
94
95 TableRecordReader trr = this.tableRecordReader;
96
97 try {
98
99 if (trr == null) {
100 trr = new TableRecordReader();
101 }
102 Scan sc = tSplit.getScan();
103 sc.setStartRow(tSplit.getStartRow());
104 sc.setStopRow(tSplit.getEndRow());
105 trr.setScan(sc);
106 trr.setTable(table);
107 } catch (IOException ioe) {
108
109
110 connection.close();
111 table.close();
112 trr.close();
113 throw ioe;
114 }
115 return trr;
116 }
117
118
119
120
121
122
123
124
125
126
127 @Override
128 public List<InputSplit> getSplits(JobContext context) throws IOException {
129 if (scans.isEmpty()) {
130 throw new IOException("No scans were provided.");
131 }
132 List<InputSplit> splits = new ArrayList<InputSplit>();
133
134 for (Scan scan : scans) {
135 byte[] tableNameBytes = scan.getAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME);
136 if (tableNameBytes == null)
137 throw new IOException("A scan object did not have a table name");
138
139 TableName tableName = TableName.valueOf(tableNameBytes);
140 Table table = null;
141 RegionLocator regionLocator = null;
142 Connection conn = null;
143 try {
144 conn = ConnectionFactory.createConnection(context.getConfiguration());
145 table = conn.getTable(tableName);
146 regionLocator = conn.getRegionLocator(tableName);
147 regionLocator = (RegionLocator) table;
148 Pair<byte[][], byte[][]> keys = regionLocator.getStartEndKeys();
149 if (keys == null || keys.getFirst() == null ||
150 keys.getFirst().length == 0) {
151 throw new IOException("Expecting at least one region for table : "
152 + tableName.getNameAsString());
153 }
154 int count = 0;
155
156 byte[] startRow = scan.getStartRow();
157 byte[] stopRow = scan.getStopRow();
158
159 RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(
160 regionLocator, conn.getAdmin());
161
162 for (int i = 0; i < keys.getFirst().length; i++) {
163 if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
164 continue;
165 }
166 HRegionLocation hregionLocation = regionLocator.getRegionLocation(
167 keys.getFirst()[i], false);
168 String regionHostname = hregionLocation.getHostname();
169 HRegionInfo regionInfo = hregionLocation.getRegionInfo();
170
171
172 if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
173 Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
174 (stopRow.length == 0 ||
175 Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
176 byte[] splitStart =
177 startRow.length == 0 ||
178 Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ? keys
179 .getFirst()[i] : startRow;
180 byte[] splitStop =
181 (stopRow.length == 0 || Bytes.compareTo(keys.getSecond()[i],
182 stopRow) <= 0) && keys.getSecond()[i].length > 0 ? keys
183 .getSecond()[i] : stopRow;
184
185 long regionSize = sizeCalculator.getRegionSize(regionInfo.getRegionName());
186 TableSplit split =
187 new TableSplit(regionLocator.getName(),
188 scan, splitStart, splitStop, regionHostname, regionSize);
189
190 splits.add(split);
191 if (LOG.isDebugEnabled())
192 LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
193 }
194 }
195 } finally {
196 if (null != table) table.close();
197 if (null != regionLocator) regionLocator.close();
198 if (null != conn) conn.close();
199 }
200 }
201 return splits;
202 }
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226 protected boolean includeRegionInSplit(final byte[] startKey,
227 final byte[] endKey) {
228 return true;
229 }
230
231
232
233
234 protected List<Scan> getScans() {
235 return this.scans;
236 }
237
238
239
240
241
242
243 protected void setScans(List<Scan> scans) {
244 this.scans = scans;
245 }
246
247
248
249
250
251
252
253 protected void setTableRecordReader(TableRecordReader tableRecordReader) {
254 this.tableRecordReader = tableRecordReader;
255 }
256 }