1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.mapreduce;
19
20 import java.io.IOException;
21 import java.lang.reflect.Method;
22 import java.util.Map;
23
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.hadoop.classification.InterfaceAudience;
27 import org.apache.hadoop.classification.InterfaceStability;
28 import org.apache.hadoop.conf.Configuration;
29 import org.apache.hadoop.hbase.client.HTable;
30 import org.apache.hadoop.hbase.client.Result;
31 import org.apache.hadoop.hbase.client.ResultScanner;
32 import org.apache.hadoop.hbase.client.Scan;
33 import org.apache.hadoop.hbase.client.ScannerCallable;
34 import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
35 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
36 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
37 import org.apache.hadoop.hbase.util.Bytes;
38 import org.apache.hadoop.mapreduce.Counter;
39 import org.apache.hadoop.mapreduce.InputSplit;
40 import org.apache.hadoop.mapreduce.TaskAttemptContext;
41 import org.apache.hadoop.util.StringUtils;
42
43
44
45
46
47 @InterfaceAudience.Public
48 @InterfaceStability.Stable
49 public class TableRecordReaderImpl {
50 public static final String LOG_PER_ROW_COUNT
51 = "hbase.mapreduce.log.scanner.rowcount";
52
53 static final Log LOG = LogFactory.getLog(TableRecordReaderImpl.class);
54
55
56 private static final String HBASE_COUNTER_GROUP_NAME =
57 "HBase Counters";
58 private ResultScanner scanner = null;
59 private Scan scan = null;
60 private Scan currentScan = null;
61 private HTable htable = null;
62 private byte[] lastSuccessfulRow = null;
63 private ImmutableBytesWritable key = null;
64 private Result value = null;
65 private TaskAttemptContext context = null;
66 private Method getCounter = null;
67 private long numRestarts = 0;
68 private long timestamp;
69 private int rowcount;
70 private boolean logScannerActivity = false;
71 private int logPerRowCount = 100;
72
73
74
75
76
77
78
79 public void restart(byte[] firstRow) throws IOException {
80 currentScan = new Scan(scan);
81 currentScan.setStartRow(firstRow);
82 currentScan.setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE,
83 Bytes.toBytes(Boolean.TRUE));
84 this.scanner = this.htable.getScanner(currentScan);
85 if (logScannerActivity) {
86 LOG.info("Current scan=" + currentScan.toString());
87 timestamp = System.currentTimeMillis();
88 rowcount = 0;
89 }
90 }
91
92
93
94
95
96
97
98 protected static Method retrieveGetCounterWithStringsParams(TaskAttemptContext context)
99 throws IOException {
100 Method m = null;
101 try {
102 m = context.getClass().getMethod("getCounter",
103 new Class [] {String.class, String.class});
104 } catch (SecurityException e) {
105 throw new IOException("Failed test for getCounter", e);
106 } catch (NoSuchMethodException e) {
107
108 }
109 return m;
110 }
111
112
113
114
115
116
117 public void setHTable(HTable htable) {
118 Configuration conf = htable.getConfiguration();
119 logScannerActivity = conf.getBoolean(
120 ScannerCallable.LOG_SCANNER_ACTIVITY, false);
121 logPerRowCount = conf.getInt(LOG_PER_ROW_COUNT, 100);
122 this.htable = htable;
123 }
124
125
126
127
128
129
130 public void setScan(Scan scan) {
131 this.scan = scan;
132 }
133
134
135
136
137
138
139 public void initialize(InputSplit inputsplit,
140 TaskAttemptContext context) throws IOException,
141 InterruptedException {
142 if (context != null) {
143 this.context = context;
144 getCounter = retrieveGetCounterWithStringsParams(context);
145 }
146 restart(scan.getStartRow());
147 }
148
149
150
151
152
153
154 public void close() {
155 this.scanner.close();
156 }
157
158
159
160
161
162
163
164
165 public ImmutableBytesWritable getCurrentKey() throws IOException,
166 InterruptedException {
167 return key;
168 }
169
170
171
172
173
174
175
176
177 public Result getCurrentValue() throws IOException, InterruptedException {
178 return value;
179 }
180
181
182
183
184
185
186
187
188
189 public boolean nextKeyValue() throws IOException, InterruptedException {
190 if (key == null) key = new ImmutableBytesWritable();
191 if (value == null) value = new Result();
192 try {
193 try {
194 value = this.scanner.next();
195 if (logScannerActivity) {
196 rowcount ++;
197 if (rowcount >= logPerRowCount) {
198 long now = System.currentTimeMillis();
199 LOG.info("Mapper took " + (now-timestamp)
200 + "ms to process " + rowcount + " rows");
201 timestamp = now;
202 rowcount = 0;
203 }
204 }
205 } catch (IOException e) {
206
207
208 LOG.info("recovered from " + StringUtils.stringifyException(e));
209 if (lastSuccessfulRow == null) {
210 LOG.warn("We are restarting the first next() invocation," +
211 " if your mapper has restarted a few other times like this" +
212 " then you should consider killing this job and investigate" +
213 " why it's taking so long.");
214 }
215 if (lastSuccessfulRow == null) {
216 restart(scan.getStartRow());
217 } else {
218 restart(lastSuccessfulRow);
219 scanner.next();
220 }
221 value = scanner.next();
222 numRestarts++;
223 }
224 if (value != null && value.size() > 0) {
225 key.set(value.getRow());
226 lastSuccessfulRow = key.get();
227 return true;
228 }
229
230 updateCounters();
231 return false;
232 } catch (IOException ioe) {
233 if (logScannerActivity) {
234 long now = System.currentTimeMillis();
235 LOG.info("Mapper took " + (now-timestamp)
236 + "ms to process " + rowcount + " rows");
237 LOG.info(ioe);
238 String lastRow = lastSuccessfulRow == null ?
239 "null" : Bytes.toStringBinary(lastSuccessfulRow);
240 LOG.info("lastSuccessfulRow=" + lastRow);
241 }
242 throw ioe;
243 }
244 }
245
246
247
248
249
250
251
252
253 private void updateCounters() throws IOException {
254 byte[] serializedMetrics = currentScan.getAttribute(
255 Scan.SCAN_ATTRIBUTES_METRICS_DATA);
256 if (serializedMetrics == null || serializedMetrics.length == 0 ) {
257 return;
258 }
259
260 ScanMetrics scanMetrics = ProtobufUtil.toScanMetrics(serializedMetrics);
261
262 updateCounters(scanMetrics, numRestarts, getCounter, context);
263 }
264
265 protected static void updateCounters(ScanMetrics scanMetrics, long numScannerRestarts,
266 Method getCounter, TaskAttemptContext context) {
267
268 if (getCounter == null) {
269 return;
270 }
271
272 try {
273 for (Map.Entry<String, Long> entry:scanMetrics.getMetricsMap().entrySet()) {
274 Counter ct = (Counter)getCounter.invoke(context,
275 HBASE_COUNTER_GROUP_NAME, entry.getKey());
276
277 ct.increment(entry.getValue());
278 }
279 ((Counter) getCounter.invoke(context, HBASE_COUNTER_GROUP_NAME,
280 "NUM_SCANNER_RESTARTS")).increment(numScannerRestarts);
281 } catch (Exception e) {
282 LOG.debug("can't update counter." + StringUtils.stringifyException(e));
283 }
284 }
285
286
287
288
289
290
291 public float getProgress() {
292
293 return 0;
294 }
295
296 }