1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.mapreduce;
19
20 import java.io.IOException;
21 import java.lang.reflect.Method;
22 import java.util.Map;
23
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.hadoop.hbase.classification.InterfaceAudience;
27 import org.apache.hadoop.hbase.classification.InterfaceStability;
28 import org.apache.hadoop.conf.Configuration;
29 import org.apache.hadoop.hbase.client.HTable;
30 import org.apache.hadoop.hbase.client.Result;
31 import org.apache.hadoop.hbase.client.ResultScanner;
32 import org.apache.hadoop.hbase.client.Scan;
33 import org.apache.hadoop.hbase.client.ScannerCallable;
34 import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
35 import org.apache.hadoop.hbase.DoNotRetryIOException;
36 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
37 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
38 import org.apache.hadoop.hbase.util.Bytes;
39 import org.apache.hadoop.mapreduce.Counter;
40 import org.apache.hadoop.mapreduce.InputSplit;
41 import org.apache.hadoop.mapreduce.TaskAttemptContext;
42 import org.apache.hadoop.util.StringUtils;
43
44
45
46
47
48 @InterfaceAudience.Public
49 @InterfaceStability.Stable
50 public class TableRecordReaderImpl {
51 public static final String LOG_PER_ROW_COUNT
52 = "hbase.mapreduce.log.scanner.rowcount";
53
54 static final Log LOG = LogFactory.getLog(TableRecordReaderImpl.class);
55
56
57 private static final String HBASE_COUNTER_GROUP_NAME =
58 "HBase Counters";
59 private ResultScanner scanner = null;
60 private Scan scan = null;
61 private Scan currentScan = null;
62 private HTable htable = null;
63 private byte[] lastSuccessfulRow = null;
64 private ImmutableBytesWritable key = null;
65 private Result value = null;
66 private TaskAttemptContext context = null;
67 private Method getCounter = null;
68 private long numRestarts = 0;
69 private long timestamp;
70 private int rowcount;
71 private boolean logScannerActivity = false;
72 private int logPerRowCount = 100;
73
74
75
76
77
78
79
80 public void restart(byte[] firstRow) throws IOException {
81 currentScan = new Scan(scan);
82 currentScan.setStartRow(firstRow);
83 currentScan.setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE,
84 Bytes.toBytes(Boolean.TRUE));
85 if (this.scanner != null) {
86 if (logScannerActivity) {
87 LOG.info("Closing the previously opened scanner object.");
88 }
89 this.scanner.close();
90 }
91 this.scanner = this.htable.getScanner(currentScan);
92 if (logScannerActivity) {
93 LOG.info("Current scan=" + currentScan.toString());
94 timestamp = System.currentTimeMillis();
95 rowcount = 0;
96 }
97 }
98
99
100
101
102
103
104
105 protected static Method retrieveGetCounterWithStringsParams(TaskAttemptContext context)
106 throws IOException {
107 Method m = null;
108 try {
109 m = context.getClass().getMethod("getCounter",
110 new Class [] {String.class, String.class});
111 } catch (SecurityException e) {
112 throw new IOException("Failed test for getCounter", e);
113 } catch (NoSuchMethodException e) {
114
115 }
116 return m;
117 }
118
119
120
121
122
123
124 public void setHTable(HTable htable) {
125 Configuration conf = htable.getConfiguration();
126 logScannerActivity = conf.getBoolean(
127 ScannerCallable.LOG_SCANNER_ACTIVITY, false);
128 logPerRowCount = conf.getInt(LOG_PER_ROW_COUNT, 100);
129 this.htable = htable;
130 }
131
132
133
134
135
136
137 public void setScan(Scan scan) {
138 this.scan = scan;
139 }
140
141
142
143
144
145
146 public void initialize(InputSplit inputsplit,
147 TaskAttemptContext context) throws IOException,
148 InterruptedException {
149 if (context != null) {
150 this.context = context;
151 getCounter = retrieveGetCounterWithStringsParams(context);
152 }
153 restart(scan.getStartRow());
154 }
155
156
157
158
159
160
161 public void close() {
162 this.scanner.close();
163 try {
164 this.htable.close();
165 } catch (IOException ioe) {
166 LOG.warn("Error closing table", ioe);
167 }
168 }
169
170
171
172
173
174
175
176
177 public ImmutableBytesWritable getCurrentKey() throws IOException,
178 InterruptedException {
179 return key;
180 }
181
182
183
184
185
186
187
188
189 public Result getCurrentValue() throws IOException, InterruptedException {
190 return value;
191 }
192
193
194
195
196
197
198
199
200
201 public boolean nextKeyValue() throws IOException, InterruptedException {
202 if (key == null) key = new ImmutableBytesWritable();
203 if (value == null) value = new Result();
204 try {
205 try {
206 value = this.scanner.next();
207 if (logScannerActivity) {
208 rowcount ++;
209 if (rowcount >= logPerRowCount) {
210 long now = System.currentTimeMillis();
211 LOG.info("Mapper took " + (now-timestamp)
212 + "ms to process " + rowcount + " rows");
213 timestamp = now;
214 rowcount = 0;
215 }
216 }
217 } catch (IOException e) {
218
219 if (e instanceof DoNotRetryIOException) {
220 throw e;
221 }
222
223
224 LOG.info("recovered from " + StringUtils.stringifyException(e));
225 if (lastSuccessfulRow == null) {
226 LOG.warn("We are restarting the first next() invocation," +
227 " if your mapper has restarted a few other times like this" +
228 " then you should consider killing this job and investigate" +
229 " why it's taking so long.");
230 }
231 if (lastSuccessfulRow == null) {
232 restart(scan.getStartRow());
233 } else {
234 restart(lastSuccessfulRow);
235 scanner.next();
236 }
237 value = scanner.next();
238 numRestarts++;
239 }
240 if (value != null && value.size() > 0) {
241 key.set(value.getRow());
242 lastSuccessfulRow = key.get();
243 return true;
244 }
245
246 updateCounters();
247 return false;
248 } catch (IOException ioe) {
249 if (logScannerActivity) {
250 long now = System.currentTimeMillis();
251 LOG.info("Mapper took " + (now-timestamp)
252 + "ms to process " + rowcount + " rows");
253 LOG.info(ioe);
254 String lastRow = lastSuccessfulRow == null ?
255 "null" : Bytes.toStringBinary(lastSuccessfulRow);
256 LOG.info("lastSuccessfulRow=" + lastRow);
257 }
258 throw ioe;
259 }
260 }
261
262
263
264
265
266
267
268
269 private void updateCounters() throws IOException {
270 byte[] serializedMetrics = currentScan.getAttribute(
271 Scan.SCAN_ATTRIBUTES_METRICS_DATA);
272 if (serializedMetrics == null || serializedMetrics.length == 0 ) {
273 return;
274 }
275
276 ScanMetrics scanMetrics = ProtobufUtil.toScanMetrics(serializedMetrics);
277
278 updateCounters(scanMetrics, numRestarts, getCounter, context);
279 }
280
281 protected static void updateCounters(ScanMetrics scanMetrics, long numScannerRestarts,
282 Method getCounter, TaskAttemptContext context) {
283
284 if (getCounter == null) {
285 return;
286 }
287
288 try {
289 for (Map.Entry<String, Long> entry:scanMetrics.getMetricsMap().entrySet()) {
290 Counter ct = (Counter)getCounter.invoke(context,
291 HBASE_COUNTER_GROUP_NAME, entry.getKey());
292
293 ct.increment(entry.getValue());
294 }
295 ((Counter) getCounter.invoke(context, HBASE_COUNTER_GROUP_NAME,
296 "NUM_SCANNER_RESTARTS")).increment(numScannerRestarts);
297 } catch (Exception e) {
298 LOG.debug("can't update counter." + StringUtils.stringifyException(e));
299 }
300 }
301
302
303
304
305
306
307 public float getProgress() {
308
309 return 0;
310 }
311
312 }