1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.mapreduce;
19
20 import java.io.IOException;
21 import java.lang.reflect.Method;
22 import java.util.Map;
23
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.hadoop.classification.InterfaceAudience;
27 import org.apache.hadoop.classification.InterfaceStability;
28 import org.apache.hadoop.conf.Configuration;
29 import org.apache.hadoop.hbase.client.HTable;
30 import org.apache.hadoop.hbase.client.Result;
31 import org.apache.hadoop.hbase.client.ResultScanner;
32 import org.apache.hadoop.hbase.client.Scan;
33 import org.apache.hadoop.hbase.client.ScannerCallable;
34 import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
35 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
36 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
37 import org.apache.hadoop.hbase.util.Bytes;
38 import org.apache.hadoop.io.DataInputBuffer;
39 import org.apache.hadoop.mapreduce.Counter;
40 import org.apache.hadoop.mapreduce.InputSplit;
41 import org.apache.hadoop.mapreduce.TaskAttemptContext;
42 import org.apache.hadoop.metrics.util.MetricsTimeVaryingLong;
43 import org.apache.hadoop.util.StringUtils;
44
45
46
47
48
49 @InterfaceAudience.Public
50 @InterfaceStability.Stable
51 public class TableRecordReaderImpl {
52 public static final String LOG_PER_ROW_COUNT
53 = "hbase.mapreduce.log.scanner.rowcount";
54
55 static final Log LOG = LogFactory.getLog(TableRecordReader.class);
56
57
58 private static final String HBASE_COUNTER_GROUP_NAME =
59 "HBase Counters";
60 private ResultScanner scanner = null;
61 private Scan scan = null;
62 private Scan currentScan = null;
63 private HTable htable = null;
64 private byte[] lastSuccessfulRow = null;
65 private ImmutableBytesWritable key = null;
66 private Result value = null;
67 private TaskAttemptContext context = null;
68 private Method getCounter = null;
69 private long numRestarts = 0;
70 private long timestamp;
71 private int rowcount;
72 private boolean logScannerActivity = false;
73 private int logPerRowCount = 100;
74
75
76
77
78
79
80
81 public void restart(byte[] firstRow) throws IOException {
82 currentScan = new Scan(scan);
83 currentScan.setStartRow(firstRow);
84 currentScan.setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE,
85 Bytes.toBytes(Boolean.TRUE));
86 this.scanner = this.htable.getScanner(currentScan);
87 if (logScannerActivity) {
88 LOG.info("Current scan=" + currentScan.toString());
89 timestamp = System.currentTimeMillis();
90 rowcount = 0;
91 }
92 }
93
94
95
96
97
98
99
100 private Method retrieveGetCounterWithStringsParams(TaskAttemptContext context)
101 throws IOException {
102 Method m = null;
103 try {
104 m = context.getClass().getMethod("getCounter",
105 new Class [] {String.class, String.class});
106 } catch (SecurityException e) {
107 throw new IOException("Failed test for getCounter", e);
108 } catch (NoSuchMethodException e) {
109
110 }
111 return m;
112 }
113
114
115
116
117
118
119 public void setHTable(HTable htable) {
120 Configuration conf = htable.getConfiguration();
121 logScannerActivity = conf.getBoolean(
122 ScannerCallable.LOG_SCANNER_ACTIVITY, false);
123 logPerRowCount = conf.getInt(LOG_PER_ROW_COUNT, 100);
124 this.htable = htable;
125 }
126
127
128
129
130
131
132 public void setScan(Scan scan) {
133 this.scan = scan;
134 }
135
136
137
138
139
140
141 public void initialize(InputSplit inputsplit,
142 TaskAttemptContext context) throws IOException,
143 InterruptedException {
144 if (context != null) {
145 this.context = context;
146 getCounter = retrieveGetCounterWithStringsParams(context);
147 }
148 restart(scan.getStartRow());
149 }
150
151
152
153
154
155
156 public void close() {
157 this.scanner.close();
158 }
159
160
161
162
163
164
165
166
167 public ImmutableBytesWritable getCurrentKey() throws IOException,
168 InterruptedException {
169 return key;
170 }
171
172
173
174
175
176
177
178
179 public Result getCurrentValue() throws IOException, InterruptedException {
180 return value;
181 }
182
183
184
185
186
187
188
189
190
191 public boolean nextKeyValue() throws IOException, InterruptedException {
192 if (key == null) key = new ImmutableBytesWritable();
193 if (value == null) value = new Result();
194 try {
195 try {
196 value = this.scanner.next();
197 if (logScannerActivity) {
198 rowcount ++;
199 if (rowcount >= logPerRowCount) {
200 long now = System.currentTimeMillis();
201 LOG.info("Mapper took " + (now-timestamp)
202 + "ms to process " + rowcount + " rows");
203 timestamp = now;
204 rowcount = 0;
205 }
206 }
207 } catch (IOException e) {
208
209
210 LOG.info("recovered from " + StringUtils.stringifyException(e));
211 if (lastSuccessfulRow == null) {
212 LOG.warn("We are restarting the first next() invocation," +
213 " if your mapper has restarted a few other times like this" +
214 " then you should consider killing this job and investigate" +
215 " why it's taking so long.");
216 }
217 if (lastSuccessfulRow == null) {
218 restart(scan.getStartRow());
219 } else {
220 restart(lastSuccessfulRow);
221 scanner.next();
222 }
223 value = scanner.next();
224 numRestarts++;
225 }
226 if (value != null && value.size() > 0) {
227 key.set(value.getRow());
228 lastSuccessfulRow = key.get();
229 return true;
230 }
231
232 updateCounters();
233 return false;
234 } catch (IOException ioe) {
235 if (logScannerActivity) {
236 long now = System.currentTimeMillis();
237 LOG.info("Mapper took " + (now-timestamp)
238 + "ms to process " + rowcount + " rows");
239 LOG.info(ioe);
240 String lastRow = lastSuccessfulRow == null ?
241 "null" : Bytes.toStringBinary(lastSuccessfulRow);
242 LOG.info("lastSuccessfulRow=" + lastRow);
243 }
244 throw ioe;
245 }
246 }
247
248
249
250
251
252
253
254
255 private void updateCounters() throws IOException {
256
257 if (this.getCounter == null) {
258 return;
259 }
260
261 byte[] serializedMetrics = currentScan.getAttribute(
262 Scan.SCAN_ATTRIBUTES_METRICS_DATA);
263 if (serializedMetrics == null || serializedMetrics.length == 0 ) {
264 return;
265 }
266
267 ScanMetrics scanMetrics = ProtobufUtil.toScanMetrics(serializedMetrics);
268
269 try {
270 for (Map.Entry<String, Long> entry:scanMetrics.getMetricsMap().entrySet()) {
271 Counter ct = (Counter)this.getCounter.invoke(context,
272 HBASE_COUNTER_GROUP_NAME, entry.getKey());
273
274 ct.increment(entry.getValue());
275 }
276
277 ((Counter) this.getCounter.invoke(context, HBASE_COUNTER_GROUP_NAME,
278 "NUM_SCANNER_RESTARTS")).increment(numRestarts);
279 } catch (Exception e) {
280 LOG.debug("can't update counter." + StringUtils.stringifyException(e));
281 }
282 }
283
284
285
286
287
288
289 public float getProgress() {
290
291 return 0;
292 }
293
294 }