1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.mapreduce;
19
20 import java.io.IOException;
21 import java.lang.reflect.Method;
22
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
25 import org.apache.hadoop.conf.Configuration;
26 import org.apache.hadoop.hbase.client.HTable;
27 import org.apache.hadoop.hbase.client.Result;
28 import org.apache.hadoop.hbase.client.ResultScanner;
29 import org.apache.hadoop.hbase.client.Scan;
30 import org.apache.hadoop.hbase.client.ScannerCallable;
31 import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
32 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
33 import org.apache.hadoop.hbase.util.Bytes;
34 import org.apache.hadoop.io.DataInputBuffer;
35 import org.apache.hadoop.mapreduce.Counter;
36 import org.apache.hadoop.mapreduce.InputSplit;
37 import org.apache.hadoop.mapreduce.TaskAttemptContext;
38 import org.apache.hadoop.metrics.util.MetricsTimeVaryingLong;
39 import org.apache.hadoop.util.StringUtils;
40
41
42
43
44
45 public class TableRecordReaderImpl {
46 public static final String LOG_PER_ROW_COUNT
47 = "hbase.mapreduce.log.scanner.rowcount";
48
49 static final Log LOG = LogFactory.getLog(TableRecordReader.class);
50
51
52 private static final String HBASE_COUNTER_GROUP_NAME =
53 "HBase Counters";
54 private ResultScanner scanner = null;
55 private Scan scan = null;
56 private Scan currentScan = null;
57 private HTable htable = null;
58 private byte[] lastSuccessfulRow = null;
59 private ImmutableBytesWritable key = null;
60 private Result value = null;
61 private TaskAttemptContext context = null;
62 private Method getCounter = null;
63 private long numRestarts = 0;
64 private long timestamp;
65 private int rowcount;
66 private boolean logScannerActivity = false;
67 private int logPerRowCount = 100;
68
69
70
71
72
73
74
75 public void restart(byte[] firstRow) throws IOException {
76 currentScan = new Scan(scan);
77 currentScan.setStartRow(firstRow);
78 currentScan.setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE,
79 Bytes.toBytes(Boolean.TRUE));
80 this.scanner = this.htable.getScanner(currentScan);
81 if (logScannerActivity) {
82 LOG.info("Current scan=" + currentScan.toString());
83 timestamp = System.currentTimeMillis();
84 rowcount = 0;
85 }
86 }
87
88
89
90
91
92
93
94 private Method retrieveGetCounterWithStringsParams(TaskAttemptContext context)
95 throws IOException {
96 Method m = null;
97 try {
98 m = context.getClass().getMethod("getCounter",
99 new Class [] {String.class, String.class});
100 } catch (SecurityException e) {
101 throw new IOException("Failed test for getCounter", e);
102 } catch (NoSuchMethodException e) {
103
104 }
105 return m;
106 }
107
108
109
110
111
112
113 public void setHTable(HTable htable) {
114 Configuration conf = htable.getConfiguration();
115 logScannerActivity = conf.getBoolean(
116 ScannerCallable.LOG_SCANNER_ACTIVITY, false);
117 logPerRowCount = conf.getInt(LOG_PER_ROW_COUNT, 100);
118 this.htable = htable;
119 }
120
121
122
123
124
125
126 public void setScan(Scan scan) {
127 this.scan = scan;
128 }
129
130
131
132
133
134
135 public void initialize(InputSplit inputsplit,
136 TaskAttemptContext context) throws IOException,
137 InterruptedException {
138 if (context != null) {
139 this.context = context;
140 getCounter = retrieveGetCounterWithStringsParams(context);
141 }
142 restart(scan.getStartRow());
143 }
144
145
146
147
148
149
150 public void close() {
151 this.scanner.close();
152 }
153
154
155
156
157
158
159
160
161 public ImmutableBytesWritable getCurrentKey() throws IOException,
162 InterruptedException {
163 return key;
164 }
165
166
167
168
169
170
171
172
173 public Result getCurrentValue() throws IOException, InterruptedException {
174 return value;
175 }
176
177
178
179
180
181
182
183
184
185 public boolean nextKeyValue() throws IOException, InterruptedException {
186 if (key == null) key = new ImmutableBytesWritable();
187 if (value == null) value = new Result();
188 try {
189 try {
190 value = this.scanner.next();
191 if (logScannerActivity) {
192 rowcount ++;
193 if (rowcount >= logPerRowCount) {
194 long now = System.currentTimeMillis();
195 LOG.info("Mapper took " + (now-timestamp)
196 + "ms to process " + rowcount + " rows");
197 timestamp = now;
198 rowcount = 0;
199 }
200 }
201 } catch (IOException e) {
202
203
204 LOG.info("recovered from " + StringUtils.stringifyException(e));
205 if (lastSuccessfulRow == null) {
206 LOG.warn("We are restarting the first next() invocation," +
207 " if your mapper has restarted a few other times like this" +
208 " then you should consider killing this job and investigate" +
209 " why it's taking so long.");
210 }
211 if (lastSuccessfulRow == null) {
212 restart(scan.getStartRow());
213 } else {
214 restart(lastSuccessfulRow);
215 scanner.next();
216 }
217 value = scanner.next();
218 numRestarts++;
219 }
220 if (value != null && value.size() > 0) {
221 key.set(value.getRow());
222 lastSuccessfulRow = key.get();
223 return true;
224 }
225
226 updateCounters();
227 return false;
228 } catch (IOException ioe) {
229 if (logScannerActivity) {
230 long now = System.currentTimeMillis();
231 LOG.info("Mapper took " + (now-timestamp)
232 + "ms to process " + rowcount + " rows");
233 LOG.info(ioe);
234 String lastRow = lastSuccessfulRow == null ?
235 "null" : Bytes.toStringBinary(lastSuccessfulRow);
236 LOG.info("lastSuccessfulRow=" + lastRow);
237 }
238 throw ioe;
239 }
240 }
241
242
243
244
245
246
247
248
249 private void updateCounters() throws IOException {
250
251 if (this.getCounter == null) {
252 return;
253 }
254
255 byte[] serializedMetrics = currentScan.getAttribute(
256 Scan.SCAN_ATTRIBUTES_METRICS_DATA);
257 if (serializedMetrics == null || serializedMetrics.length == 0 ) {
258 return;
259 }
260
261 DataInputBuffer in = new DataInputBuffer();
262 in.reset(serializedMetrics, 0, serializedMetrics.length);
263 ScanMetrics scanMetrics = new ScanMetrics();
264 scanMetrics.readFields(in);
265 MetricsTimeVaryingLong[] mlvs =
266 scanMetrics.getMetricsTimeVaryingLongArray();
267
268 try {
269 for (MetricsTimeVaryingLong mlv : mlvs) {
270 Counter ct = (Counter)this.getCounter.invoke(context,
271 HBASE_COUNTER_GROUP_NAME, mlv.getName());
272 ct.increment(mlv.getCurrentIntervalValue());
273 }
274 ((Counter) this.getCounter.invoke(context, HBASE_COUNTER_GROUP_NAME,
275 "NUM_SCANNER_RESTARTS")).increment(numRestarts);
276 } catch (Exception e) {
277 LOG.debug("can't update counter." + StringUtils.stringifyException(e));
278 }
279 }
280
281
282
283
284
285
286 public float getProgress() {
287
288 return 0;
289 }
290
291 }