1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.mapreduce;
19
20 import java.io.IOException;
21 import java.lang.reflect.Method;
22
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
25 import org.apache.hadoop.conf.Configuration;
26 import org.apache.hadoop.hbase.client.HTable;
27 import org.apache.hadoop.hbase.client.Result;
28 import org.apache.hadoop.hbase.client.ResultScanner;
29 import org.apache.hadoop.hbase.client.Scan;
30 import org.apache.hadoop.hbase.client.ScannerCallable;
31 import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
32 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
33 import org.apache.hadoop.hbase.util.Bytes;
34 import org.apache.hadoop.io.DataInputBuffer;
35 import org.apache.hadoop.mapreduce.Counter;
36 import org.apache.hadoop.mapreduce.InputSplit;
37 import org.apache.hadoop.mapreduce.TaskAttemptContext;
38 import org.apache.hadoop.metrics.util.MetricsTimeVaryingLong;
39 import org.apache.hadoop.util.StringUtils;
40
41
42
43
44
45 public class TableRecordReaderImpl {
46 public static final String LOG_PER_ROW_COUNT
47 = "hbase.mapreduce.log.scanner.rowcount";
48
49 static final Log LOG = LogFactory.getLog(TableRecordReaderImpl.class);
50
51
52 private static final String HBASE_COUNTER_GROUP_NAME =
53 "HBase Counters";
54 private ResultScanner scanner = null;
55 private Scan scan = null;
56 private Scan currentScan = null;
57 private HTable htable = null;
58 private byte[] lastSuccessfulRow = null;
59 private ImmutableBytesWritable key = null;
60 private Result value = null;
61 private TaskAttemptContext context = null;
62 private Method getCounter = null;
63 private long numRestarts = 0;
64 private long timestamp;
65 private int rowcount;
66 private boolean logScannerActivity = false;
67 private int logPerRowCount = 100;
68
69
70
71
72
73
74
75 public void restart(byte[] firstRow) throws IOException {
76 currentScan = new Scan(scan);
77 currentScan.setStartRow(firstRow);
78 currentScan.setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE,
79 Bytes.toBytes(Boolean.TRUE));
80 if (this.scanner != null) {
81 if (logScannerActivity) {
82 LOG.info("Closing the previously opened scanner object.");
83 }
84 this.scanner.close();
85 }
86 this.scanner = this.htable.getScanner(currentScan);
87 if (logScannerActivity) {
88 LOG.info("Current scan=" + currentScan.toString());
89 timestamp = System.currentTimeMillis();
90 rowcount = 0;
91 }
92 }
93
94
95
96
97
98
99
100 private Method retrieveGetCounterWithStringsParams(TaskAttemptContext context)
101 throws IOException {
102 Method m = null;
103 try {
104 m = context.getClass().getMethod("getCounter",
105 new Class [] {String.class, String.class});
106 } catch (SecurityException e) {
107 throw new IOException("Failed test for getCounter", e);
108 } catch (NoSuchMethodException e) {
109
110 }
111 return m;
112 }
113
114
115
116
117
118
119 public void setHTable(HTable htable) {
120 Configuration conf = htable.getConfiguration();
121 logScannerActivity = conf.getBoolean(
122 ScannerCallable.LOG_SCANNER_ACTIVITY, false);
123 logPerRowCount = conf.getInt(LOG_PER_ROW_COUNT, 100);
124 this.htable = htable;
125 }
126
127
128
129
130
131
132 public void setScan(Scan scan) {
133 this.scan = scan;
134 }
135
136
137
138
139
140
141 public void initialize(InputSplit inputsplit,
142 TaskAttemptContext context) throws IOException,
143 InterruptedException {
144 if (context != null) {
145 this.context = context;
146 getCounter = retrieveGetCounterWithStringsParams(context);
147 }
148 restart(scan.getStartRow());
149 }
150
151
152
153
154
155
156 public void close() {
157 this.scanner.close();
158 }
159
160
161
162
163
164
165
166
167 public ImmutableBytesWritable getCurrentKey() throws IOException,
168 InterruptedException {
169 return key;
170 }
171
172
173
174
175
176
177
178
179 public Result getCurrentValue() throws IOException, InterruptedException {
180 return value;
181 }
182
183
184
185
186
187
188
189
190
191 public boolean nextKeyValue() throws IOException, InterruptedException {
192 if (key == null) key = new ImmutableBytesWritable();
193 if (value == null) value = new Result();
194 try {
195 try {
196 value = this.scanner.next();
197 if (logScannerActivity) {
198 rowcount ++;
199 if (rowcount >= logPerRowCount) {
200 long now = System.currentTimeMillis();
201 LOG.info("Mapper took " + (now-timestamp)
202 + "ms to process " + rowcount + " rows");
203 timestamp = now;
204 rowcount = 0;
205 }
206 }
207 } catch (IOException e) {
208
209
210 LOG.info("recovered from " + StringUtils.stringifyException(e));
211 if (lastSuccessfulRow == null) {
212 LOG.warn("We are restarting the first next() invocation," +
213 " if your mapper has restarted a few other times like this" +
214 " then you should consider killing this job and investigate" +
215 " why it's taking so long.");
216 }
217 if (lastSuccessfulRow == null) {
218 restart(scan.getStartRow());
219 } else {
220 restart(lastSuccessfulRow);
221 scanner.next();
222 }
223 value = scanner.next();
224 numRestarts++;
225 }
226 if (value != null && value.size() > 0) {
227 key.set(value.getRow());
228 lastSuccessfulRow = key.get();
229 return true;
230 }
231
232 updateCounters();
233 return false;
234 } catch (IOException ioe) {
235 if (logScannerActivity) {
236 long now = System.currentTimeMillis();
237 LOG.info("Mapper took " + (now-timestamp)
238 + "ms to process " + rowcount + " rows");
239 LOG.info(ioe);
240 String lastRow = lastSuccessfulRow == null ?
241 "null" : Bytes.toStringBinary(lastSuccessfulRow);
242 LOG.info("lastSuccessfulRow=" + lastRow);
243 }
244 throw ioe;
245 }
246 }
247
248
249
250
251
252
253
254
255 private void updateCounters() throws IOException {
256
257 if (this.getCounter == null) {
258 return;
259 }
260
261 byte[] serializedMetrics = currentScan.getAttribute(
262 Scan.SCAN_ATTRIBUTES_METRICS_DATA);
263 if (serializedMetrics == null || serializedMetrics.length == 0 ) {
264 return;
265 }
266
267 DataInputBuffer in = new DataInputBuffer();
268 in.reset(serializedMetrics, 0, serializedMetrics.length);
269 ScanMetrics scanMetrics = new ScanMetrics();
270 scanMetrics.readFields(in);
271 MetricsTimeVaryingLong[] mlvs =
272 scanMetrics.getMetricsTimeVaryingLongArray();
273
274 try {
275 for (MetricsTimeVaryingLong mlv : mlvs) {
276 Counter ct = (Counter)this.getCounter.invoke(context,
277 HBASE_COUNTER_GROUP_NAME, mlv.getName());
278 ct.increment(mlv.getCurrentIntervalValue());
279 }
280 ((Counter) this.getCounter.invoke(context, HBASE_COUNTER_GROUP_NAME,
281 "NUM_SCANNER_RESTARTS")).increment(numRestarts);
282 } catch (Exception e) {
283 LOG.debug("can't update counter." + StringUtils.stringifyException(e));
284 }
285 }
286
287
288
289
290
291
292 public float getProgress() {
293
294 return 0;
295 }
296
297 }