1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapred;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertTrue;
24 import static org.mockito.Matchers.anyObject;
25 import static org.mockito.Mockito.doAnswer;
26 import static org.mockito.Mockito.doReturn;
27 import static org.mockito.Mockito.doThrow;
28 import static org.mockito.Mockito.mock;
29 import static org.mockito.Mockito.spy;
30
31 import java.io.IOException;
32 import java.util.Arrays;
33 import java.util.Map;
34
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.hbase.*;
38 import org.apache.hadoop.hbase.Cell;
39 import org.apache.hadoop.hbase.client.Connection;
40 import org.apache.hadoop.hbase.client.ConnectionFactory;
41 import org.apache.hadoop.hbase.client.HTable;
42 import org.apache.hadoop.hbase.client.Put;
43 import org.apache.hadoop.hbase.client.Result;
44 import org.apache.hadoop.hbase.client.ResultScanner;
45 import org.apache.hadoop.hbase.client.Scan;
46 import org.apache.hadoop.hbase.client.Table;
47 import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
48 import org.apache.hadoop.hbase.filter.Filter;
49 import org.apache.hadoop.hbase.filter.RegexStringComparator;
50 import org.apache.hadoop.hbase.filter.RowFilter;
51 import org.apache.hadoop.hbase.mapreduce.MapreduceTestingShim;
52 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
53 import org.apache.hadoop.hbase.testclassification.LargeTests;
54 import org.apache.hadoop.hbase.util.Bytes;
55 import org.apache.hadoop.io.NullWritable;
56 import org.apache.hadoop.mapred.InputFormat;
57 import org.apache.hadoop.mapred.JobClient;
58 import org.apache.hadoop.mapred.JobConf;
59 import org.apache.hadoop.mapred.JobConfigurable;
60 import org.apache.hadoop.mapred.MiniMRCluster;
61 import org.apache.hadoop.mapred.OutputCollector;
62 import org.apache.hadoop.mapred.Reporter;
63 import org.apache.hadoop.mapred.RunningJob;
64 import org.apache.hadoop.mapred.lib.NullOutputFormat;
65 import org.junit.AfterClass;
66 import org.junit.Before;
67 import org.junit.BeforeClass;
68 import org.junit.Test;
69 import org.junit.experimental.categories.Category;
70 import org.mockito.invocation.InvocationOnMock;
71 import org.mockito.stubbing.Answer;
72
73
74
75
76
77 @Category(LargeTests.class)
78 public class TestTableInputFormat {
79
80 private static final Log LOG = LogFactory.getLog(TestTableInputFormat.class);
81
82 private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
83 private static MiniMRCluster mrCluster;
84 static final byte[] FAMILY = Bytes.toBytes("family");
85
86 private static final byte[][] columns = new byte[][] { FAMILY };
87
88 @BeforeClass
89 public static void beforeClass() throws Exception {
90 UTIL.startMiniCluster();
91 mrCluster = UTIL.startMiniMapReduceCluster();
92 }
93
94 @AfterClass
95 public static void afterClass() throws Exception {
96 UTIL.shutdownMiniMapReduceCluster();
97 UTIL.shutdownMiniCluster();
98 }
99
100 @Before
101 public void before() throws IOException {
102 LOG.info("before");
103 UTIL.ensureSomeRegionServersAvailable(1);
104 LOG.info("before done");
105 }
106
107
108
109
110
111
112
113
114 public static Table createTable(byte[] tableName) throws IOException {
115 return createTable(tableName, new byte[][] { FAMILY });
116 }
117
118
119
120
121
122
123
124
125 public static Table createTable(byte[] tableName, byte[][] families) throws IOException {
126 Table table = UTIL.createTable(TableName.valueOf(tableName), families);
127 Put p = new Put("aaa".getBytes());
128 for (byte[] family : families) {
129 p.add(family, null, "value aaa".getBytes());
130 }
131 table.put(p);
132 p = new Put("bbb".getBytes());
133 for (byte[] family : families) {
134 p.add(family, null, "value bbb".getBytes());
135 }
136 table.put(p);
137 return table;
138 }
139
140
141
142
143
144
145
146
147
148
149 static boolean checkResult(Result r, ImmutableBytesWritable key,
150 byte[] expectedKey, byte[] expectedValue) {
151 assertEquals(0, key.compareTo(expectedKey));
152 Map<byte[], byte[]> vals = r.getFamilyMap(FAMILY);
153 byte[] value = vals.values().iterator().next();
154 assertTrue(Arrays.equals(value, expectedValue));
155 return true;
156 }
157
158
159
160
161
162
163
164
165 static void runTestMapred(Table table) throws IOException {
166 org.apache.hadoop.hbase.mapred.TableRecordReader trr =
167 new org.apache.hadoop.hbase.mapred.TableRecordReader();
168 trr.setStartRow("aaa".getBytes());
169 trr.setEndRow("zzz".getBytes());
170 trr.setHTable(table);
171 trr.setInputColumns(columns);
172
173 trr.init();
174 Result r = new Result();
175 ImmutableBytesWritable key = new ImmutableBytesWritable();
176
177 boolean more = trr.next(key, r);
178 assertTrue(more);
179 checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes());
180
181 more = trr.next(key, r);
182 assertTrue(more);
183 checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes());
184
185
186 more = trr.next(key, r);
187 assertFalse(more);
188 }
189
190
191
192
193
194
195 static Table createIOEScannerTable(byte[] name, final int failCnt)
196 throws IOException {
197
198 Answer<ResultScanner> a = new Answer<ResultScanner>() {
199 int cnt = 0;
200
201 @Override
202 public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
203
204 if (cnt++ < failCnt) {
205
206 Scan scan = mock(Scan.class);
207 doReturn("bogus".getBytes()).when(scan).getStartRow();
208 ResultScanner scanner = mock(ResultScanner.class);
209
210 doThrow(new IOException("Injected exception")).when(scanner).next();
211 return scanner;
212 }
213
214
215 return (ResultScanner) invocation.callRealMethod();
216 }
217 };
218
219 Table htable = spy(createTable(name));
220 doAnswer(a).when(htable).getScanner((Scan) anyObject());
221 return htable;
222 }
223
224
225
226
227
228
229
230 static Table createDNRIOEScannerTable(byte[] name, final int failCnt)
231 throws IOException {
232
233 Answer<ResultScanner> a = new Answer<ResultScanner>() {
234 int cnt = 0;
235
236 @Override
237 public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
238
239 if (cnt++ < failCnt) {
240
241 Scan scan = mock(Scan.class);
242 doReturn("bogus".getBytes()).when(scan).getStartRow();
243 ResultScanner scanner = mock(ResultScanner.class);
244
245 invocation.callRealMethod();
246 doThrow(
247 new UnknownScannerException("Injected simulated TimeoutException"))
248 .when(scanner).next();
249 return scanner;
250 }
251
252
253 return (ResultScanner) invocation.callRealMethod();
254 }
255 };
256
257 Table htable = spy(createTable(name));
258 doAnswer(a).when(htable).getScanner((Scan) anyObject());
259 return htable;
260 }
261
262
263
264
265
266
267 @Test
268 public void testTableRecordReader() throws IOException {
269 Table table = createTable("table1".getBytes());
270 runTestMapred(table);
271 }
272
273
274
275
276
277
278 @Test
279 public void testTableRecordReaderScannerFail() throws IOException {
280 Table htable = createIOEScannerTable("table2".getBytes(), 1);
281 runTestMapred(htable);
282 }
283
284
285
286
287
288
289 @Test(expected = IOException.class)
290 public void testTableRecordReaderScannerFailTwice() throws IOException {
291 Table htable = createIOEScannerTable("table3".getBytes(), 2);
292 runTestMapred(htable);
293 }
294
295
296
297
298
299
300
301 @Test
302 public void testTableRecordReaderScannerTimeout() throws IOException {
303 Table htable = createDNRIOEScannerTable("table4".getBytes(), 1);
304 runTestMapred(htable);
305 }
306
307
308
309
310
311
312
313 @Test(expected = org.apache.hadoop.hbase.DoNotRetryIOException.class)
314 public void testTableRecordReaderScannerTimeoutTwice() throws IOException {
315 Table htable = createDNRIOEScannerTable("table5".getBytes(), 2);
316 runTestMapred(htable);
317 }
318
319
320
321
322 @Test
323 public void testExtensionOfTableInputFormatBase() throws IOException {
324 LOG.info("testing use of an InputFormat taht extends InputFormatBase");
325 final Table table = createTable(Bytes.toBytes("exampleTable"),
326 new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
327 testInputFormat(ExampleTIF.class);
328 }
329
330 @Test
331 public void testDeprecatedExtensionOfTableInputFormatBase() throws IOException {
332 LOG.info("testing use of an InputFormat taht extends InputFormatBase, "
333 + "as it was given in 0.98.");
334 final Table table = createTable(Bytes.toBytes("exampleDeprecatedTable"),
335 new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
336 testInputFormat(ExampleDeprecatedTIF.class);
337 }
338
339 @Test
340 public void testJobConfigurableExtensionOfTableInputFormatBase() throws IOException {
341 LOG.info("testing use of an InputFormat taht extends InputFormatBase, "
342 + "using JobConfigurable.");
343 final Table table = createTable(Bytes.toBytes("exampleJobConfigurableTable"),
344 new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
345 testInputFormat(ExampleJobConfigurableTIF.class);
346 }
347
348 void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
349 final JobConf job = MapreduceTestingShim.getJobConf(mrCluster);
350 job.setInputFormat(clazz);
351 job.setOutputFormat(NullOutputFormat.class);
352 job.setMapperClass(ExampleVerifier.class);
353 job.setNumReduceTasks(0);
354 LOG.debug("submitting job.");
355 final RunningJob run = JobClient.runJob(job);
356 assertTrue("job failed!", run.isSuccessful());
357 assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
358 .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
359 assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
360 .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
361 assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
362 .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
363 assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
364 .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
365 assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
366 .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
367 assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
368 .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
369 }
370
371 public static class ExampleVerifier implements TableMap<NullWritable, NullWritable> {
372
373 @Override
374 public void configure(JobConf conf) {
375 }
376
377 @Override
378 public void map(ImmutableBytesWritable key, Result value,
379 OutputCollector<NullWritable,NullWritable> output,
380 Reporter reporter) throws IOException {
381 for (Cell cell : value.listCells()) {
382 reporter.getCounter(TestTableInputFormat.class.getName() + ":row",
383 Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()))
384 .increment(1l);
385 reporter.getCounter(TestTableInputFormat.class.getName() + ":family",
386 Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()))
387 .increment(1l);
388 reporter.getCounter(TestTableInputFormat.class.getName() + ":value",
389 Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()))
390 .increment(1l);
391 }
392 }
393
394 @Override
395 public void close() {
396 }
397
398 }
399
400 public static class ExampleDeprecatedTIF extends TableInputFormatBase implements JobConfigurable {
401
402 @Override
403 public void configure(JobConf job) {
404 try {
405 HTable exampleTable = new HTable(HBaseConfiguration.create(job),
406 Bytes.toBytes("exampleDeprecatedTable"));
407
408 setHTable(exampleTable);
409 byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
410 Bytes.toBytes("columnB") };
411
412 setInputColumns(inputColumns);
413 Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
414
415 setRowFilter(exampleFilter);
416 } catch (IOException exception) {
417 throw new RuntimeException("Failed to configure for job.", exception);
418 }
419 }
420
421 }
422
423 public static class ExampleJobConfigurableTIF extends ExampleTIF implements JobConfigurable {
424
425 @Override
426 public void configure(JobConf job) {
427 try {
428 initialize(job);
429 } catch (IOException exception) {
430 throw new RuntimeException("Failed to initialize.", exception);
431 }
432 }
433
434 @Override
435 protected void initialize(JobConf job) throws IOException {
436 initialize(job, "exampleJobConfigurableTable");
437 }
438 }
439
440
441 public static class ExampleTIF extends TableInputFormatBase {
442
443 @Override
444 protected void initialize(JobConf job) throws IOException {
445 initialize(job, "exampleTable");
446 }
447
448 protected void initialize(JobConf job, String table) throws IOException {
449 Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job));
450 TableName tableName = TableName.valueOf(table);
451
452 initializeTable(connection, tableName);
453 byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
454 Bytes.toBytes("columnB") };
455
456 setInputColumns(inputColumns);
457 Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
458
459 setRowFilter(exampleFilter);
460 }
461
462 }
463
464 }
465