1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertTrue;
24 import static org.mockito.Matchers.anyObject;
25 import static org.mockito.Mockito.doAnswer;
26 import static org.mockito.Mockito.doReturn;
27 import static org.mockito.Mockito.doThrow;
28 import static org.mockito.Mockito.mock;
29 import static org.mockito.Mockito.spy;
30
31 import java.io.IOException;
32 import java.util.Arrays;
33 import java.util.Map;
34
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.hbase.*;
38 import org.apache.hadoop.hbase.client.HTable;
39 import org.apache.hadoop.hbase.client.Put;
40 import org.apache.hadoop.hbase.client.Result;
41 import org.apache.hadoop.hbase.client.ResultScanner;
42 import org.apache.hadoop.hbase.client.Scan;
43 import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
44 import org.apache.hadoop.hbase.filter.Filter;
45 import org.apache.hadoop.hbase.filter.RegexStringComparator;
46 import org.apache.hadoop.hbase.filter.RowFilter;
47 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
48 import org.apache.hadoop.hbase.testclassification.LargeTests;
49 import org.apache.hadoop.hbase.util.Bytes;
50 import org.apache.hadoop.io.NullWritable;
51 import org.apache.hadoop.mapred.JobConf;
52 import org.apache.hadoop.mapred.JobConfigurable;
53 import org.apache.hadoop.mapred.MiniMRCluster;
54 import org.apache.hadoop.mapreduce.Job;
55 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
56 import org.junit.AfterClass;
57 import org.junit.Before;
58 import org.junit.BeforeClass;
59 import org.junit.Test;
60 import org.junit.experimental.categories.Category;
61 import org.mockito.invocation.InvocationOnMock;
62 import org.mockito.stubbing.Answer;
63
64
65
66
67
68 @Category(LargeTests.class)
69 public class TestTableInputFormat {
70
71 private static final Log LOG = LogFactory.getLog(TestTableInputFormat.class);
72
73 private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
74 private static MiniMRCluster mrCluster;
75 static final byte[] FAMILY = Bytes.toBytes("family");
76
77 private static final byte[][] columns = new byte[][] { FAMILY };
78
79 @BeforeClass
80 public static void beforeClass() throws Exception {
81 UTIL.startMiniCluster();
82 mrCluster = UTIL.startMiniMapReduceCluster();
83 }
84
85 @AfterClass
86 public static void afterClass() throws Exception {
87 UTIL.shutdownMiniMapReduceCluster();
88 UTIL.shutdownMiniCluster();
89 }
90
91 @Before
92 public void before() throws IOException {
93 LOG.info("before");
94 UTIL.ensureSomeRegionServersAvailable(1);
95 LOG.info("before done");
96 }
97
98
99
100
101
102
103
104
105 public static HTable createTable(byte[] tableName) throws IOException {
106 return createTable(tableName, new byte[][] { FAMILY });
107 }
108
109
110
111
112
113
114
115
116 public static HTable createTable(byte[] tableName, byte[][] families) throws IOException {
117 HTable table = UTIL.createTable(tableName, families);
118 Put p = new Put("aaa".getBytes());
119 for (byte[] family : families) {
120 p.add(family, null, "value aaa".getBytes());
121 }
122 table.put(p);
123 p = new Put("bbb".getBytes());
124 for (byte[] family : families) {
125 p.add(family, null, "value bbb".getBytes());
126 }
127 table.put(p);
128 return table;
129 }
130
131
132
133
134
135
136
137
138
139
140 static boolean checkResult(Result r, ImmutableBytesWritable key,
141 byte[] expectedKey, byte[] expectedValue) {
142 assertEquals(0, key.compareTo(expectedKey));
143 Map<byte[], byte[]> vals = r.getFamilyMap(FAMILY);
144 byte[] value = vals.values().iterator().next();
145 assertTrue(Arrays.equals(value, expectedValue));
146 return true;
147 }
148
149
150
151
152
153
154
155
156
157 static void runTestMapreduce(HTable table) throws IOException,
158 InterruptedException {
159 org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl trr =
160 new org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl();
161 Scan s = new Scan();
162 s.setStartRow("aaa".getBytes());
163 s.setStopRow("zzz".getBytes());
164 s.addFamily(FAMILY);
165 trr.setScan(s);
166 trr.setHTable(table);
167
168 trr.initialize(null, null);
169 Result r = new Result();
170 ImmutableBytesWritable key = new ImmutableBytesWritable();
171
172 boolean more = trr.nextKeyValue();
173 assertTrue(more);
174 key = trr.getCurrentKey();
175 r = trr.getCurrentValue();
176 checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes());
177
178 more = trr.nextKeyValue();
179 assertTrue(more);
180 key = trr.getCurrentKey();
181 r = trr.getCurrentValue();
182 checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes());
183
184
185 more = trr.nextKeyValue();
186 assertFalse(more);
187 }
188
189
190
191
192
193
194 static HTable createIOEScannerTable(byte[] name, final int failCnt)
195 throws IOException {
196
197 Answer<ResultScanner> a = new Answer<ResultScanner>() {
198 int cnt = 0;
199
200 @Override
201 public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
202
203 if (cnt++ < failCnt) {
204
205 Scan scan = mock(Scan.class);
206 doReturn("bogus".getBytes()).when(scan).getStartRow();
207 ResultScanner scanner = mock(ResultScanner.class);
208
209 doThrow(new IOException("Injected exception")).when(scanner).next();
210 return scanner;
211 }
212
213
214 return (ResultScanner) invocation.callRealMethod();
215 }
216 };
217
218 HTable htable = spy(createTable(name));
219 doAnswer(a).when(htable).getScanner((Scan) anyObject());
220 return htable;
221 }
222
223
224
225
226
227
228
229 static HTable createDNRIOEScannerTable(byte[] name, final int failCnt)
230 throws IOException {
231
232 Answer<ResultScanner> a = new Answer<ResultScanner>() {
233 int cnt = 0;
234
235 @Override
236 public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
237
238 if (cnt++ < failCnt) {
239
240 Scan scan = mock(Scan.class);
241 doReturn("bogus".getBytes()).when(scan).getStartRow();
242 ResultScanner scanner = mock(ResultScanner.class);
243
244 invocation.callRealMethod();
245 doThrow(
246 new NotServingRegionException("Injected simulated TimeoutException"))
247 .when(scanner).next();
248 return scanner;
249 }
250
251
252 return (ResultScanner) invocation.callRealMethod();
253 }
254 };
255
256 HTable htable = spy(createTable(name));
257 doAnswer(a).when(htable).getScanner((Scan) anyObject());
258 return htable;
259 }
260
261
262
263
264
265
266
267 @Test
268 public void testTableRecordReaderMapreduce() throws IOException,
269 InterruptedException {
270 HTable table = createTable("table1-mr".getBytes());
271 runTestMapreduce(table);
272 }
273
274
275
276
277
278
279
280 @Test
281 public void testTableRecordReaderScannerFailMapreduce() throws IOException,
282 InterruptedException {
283 HTable htable = createIOEScannerTable("table2-mr".getBytes(), 1);
284 runTestMapreduce(htable);
285 }
286
287
288
289
290
291
292
293 @Test(expected = IOException.class)
294 public void testTableRecordReaderScannerFailMapreduceTwice() throws IOException,
295 InterruptedException {
296 HTable htable = createIOEScannerTable("table3-mr".getBytes(), 2);
297 runTestMapreduce(htable);
298 }
299
300
301
302
303
304
305
306 @Test
307 public void testTableRecordReaderScannerTimeoutMapreduce()
308 throws IOException, InterruptedException {
309 HTable htable = createDNRIOEScannerTable("table4-mr".getBytes(), 1);
310 runTestMapreduce(htable);
311 }
312
313
314
315
316
317
318
319 @Test(expected = org.apache.hadoop.hbase.NotServingRegionException.class)
320 public void testTableRecordReaderScannerTimeoutMapreduceTwice()
321 throws IOException, InterruptedException {
322 HTable htable = createDNRIOEScannerTable("table5-mr".getBytes(), 2);
323 runTestMapreduce(htable);
324 }
325
326
327
328
329 @Test
330 public void testExtensionOfTableInputFormatBase()
331 throws IOException, InterruptedException, ClassNotFoundException {
332 LOG.info("testing use of an InputFormat taht extends InputFormatBase");
333 final HTable htable = createTable(Bytes.toBytes("exampleTable"),
334 new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
335
336 final Job job = MapreduceTestingShim.createJob(UTIL.getConfiguration());
337 job.setInputFormatClass(ExampleTIF.class);
338 job.setOutputFormatClass(NullOutputFormat.class);
339 job.setMapperClass(ExampleVerifier.class);
340 job.setNumReduceTasks(0);
341
342 LOG.debug("submitting job.");
343 assertTrue("job failed!", job.waitForCompletion(true));
344 assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, job.getCounters()
345 .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getValue());
346 assertEquals("Saw any instances of the filtered out row.", 0, job.getCounters()
347 .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getValue());
348 assertEquals("Saw the wrong number of instances of columnA.", 1, job.getCounters()
349 .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getValue());
350 assertEquals("Saw the wrong number of instances of columnB.", 1, job.getCounters()
351 .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getValue());
352 assertEquals("Saw the wrong count of values for the filtered-for row.", 2, job.getCounters()
353 .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getValue());
354 assertEquals("Saw the wrong count of values for the filtered-out row.", 0, job.getCounters()
355 .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getValue());
356 }
357
358 public static class ExampleVerifier extends TableMapper<NullWritable, NullWritable> {
359
360 @Override
361 public void map(ImmutableBytesWritable key, Result value, Context context)
362 throws IOException {
363 for (Cell cell : value.listCells()) {
364 context.getCounter(TestTableInputFormat.class.getName() + ":row",
365 Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()))
366 .increment(1l);
367 context.getCounter(TestTableInputFormat.class.getName() + ":family",
368 Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()))
369 .increment(1l);
370 context.getCounter(TestTableInputFormat.class.getName() + ":value",
371 Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()))
372 .increment(1l);
373 }
374 }
375
376 }
377
378 public static class ExampleTIF extends TableInputFormatBase implements JobConfigurable {
379
380 @Override
381 public void configure(JobConf job) {
382 try {
383 HTable exampleTable = new HTable(HBaseConfiguration.create(job),
384 Bytes.toBytes("exampleTable"));
385
386 setHTable(exampleTable);
387 byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
388 Bytes.toBytes("columnB") };
389
390 Scan scan = new Scan();
391 for (byte[] family : inputColumns) {
392 scan.addFamily(family);
393 }
394 Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
395 scan.setFilter(exampleFilter);
396 setScan(scan);
397 } catch (IOException exception) {
398 throw new RuntimeException("Failed to configure for job.", exception);
399 }
400 }
401
402 }
403
404 }
405