1   /**
2    * Copyright 2011 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapred;
21  
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertFalse;
24  import static org.junit.Assert.assertTrue;
25  import static org.mockito.Matchers.anyObject;
26  import static org.mockito.Mockito.doAnswer;
27  import static org.mockito.Mockito.doReturn;
28  import static org.mockito.Mockito.doThrow;
29  import static org.mockito.Mockito.mock;
30  import static org.mockito.Mockito.spy;
31  
32  import java.io.IOException;
33  import java.util.Arrays;
34  import java.util.Map;
35  
36  import org.apache.commons.logging.Log;
37  import org.apache.commons.logging.LogFactory;
38  import org.apache.hadoop.hbase.*;
39  import org.apache.hadoop.hbase.client.HTable;
40  import org.apache.hadoop.hbase.client.Put;
41  import org.apache.hadoop.hbase.client.Result;
42  import org.apache.hadoop.hbase.client.ResultScanner;
43  import org.apache.hadoop.hbase.client.Scan;
44  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
45  import org.apache.hadoop.hbase.util.Bytes;
46  import org.junit.AfterClass;
47  import org.junit.Before;
48  import org.junit.BeforeClass;
49  import org.junit.Test;
50  import org.junit.experimental.categories.Category;
51  import org.mockito.invocation.InvocationOnMock;
52  import org.mockito.stubbing.Answer;
53  
54  /**
55   * This tests the TableInputFormat and its recovery semantics
56   * 
57   */
58  @Category(LargeTests.class)
59  public class TestTableInputFormat {
60  
61    private static final Log LOG = LogFactory.getLog(TestTableInputFormat.class);
62  
63    private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
64    static final byte[] FAMILY = Bytes.toBytes("family");
65  
66    private static final byte[][] columns = new byte[][] { FAMILY };
67  
68    @BeforeClass
69    public static void beforeClass() throws Exception {
70      UTIL.startMiniCluster();
71    }
72  
73    @AfterClass
74    public static void afterClass() throws Exception {
75      UTIL.shutdownMiniCluster();
76    }
77  
78    @Before
79    public void before() throws IOException {
80      LOG.info("before");
81      UTIL.ensureSomeRegionServersAvailable(1);
82      LOG.info("before done");
83    }
84  
85    /**
86     * Setup a table with two rows and values.
87     * 
88     * @param tableName
89     * @return
90     * @throws IOException
91     */
92    public static HTable createTable(byte[] tableName) throws IOException {
93      HTable table = UTIL.createTable(tableName, FAMILY);
94      Put p = new Put("aaa".getBytes());
95      p.add(FAMILY, null, "value aaa".getBytes());
96      table.put(p);
97      p = new Put("bbb".getBytes());
98      p.add(FAMILY, null, "value bbb".getBytes());
99      table.put(p);
100     return table;
101   }
102 
103   /**
104    * Verify that the result and key have expected values.
105    * 
106    * @param r
107    * @param key
108    * @param expectedKey
109    * @param expectedValue
110    * @return
111    */
112   static boolean checkResult(Result r, ImmutableBytesWritable key,
113       byte[] expectedKey, byte[] expectedValue) {
114     assertEquals(0, key.compareTo(expectedKey));
115     Map<byte[], byte[]> vals = r.getFamilyMap(FAMILY);
116     byte[] value = vals.values().iterator().next();
117     assertTrue(Arrays.equals(value, expectedValue));
118     return true; // if succeed
119   }
120 
121   /**
122    * Create table data and run tests on specified htable using the
123    * o.a.h.hbase.mapred API.
124    * 
125    * @param table
126    * @throws IOException
127    */
128   static void runTestMapred(HTable table) throws IOException {
129     org.apache.hadoop.hbase.mapred.TableRecordReader trr = 
130         new org.apache.hadoop.hbase.mapred.TableRecordReader();
131     trr.setStartRow("aaa".getBytes());
132     trr.setEndRow("zzz".getBytes());
133     trr.setHTable(table);
134     trr.setInputColumns(columns);
135 
136     trr.init();
137     Result r = new Result();
138     ImmutableBytesWritable key = new ImmutableBytesWritable();
139 
140     boolean more = trr.next(key, r);
141     assertTrue(more);
142     checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes());
143 
144     more = trr.next(key, r);
145     assertTrue(more);
146     checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes());
147 
148     // no more data
149     more = trr.next(key, r);
150     assertFalse(more);
151   }
152 
153   /**
154    * Create table data and run tests on specified htable using the
155    * o.a.h.hbase.mapreduce API.
156    * 
157    * @param table
158    * @throws IOException
159    * @throws InterruptedException
160    */
161   static void runTestMapreduce(HTable table) throws IOException,
162       InterruptedException {
163     org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl trr = 
164         new org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl();
165     Scan s = new Scan();
166     s.setStartRow("aaa".getBytes());
167     s.setStopRow("zzz".getBytes());
168     s.addFamily(FAMILY);
169     trr.setScan(s);
170     trr.setHTable(table);
171 
172     trr.initialize(null, null);
173     Result r = new Result();
174     ImmutableBytesWritable key = new ImmutableBytesWritable();
175 
176     boolean more = trr.nextKeyValue();
177     assertTrue(more);
178     key = trr.getCurrentKey();
179     r = trr.getCurrentValue();
180     checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes());
181 
182     more = trr.nextKeyValue();
183     assertTrue(more);
184     key = trr.getCurrentKey();
185     r = trr.getCurrentValue();
186     checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes());
187 
188     // no more data
189     more = trr.nextKeyValue();
190     assertFalse(more);
191   }
192 
193   /**
194    * Create a table that IOE's on first scanner next call
195    * 
196    * @throws IOException
197    */
198   static HTable createIOEScannerTable(byte[] name, final int failCnt)
199       throws IOException {
200     // build up a mock scanner stuff to fail the first time
201     Answer<ResultScanner> a = new Answer<ResultScanner>() {
202       int cnt = 0;
203 
204       @Override
205       public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
206         // first invocation return the busted mock scanner
207         if (cnt++ < failCnt) {
208           // create mock ResultScanner that always fails.
209           Scan scan = mock(Scan.class);
210           doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
211           ResultScanner scanner = mock(ResultScanner.class);
212           // simulate TimeoutException / IOException
213           doThrow(new IOException("Injected exception")).when(scanner).next();
214           return scanner;
215         }
216 
217         // otherwise return the real scanner.
218         return (ResultScanner) invocation.callRealMethod();
219       }
220     };
221 
222     HTable htable = spy(createTable(name));
223     doAnswer(a).when(htable).getScanner((Scan) anyObject());
224     return htable;
225   }
226 
227   /**
228    * Create a table that throws a DoNoRetryIOException on first scanner next
229    * call
230    * 
231    * @throws IOException
232    */
233   static HTable createDNRIOEScannerTable(byte[] name, final int failCnt)
234       throws IOException {
235     // build up a mock scanner stuff to fail the first time
236     Answer<ResultScanner> a = new Answer<ResultScanner>() {
237       int cnt = 0;
238 
239       @Override
240       public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
241         // first invocation return the busted mock scanner
242         if (cnt++ < failCnt) {
243           // create mock ResultScanner that always fails.
244           Scan scan = mock(Scan.class);
245           doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
246           ResultScanner scanner = mock(ResultScanner.class);
247 
248           invocation.callRealMethod(); // simulate UnknownScannerException
249           doThrow(
250               new UnknownScannerException("Injected simulated TimeoutException"))
251               .when(scanner).next();
252           return scanner;
253         }
254 
255         // otherwise return the real scanner.
256         return (ResultScanner) invocation.callRealMethod();
257       }
258     };
259 
260     HTable htable = spy(createTable(name));
261     doAnswer(a).when(htable).getScanner((Scan) anyObject());
262     return htable;
263   }
264 
265   /**
266    * Run test assuming no errors using mapred api.
267    * 
268    * @throws IOException
269    */
270   @Test
271   public void testTableRecordReader() throws IOException {
272     HTable table = createTable("table1".getBytes());
273     runTestMapred(table);
274   }
275 
276   /**
277    * Run test assuming Scanner IOException failure using mapred api,
278    * 
279    * @throws IOException
280    */
281   @Test
282   public void testTableRecordReaderScannerFail() throws IOException {
283     HTable htable = createIOEScannerTable("table2".getBytes(), 1);
284     runTestMapred(htable);
285   }
286 
287   /**
288    * Run test assuming Scanner IOException failure using mapred api,
289    * 
290    * @throws IOException
291    */
292   @Test(expected = IOException.class)
293   public void testTableRecordReaderScannerFailTwice() throws IOException {
294     HTable htable = createIOEScannerTable("table3".getBytes(), 2);
295     runTestMapred(htable);
296   }
297 
298   /**
299    * Run test assuming UnknownScannerException (which is a type of
300    * DoNotRetryIOException) using mapred api.
301    * 
302    * @throws DoNotRetryIOException
303    */
304   @Test
305   public void testTableRecordReaderScannerTimeout() throws IOException {
306     HTable htable = createDNRIOEScannerTable("table4".getBytes(), 1);
307     runTestMapred(htable);
308   }
309 
310   /**
311    * Run test assuming UnknownScannerException (which is a type of
312    * DoNotRetryIOException) using mapred api.
313    * 
314    * @throws DoNotRetryIOException
315    */
316   @Test(expected = DoNotRetryIOException.class)
317   public void testTableRecordReaderScannerTimeoutTwice() throws IOException {
318     HTable htable = createDNRIOEScannerTable("table5".getBytes(), 2);
319     runTestMapred(htable);
320   }
321 
322   /**
323    * Run test assuming no errors using newer mapreduce api
324    * 
325    * @throws IOException
326    * @throws InterruptedException
327    */
328   @Test
329   public void testTableRecordReaderMapreduce() throws IOException,
330       InterruptedException {
331     HTable table = createTable("table1-mr".getBytes());
332     runTestMapreduce(table);
333   }
334 
335   /**
336    * Run test assuming Scanner IOException failure using newer mapreduce api
337    * 
338    * @throws IOException
339    * @throws InterruptedException
340    */
341   @Test
342   public void testTableRecordReaderScannerFailMapreduce() throws IOException,
343       InterruptedException {
344     HTable htable = createIOEScannerTable("table2-mr".getBytes(), 1);
345     runTestMapreduce(htable);
346   }
347 
348   /**
349    * Run test assuming Scanner IOException failure using newer mapreduce api
350    * 
351    * @throws IOException
352    * @throws InterruptedException
353    */
354   @Test(expected = IOException.class)
355   public void testTableRecordReaderScannerFailMapreduceTwice() throws IOException,
356       InterruptedException {
357     HTable htable = createIOEScannerTable("table3-mr".getBytes(), 2);
358     runTestMapreduce(htable);
359   }
360 
361   /**
362    * Run test assuming UnknownScannerException (which is a type of
363    * DoNotRetryIOException) using newer mapreduce api
364    * 
365    * @throws InterruptedException
366    * @throws DoNotRetryIOException
367    */
368   @Test
369   public void testTableRecordReaderScannerTimeoutMapreduce()
370       throws IOException, InterruptedException {
371     HTable htable = createDNRIOEScannerTable("table4-mr".getBytes(), 1);
372     runTestMapreduce(htable);
373   }
374 
375   /**
376    * Run test assuming UnknownScannerException (which is a type of
377    * DoNotRetryIOException) using newer mapreduce api
378    * 
379    * @throws InterruptedException
380    * @throws DoNotRetryIOException
381    */
382   @Test(expected = DoNotRetryIOException.class)
383   public void testTableRecordReaderScannerTimeoutMapreduceTwice()
384       throws IOException, InterruptedException {
385     HTable htable = createDNRIOEScannerTable("table5-mr".getBytes(), 2);
386     runTestMapreduce(htable);
387   }
388 
389   @org.junit.Rule
390   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
391     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
392 }
393