View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertTrue;
23  
24  import java.io.IOException;
25  import java.util.Map;
26  import java.util.NavigableMap;
27  
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.HBaseTestingUtility;
33  import org.apache.hadoop.hbase.TableName;
34  import org.apache.hadoop.hbase.client.HTable;
35  import org.apache.hadoop.hbase.client.Result;
36  import org.apache.hadoop.hbase.client.Scan;
37  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
38  import org.apache.hadoop.hbase.util.Bytes;
39  import org.apache.hadoop.io.NullWritable;
40  import org.apache.hadoop.mapreduce.Job;
41  import org.apache.hadoop.mapreduce.Reducer;
42  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
43  import org.junit.AfterClass;
44  import org.junit.BeforeClass;
45  
46  /**
47   * <p>
48   * Tests various scan start and stop row scenarios. This is set in a scan and
49   * tested in a MapReduce job to see if that is handed over and done properly
50   * too.
51   * </p>
52   * <p>
53   * This test is broken into two parts in order to side-step the test timeout
54   * period of 900, as documented in HBASE-8326.
55   * </p>
56   */
57  public abstract class TestTableInputFormatScanBase {
58  
59    static final Log LOG = LogFactory.getLog(TestTableInputFormatScanBase.class);
60    static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
61  
62    static final byte[] TABLE_NAME = Bytes.toBytes("scantest");
63    static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
64    static final String KEY_STARTROW = "startRow";
65    static final String KEY_LASTROW = "stpRow";
66  
67    private static HTable table = null;
68  
69    @BeforeClass
70    public static void setUpBeforeClass() throws Exception {
71      // test intermittently fails under hadoop2 (2.0.2-alpha) if shortcircuit-read (scr) is on.
72      // this turns it off for this test.  TODO: Figure out why scr breaks recovery. 
73      System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
74  
75      // switch TIF to log at DEBUG level
76      TEST_UTIL.enableDebug(TableInputFormat.class);
77      TEST_UTIL.enableDebug(TableInputFormatBase.class);
78      // start mini hbase cluster
79      TEST_UTIL.startMiniCluster(3);
80      // create and fill table
81      table = TEST_UTIL.createTable(TableName.valueOf(TABLE_NAME), INPUT_FAMILY);
82      TEST_UTIL.createMultiRegions(table, INPUT_FAMILY);
83      TEST_UTIL.loadTable(table, INPUT_FAMILY, false);
84      // start MR cluster
85      TEST_UTIL.startMiniMapReduceCluster();
86    }
87  
88    @AfterClass
89    public static void tearDownAfterClass() throws Exception {
90      TEST_UTIL.shutdownMiniMapReduceCluster();
91      TEST_UTIL.shutdownMiniCluster();
92    }
93  
94    /**
95     * Pass the key and value to reduce.
96     */
97    public static class ScanMapper
98    extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
99  
100     /**
101      * Pass the key and value to reduce.
102      *
103      * @param key  The key, here "aaa", "aab" etc.
104      * @param value  The value is the same as the key.
105      * @param context  The task context.
106      * @throws IOException When reading the rows fails.
107      */
108     @Override
109     public void map(ImmutableBytesWritable key, Result value,
110       Context context)
111     throws IOException, InterruptedException {
112       if (value.size() != 1) {
113         throw new IOException("There should only be one input column");
114       }
115       Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
116         cf = value.getMap();
117       if(!cf.containsKey(INPUT_FAMILY)) {
118         throw new IOException("Wrong input columns. Missing: '" +
119           Bytes.toString(INPUT_FAMILY) + "'.");
120       }
121       String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
122       LOG.info("map: key -> " + Bytes.toStringBinary(key.get()) +
123         ", value -> " + val);
124       context.write(key, key);
125     }
126 
127   }
128 
129   /**
130    * Checks the last and first key seen against the scanner boundaries.
131    */
132   public static class ScanReducer
133   extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
134                   NullWritable, NullWritable> {
135 
136     private String first = null;
137     private String last = null;
138 
139     protected void reduce(ImmutableBytesWritable key,
140         Iterable<ImmutableBytesWritable> values, Context context)
141     throws IOException ,InterruptedException {
142       int count = 0;
143       for (ImmutableBytesWritable value : values) {
144         String val = Bytes.toStringBinary(value.get());
145         LOG.info("reduce: key[" + count + "] -> " +
146           Bytes.toStringBinary(key.get()) + ", value -> " + val);
147         if (first == null) first = val;
148         last = val;
149         count++;
150       }
151     }
152 
153     protected void cleanup(Context context)
154     throws IOException, InterruptedException {
155       Configuration c = context.getConfiguration();
156       String startRow = c.get(KEY_STARTROW);
157       String lastRow = c.get(KEY_LASTROW);
158       LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" + startRow + "\"");
159       LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow + "\"");
160       if (startRow != null && startRow.length() > 0) {
161         assertEquals(startRow, first);
162       }
163       if (lastRow != null && lastRow.length() > 0) {
164         assertEquals(lastRow, last);
165       }
166     }
167 
168   }
169 
170   /**
171    * Tests an MR Scan initialized from properties set in the Configuration.
172    * 
173    * @throws IOException
174    * @throws ClassNotFoundException
175    * @throws InterruptedException
176    */
177   protected void testScanFromConfiguration(String start, String stop, String last)
178   throws IOException, InterruptedException, ClassNotFoundException {
179     String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase() : "Empty") +
180       "To" + (stop != null ? stop.toUpperCase() : "Empty");
181     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
182     c.set(TableInputFormat.INPUT_TABLE, Bytes.toString(TABLE_NAME));
183     c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILY));
184     c.set(KEY_STARTROW, start != null ? start : "");
185     c.set(KEY_LASTROW, last != null ? last : "");
186 
187     if (start != null) {
188       c.set(TableInputFormat.SCAN_ROW_START, start);
189     }
190 
191     if (stop != null) {
192       c.set(TableInputFormat.SCAN_ROW_STOP, stop);
193     }
194 
195     Job job = new Job(c, jobName);
196     job.setMapperClass(ScanMapper.class);
197     job.setReducerClass(ScanReducer.class);
198     job.setMapOutputKeyClass(ImmutableBytesWritable.class);
199     job.setMapOutputValueClass(ImmutableBytesWritable.class);
200     job.setInputFormatClass(TableInputFormat.class);
201     job.setNumReduceTasks(1);
202     FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
203     TableMapReduceUtil.addDependencyJars(job);
204     assertTrue(job.waitForCompletion(true));
205   }
206 
207   /**
208    * Tests a MR scan using specific start and stop rows.
209    *
210    * @throws IOException
211    * @throws ClassNotFoundException
212    * @throws InterruptedException
213    */
214   protected void testScan(String start, String stop, String last)
215   throws IOException, InterruptedException, ClassNotFoundException {
216     String jobName = "Scan" + (start != null ? start.toUpperCase() : "Empty") +
217       "To" + (stop != null ? stop.toUpperCase() : "Empty");
218     LOG.info("Before map/reduce startup - job " + jobName);
219     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
220     Scan scan = new Scan();
221     scan.addFamily(INPUT_FAMILY);
222     if (start != null) {
223       scan.setStartRow(Bytes.toBytes(start));
224     }
225     c.set(KEY_STARTROW, start != null ? start : "");
226     if (stop != null) {
227       scan.setStopRow(Bytes.toBytes(stop));
228     }
229     c.set(KEY_LASTROW, last != null ? last : "");
230     LOG.info("scan before: " + scan);
231     Job job = new Job(c, jobName);
232     TableMapReduceUtil.initTableMapperJob(
233       Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
234       ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
235     job.setReducerClass(ScanReducer.class);
236     job.setNumReduceTasks(1); // one to get final "first" and "last" key
237     FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
238     LOG.info("Started " + job.getJobName());
239     assertTrue(job.waitForCompletion(true));
240     LOG.info("After map/reduce completion - job " + jobName);
241   }
242 
243 }
244