1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertTrue;
23  
24  import java.io.IOException;
25  import java.util.Map;
26  import java.util.NavigableMap;
27  
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.HBaseTestingUtility;
33  import org.apache.hadoop.hbase.client.HTable;
34  import org.apache.hadoop.hbase.client.Result;
35  import org.apache.hadoop.hbase.client.Scan;
36  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
37  import org.apache.hadoop.hbase.util.Bytes;
38  import org.apache.hadoop.io.NullWritable;
39  import org.apache.hadoop.mapreduce.Job;
40  import org.apache.hadoop.mapreduce.Reducer;
41  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
42  import org.junit.AfterClass;
43  import org.junit.BeforeClass;
44  
45  /**
46   * <p>
47   * Tests various scan start and stop row scenarios. This is set in a scan and
48   * tested in a MapReduce job to see if that is handed over and done properly
49   * too.
50   * </p>
51   * <p>
52   * This test is broken into two parts in order to side-step the test timeout
53   * period of 900, as documented in HBASE-8326.
54   * </p>
55   */
56  public abstract class TestTableInputFormatScanBase {
57  
58    static final Log LOG = LogFactory.getLog(TestTableInputFormatScanBase.class);
59    static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
60  
61    static final byte[] TABLE_NAME = Bytes.toBytes("scantest");
62    static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
63    static final String KEY_STARTROW = "startRow";
64    static final String KEY_LASTROW = "stpRow";
65  
66    private static HTable table = null;
67  
68    @BeforeClass
69    public static void setUpBeforeClass() throws Exception {
70      // switch TIF to log at DEBUG level
71      TEST_UTIL.enableDebug(TableInputFormat.class);
72      TEST_UTIL.enableDebug(TableInputFormatBase.class);
73      // start mini hbase cluster
74      TEST_UTIL.startMiniCluster(3);
75      // create and fill table
76      table = TEST_UTIL.createTable(TABLE_NAME, INPUT_FAMILY);
77      TEST_UTIL.createMultiRegions(table, INPUT_FAMILY);
78      TEST_UTIL.loadTable(table, INPUT_FAMILY);
79      // start MR cluster
80      TEST_UTIL.startMiniMapReduceCluster();
81    }
82  
83    @AfterClass
84    public static void tearDownAfterClass() throws Exception {
85      TEST_UTIL.shutdownMiniMapReduceCluster();
86      TEST_UTIL.shutdownMiniCluster();
87    }
88  
89    /**
90     * Pass the key and value to reduce.
91     */
92    public static class ScanMapper
93    extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
94  
95      /**
96       * Pass the key and value to reduce.
97       *
98       * @param key  The key, here "aaa", "aab" etc.
99       * @param value  The value is the same as the key.
100      * @param context  The task context.
101      * @throws IOException When reading the rows fails.
102      */
103     @Override
104     public void map(ImmutableBytesWritable key, Result value,
105       Context context)
106     throws IOException, InterruptedException {
107       if (value.size() != 1) {
108         throw new IOException("There should only be one input column");
109       }
110       Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
111         cf = value.getMap();
112       if(!cf.containsKey(INPUT_FAMILY)) {
113         throw new IOException("Wrong input columns. Missing: '" +
114           Bytes.toString(INPUT_FAMILY) + "'.");
115       }
116       String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
117       LOG.info("map: key -> " + Bytes.toStringBinary(key.get()) +
118         ", value -> " + val);
119       context.write(key, key);
120     }
121 
122   }
123 
124   /**
125    * Checks the last and first key seen against the scanner boundaries.
126    */
127   public static class ScanReducer
128   extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
129                   NullWritable, NullWritable> {
130 
131     private String first = null;
132     private String last = null;
133 
134     protected void reduce(ImmutableBytesWritable key,
135         Iterable<ImmutableBytesWritable> values, Context context)
136     throws IOException ,InterruptedException {
137       int count = 0;
138       for (ImmutableBytesWritable value : values) {
139         String val = Bytes.toStringBinary(value.get());
140         LOG.info("reduce: key[" + count + "] -> " +
141           Bytes.toStringBinary(key.get()) + ", value -> " + val);
142         if (first == null) first = val;
143         last = val;
144         count++;
145       }
146     }
147 
148     protected void cleanup(Context context)
149     throws IOException, InterruptedException {
150       Configuration c = context.getConfiguration();
151       String startRow = c.get(KEY_STARTROW);
152       String lastRow = c.get(KEY_LASTROW);
153       LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" + startRow + "\"");
154       LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow + "\"");
155       if (startRow != null && startRow.length() > 0) {
156         assertEquals(startRow, first);
157       }
158       if (lastRow != null && lastRow.length() > 0) {
159         assertEquals(lastRow, last);
160       }
161     }
162 
163   }
164 
165   /**
166    * Tests an MR Scan initialized from properties set in the Configuration.
167    * 
168    * @throws IOException
169    * @throws ClassNotFoundException
170    * @throws InterruptedException
171    */
172   protected void testScanFromConfiguration(String start, String stop, String last)
173   throws IOException, InterruptedException, ClassNotFoundException {
174     String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase() : "Empty") +
175       "To" + (stop != null ? stop.toUpperCase() : "Empty");
176     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
177     c.set(TableInputFormat.INPUT_TABLE, Bytes.toString(TABLE_NAME));
178     c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILY));
179     c.set(KEY_STARTROW, start != null ? start : "");
180     c.set(KEY_LASTROW, last != null ? last : "");
181 
182     if (start != null) {
183       c.set(TableInputFormat.SCAN_ROW_START, start);
184     }
185 
186     if (stop != null) {
187       c.set(TableInputFormat.SCAN_ROW_STOP, stop);
188     }
189 
190     Job job = new Job(c, jobName);
191     job.setMapperClass(ScanMapper.class);
192     job.setReducerClass(ScanReducer.class);
193     job.setMapOutputKeyClass(ImmutableBytesWritable.class);
194     job.setMapOutputValueClass(ImmutableBytesWritable.class);
195     job.setInputFormatClass(TableInputFormat.class);
196     job.setNumReduceTasks(1);
197     FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
198     TableMapReduceUtil.addDependencyJars(job);
199     assertTrue(job.waitForCompletion(true));
200   }
201 
202   /**
203    * Tests a MR scan using specific start and stop rows.
204    *
205    * @throws IOException
206    * @throws ClassNotFoundException
207    * @throws InterruptedException
208    */
209   protected void testScan(String start, String stop, String last)
210   throws IOException, InterruptedException, ClassNotFoundException {
211     String jobName = "Scan" + (start != null ? start.toUpperCase() : "Empty") +
212       "To" + (stop != null ? stop.toUpperCase() : "Empty");
213     LOG.info("Before map/reduce startup - job " + jobName);
214     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
215     Scan scan = new Scan();
216     scan.addFamily(INPUT_FAMILY);
217     if (start != null) {
218       scan.setStartRow(Bytes.toBytes(start));
219     }
220     c.set(KEY_STARTROW, start != null ? start : "");
221     if (stop != null) {
222       scan.setStopRow(Bytes.toBytes(stop));
223     }
224     c.set(KEY_LASTROW, last != null ? last : "");
225     LOG.info("scan before: " + scan);
226     Job job = new Job(c, jobName);
227     TableMapReduceUtil.initTableMapperJob(
228       Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
229       ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
230     job.setReducerClass(ScanReducer.class);
231     job.setNumReduceTasks(1); // one to get final "first" and "last" key
232     FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
233     LOG.info("Started " + job.getJobName());
234     assertTrue(job.waitForCompletion(true));
235     LOG.info("After map/reduce completion - job " + jobName);
236   }
237 
238 }
239