View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertTrue;
23  
24  import java.io.IOException;
25  import java.util.Arrays;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.NavigableMap;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.HBaseTestingUtility;
35  import org.apache.hadoop.hbase.HRegionLocation;
36  import org.apache.hadoop.hbase.TableName;
37  import org.apache.hadoop.hbase.client.HTable;
38  import org.apache.hadoop.hbase.client.Result;
39  import org.apache.hadoop.hbase.client.Scan;
40  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
41  import org.apache.hadoop.hbase.util.Bytes;
42  import org.apache.hadoop.io.NullWritable;
43  import org.apache.hadoop.mapreduce.InputSplit;
44  import org.apache.hadoop.mapreduce.Job;
45  import org.apache.hadoop.mapreduce.Reducer;
46  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
47  import org.junit.AfterClass;
48  import org.junit.Assert;
49  import org.junit.BeforeClass;
50  
51  
52  /**
53   * <p>
54   * Tests various scan start and stop row scenarios. This is set in a scan and
55   * tested in a MapReduce job to see if that is handed over and done properly
56   * too.
57   * </p>
58   * <p>
59   * This test is broken into two parts in order to side-step the test timeout
60   * period of 900, as documented in HBASE-8326.
61   * </p>
62   */
63  public abstract class TestTableInputFormatScanBase {
64  
65    static final Log LOG = LogFactory.getLog(TestTableInputFormatScanBase.class);
66    static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
67  
68    static final byte[] TABLE_NAME = Bytes.toBytes("scantest");
69    static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
70    static final String KEY_STARTROW = "startRow";
71    static final String KEY_LASTROW = "stpRow";
72  
73    private static HTable table = null;
74  
75    @BeforeClass
76    public static void setUpBeforeClass() throws Exception {
77      // test intermittently fails under hadoop2 (2.0.2-alpha) if shortcircuit-read (scr) is on.
78      // this turns it off for this test.  TODO: Figure out why scr breaks recovery. 
79      System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
80  
81      // switch TIF to log at DEBUG level
82      TEST_UTIL.enableDebug(TableInputFormat.class);
83      TEST_UTIL.enableDebug(TableInputFormatBase.class);
84      // start mini hbase cluster
85      TEST_UTIL.startMiniCluster(3);
86      // create and fill table
87      table = TEST_UTIL.createTable(TableName.valueOf(TABLE_NAME), INPUT_FAMILY);
88      TEST_UTIL.createMultiRegions(table, INPUT_FAMILY);
89      TEST_UTIL.loadTable(table, INPUT_FAMILY, false);
90      // start MR cluster
91      TEST_UTIL.startMiniMapReduceCluster();
92    }
93  
94    @AfterClass
95    public static void tearDownAfterClass() throws Exception {
96      TEST_UTIL.shutdownMiniMapReduceCluster();
97      TEST_UTIL.shutdownMiniCluster();
98    }
99  
100   /**
101    * Pass the key and value to reduce.
102    */
103   public static class ScanMapper
104   extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
105 
106     /**
107      * Pass the key and value to reduce.
108      *
109      * @param key  The key, here "aaa", "aab" etc.
110      * @param value  The value is the same as the key.
111      * @param context  The task context.
112      * @throws IOException When reading the rows fails.
113      */
114     @Override
115     public void map(ImmutableBytesWritable key, Result value,
116       Context context)
117     throws IOException, InterruptedException {
118       if (value.size() != 1) {
119         throw new IOException("There should only be one input column");
120       }
121       Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
122         cf = value.getMap();
123       if(!cf.containsKey(INPUT_FAMILY)) {
124         throw new IOException("Wrong input columns. Missing: '" +
125           Bytes.toString(INPUT_FAMILY) + "'.");
126       }
127       String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
128       LOG.info("map: key -> " + Bytes.toStringBinary(key.get()) +
129         ", value -> " + val);
130       context.write(key, key);
131     }
132 
133   }
134 
135   /**
136    * Checks the last and first key seen against the scanner boundaries.
137    */
138   public static class ScanReducer
139   extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
140                   NullWritable, NullWritable> {
141 
142     private String first = null;
143     private String last = null;
144 
145     protected void reduce(ImmutableBytesWritable key,
146         Iterable<ImmutableBytesWritable> values, Context context)
147     throws IOException ,InterruptedException {
148       int count = 0;
149       for (ImmutableBytesWritable value : values) {
150         String val = Bytes.toStringBinary(value.get());
151         LOG.info("reduce: key[" + count + "] -> " +
152           Bytes.toStringBinary(key.get()) + ", value -> " + val);
153         if (first == null) first = val;
154         last = val;
155         count++;
156       }
157     }
158 
159     protected void cleanup(Context context)
160     throws IOException, InterruptedException {
161       Configuration c = context.getConfiguration();
162       String startRow = c.get(KEY_STARTROW);
163       String lastRow = c.get(KEY_LASTROW);
164       LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" + startRow + "\"");
165       LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow + "\"");
166       if (startRow != null && startRow.length() > 0) {
167         assertEquals(startRow, first);
168       }
169       if (lastRow != null && lastRow.length() > 0) {
170         assertEquals(lastRow, last);
171       }
172     }
173 
174   }
175 
176   /**
177    * Tests an MR Scan initialized from properties set in the Configuration.
178    * 
179    * @throws IOException
180    * @throws ClassNotFoundException
181    * @throws InterruptedException
182    */
183   protected void testScanFromConfiguration(String start, String stop, String last)
184   throws IOException, InterruptedException, ClassNotFoundException {
185     String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase() : "Empty") +
186       "To" + (stop != null ? stop.toUpperCase() : "Empty");
187     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
188     c.set(TableInputFormat.INPUT_TABLE, Bytes.toString(TABLE_NAME));
189     c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILY));
190     c.set(KEY_STARTROW, start != null ? start : "");
191     c.set(KEY_LASTROW, last != null ? last : "");
192 
193     if (start != null) {
194       c.set(TableInputFormat.SCAN_ROW_START, start);
195     }
196 
197     if (stop != null) {
198       c.set(TableInputFormat.SCAN_ROW_STOP, stop);
199     }
200 
201     Job job = new Job(c, jobName);
202     job.setMapperClass(ScanMapper.class);
203     job.setReducerClass(ScanReducer.class);
204     job.setMapOutputKeyClass(ImmutableBytesWritable.class);
205     job.setMapOutputValueClass(ImmutableBytesWritable.class);
206     job.setInputFormatClass(TableInputFormat.class);
207     job.setNumReduceTasks(1);
208     FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
209     TableMapReduceUtil.addDependencyJars(job);
210     assertTrue(job.waitForCompletion(true));
211   }
212 
213   /**
214    * Tests a MR scan using specific start and stop rows.
215    *
216    * @throws IOException
217    * @throws ClassNotFoundException
218    * @throws InterruptedException
219    */
220   protected void testScan(String start, String stop, String last)
221   throws IOException, InterruptedException, ClassNotFoundException {
222     String jobName = "Scan" + (start != null ? start.toUpperCase() : "Empty") +
223       "To" + (stop != null ? stop.toUpperCase() : "Empty");
224     LOG.info("Before map/reduce startup - job " + jobName);
225     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
226     Scan scan = new Scan();
227     scan.addFamily(INPUT_FAMILY);
228     if (start != null) {
229       scan.setStartRow(Bytes.toBytes(start));
230     }
231     c.set(KEY_STARTROW, start != null ? start : "");
232     if (stop != null) {
233       scan.setStopRow(Bytes.toBytes(stop));
234     }
235     c.set(KEY_LASTROW, last != null ? last : "");
236     LOG.info("scan before: " + scan);
237     Job job = new Job(c, jobName);
238     TableMapReduceUtil.initTableMapperJob(
239       Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
240       ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
241     job.setReducerClass(ScanReducer.class);
242     job.setNumReduceTasks(1); // one to get final "first" and "last" key
243     FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
244     LOG.info("Started " + job.getJobName());
245     assertTrue(job.waitForCompletion(true));
246     LOG.info("After map/reduce completion - job " + jobName);
247   }
248 
249 
250   /**
251    * Tests a MR scan using data skew auto-balance
252    *
253    * @throws IOException
254    * @throws ClassNotFoundException
255    * @throws InterruptedException
256    */
257   public void testNumOfSplits(String ratio, int expectedNumOfSplits) throws IOException,
258           InterruptedException,
259           ClassNotFoundException {
260     String jobName = "TestJobForNumOfSplits";
261     LOG.info("Before map/reduce startup - job " + jobName);
262     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
263     Scan scan = new Scan();
264     scan.addFamily(INPUT_FAMILY);
265     c.set("hbase.mapreduce.input.autobalance", "true");
266     c.set("hbase.mapreduce.input.autobalance.maxskewratio", ratio);
267     c.set(KEY_STARTROW, "");
268     c.set(KEY_LASTROW, "");
269     Job job = new Job(c, jobName);
270     TableMapReduceUtil.initTableMapperJob(Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
271             ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
272     TableInputFormat tif = new TableInputFormat();
273     tif.setConf(job.getConfiguration());
274     Assert.assertEquals(new String(TABLE_NAME), new String(table.getTableName()));
275     List<InputSplit> splits = tif.getSplits(job);
276     Assert.assertEquals(expectedNumOfSplits, splits.size());
277   }
278 
279   /**
280    * Tests for the getSplitKey() method in TableInputFormatBase.java
281    */
282   public void testGetSplitKey(byte[] startKey, byte[] endKey, byte[] splitKey, boolean isText) {
283     byte[] result = TableInputFormatBase.getSplitKey(startKey, endKey, isText);
284       Assert.assertArrayEquals(splitKey, result);
285   }
286 }
287