View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import com.google.common.collect.Lists;
22  import org.apache.commons.logging.Log;
23  import org.apache.commons.logging.LogFactory;
24  import org.apache.hadoop.conf.Configuration;
25  import org.apache.hadoop.fs.FileUtil;
26  import org.apache.hadoop.fs.Path;
27  import org.apache.hadoop.hbase.HBaseTestingUtility;
28  import org.apache.hadoop.hbase.TableName;
29  import org.apache.hadoop.hbase.client.HTable;
30  import org.apache.hadoop.hbase.client.Result;
31  import org.apache.hadoop.hbase.client.Scan;
32  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
33  import org.apache.hadoop.hbase.util.Bytes;
34  import org.apache.hadoop.io.NullWritable;
35  import org.apache.hadoop.mapreduce.Job;
36  import org.apache.hadoop.mapreduce.Reducer;
37  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
38  import org.junit.After;
39  import org.junit.AfterClass;
40  import org.junit.BeforeClass;
41  import org.junit.Test;
42  
43  import java.io.File;
44  import java.io.IOException;
45  import java.util.ArrayList;
46  import java.util.List;
47  import java.util.Map;
48  import java.util.NavigableMap;
49  
50  import static org.junit.Assert.assertEquals;
51  import static org.junit.Assert.assertTrue;
52  
53  /**
54   * Base set of tests and setup for input formats touching multiple tables.
55   */
56  public abstract class MultiTableInputFormatTestBase {
57    static final Log LOG = LogFactory.getLog(TestMultiTableInputFormat.class);
58    public static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
59    static final String TABLE_NAME = "scantest";
60    static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
61    static final String KEY_STARTROW = "startRow";
62    static final String KEY_LASTROW = "stpRow";
63  
64    static List<String> TABLES = Lists.newArrayList();
65  
66    static {
67      for (int i = 0; i < 3; i++) {
68        TABLES.add(TABLE_NAME + String.valueOf(i));
69      }
70    }
71  
72    @BeforeClass
73    public static void setUpBeforeClass() throws Exception {
74      // switch TIF to log at DEBUG level
75      TEST_UTIL.enableDebug(MultiTableInputFormatBase.class);
76      // start mini hbase cluster
77      TEST_UTIL.startMiniCluster(3);
78      // create and fill table
79      for (String tableName : TABLES) {
80        HTable table =
81            TEST_UTIL.createMultiRegionTable(TableName.valueOf(tableName),
82              INPUT_FAMILY, 4);
83        try {
84          TEST_UTIL.loadTable(table, INPUT_FAMILY, false);
85        } finally {
86          table.close();
87        }
88      }
89      // start MR cluster
90      TEST_UTIL.startMiniMapReduceCluster();
91    }
92  
93    @AfterClass
94    public static void tearDownAfterClass() throws Exception {
95      TEST_UTIL.shutdownMiniMapReduceCluster();
96      TEST_UTIL.shutdownMiniCluster();
97    }
98  
99    @After
100   public void tearDown() throws Exception {
101     Configuration c = TEST_UTIL.getConfiguration();
102     FileUtil.fullyDelete(new File(c.get("hadoop.tmp.dir")));
103   }
104 
105   /**
106    * Pass the key and value to reducer.
107    */
108   public static class ScanMapper extends
109       TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
110     /**
111      * Pass the key and value to reduce.
112      *
113      * @param key The key, here "aaa", "aab" etc.
114      * @param value The value is the same as the key.
115      * @param context The task context.
116      * @throws IOException When reading the rows fails.
117      */
118     @Override
119     public void map(ImmutableBytesWritable key, Result value, Context context)
120         throws IOException, InterruptedException {
121       makeAssertions(key, value);
122       context.write(key, key);
123     }
124 
125     public void makeAssertions(ImmutableBytesWritable key, Result value) throws IOException {
126       if (value.size() != 1) {
127         throw new IOException("There should only be one input column");
128       }
129       Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> cf =
130           value.getMap();
131       if (!cf.containsKey(INPUT_FAMILY)) {
132         throw new IOException("Wrong input columns. Missing: '" +
133             Bytes.toString(INPUT_FAMILY) + "'.");
134       }
135       String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
136       LOG.debug("map: key -> " + Bytes.toStringBinary(key.get()) +
137           ", value -> " + val);
138     }
139   }
140 
141   /**
142    * Checks the last and first keys seen against the scanner boundaries.
143    */
144   public static class ScanReducer
145       extends
146       Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
147           NullWritable, NullWritable> {
148     private String first = null;
149     private String last = null;
150 
151     @Override
152     protected void reduce(ImmutableBytesWritable key,
153         Iterable<ImmutableBytesWritable> values, Context context)
154         throws IOException, InterruptedException {
155       makeAssertions(key, values);
156     }
157 
158     protected void makeAssertions(ImmutableBytesWritable key,
159         Iterable<ImmutableBytesWritable> values) {
160       int count = 0;
161       for (ImmutableBytesWritable value : values) {
162         String val = Bytes.toStringBinary(value.get());
163         LOG.debug("reduce: key[" + count + "] -> " +
164             Bytes.toStringBinary(key.get()) + ", value -> " + val);
165         if (first == null) first = val;
166         last = val;
167         count++;
168       }
169       assertEquals(3, count);
170     }
171 
172     @Override
173     protected void cleanup(Context context) throws IOException,
174         InterruptedException {
175       Configuration c = context.getConfiguration();
176       cleanup(c);
177     }
178 
179     protected void cleanup(Configuration c) {
180       String startRow = c.get(KEY_STARTROW);
181       String lastRow = c.get(KEY_LASTROW);
182       LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" +
183           startRow + "\"");
184       LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow +
185           "\"");
186       if (startRow != null && startRow.length() > 0) {
187         assertEquals(startRow, first);
188       }
189       if (lastRow != null && lastRow.length() > 0) {
190         assertEquals(lastRow, last);
191       }
192     }
193   }
194 
195   @Test
196   public void testScanEmptyToEmpty() throws IOException, InterruptedException,
197       ClassNotFoundException {
198     testScan(null, null, null);
199   }
200 
201   @Test
202   public void testScanEmptyToAPP() throws IOException, InterruptedException,
203       ClassNotFoundException {
204     testScan(null, "app", "apo");
205   }
206 
207   @Test
208   public void testScanOBBToOPP() throws IOException, InterruptedException,
209       ClassNotFoundException {
210     testScan("obb", "opp", "opo");
211   }
212 
213   @Test
214   public void testScanYZYToEmpty() throws IOException, InterruptedException,
215       ClassNotFoundException {
216     testScan("yzy", null, "zzz");
217   }
218 
219   /**
220    * Tests a MR scan using specific start and stop rows.
221    *
222    * @throws IOException
223    * @throws ClassNotFoundException
224    * @throws InterruptedException
225    */
226   private void testScan(String start, String stop, String last)
227       throws IOException, InterruptedException, ClassNotFoundException {
228     String jobName =
229         "Scan" + (start != null ? start.toUpperCase() : "Empty") + "To" +
230             (stop != null ? stop.toUpperCase() : "Empty");
231     LOG.info("Before map/reduce startup - job " + jobName);
232     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
233 
234     c.set(KEY_STARTROW, start != null ? start : "");
235     c.set(KEY_LASTROW, last != null ? last : "");
236 
237     List<Scan> scans = new ArrayList<Scan>();
238 
239     for (String tableName : TABLES) {
240       Scan scan = new Scan();
241 
242       scan.addFamily(INPUT_FAMILY);
243       scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName));
244 
245       if (start != null) {
246         scan.setStartRow(Bytes.toBytes(start));
247       }
248       if (stop != null) {
249         scan.setStopRow(Bytes.toBytes(stop));
250       }
251 
252       scans.add(scan);
253 
254       LOG.info("scan before: " + scan);
255     }
256 
257     runJob(jobName, c, scans);
258   }
259 
260   protected void runJob(String jobName, Configuration c, List<Scan> scans)
261       throws IOException, InterruptedException, ClassNotFoundException {
262     Job job = new Job(c, jobName);
263 
264     initJob(scans, job);
265     job.setReducerClass(ScanReducer.class);
266     job.setNumReduceTasks(1); // one to get final "first" and "last" key
267     FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
268     LOG.info("Started " + job.getJobName());
269     job.waitForCompletion(true);
270     assertTrue(job.isSuccessful());
271     LOG.info("After map/reduce completion - job " + jobName);
272   }
273 
274   protected abstract void initJob(List<Scan> scans, Job job) throws IOException;
275 
276 
277 }