View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.File;
22  import java.io.IOException;
23  import java.util.Iterator;
24  import java.util.Map;
25  import java.util.NavigableMap;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileUtil;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.HBaseTestingUtility;
33  import org.apache.hadoop.hbase.HConstants;
34  import org.apache.hadoop.hbase.KeyValue;
35  import org.apache.hadoop.hbase.LargeTests;
36  import org.apache.hadoop.hbase.client.HTable;
37  import org.apache.hadoop.hbase.client.Put;
38  import org.apache.hadoop.hbase.client.Result;
39  import org.apache.hadoop.hbase.client.ResultScanner;
40  import org.apache.hadoop.hbase.client.Scan;
41  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
42  import org.apache.hadoop.hbase.util.Bytes;
43  import org.apache.hadoop.mapreduce.Job;
44  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
45  import org.junit.AfterClass;
46  import org.junit.BeforeClass;
47  import org.junit.Test;
48  import org.junit.experimental.categories.Category;
49  
50  import static org.junit.Assert.fail;
51  import static org.junit.Assert.assertTrue;
52  import static org.junit.Assert.assertFalse;
53  
54  /**
55   * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
56   * on our tables is simple - take every row in the table, reverse the value of
57   * a particular cell, and write it back to the table.
58   */
59  @Category(LargeTests.class)
60  public class TestTableMapReduce {
61    private static final Log LOG = LogFactory.getLog(TestTableMapReduce.class);
62    private static final HBaseTestingUtility UTIL =
63      new HBaseTestingUtility();
64    static final byte[] MULTI_REGION_TABLE_NAME = Bytes.toBytes("mrtest");
65    static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
66    static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
67  
68    @BeforeClass
69    public static void beforeClass() throws Exception {
70      UTIL.startMiniCluster();
71      HTable table = UTIL.createTable(MULTI_REGION_TABLE_NAME, new byte[][] {INPUT_FAMILY, OUTPUT_FAMILY});
72      UTIL.createMultiRegions(table, INPUT_FAMILY);
73      UTIL.loadTable(table, INPUT_FAMILY);
74      UTIL.startMiniMapReduceCluster();
75    }
76  
77    @AfterClass
78    public static void afterClass() throws Exception {
79      UTIL.shutdownMiniMapReduceCluster();
80      UTIL.shutdownMiniCluster();
81    }
82  
83    /**
84     * Pass the given key and processed record reduce
85     */
86    public static class ProcessContentsMapper
87    extends TableMapper<ImmutableBytesWritable, Put> {
88  
89      /**
90       * Pass the key, and reversed value to reduce
91       *
92       * @param key
93       * @param value
94       * @param context
95       * @throws IOException
96       */
97      public void map(ImmutableBytesWritable key, Result value,
98        Context context)
99      throws IOException, InterruptedException {
100       if (value.size() != 1) {
101         throw new IOException("There should only be one input column");
102       }
103       Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
104         cf = value.getMap();
105       if(!cf.containsKey(INPUT_FAMILY)) {
106         throw new IOException("Wrong input columns. Missing: '" +
107           Bytes.toString(INPUT_FAMILY) + "'.");
108       }
109 
110       // Get the original value and reverse it
111       String originalValue = Bytes.toString(value.getValue(INPUT_FAMILY, null));
112       StringBuilder newValue = new StringBuilder(originalValue);
113       newValue.reverse();
114       // Now set the value to be collected
115       Put outval = new Put(key.get());
116       outval.add(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
117       context.write(key, outval);
118     }
119   }
120 
121   /**
122    * Test a map/reduce against a multi-region table
123    * @throws IOException
124    * @throws ClassNotFoundException
125    * @throws InterruptedException
126    */
127   @Test
128   public void testMultiRegionTable()
129   throws IOException, InterruptedException, ClassNotFoundException {
130     runTestOnTable(new HTable(new Configuration(UTIL.getConfiguration()),
131       MULTI_REGION_TABLE_NAME));
132   }
133 
134   @Test
135   public void testCombiner()
136       throws IOException, InterruptedException, ClassNotFoundException {
137     Configuration conf = new Configuration(UTIL.getConfiguration());
138     // force use of combiner for testing purposes
139     conf.setInt("min.num.spills.for.combine", 1);
140     runTestOnTable(new HTable(conf, MULTI_REGION_TABLE_NAME));
141   }
142 
143   private void runTestOnTable(HTable table)
144   throws IOException, InterruptedException, ClassNotFoundException {
145     Job job = null;
146     try {
147       LOG.info("Before map/reduce startup");
148       job = new Job(table.getConfiguration(), "process column contents");
149       job.setNumReduceTasks(1);
150       Scan scan = new Scan();
151       scan.addFamily(INPUT_FAMILY);
152       TableMapReduceUtil.initTableMapperJob(
153         Bytes.toString(table.getTableName()), scan,
154         ProcessContentsMapper.class, ImmutableBytesWritable.class,
155         Put.class, job);
156       TableMapReduceUtil.initTableReducerJob(
157         Bytes.toString(table.getTableName()),
158         IdentityTableReducer.class, job);
159       FileOutputFormat.setOutputPath(job, new Path("test"));
160       LOG.info("Started " + Bytes.toString(table.getTableName()));
161       assertTrue(job.waitForCompletion(true));
162       LOG.info("After map/reduce completion");
163 
164       // verify map-reduce results
165       verify(Bytes.toString(table.getTableName()));
166     } finally {
167       table.close();
168       if (job != null) {
169         FileUtil.fullyDelete(
170           new File(job.getConfiguration().get("hadoop.tmp.dir")));
171       }
172     }
173   }
174 
175   private void verify(String tableName) throws IOException {
176     HTable table = new HTable(new Configuration(UTIL.getConfiguration()), tableName);
177     boolean verified = false;
178     long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
179     int numRetries = UTIL.getConfiguration().getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
180     for (int i = 0; i < numRetries; i++) {
181       try {
182         LOG.info("Verification attempt #" + i);
183         verifyAttempt(table);
184         verified = true;
185         break;
186       } catch (NullPointerException e) {
187         // If here, a cell was empty.  Presume its because updates came in
188         // after the scanner had been opened.  Wait a while and retry.
189         LOG.debug("Verification attempt failed: " + e.getMessage());
190       }
191       try {
192         Thread.sleep(pause);
193       } catch (InterruptedException e) {
194         // continue
195       }
196     }
197     assertTrue(verified);
198     table.close();
199   }
200 
201   /**
202    * Looks at every value of the mapreduce output and verifies that indeed
203    * the values have been reversed.
204    *
205    * @param table Table to scan.
206    * @throws IOException
207    * @throws NullPointerException if we failed to find a cell value
208    */
209   private void verifyAttempt(final HTable table) throws IOException, NullPointerException {
210     Scan scan = new Scan();
211     scan.addFamily(INPUT_FAMILY);
212     scan.addFamily(OUTPUT_FAMILY);
213     ResultScanner scanner = table.getScanner(scan);
214     try {
215       Iterator<Result> itr = scanner.iterator();
216       assertTrue(itr.hasNext());
217       while(itr.hasNext()) {
218         Result r = itr.next();
219         if (LOG.isDebugEnabled()) {
220           if (r.size() > 2 ) {
221             throw new IOException("Too many results, expected 2 got " +
222               r.size());
223           }
224         }
225         byte[] firstValue = null;
226         byte[] secondValue = null;
227         int count = 0;
228         for(KeyValue kv : r.list()) {
229           if (count == 0) {
230             firstValue = kv.getValue();
231           }
232           if (count == 1) {
233             secondValue = kv.getValue();
234           }
235           count++;
236           if (count == 2) {
237             break;
238           }
239         }
240 
241         String first = "";
242         if (firstValue == null) {
243           throw new NullPointerException(Bytes.toString(r.getRow()) +
244             ": first value is null");
245         }
246         first = Bytes.toString(firstValue);
247 
248         String second = "";
249         if (secondValue == null) {
250           throw new NullPointerException(Bytes.toString(r.getRow()) +
251             ": second value is null");
252         }
253         byte[] secondReversed = new byte[secondValue.length];
254         for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
255           secondReversed[i] = secondValue[j];
256         }
257         second = Bytes.toString(secondReversed);
258 
259         if (first.compareTo(second) != 0) {
260           if (LOG.isDebugEnabled()) {
261             LOG.debug("second key is not the reverse of first. row=" +
262                 Bytes.toStringBinary(r.getRow()) + ", first value=" + first +
263                 ", second value=" + second);
264           }
265           fail();
266         }
267       }
268     } finally {
269       scanner.close();
270     }
271   }
272 
273   /**
274    * Test that we add tmpjars correctly including the ZK jar.
275    */
276   public void testAddDependencyJars() throws Exception {
277     Job job = new Job();
278     TableMapReduceUtil.addDependencyJars(job);
279     String tmpjars = job.getConfiguration().get("tmpjars");
280 
281     System.err.println("tmpjars: " + tmpjars);
282     assertTrue(tmpjars.contains("zookeeper"));
283     assertFalse(tmpjars.contains("guava"));
284 
285     System.err.println("appending guava jar");
286     TableMapReduceUtil.addDependencyJars(job.getConfiguration(), 
287         com.google.common.base.Function.class);
288     tmpjars = job.getConfiguration().get("tmpjars");
289     assertTrue(tmpjars.contains("guava"));
290   }
291 
292 }
293