View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapred;
20  
21  import java.io.File;
22  import java.io.IOException;
23  import java.util.Iterator;
24  import java.util.Map;
25  import java.util.NavigableMap;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.fs.FileUtil;
30  import org.apache.hadoop.hbase.*;
31  import org.apache.hadoop.hbase.client.HTable;
32  import org.apache.hadoop.hbase.client.Put;
33  import org.apache.hadoop.hbase.client.Result;
34  import org.apache.hadoop.hbase.client.ResultScanner;
35  import org.apache.hadoop.hbase.client.Scan;
36  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
37  import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
38  import org.apache.hadoop.hbase.util.Bytes;
39  import org.apache.hadoop.mapred.JobClient;
40  import org.apache.hadoop.mapred.JobConf;
41  import org.apache.hadoop.mapred.MapReduceBase;
42  import org.apache.hadoop.mapred.OutputCollector;
43  import org.apache.hadoop.mapred.Reporter;
44  import org.apache.hadoop.mapred.RunningJob;
45  import org.junit.AfterClass;
46  import org.junit.BeforeClass;
47  import org.junit.Test;
48  import org.junit.experimental.categories.Category;
49  
50  import static org.junit.Assert.fail;
51  import static org.junit.Assert.assertTrue;
52  
53  /**
54   * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
55   * on our tables is simple - take every row in the table, reverse the value of
56   * a particular cell, and write it back to the table.
57   */
58  @Category(LargeTests.class)
59  public class TestTableMapReduce {
60    private static final Log LOG =
61      LogFactory.getLog(TestTableMapReduce.class.getName());
62    private static final HBaseTestingUtility UTIL =
63      new HBaseTestingUtility();
64    static final byte[] MULTI_REGION_TABLE_NAME = Bytes.toBytes("mrtest");
65    static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
66    static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
67  
68    private static final byte [][] columns = new byte [][] {
69      INPUT_FAMILY,
70      OUTPUT_FAMILY
71    };
72  
73    @BeforeClass
74    public static void beforeClass() throws Exception {
75      UTIL.startMiniCluster();
76      HTable table = UTIL.createTable(MULTI_REGION_TABLE_NAME, new byte[][] {INPUT_FAMILY, OUTPUT_FAMILY});
77      UTIL.createMultiRegions(table, INPUT_FAMILY);
78      UTIL.loadTable(table, INPUT_FAMILY);
79      UTIL.startMiniMapReduceCluster();
80    }
81  
82    @AfterClass
83    public static void afterClass() throws Exception {
84      UTIL.shutdownMiniMapReduceCluster();
85      UTIL.shutdownMiniCluster();
86    }
87  
88    /**
89     * Pass the given key and processed record reduce
90     */
91    public static class ProcessContentsMapper
92    extends MapReduceBase
93    implements TableMap<ImmutableBytesWritable, Put> {
94      /**
95       * Pass the key, and reversed value to reduce
96       * @param key
97       * @param value
98       * @param output
99       * @param reporter
100      * @throws IOException
101      */
102     public void map(ImmutableBytesWritable key, Result value,
103       OutputCollector<ImmutableBytesWritable, Put> output,
104       Reporter reporter)
105     throws IOException {
106       if (value.size() != 1) {
107         throw new IOException("There should only be one input column");
108       }
109       Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
110         cf = value.getMap();
111       if(!cf.containsKey(INPUT_FAMILY)) {
112         throw new IOException("Wrong input columns. Missing: '" +
113           Bytes.toString(INPUT_FAMILY) + "'.");
114       }
115 
116       // Get the original value and reverse it
117 
118       String originalValue = Bytes.toString(value.getValue(INPUT_FAMILY, null));
119       StringBuilder newValue = new StringBuilder(originalValue);
120       newValue.reverse();
121 
122       // Now set the value to be collected
123 
124       Put outval = new Put(key.get());
125       outval.add(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
126       output.collect(key, outval);
127     }
128   }
129 
130   /**
131    * Test a map/reduce against a multi-region table
132    * @throws IOException
133    */
134   @Test
135   public void testMultiRegionTable() throws IOException {
136     runTestOnTable(new HTable(UTIL.getConfiguration(), MULTI_REGION_TABLE_NAME));
137   }
138 
139   private void runTestOnTable(HTable table) throws IOException {
140     JobConf jobConf = null;
141     try {
142       LOG.info("Before map/reduce startup");
143       jobConf = new JobConf(UTIL.getConfiguration(), TestTableMapReduce.class);
144       jobConf.setJobName("process column contents");
145       jobConf.setNumReduceTasks(1);
146       TableMapReduceUtil.initTableMapJob(Bytes.toString(table.getTableName()),
147         Bytes.toString(INPUT_FAMILY), ProcessContentsMapper.class,
148         ImmutableBytesWritable.class, Put.class, jobConf);
149       TableMapReduceUtil.initTableReduceJob(Bytes.toString(table.getTableName()),
150         IdentityTableReduce.class, jobConf);
151 
152       LOG.info("Started " + Bytes.toString(table.getTableName()));
153       RunningJob job = JobClient.runJob(jobConf);
154       assertTrue(job.isSuccessful());
155       LOG.info("After map/reduce completion");
156 
157       // verify map-reduce results
158       verify(Bytes.toString(table.getTableName()));
159     } finally {
160       if (jobConf != null) {
161         FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
162       }
163     }
164   }
165 
166   private void verify(String tableName) throws IOException {
167     HTable table = new HTable(UTIL.getConfiguration(), tableName);
168     boolean verified = false;
169     long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
170     int numRetries = UTIL.getConfiguration().getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
171     for (int i = 0; i < numRetries; i++) {
172       try {
173         LOG.info("Verification attempt #" + i);
174         verifyAttempt(table);
175         verified = true;
176         break;
177       } catch (NullPointerException e) {
178         // If here, a cell was empty.  Presume its because updates came in
179         // after the scanner had been opened.  Wait a while and retry.
180         LOG.debug("Verification attempt failed: " + e.getMessage());
181       }
182       try {
183         Thread.sleep(pause);
184       } catch (InterruptedException e) {
185         // continue
186       }
187     }
188     assertTrue(verified);
189   }
190 
191   /**
192    * Looks at every value of the mapreduce output and verifies that indeed
193    * the values have been reversed.
194    * @param table Table to scan.
195    * @throws IOException
196    * @throws NullPointerException if we failed to find a cell value
197    */
198   private void verifyAttempt(final HTable table) throws IOException, NullPointerException {
199     Scan scan = new Scan();
200     TableInputFormat.addColumns(scan, columns);
201     ResultScanner scanner = table.getScanner(scan);
202     try {
203       Iterator<Result> itr = scanner.iterator();
204       assertTrue(itr.hasNext());
205       while(itr.hasNext()) {
206         Result r = itr.next();
207         if (LOG.isDebugEnabled()) {
208           if (r.size() > 2 ) {
209             throw new IOException("Too many results, expected 2 got " +
210               r.size());
211           }
212         }
213         byte[] firstValue = null;
214         byte[] secondValue = null;
215         int count = 0;
216          for(KeyValue kv : r.list()) {
217           if (count == 0) {
218             firstValue = kv.getValue();
219           }
220           if (count == 1) {
221             secondValue = kv.getValue();
222           }
223           count++;
224           if (count == 2) {
225             break;
226           }
227         }
228 
229 
230         String first = "";
231         if (firstValue == null) {
232           throw new NullPointerException(Bytes.toString(r.getRow()) +
233             ": first value is null");
234         }
235         first = Bytes.toString(firstValue);
236 
237         String second = "";
238         if (secondValue == null) {
239           throw new NullPointerException(Bytes.toString(r.getRow()) +
240             ": second value is null");
241         }
242         byte[] secondReversed = new byte[secondValue.length];
243         for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
244           secondReversed[i] = secondValue[j];
245         }
246         second = Bytes.toString(secondReversed);
247 
248         if (first.compareTo(second) != 0) {
249           if (LOG.isDebugEnabled()) {
250             LOG.debug("second key is not the reverse of first. row=" +
251                 r.getRow() + ", first value=" + first + ", second value=" +
252                 second);
253           }
254           fail();
255         }
256       }
257     } finally {
258       scanner.close();
259     }
260   }
261 
262 }
263