1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapred;
21  
22  import java.io.File;
23  import java.io.IOException;
24  import java.util.Iterator;
25  import java.util.Map;
26  import java.util.NavigableMap;
27  
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  import org.apache.hadoop.fs.FileUtil;
31  import org.apache.hadoop.hbase.*;
32  import org.apache.hadoop.hbase.client.HTable;
33  import org.apache.hadoop.hbase.client.Put;
34  import org.apache.hadoop.hbase.client.Result;
35  import org.apache.hadoop.hbase.client.ResultScanner;
36  import org.apache.hadoop.hbase.client.Scan;
37  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
38  import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
39  import org.apache.hadoop.hbase.util.Bytes;
40  import org.apache.hadoop.mapred.JobClient;
41  import org.apache.hadoop.mapred.JobConf;
42  import org.apache.hadoop.mapred.MapReduceBase;
43  import org.apache.hadoop.mapred.OutputCollector;
44  import org.apache.hadoop.mapred.Reporter;
45  import org.apache.hadoop.mapred.RunningJob;
46  import org.junit.AfterClass;
47  import org.junit.BeforeClass;
48  import org.junit.Test;
49  import org.junit.experimental.categories.Category;
50  
51  import static org.junit.Assert.fail;
52  import static org.junit.Assert.assertTrue;
53  
54  /**
55   * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
56   * on our tables is simple - take every row in the table, reverse the value of
57   * a particular cell, and write it back to the table.
58   */
59  @Category(LargeTests.class)
60  public class TestTableMapReduce {
61    private static final Log LOG =
62      LogFactory.getLog(TestTableMapReduce.class.getName());
63    private static final HBaseTestingUtility UTIL =
64      new HBaseTestingUtility();
65    static final byte[] MULTI_REGION_TABLE_NAME = Bytes.toBytes("mrtest");
66    static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
67    static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
68  
69    private static final byte [][] columns = new byte [][] {
70      INPUT_FAMILY,
71      OUTPUT_FAMILY
72    };
73  
74    @BeforeClass
75    public static void beforeClass() throws Exception {
76      UTIL.startMiniCluster();
77      HTable table = UTIL.createTable(MULTI_REGION_TABLE_NAME, new byte[][] {INPUT_FAMILY, OUTPUT_FAMILY});
78      UTIL.createMultiRegions(table, INPUT_FAMILY);
79      UTIL.loadTable(table, INPUT_FAMILY);
80      UTIL.startMiniMapReduceCluster();
81    }
82  
83    @AfterClass
84    public static void afterClass() throws Exception {
85      UTIL.shutdownMiniMapReduceCluster();
86      UTIL.shutdownMiniCluster();
87    }
88  
89    /**
90     * Pass the given key and processed record reduce
91     */
92    public static class ProcessContentsMapper
93    extends MapReduceBase
94    implements TableMap<ImmutableBytesWritable, Put> {
95      /**
96       * Pass the key, and reversed value to reduce
97       * @param key
98       * @param value
99       * @param output
100      * @param reporter
101      * @throws IOException
102      */
103     public void map(ImmutableBytesWritable key, Result value,
104       OutputCollector<ImmutableBytesWritable, Put> output,
105       Reporter reporter)
106     throws IOException {
107       if (value.size() != 1) {
108         throw new IOException("There should only be one input column");
109       }
110       Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
111         cf = value.getMap();
112       if(!cf.containsKey(INPUT_FAMILY)) {
113         throw new IOException("Wrong input columns. Missing: '" +
114           Bytes.toString(INPUT_FAMILY) + "'.");
115       }
116 
117       // Get the original value and reverse it
118 
119       String originalValue = new String(value.getValue(INPUT_FAMILY, null),
120         HConstants.UTF8_ENCODING);
121       StringBuilder newValue = new StringBuilder(originalValue);
122       newValue.reverse();
123 
124       // Now set the value to be collected
125 
126       Put outval = new Put(key.get());
127       outval.add(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
128       output.collect(key, outval);
129     }
130   }
131 
132   /**
133    * Test a map/reduce against a multi-region table
134    * @throws IOException
135    */
136   @Test
137   public void testMultiRegionTable() throws IOException {
138     runTestOnTable(new HTable(UTIL.getConfiguration(), MULTI_REGION_TABLE_NAME));
139   }
140 
141   private void runTestOnTable(HTable table) throws IOException {
142     JobConf jobConf = null;
143     try {
144       LOG.info("Before map/reduce startup");
145       jobConf = new JobConf(UTIL.getConfiguration(), TestTableMapReduce.class);
146       jobConf.setJobName("process column contents");
147       jobConf.setNumReduceTasks(1);
148       TableMapReduceUtil.initTableMapJob(Bytes.toString(table.getTableName()),
149         Bytes.toString(INPUT_FAMILY), ProcessContentsMapper.class,
150         ImmutableBytesWritable.class, Put.class, jobConf);
151       TableMapReduceUtil.initTableReduceJob(Bytes.toString(table.getTableName()),
152         IdentityTableReduce.class, jobConf);
153 
154       LOG.info("Started " + Bytes.toString(table.getTableName()));
155       RunningJob job = JobClient.runJob(jobConf);
156       assertTrue(job.isSuccessful());
157       LOG.info("After map/reduce completion");
158 
159       // verify map-reduce results
160       verify(Bytes.toString(table.getTableName()));
161     } finally {
162       if (jobConf != null) {
163         FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
164       }
165     }
166   }
167 
168   private void verify(String tableName) throws IOException {
169     HTable table = new HTable(UTIL.getConfiguration(), tableName);
170     boolean verified = false;
171     long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
172     int numRetries = UTIL.getConfiguration().getInt("hbase.client.retries.number", 5);
173     for (int i = 0; i < numRetries; i++) {
174       try {
175         LOG.info("Verification attempt #" + i);
176         verifyAttempt(table);
177         verified = true;
178         break;
179       } catch (NullPointerException e) {
180         // If here, a cell was empty.  Presume its because updates came in
181         // after the scanner had been opened.  Wait a while and retry.
182         LOG.debug("Verification attempt failed: " + e.getMessage());
183       }
184       try {
185         Thread.sleep(pause);
186       } catch (InterruptedException e) {
187         // continue
188       }
189     }
190     assertTrue(verified);
191   }
192 
193   /**
194    * Looks at every value of the mapreduce output and verifies that indeed
195    * the values have been reversed.
196    * @param table Table to scan.
197    * @throws IOException
198    * @throws NullPointerException if we failed to find a cell value
199    */
200   private void verifyAttempt(final HTable table) throws IOException, NullPointerException {
201     Scan scan = new Scan();
202     TableInputFormat.addColumns(scan, columns);
203     ResultScanner scanner = table.getScanner(scan);
204     try {
205       Iterator<Result> itr = scanner.iterator();
206       assertTrue(itr.hasNext());
207       while(itr.hasNext()) {
208         Result r = itr.next();
209         if (LOG.isDebugEnabled()) {
210           if (r.size() > 2 ) {
211             throw new IOException("Too many results, expected 2 got " +
212               r.size());
213           }
214         }
215         byte[] firstValue = null;
216         byte[] secondValue = null;
217         int count = 0;
218          for(KeyValue kv : r.list()) {
219           if (count == 0) {
220             firstValue = kv.getValue();
221           }
222           if (count == 1) {
223             secondValue = kv.getValue();
224           }
225           count++;
226           if (count == 2) {
227             break;
228           }
229         }
230 
231 
232         String first = "";
233         if (firstValue == null) {
234           throw new NullPointerException(Bytes.toString(r.getRow()) +
235             ": first value is null");
236         }
237         first = new String(firstValue, HConstants.UTF8_ENCODING);
238 
239         String second = "";
240         if (secondValue == null) {
241           throw new NullPointerException(Bytes.toString(r.getRow()) +
242             ": second value is null");
243         }
244         byte[] secondReversed = new byte[secondValue.length];
245         for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
246           secondReversed[i] = secondValue[j];
247         }
248         second = new String(secondReversed, HConstants.UTF8_ENCODING);
249 
250         if (first.compareTo(second) != 0) {
251           if (LOG.isDebugEnabled()) {
252             LOG.debug("second key is not the reverse of first. row=" +
253                 r.getRow() + ", first value=" + first + ", second value=" +
254                 second);
255           }
256           fail();
257         }
258       }
259     } finally {
260       scanner.close();
261     }
262   }
263 
264   @org.junit.Rule
265   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
266     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
267 }
268