1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce;
19  
20  import java.io.File;
21  import java.io.IOException;
22  import java.util.Iterator;
23  import java.util.Map;
24  import java.util.NavigableMap;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.FileUtil;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.hbase.*;
32  import org.apache.hadoop.hbase.client.HTable;
33  import org.apache.hadoop.hbase.client.Put;
34  import org.apache.hadoop.hbase.client.Result;
35  import org.apache.hadoop.hbase.client.ResultScanner;
36  import org.apache.hadoop.hbase.client.Scan;
37  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
38  import org.apache.hadoop.hbase.util.Bytes;
39  import org.apache.hadoop.mapreduce.Job;
40  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
41  import org.junit.AfterClass;
42  import org.junit.BeforeClass;
43  import org.junit.Test;
44  import org.junit.experimental.categories.Category;
45  
46  import static org.junit.Assert.fail;
47  import static org.junit.Assert.assertTrue;
48  
49  /**
50   * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
51   * on our tables is simple - take every row in the table, reverse the value of
52   * a particular cell, and write it back to the table.
53   */
54  @Category(LargeTests.class)
55  public class TestMultithreadedTableMapper {
56    private static final Log LOG = LogFactory.getLog(TestMultithreadedTableMapper.class);
57    private static final HBaseTestingUtility UTIL =
58        new HBaseTestingUtility();
59    static final byte[] MULTI_REGION_TABLE_NAME = Bytes.toBytes("mrtest");
60    static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
61    static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
62    static final int    NUMBER_OF_THREADS = 10;
63  
64    @BeforeClass
65    public static void beforeClass() throws Exception {
66      UTIL.startMiniCluster();
67      HTable table = UTIL.createTable(MULTI_REGION_TABLE_NAME, new byte[][] {INPUT_FAMILY, OUTPUT_FAMILY});
68      UTIL.createMultiRegions(table, INPUT_FAMILY);
69      UTIL.loadTable(table, INPUT_FAMILY);
70      UTIL.startMiniMapReduceCluster();
71    }
72  
73    @AfterClass
74    public static void afterClass() throws Exception {
75      UTIL.shutdownMiniMapReduceCluster();
76      UTIL.shutdownMiniCluster();
77    }
78  
79    /**
80     * Pass the given key and processed record reduce
81     */
82    public static class ProcessContentsMapper
83    extends TableMapper<ImmutableBytesWritable, Put> {
84  
85      /**
86       * Pass the key, and reversed value to reduce
87       *
88       * @param key
89       * @param value
90       * @param context
91       * @throws IOException
92       */
93      public void map(ImmutableBytesWritable key, Result value,
94          Context context)
95              throws IOException, InterruptedException {
96        if (value.size() != 1) {
97          throw new IOException("There should only be one input column");
98        }
99        Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
100       cf = value.getMap();
101       if(!cf.containsKey(INPUT_FAMILY)) {
102         throw new IOException("Wrong input columns. Missing: '" +
103             Bytes.toString(INPUT_FAMILY) + "'.");
104       }
105       // Get the original value and reverse it
106       String originalValue = new String(value.getValue(INPUT_FAMILY, null),
107           HConstants.UTF8_ENCODING);
108       StringBuilder newValue = new StringBuilder(originalValue);
109       newValue.reverse();
110       // Now set the value to be collected
111       Put outval = new Put(key.get());
112       outval.add(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
113       context.write(key, outval);
114     }
115   }
116 
117   /**
118    * Test multithreadedTableMappper map/reduce against a multi-region table
119    * @throws IOException
120    * @throws ClassNotFoundException
121    * @throws InterruptedException
122    */
123   @Test
124   public void testMultithreadedTableMapper()
125       throws IOException, InterruptedException, ClassNotFoundException {
126     runTestOnTable(new HTable(new Configuration(UTIL.getConfiguration()),
127         MULTI_REGION_TABLE_NAME));
128   }
129 
130   private void runTestOnTable(HTable table)
131       throws IOException, InterruptedException, ClassNotFoundException {
132     Job job = null;
133     try {
134       LOG.info("Before map/reduce startup");
135       job = new Job(table.getConfiguration(), "process column contents");
136       job.setNumReduceTasks(1);
137       Scan scan = new Scan();
138       scan.addFamily(INPUT_FAMILY);
139       TableMapReduceUtil.initTableMapperJob(
140           Bytes.toString(table.getTableName()), scan,
141           MultithreadedTableMapper.class, ImmutableBytesWritable.class,
142           Put.class, job);
143       MultithreadedTableMapper.setMapperClass(job, ProcessContentsMapper.class);
144       MultithreadedTableMapper.setNumberOfThreads(job, NUMBER_OF_THREADS);
145       TableMapReduceUtil.initTableReducerJob(
146           Bytes.toString(table.getTableName()),
147           IdentityTableReducer.class, job);
148       FileOutputFormat.setOutputPath(job, new Path("test"));
149       LOG.info("Started " + Bytes.toString(table.getTableName()));
150       assertTrue(job.waitForCompletion(true));
151       LOG.info("After map/reduce completion");
152       // verify map-reduce results
153       verify(Bytes.toString(table.getTableName()));
154     } finally {
155       table.close();
156       if (job != null) {
157         FileUtil.fullyDelete(
158             new File(job.getConfiguration().get("hadoop.tmp.dir")));
159       }
160     }
161   }
162 
163   private void verify(String tableName) throws IOException {
164     HTable table = new HTable(new Configuration(UTIL.getConfiguration()), tableName);
165     boolean verified = false;
166     long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
167     int numRetries = UTIL.getConfiguration().getInt("hbase.client.retries.number", 5);
168     for (int i = 0; i < numRetries; i++) {
169       try {
170         LOG.info("Verification attempt #" + i);
171         verifyAttempt(table);
172         verified = true;
173         break;
174       } catch (NullPointerException e) {
175         // If here, a cell was empty.  Presume its because updates came in
176         // after the scanner had been opened.  Wait a while and retry.
177         LOG.debug("Verification attempt failed: " + e.getMessage());
178       }
179       try {
180         Thread.sleep(pause);
181       } catch (InterruptedException e) {
182         // continue
183       }
184     }
185     assertTrue(verified);
186     table.close();
187   }
188 
189   /**
190    * Looks at every value of the mapreduce output and verifies that indeed
191    * the values have been reversed.
192    *
193    * @param table Table to scan.
194    * @throws IOException
195    * @throws NullPointerException if we failed to find a cell value
196    */
197   private void verifyAttempt(final HTable table)
198       throws IOException, NullPointerException {
199     Scan scan = new Scan();
200     scan.addFamily(INPUT_FAMILY);
201     scan.addFamily(OUTPUT_FAMILY);
202     ResultScanner scanner = table.getScanner(scan);
203     try {
204       Iterator<Result> itr = scanner.iterator();
205       assertTrue(itr.hasNext());
206       while(itr.hasNext()) {
207         Result r = itr.next();
208         if (LOG.isDebugEnabled()) {
209           if (r.size() > 2 ) {
210             throw new IOException("Too many results, expected 2 got " +
211                 r.size());
212           }
213         }
214         byte[] firstValue = null;
215         byte[] secondValue = null;
216         int count = 0;
217         for(KeyValue kv : r.list()) {
218           if (count == 0) {
219             firstValue = kv.getValue();
220           }else if (count == 1) {
221             secondValue = kv.getValue();
222           }else if (count == 2) {
223             break;
224           }
225           count++;
226         }
227         String first = "";
228         if (firstValue == null) {
229           throw new NullPointerException(Bytes.toString(r.getRow()) +
230               ": first value is null");
231         }
232         first = new String(firstValue, HConstants.UTF8_ENCODING);
233         String second = "";
234         if (secondValue == null) {
235           throw new NullPointerException(Bytes.toString(r.getRow()) +
236               ": second value is null");
237         }
238         byte[] secondReversed = new byte[secondValue.length];
239         for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
240           secondReversed[i] = secondValue[j];
241         }
242         second = new String(secondReversed, HConstants.UTF8_ENCODING);
243         if (first.compareTo(second) != 0) {
244           if (LOG.isDebugEnabled()) {
245             LOG.debug("second key is not the reverse of first. row=" +
246                 Bytes.toStringBinary(r.getRow()) + ", first value=" + first +
247                 ", second value=" + second);
248           }
249           fail();
250         }
251       }
252     } finally {
253       scanner.close();
254     }
255   }
256 
257   @org.junit.Rule
258   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
259   new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
260 }
261