1   /**
2    * Copyright 2007 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import java.io.File;
23  import java.io.IOException;
24  import java.util.Iterator;
25  import java.util.Map;
26  import java.util.NavigableMap;
27  
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileUtil;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.*;
34  import org.apache.hadoop.hbase.client.HTable;
35  import org.apache.hadoop.hbase.client.Put;
36  import org.apache.hadoop.hbase.client.Result;
37  import org.apache.hadoop.hbase.client.ResultScanner;
38  import org.apache.hadoop.hbase.client.Scan;
39  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
40  import org.apache.hadoop.hbase.util.Bytes;
41  import org.apache.hadoop.mapreduce.Job;
42  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
43  import org.junit.AfterClass;
44  import org.junit.BeforeClass;
45  import org.junit.Test;
46  import org.junit.experimental.categories.Category;
47  
48  import static org.junit.Assert.fail;
49  import static org.junit.Assert.assertTrue;
50  import static org.junit.Assert.assertFalse;
51  
52  /**
53   * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
54   * on our tables is simple - take every row in the table, reverse the value of
55   * a particular cell, and write it back to the table.
56   */
57  @Category(LargeTests.class)
58  public class TestTableMapReduce {
59    private static final Log LOG = LogFactory.getLog(TestTableMapReduce.class);
60    private static final HBaseTestingUtility UTIL =
61      new HBaseTestingUtility();
62    static final byte[] MULTI_REGION_TABLE_NAME = Bytes.toBytes("mrtest");
63    static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
64    static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
65  
66    @BeforeClass
67    public static void beforeClass() throws Exception {
68      UTIL.startMiniCluster();
69      HTable table = UTIL.createTable(MULTI_REGION_TABLE_NAME, new byte[][] {INPUT_FAMILY, OUTPUT_FAMILY});
70      UTIL.createMultiRegions(table, INPUT_FAMILY);
71      UTIL.loadTable(table, INPUT_FAMILY);
72      UTIL.startMiniMapReduceCluster();
73    }
74  
75    @AfterClass
76    public static void afterClass() throws Exception {
77      UTIL.shutdownMiniMapReduceCluster();
78      UTIL.shutdownMiniCluster();
79    }
80  
81    /**
82     * Pass the given key and processed record reduce
83     */
84    public static class ProcessContentsMapper
85    extends TableMapper<ImmutableBytesWritable, Put> {
86  
87      /**
88       * Pass the key, and reversed value to reduce
89       *
90       * @param key
91       * @param value
92       * @param context
93       * @throws IOException
94       */
95      public void map(ImmutableBytesWritable key, Result value,
96        Context context)
97      throws IOException, InterruptedException {
98        if (value.size() != 1) {
99          throw new IOException("There should only be one input column");
100       }
101       Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
102         cf = value.getMap();
103       if(!cf.containsKey(INPUT_FAMILY)) {
104         throw new IOException("Wrong input columns. Missing: '" +
105           Bytes.toString(INPUT_FAMILY) + "'.");
106       }
107 
108       // Get the original value and reverse it
109       String originalValue = new String(value.getValue(INPUT_FAMILY, null),
110         HConstants.UTF8_ENCODING);
111       StringBuilder newValue = new StringBuilder(originalValue);
112       newValue.reverse();
113       // Now set the value to be collected
114       Put outval = new Put(key.get());
115       outval.add(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
116       context.write(key, outval);
117     }
118   }
119 
120   /**
121    * Test a map/reduce against a multi-region table
122    * @throws IOException
123    * @throws ClassNotFoundException
124    * @throws InterruptedException
125    */
126   @Test
127   public void testMultiRegionTable()
128   throws IOException, InterruptedException, ClassNotFoundException {
129     runTestOnTable(new HTable(new Configuration(UTIL.getConfiguration()),
130       MULTI_REGION_TABLE_NAME));
131   }
132 
133   private void runTestOnTable(HTable table)
134   throws IOException, InterruptedException, ClassNotFoundException {
135     Job job = null;
136     try {
137       LOG.info("Before map/reduce startup");
138       job = new Job(table.getConfiguration(), "process column contents");
139       job.setNumReduceTasks(1);
140       Scan scan = new Scan();
141       scan.addFamily(INPUT_FAMILY);
142       TableMapReduceUtil.initTableMapperJob(
143         Bytes.toString(table.getTableName()), scan,
144         ProcessContentsMapper.class, ImmutableBytesWritable.class,
145         Put.class, job);
146       TableMapReduceUtil.initTableReducerJob(
147         Bytes.toString(table.getTableName()),
148         IdentityTableReducer.class, job);
149       FileOutputFormat.setOutputPath(job, new Path("test"));
150       LOG.info("Started " + Bytes.toString(table.getTableName()));
151       assertTrue(job.waitForCompletion(true));
152       LOG.info("After map/reduce completion");
153 
154       // verify map-reduce results
155       verify(Bytes.toString(table.getTableName()));
156     } finally {
157       table.close();
158       if (job != null) {
159         FileUtil.fullyDelete(
160           new File(job.getConfiguration().get("hadoop.tmp.dir")));
161       }
162     }
163   }
164 
165   private void verify(String tableName) throws IOException {
166     HTable table = new HTable(new Configuration(UTIL.getConfiguration()), tableName);
167     boolean verified = false;
168     long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
169     int numRetries = UTIL.getConfiguration().getInt("hbase.client.retries.number", 5);
170     for (int i = 0; i < numRetries; i++) {
171       try {
172         LOG.info("Verification attempt #" + i);
173         verifyAttempt(table);
174         verified = true;
175         break;
176       } catch (NullPointerException e) {
177         // If here, a cell was empty.  Presume its because updates came in
178         // after the scanner had been opened.  Wait a while and retry.
179         LOG.debug("Verification attempt failed: " + e.getMessage());
180       }
181       try {
182         Thread.sleep(pause);
183       } catch (InterruptedException e) {
184         // continue
185       }
186     }
187     assertTrue(verified);
188     table.close();
189   }
190 
191   /**
192    * Looks at every value of the mapreduce output and verifies that indeed
193    * the values have been reversed.
194    *
195    * @param table Table to scan.
196    * @throws IOException
197    * @throws NullPointerException if we failed to find a cell value
198    */
199   private void verifyAttempt(final HTable table) throws IOException, NullPointerException {
200     Scan scan = new Scan();
201     scan.addFamily(INPUT_FAMILY);
202     scan.addFamily(OUTPUT_FAMILY);
203     ResultScanner scanner = table.getScanner(scan);
204     try {
205       Iterator<Result> itr = scanner.iterator();
206       assertTrue(itr.hasNext());
207       while(itr.hasNext()) {
208         Result r = itr.next();
209         if (LOG.isDebugEnabled()) {
210           if (r.size() > 2 ) {
211             throw new IOException("Too many results, expected 2 got " +
212               r.size());
213           }
214         }
215         byte[] firstValue = null;
216         byte[] secondValue = null;
217         int count = 0;
218         for(KeyValue kv : r.list()) {
219           if (count == 0) {
220             firstValue = kv.getValue();
221           }
222           if (count == 1) {
223             secondValue = kv.getValue();
224           }
225           count++;
226           if (count == 2) {
227             break;
228           }
229         }
230 
231         String first = "";
232         if (firstValue == null) {
233           throw new NullPointerException(Bytes.toString(r.getRow()) +
234             ": first value is null");
235         }
236         first = new String(firstValue, HConstants.UTF8_ENCODING);
237 
238         String second = "";
239         if (secondValue == null) {
240           throw new NullPointerException(Bytes.toString(r.getRow()) +
241             ": second value is null");
242         }
243         byte[] secondReversed = new byte[secondValue.length];
244         for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
245           secondReversed[i] = secondValue[j];
246         }
247         second = new String(secondReversed, HConstants.UTF8_ENCODING);
248 
249         if (first.compareTo(second) != 0) {
250           if (LOG.isDebugEnabled()) {
251             LOG.debug("second key is not the reverse of first. row=" +
252                 Bytes.toStringBinary(r.getRow()) + ", first value=" + first +
253                 ", second value=" + second);
254           }
255           fail();
256         }
257       }
258     } finally {
259       scanner.close();
260     }
261   }
262 
263   /**
264    * Test that we add tmpjars correctly including the ZK jar.
265    */
266   public void testAddDependencyJars() throws Exception {
267     Job job = new Job();
268     TableMapReduceUtil.addDependencyJars(job);
269     String tmpjars = job.getConfiguration().get("tmpjars");
270 
271     System.err.println("tmpjars: " + tmpjars);
272     assertTrue(tmpjars.contains("zookeeper"));
273     assertFalse(tmpjars.contains("guava"));
274 
275     System.err.println("appending guava jar");
276     TableMapReduceUtil.addDependencyJars(job.getConfiguration(), 
277         com.google.common.base.Function.class);
278     tmpjars = job.getConfiguration().get("tmpjars");
279     assertTrue(tmpjars.contains("guava"));
280   }
281 
282   @org.junit.Rule
283   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
284     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
285 }
286