1   /**
2    * Copyright 2007 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import java.io.File;
23  import java.io.IOException;
24  import java.util.Map;
25  import java.util.NavigableMap;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.fs.FileUtil;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.hbase.HColumnDescriptor;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.HTableDescriptor;
34  import org.apache.hadoop.hbase.KeyValue;
35  import org.apache.hadoop.hbase.MultiRegionTable;
36  import org.apache.hadoop.hbase.client.HTable;
37  import org.apache.hadoop.hbase.client.Put;
38  import org.apache.hadoop.hbase.client.Result;
39  import org.apache.hadoop.hbase.client.ResultScanner;
40  import org.apache.hadoop.hbase.client.Scan;
41  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
42  import org.apache.hadoop.hbase.util.Bytes;
43  import org.apache.hadoop.mapred.MiniMRCluster;
44  import org.apache.hadoop.mapreduce.Job;
45  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
46  
47  /**
48   * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
49   * on our tables is simple - take every row in the table, reverse the value of
50   * a particular cell, and write it back to the table.
51   */
52  public class TestTableMapReduce extends MultiRegionTable {
53  
54    private static final Log LOG = LogFactory.getLog(TestTableMapReduce.class);
55  
56    static final String MULTI_REGION_TABLE_NAME = "mrtest";
57    static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
58    static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
59  
60    /** constructor */
61    public TestTableMapReduce() {
62      super(Bytes.toString(INPUT_FAMILY));
63      desc = new HTableDescriptor(MULTI_REGION_TABLE_NAME);
64      desc.addFamily(new HColumnDescriptor(INPUT_FAMILY));
65      desc.addFamily(new HColumnDescriptor(OUTPUT_FAMILY));
66    }
67  
68    /**
69     * Pass the given key and processed record reduce
70     */
71    public static class ProcessContentsMapper
72    extends TableMapper<ImmutableBytesWritable, Put> {
73  
74      /**
75       * Pass the key, and reversed value to reduce
76       *
77       * @param key
78       * @param value
79       * @param context
80       * @throws IOException
81       */
82      public void map(ImmutableBytesWritable key, Result value,
83        Context context)
84      throws IOException, InterruptedException {
85        if (value.size() != 1) {
86          throw new IOException("There should only be one input column");
87        }
88        Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
89          cf = value.getMap();
90        if(!cf.containsKey(INPUT_FAMILY)) {
91          throw new IOException("Wrong input columns. Missing: '" +
92            Bytes.toString(INPUT_FAMILY) + "'.");
93        }
94  
95        // Get the original value and reverse it
96        String originalValue = new String(value.getValue(INPUT_FAMILY, null),
97          HConstants.UTF8_ENCODING);
98        StringBuilder newValue = new StringBuilder(originalValue);
99        newValue.reverse();
100       // Now set the value to be collected
101       Put outval = new Put(key.get());
102       outval.add(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
103       context.write(key, outval);
104     }
105   }
106 
107   /**
108    * Test a map/reduce against a multi-region table
109    * @throws IOException
110    * @throws ClassNotFoundException
111    * @throws InterruptedException
112    */
113   public void testMultiRegionTable()
114   throws IOException, InterruptedException, ClassNotFoundException {
115     runTestOnTable(new HTable(conf, MULTI_REGION_TABLE_NAME));
116   }
117 
118   private void runTestOnTable(HTable table)
119   throws IOException, InterruptedException, ClassNotFoundException {
120     MiniMRCluster mrCluster = new MiniMRCluster(2, fs.getUri().toString(), 1);
121 
122     Job job = null;
123     try {
124       LOG.info("Before map/reduce startup");
125       job = new Job(conf, "process column contents");
126       job.setNumReduceTasks(1);
127       Scan scan = new Scan();
128       scan.addFamily(INPUT_FAMILY);
129       TableMapReduceUtil.initTableMapperJob(
130         Bytes.toString(table.getTableName()), scan,
131         ProcessContentsMapper.class, ImmutableBytesWritable.class,
132         Put.class, job);
133       TableMapReduceUtil.initTableReducerJob(
134         Bytes.toString(table.getTableName()),
135         IdentityTableReducer.class, job);
136       FileOutputFormat.setOutputPath(job, new Path("test"));
137       LOG.info("Started " + Bytes.toString(table.getTableName()));
138       job.waitForCompletion(true);
139       LOG.info("After map/reduce completion");
140 
141       // verify map-reduce results
142       verify(Bytes.toString(table.getTableName()));
143     } finally {
144       mrCluster.shutdown();
145       if (job != null) {
146         FileUtil.fullyDelete(
147           new File(job.getConfiguration().get("hadoop.tmp.dir")));
148       }
149     }
150   }
151 
152   private void verify(String tableName) throws IOException {
153     HTable table = new HTable(conf, tableName);
154     boolean verified = false;
155     long pause = conf.getLong("hbase.client.pause", 5 * 1000);
156     int numRetries = conf.getInt("hbase.client.retries.number", 5);
157     for (int i = 0; i < numRetries; i++) {
158       try {
159         LOG.info("Verification attempt #" + i);
160         verifyAttempt(table);
161         verified = true;
162         break;
163       } catch (NullPointerException e) {
164         // If here, a cell was empty.  Presume its because updates came in
165         // after the scanner had been opened.  Wait a while and retry.
166         LOG.debug("Verification attempt failed: " + e.getMessage());
167       }
168       try {
169         Thread.sleep(pause);
170       } catch (InterruptedException e) {
171         // continue
172       }
173     }
174     assertTrue(verified);
175   }
176 
177   /**
178    * Looks at every value of the mapreduce output and verifies that indeed
179    * the values have been reversed.
180    *
181    * @param table Table to scan.
182    * @throws IOException
183    * @throws NullPointerException if we failed to find a cell value
184    */
185   private void verifyAttempt(final HTable table) throws IOException, NullPointerException {
186     Scan scan = new Scan();
187     scan.addFamily(INPUT_FAMILY);
188     scan.addFamily(OUTPUT_FAMILY);
189     ResultScanner scanner = table.getScanner(scan);
190     try {
191       for (Result r : scanner) {
192         if (LOG.isDebugEnabled()) {
193           if (r.size() > 2 ) {
194             throw new IOException("Too many results, expected 2 got " +
195               r.size());
196           }
197         }
198         byte[] firstValue = null;
199         byte[] secondValue = null;
200         int count = 0;
201         for(KeyValue kv : r.list()) {
202           if (count == 0) {
203             firstValue = kv.getValue();
204           }
205           if (count == 1) {
206             secondValue = kv.getValue();
207           }
208           count++;
209           if (count == 2) {
210             break;
211           }
212         }
213 
214         String first = "";
215         if (firstValue == null) {
216           throw new NullPointerException(Bytes.toString(r.getRow()) +
217             ": first value is null");
218         }
219         first = new String(firstValue, HConstants.UTF8_ENCODING);
220 
221         String second = "";
222         if (secondValue == null) {
223           throw new NullPointerException(Bytes.toString(r.getRow()) +
224             ": second value is null");
225         }
226         byte[] secondReversed = new byte[secondValue.length];
227         for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
228           secondReversed[i] = secondValue[j];
229         }
230         second = new String(secondReversed, HConstants.UTF8_ENCODING);
231 
232         if (first.compareTo(second) != 0) {
233           if (LOG.isDebugEnabled()) {
234             LOG.debug("second key is not the reverse of first. row=" +
235                 Bytes.toStringBinary(r.getRow()) + ", first value=" + first +
236                 ", second value=" + second);
237           }
238           fail();
239         }
240       }
241     } finally {
242       scanner.close();
243     }
244   }
245 
246   /**
247    * Test that we add tmpjars correctly including the ZK jar.
248    */
249   public void testAddDependencyJars() throws Exception {
250     Job job = new Job();
251     TableMapReduceUtil.addDependencyJars(job);
252     String tmpjars = job.getConfiguration().get("tmpjars");
253 
254     System.err.println("tmpjars: " + tmpjars);
255     assertTrue(tmpjars.contains("zookeeper"));
256     assertTrue(tmpjars.contains("guava"));
257   }
258 }