1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapred;
20
21 import java.io.File;
22 import java.io.IOException;
23 import java.util.Iterator;
24 import java.util.Map;
25 import java.util.NavigableMap;
26
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.hadoop.fs.FileUtil;
30 import org.apache.hadoop.hbase.*;
31 import org.apache.hadoop.hbase.client.HTable;
32 import org.apache.hadoop.hbase.client.Put;
33 import org.apache.hadoop.hbase.client.Result;
34 import org.apache.hadoop.hbase.client.ResultScanner;
35 import org.apache.hadoop.hbase.client.Scan;
36 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
37 import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
38 import org.apache.hadoop.hbase.util.Bytes;
39 import org.apache.hadoop.mapred.JobClient;
40 import org.apache.hadoop.mapred.JobConf;
41 import org.apache.hadoop.mapred.MapReduceBase;
42 import org.apache.hadoop.mapred.OutputCollector;
43 import org.apache.hadoop.mapred.Reporter;
44 import org.apache.hadoop.mapred.RunningJob;
45 import org.junit.AfterClass;
46 import org.junit.BeforeClass;
47 import org.junit.Test;
48 import org.junit.experimental.categories.Category;
49
50 import static org.junit.Assert.fail;
51 import static org.junit.Assert.assertTrue;
52
53
54
55
56
57
58 @Category(LargeTests.class)
59 public class TestTableMapReduce {
60 private static final Log LOG =
61 LogFactory.getLog(TestTableMapReduce.class.getName());
62 private static final HBaseTestingUtility UTIL =
63 new HBaseTestingUtility();
64 static final byte[] MULTI_REGION_TABLE_NAME = Bytes.toBytes("mrtest");
65 static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
66 static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
67
68 private static final byte [][] columns = new byte [][] {
69 INPUT_FAMILY,
70 OUTPUT_FAMILY
71 };
72
73 @BeforeClass
74 public static void beforeClass() throws Exception {
75 UTIL.startMiniCluster();
76 HTable table = UTIL.createTable(MULTI_REGION_TABLE_NAME, new byte[][] {INPUT_FAMILY, OUTPUT_FAMILY});
77 UTIL.createMultiRegions(table, INPUT_FAMILY);
78 UTIL.loadTable(table, INPUT_FAMILY);
79 UTIL.startMiniMapReduceCluster();
80 }
81
82 @AfterClass
83 public static void afterClass() throws Exception {
84 UTIL.shutdownMiniMapReduceCluster();
85 UTIL.shutdownMiniCluster();
86 }
87
88
89
90
91 public static class ProcessContentsMapper
92 extends MapReduceBase
93 implements TableMap<ImmutableBytesWritable, Put> {
94
95
96
97
98
99
100
101
102 public void map(ImmutableBytesWritable key, Result value,
103 OutputCollector<ImmutableBytesWritable, Put> output,
104 Reporter reporter)
105 throws IOException {
106 if (value.size() != 1) {
107 throw new IOException("There should only be one input column");
108 }
109 Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
110 cf = value.getMap();
111 if(!cf.containsKey(INPUT_FAMILY)) {
112 throw new IOException("Wrong input columns. Missing: '" +
113 Bytes.toString(INPUT_FAMILY) + "'.");
114 }
115
116
117
118 String originalValue = Bytes.toString(value.getValue(INPUT_FAMILY, null));
119 StringBuilder newValue = new StringBuilder(originalValue);
120 newValue.reverse();
121
122
123
124 Put outval = new Put(key.get());
125 outval.add(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
126 output.collect(key, outval);
127 }
128 }
129
130
131
132
133
134 @Test
135 public void testMultiRegionTable() throws IOException {
136 runTestOnTable(new HTable(UTIL.getConfiguration(), MULTI_REGION_TABLE_NAME));
137 }
138
139 private void runTestOnTable(HTable table) throws IOException {
140 JobConf jobConf = null;
141 try {
142 LOG.info("Before map/reduce startup");
143 jobConf = new JobConf(UTIL.getConfiguration(), TestTableMapReduce.class);
144 jobConf.setJobName("process column contents");
145 jobConf.setNumReduceTasks(1);
146 TableMapReduceUtil.initTableMapJob(Bytes.toString(table.getTableName()),
147 Bytes.toString(INPUT_FAMILY), ProcessContentsMapper.class,
148 ImmutableBytesWritable.class, Put.class, jobConf);
149 TableMapReduceUtil.initTableReduceJob(Bytes.toString(table.getTableName()),
150 IdentityTableReduce.class, jobConf);
151
152 LOG.info("Started " + Bytes.toString(table.getTableName()));
153 RunningJob job = JobClient.runJob(jobConf);
154 assertTrue(job.isSuccessful());
155 LOG.info("After map/reduce completion");
156
157
158 verify(Bytes.toString(table.getTableName()));
159 } finally {
160 if (jobConf != null) {
161 FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
162 }
163 }
164 }
165
166 private void verify(String tableName) throws IOException {
167 HTable table = new HTable(UTIL.getConfiguration(), tableName);
168 boolean verified = false;
169 long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
170 int numRetries = UTIL.getConfiguration().getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
171 for (int i = 0; i < numRetries; i++) {
172 try {
173 LOG.info("Verification attempt #" + i);
174 verifyAttempt(table);
175 verified = true;
176 break;
177 } catch (NullPointerException e) {
178
179
180 LOG.debug("Verification attempt failed: " + e.getMessage());
181 }
182 try {
183 Thread.sleep(pause);
184 } catch (InterruptedException e) {
185
186 }
187 }
188 assertTrue(verified);
189 }
190
191
192
193
194
195
196
197
198 private void verifyAttempt(final HTable table) throws IOException, NullPointerException {
199 Scan scan = new Scan();
200 TableInputFormat.addColumns(scan, columns);
201 ResultScanner scanner = table.getScanner(scan);
202 try {
203 Iterator<Result> itr = scanner.iterator();
204 assertTrue(itr.hasNext());
205 while(itr.hasNext()) {
206 Result r = itr.next();
207 if (LOG.isDebugEnabled()) {
208 if (r.size() > 2 ) {
209 throw new IOException("Too many results, expected 2 got " +
210 r.size());
211 }
212 }
213 byte[] firstValue = null;
214 byte[] secondValue = null;
215 int count = 0;
216 for(KeyValue kv : r.list()) {
217 if (count == 0) {
218 firstValue = kv.getValue();
219 }
220 if (count == 1) {
221 secondValue = kv.getValue();
222 }
223 count++;
224 if (count == 2) {
225 break;
226 }
227 }
228
229
230 String first = "";
231 if (firstValue == null) {
232 throw new NullPointerException(Bytes.toString(r.getRow()) +
233 ": first value is null");
234 }
235 first = Bytes.toString(firstValue);
236
237 String second = "";
238 if (secondValue == null) {
239 throw new NullPointerException(Bytes.toString(r.getRow()) +
240 ": second value is null");
241 }
242 byte[] secondReversed = new byte[secondValue.length];
243 for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
244 secondReversed[i] = secondValue[j];
245 }
246 second = Bytes.toString(secondReversed);
247
248 if (first.compareTo(second) != 0) {
249 if (LOG.isDebugEnabled()) {
250 LOG.debug("second key is not the reverse of first. row=" +
251 r.getRow() + ", first value=" + first + ", second value=" +
252 second);
253 }
254 fail();
255 }
256 }
257 } finally {
258 scanner.close();
259 }
260 }
261
262 }
263