1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapred;
21
22 import java.io.File;
23 import java.io.IOException;
24 import java.util.Iterator;
25 import java.util.Map;
26 import java.util.NavigableMap;
27
28 import org.apache.commons.logging.Log;
29 import org.apache.commons.logging.LogFactory;
30 import org.apache.hadoop.fs.FileUtil;
31 import org.apache.hadoop.hbase.*;
32 import org.apache.hadoop.hbase.client.HTable;
33 import org.apache.hadoop.hbase.client.Put;
34 import org.apache.hadoop.hbase.client.Result;
35 import org.apache.hadoop.hbase.client.ResultScanner;
36 import org.apache.hadoop.hbase.client.Scan;
37 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
38 import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
39 import org.apache.hadoop.hbase.util.Bytes;
40 import org.apache.hadoop.mapred.JobClient;
41 import org.apache.hadoop.mapred.JobConf;
42 import org.apache.hadoop.mapred.MapReduceBase;
43 import org.apache.hadoop.mapred.OutputCollector;
44 import org.apache.hadoop.mapred.Reporter;
45 import org.apache.hadoop.mapred.RunningJob;
46 import org.junit.AfterClass;
47 import org.junit.BeforeClass;
48 import org.junit.Test;
49 import org.junit.experimental.categories.Category;
50
51 import static org.junit.Assert.fail;
52 import static org.junit.Assert.assertTrue;
53
54
55
56
57
58
59 @Category(LargeTests.class)
60 public class TestTableMapReduce {
61 private static final Log LOG =
62 LogFactory.getLog(TestTableMapReduce.class.getName());
63 private static final HBaseTestingUtility UTIL =
64 new HBaseTestingUtility();
65 static final byte[] MULTI_REGION_TABLE_NAME = Bytes.toBytes("mrtest");
66 static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
67 static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
68
69 private static final byte [][] columns = new byte [][] {
70 INPUT_FAMILY,
71 OUTPUT_FAMILY
72 };
73
74 @BeforeClass
75 public static void beforeClass() throws Exception {
76 UTIL.startMiniCluster();
77 HTable table = UTIL.createTable(MULTI_REGION_TABLE_NAME, new byte[][] {INPUT_FAMILY, OUTPUT_FAMILY});
78 UTIL.createMultiRegions(table, INPUT_FAMILY);
79 UTIL.loadTable(table, INPUT_FAMILY);
80 UTIL.startMiniMapReduceCluster();
81 }
82
83 @AfterClass
84 public static void afterClass() throws Exception {
85 UTIL.shutdownMiniMapReduceCluster();
86 UTIL.shutdownMiniCluster();
87 }
88
89
90
91
92 public static class ProcessContentsMapper
93 extends MapReduceBase
94 implements TableMap<ImmutableBytesWritable, Put> {
95
96
97
98
99
100
101
102
103 public void map(ImmutableBytesWritable key, Result value,
104 OutputCollector<ImmutableBytesWritable, Put> output,
105 Reporter reporter)
106 throws IOException {
107 if (value.size() != 1) {
108 throw new IOException("There should only be one input column");
109 }
110 Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
111 cf = value.getMap();
112 if(!cf.containsKey(INPUT_FAMILY)) {
113 throw new IOException("Wrong input columns. Missing: '" +
114 Bytes.toString(INPUT_FAMILY) + "'.");
115 }
116
117
118
119 String originalValue = new String(value.getValue(INPUT_FAMILY, null),
120 HConstants.UTF8_ENCODING);
121 StringBuilder newValue = new StringBuilder(originalValue);
122 newValue.reverse();
123
124
125
126 Put outval = new Put(key.get());
127 outval.add(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
128 output.collect(key, outval);
129 }
130 }
131
132
133
134
135
136 @Test
137 public void testMultiRegionTable() throws IOException {
138 runTestOnTable(new HTable(UTIL.getConfiguration(), MULTI_REGION_TABLE_NAME));
139 }
140
141 private void runTestOnTable(HTable table) throws IOException {
142 JobConf jobConf = null;
143 try {
144 LOG.info("Before map/reduce startup");
145 jobConf = new JobConf(UTIL.getConfiguration(), TestTableMapReduce.class);
146 jobConf.setJobName("process column contents");
147 jobConf.setNumReduceTasks(1);
148 TableMapReduceUtil.initTableMapJob(Bytes.toString(table.getTableName()),
149 Bytes.toString(INPUT_FAMILY), ProcessContentsMapper.class,
150 ImmutableBytesWritable.class, Put.class, jobConf);
151 TableMapReduceUtil.initTableReduceJob(Bytes.toString(table.getTableName()),
152 IdentityTableReduce.class, jobConf);
153
154 LOG.info("Started " + Bytes.toString(table.getTableName()));
155 RunningJob job = JobClient.runJob(jobConf);
156 assertTrue(job.isSuccessful());
157 LOG.info("After map/reduce completion");
158
159
160 verify(Bytes.toString(table.getTableName()));
161 } finally {
162 if (jobConf != null) {
163 FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
164 }
165 }
166 }
167
168 private void verify(String tableName) throws IOException {
169 HTable table = new HTable(UTIL.getConfiguration(), tableName);
170 boolean verified = false;
171 long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
172 int numRetries = UTIL.getConfiguration().getInt("hbase.client.retries.number", 5);
173 for (int i = 0; i < numRetries; i++) {
174 try {
175 LOG.info("Verification attempt #" + i);
176 verifyAttempt(table);
177 verified = true;
178 break;
179 } catch (NullPointerException e) {
180
181
182 LOG.debug("Verification attempt failed: " + e.getMessage());
183 }
184 try {
185 Thread.sleep(pause);
186 } catch (InterruptedException e) {
187
188 }
189 }
190 assertTrue(verified);
191 }
192
193
194
195
196
197
198
199
200 private void verifyAttempt(final HTable table) throws IOException, NullPointerException {
201 Scan scan = new Scan();
202 TableInputFormat.addColumns(scan, columns);
203 ResultScanner scanner = table.getScanner(scan);
204 try {
205 Iterator<Result> itr = scanner.iterator();
206 assertTrue(itr.hasNext());
207 while(itr.hasNext()) {
208 Result r = itr.next();
209 if (LOG.isDebugEnabled()) {
210 if (r.size() > 2 ) {
211 throw new IOException("Too many results, expected 2 got " +
212 r.size());
213 }
214 }
215 byte[] firstValue = null;
216 byte[] secondValue = null;
217 int count = 0;
218 for(KeyValue kv : r.list()) {
219 if (count == 0) {
220 firstValue = kv.getValue();
221 }
222 if (count == 1) {
223 secondValue = kv.getValue();
224 }
225 count++;
226 if (count == 2) {
227 break;
228 }
229 }
230
231
232 String first = "";
233 if (firstValue == null) {
234 throw new NullPointerException(Bytes.toString(r.getRow()) +
235 ": first value is null");
236 }
237 first = new String(firstValue, HConstants.UTF8_ENCODING);
238
239 String second = "";
240 if (secondValue == null) {
241 throw new NullPointerException(Bytes.toString(r.getRow()) +
242 ": second value is null");
243 }
244 byte[] secondReversed = new byte[secondValue.length];
245 for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
246 secondReversed[i] = secondValue[j];
247 }
248 second = new String(secondReversed, HConstants.UTF8_ENCODING);
249
250 if (first.compareTo(second) != 0) {
251 if (LOG.isDebugEnabled()) {
252 LOG.debug("second key is not the reverse of first. row=" +
253 r.getRow() + ", first value=" + first + ", second value=" +
254 second);
255 }
256 fail();
257 }
258 }
259 } finally {
260 scanner.close();
261 }
262 }
263
264 @org.junit.Rule
265 public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
266 new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
267 }
268