1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import java.io.File;
23 import java.io.IOException;
24 import java.util.Iterator;
25 import java.util.Map;
26 import java.util.NavigableMap;
27
28 import org.apache.commons.logging.Log;
29 import org.apache.commons.logging.LogFactory;
30 import org.apache.hadoop.conf.Configuration;
31 import org.apache.hadoop.fs.FileUtil;
32 import org.apache.hadoop.fs.Path;
33 import org.apache.hadoop.hbase.*;
34 import org.apache.hadoop.hbase.client.HTable;
35 import org.apache.hadoop.hbase.client.Put;
36 import org.apache.hadoop.hbase.client.Result;
37 import org.apache.hadoop.hbase.client.ResultScanner;
38 import org.apache.hadoop.hbase.client.Scan;
39 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
40 import org.apache.hadoop.hbase.util.Bytes;
41 import org.apache.hadoop.mapreduce.Job;
42 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
43 import org.junit.AfterClass;
44 import org.junit.BeforeClass;
45 import org.junit.Test;
46 import org.junit.experimental.categories.Category;
47
48 import static org.junit.Assert.fail;
49 import static org.junit.Assert.assertTrue;
50 import static org.junit.Assert.assertFalse;
51
52
53
54
55
56
57 @Category(LargeTests.class)
58 public class TestTableMapReduce {
59 private static final Log LOG = LogFactory.getLog(TestTableMapReduce.class);
60 private static final HBaseTestingUtility UTIL =
61 new HBaseTestingUtility();
62 static final byte[] MULTI_REGION_TABLE_NAME = Bytes.toBytes("mrtest");
63 static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
64 static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
65
66 @BeforeClass
67 public static void beforeClass() throws Exception {
68 UTIL.startMiniCluster();
69 HTable table = UTIL.createTable(MULTI_REGION_TABLE_NAME, new byte[][] {INPUT_FAMILY, OUTPUT_FAMILY});
70 UTIL.createMultiRegions(table, INPUT_FAMILY);
71 UTIL.loadTable(table, INPUT_FAMILY);
72 UTIL.startMiniMapReduceCluster();
73 }
74
75 @AfterClass
76 public static void afterClass() throws Exception {
77 UTIL.shutdownMiniMapReduceCluster();
78 UTIL.shutdownMiniCluster();
79 }
80
81
82
83
84 public static class ProcessContentsMapper
85 extends TableMapper<ImmutableBytesWritable, Put> {
86
87
88
89
90
91
92
93
94
95 public void map(ImmutableBytesWritable key, Result value,
96 Context context)
97 throws IOException, InterruptedException {
98 if (value.size() != 1) {
99 throw new IOException("There should only be one input column");
100 }
101 Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
102 cf = value.getMap();
103 if(!cf.containsKey(INPUT_FAMILY)) {
104 throw new IOException("Wrong input columns. Missing: '" +
105 Bytes.toString(INPUT_FAMILY) + "'.");
106 }
107
108
109 String originalValue = new String(value.getValue(INPUT_FAMILY, null),
110 HConstants.UTF8_ENCODING);
111 StringBuilder newValue = new StringBuilder(originalValue);
112 newValue.reverse();
113
114 Put outval = new Put(key.get());
115 outval.add(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
116 context.write(key, outval);
117 }
118 }
119
120
121
122
123
124
125
126 @Test
127 public void testMultiRegionTable()
128 throws IOException, InterruptedException, ClassNotFoundException {
129 runTestOnTable(new HTable(new Configuration(UTIL.getConfiguration()),
130 MULTI_REGION_TABLE_NAME));
131 }
132
133 private void runTestOnTable(HTable table)
134 throws IOException, InterruptedException, ClassNotFoundException {
135 Job job = null;
136 try {
137 LOG.info("Before map/reduce startup");
138 job = new Job(table.getConfiguration(), "process column contents");
139 job.setNumReduceTasks(1);
140 Scan scan = new Scan();
141 scan.addFamily(INPUT_FAMILY);
142 TableMapReduceUtil.initTableMapperJob(
143 Bytes.toString(table.getTableName()), scan,
144 ProcessContentsMapper.class, ImmutableBytesWritable.class,
145 Put.class, job);
146 TableMapReduceUtil.initTableReducerJob(
147 Bytes.toString(table.getTableName()),
148 IdentityTableReducer.class, job);
149 FileOutputFormat.setOutputPath(job, new Path("test"));
150 LOG.info("Started " + Bytes.toString(table.getTableName()));
151 assertTrue(job.waitForCompletion(true));
152 LOG.info("After map/reduce completion");
153
154
155 verify(Bytes.toString(table.getTableName()));
156 } finally {
157 table.close();
158 if (job != null) {
159 FileUtil.fullyDelete(
160 new File(job.getConfiguration().get("hadoop.tmp.dir")));
161 }
162 }
163 }
164
165 private void verify(String tableName) throws IOException {
166 HTable table = new HTable(new Configuration(UTIL.getConfiguration()), tableName);
167 boolean verified = false;
168 long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
169 int numRetries = UTIL.getConfiguration().getInt("hbase.client.retries.number", 5);
170 for (int i = 0; i < numRetries; i++) {
171 try {
172 LOG.info("Verification attempt #" + i);
173 verifyAttempt(table);
174 verified = true;
175 break;
176 } catch (NullPointerException e) {
177
178
179 LOG.debug("Verification attempt failed: " + e.getMessage());
180 }
181 try {
182 Thread.sleep(pause);
183 } catch (InterruptedException e) {
184
185 }
186 }
187 assertTrue(verified);
188 table.close();
189 }
190
191
192
193
194
195
196
197
198
199 private void verifyAttempt(final HTable table) throws IOException, NullPointerException {
200 Scan scan = new Scan();
201 scan.addFamily(INPUT_FAMILY);
202 scan.addFamily(OUTPUT_FAMILY);
203 ResultScanner scanner = table.getScanner(scan);
204 try {
205 Iterator<Result> itr = scanner.iterator();
206 assertTrue(itr.hasNext());
207 while(itr.hasNext()) {
208 Result r = itr.next();
209 if (LOG.isDebugEnabled()) {
210 if (r.size() > 2 ) {
211 throw new IOException("Too many results, expected 2 got " +
212 r.size());
213 }
214 }
215 byte[] firstValue = null;
216 byte[] secondValue = null;
217 int count = 0;
218 for(KeyValue kv : r.list()) {
219 if (count == 0) {
220 firstValue = kv.getValue();
221 }
222 if (count == 1) {
223 secondValue = kv.getValue();
224 }
225 count++;
226 if (count == 2) {
227 break;
228 }
229 }
230
231 String first = "";
232 if (firstValue == null) {
233 throw new NullPointerException(Bytes.toString(r.getRow()) +
234 ": first value is null");
235 }
236 first = new String(firstValue, HConstants.UTF8_ENCODING);
237
238 String second = "";
239 if (secondValue == null) {
240 throw new NullPointerException(Bytes.toString(r.getRow()) +
241 ": second value is null");
242 }
243 byte[] secondReversed = new byte[secondValue.length];
244 for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
245 secondReversed[i] = secondValue[j];
246 }
247 second = new String(secondReversed, HConstants.UTF8_ENCODING);
248
249 if (first.compareTo(second) != 0) {
250 if (LOG.isDebugEnabled()) {
251 LOG.debug("second key is not the reverse of first. row=" +
252 Bytes.toStringBinary(r.getRow()) + ", first value=" + first +
253 ", second value=" + second);
254 }
255 fail();
256 }
257 }
258 } finally {
259 scanner.close();
260 }
261 }
262
263
264
265
266 @Test
267 public void testAddDependencyJars() throws Exception {
268 Job job = new Job();
269 TableMapReduceUtil.addDependencyJars(job);
270 String tmpjars = job.getConfiguration().get("tmpjars");
271
272 assertTrue(tmpjars.contains("zookeeper"));
273 assertTrue(tmpjars.contains("protobuf"));
274 assertTrue(tmpjars.contains("guava"));
275 }
276
277 @org.junit.Rule
278 public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
279 new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
280 }
281