1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import java.io.File;
23 import java.io.IOException;
24 import java.util.Map;
25 import java.util.NavigableMap;
26
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.hadoop.fs.FileUtil;
30 import org.apache.hadoop.fs.Path;
31 import org.apache.hadoop.hbase.HColumnDescriptor;
32 import org.apache.hadoop.hbase.HConstants;
33 import org.apache.hadoop.hbase.HTableDescriptor;
34 import org.apache.hadoop.hbase.KeyValue;
35 import org.apache.hadoop.hbase.MultiRegionTable;
36 import org.apache.hadoop.hbase.client.HTable;
37 import org.apache.hadoop.hbase.client.Put;
38 import org.apache.hadoop.hbase.client.Result;
39 import org.apache.hadoop.hbase.client.ResultScanner;
40 import org.apache.hadoop.hbase.client.Scan;
41 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
42 import org.apache.hadoop.hbase.util.Bytes;
43 import org.apache.hadoop.mapred.MiniMRCluster;
44 import org.apache.hadoop.mapreduce.Job;
45 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
46
47
48
49
50
51
52 public class TestTableMapReduce extends MultiRegionTable {
53
54 private static final Log LOG = LogFactory.getLog(TestTableMapReduce.class);
55
56 static final String MULTI_REGION_TABLE_NAME = "mrtest";
57 static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
58 static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
59
60
61 public TestTableMapReduce() {
62 super(Bytes.toString(INPUT_FAMILY));
63 desc = new HTableDescriptor(MULTI_REGION_TABLE_NAME);
64 desc.addFamily(new HColumnDescriptor(INPUT_FAMILY));
65 desc.addFamily(new HColumnDescriptor(OUTPUT_FAMILY));
66 }
67
68
69
70
71 public static class ProcessContentsMapper
72 extends TableMapper<ImmutableBytesWritable, Put> {
73
74
75
76
77
78
79
80
81
82 public void map(ImmutableBytesWritable key, Result value,
83 Context context)
84 throws IOException, InterruptedException {
85 if (value.size() != 1) {
86 throw new IOException("There should only be one input column");
87 }
88 Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
89 cf = value.getMap();
90 if(!cf.containsKey(INPUT_FAMILY)) {
91 throw new IOException("Wrong input columns. Missing: '" +
92 Bytes.toString(INPUT_FAMILY) + "'.");
93 }
94
95
96 String originalValue = new String(value.getValue(INPUT_FAMILY, null),
97 HConstants.UTF8_ENCODING);
98 StringBuilder newValue = new StringBuilder(originalValue);
99 newValue.reverse();
100
101 Put outval = new Put(key.get());
102 outval.add(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
103 context.write(key, outval);
104 }
105 }
106
107
108
109
110
111
112
113 public void testMultiRegionTable()
114 throws IOException, InterruptedException, ClassNotFoundException {
115 runTestOnTable(new HTable(conf, MULTI_REGION_TABLE_NAME));
116 }
117
118 private void runTestOnTable(HTable table)
119 throws IOException, InterruptedException, ClassNotFoundException {
120 MiniMRCluster mrCluster = new MiniMRCluster(2, fs.getUri().toString(), 1);
121
122 Job job = null;
123 try {
124 LOG.info("Before map/reduce startup");
125 job = new Job(conf, "process column contents");
126 job.setNumReduceTasks(1);
127 Scan scan = new Scan();
128 scan.addFamily(INPUT_FAMILY);
129 TableMapReduceUtil.initTableMapperJob(
130 Bytes.toString(table.getTableName()), scan,
131 ProcessContentsMapper.class, ImmutableBytesWritable.class,
132 Put.class, job);
133 TableMapReduceUtil.initTableReducerJob(
134 Bytes.toString(table.getTableName()),
135 IdentityTableReducer.class, job);
136 FileOutputFormat.setOutputPath(job, new Path("test"));
137 LOG.info("Started " + Bytes.toString(table.getTableName()));
138 job.waitForCompletion(true);
139 LOG.info("After map/reduce completion");
140
141
142 verify(Bytes.toString(table.getTableName()));
143 } finally {
144 mrCluster.shutdown();
145 if (job != null) {
146 FileUtil.fullyDelete(
147 new File(job.getConfiguration().get("hadoop.tmp.dir")));
148 }
149 }
150 }
151
152 private void verify(String tableName) throws IOException {
153 HTable table = new HTable(conf, tableName);
154 boolean verified = false;
155 long pause = conf.getLong("hbase.client.pause", 5 * 1000);
156 int numRetries = conf.getInt("hbase.client.retries.number", 5);
157 for (int i = 0; i < numRetries; i++) {
158 try {
159 LOG.info("Verification attempt #" + i);
160 verifyAttempt(table);
161 verified = true;
162 break;
163 } catch (NullPointerException e) {
164
165
166 LOG.debug("Verification attempt failed: " + e.getMessage());
167 }
168 try {
169 Thread.sleep(pause);
170 } catch (InterruptedException e) {
171
172 }
173 }
174 assertTrue(verified);
175 }
176
177
178
179
180
181
182
183
184
185 private void verifyAttempt(final HTable table) throws IOException, NullPointerException {
186 Scan scan = new Scan();
187 scan.addFamily(INPUT_FAMILY);
188 scan.addFamily(OUTPUT_FAMILY);
189 ResultScanner scanner = table.getScanner(scan);
190 try {
191 for (Result r : scanner) {
192 if (LOG.isDebugEnabled()) {
193 if (r.size() > 2 ) {
194 throw new IOException("Too many results, expected 2 got " +
195 r.size());
196 }
197 }
198 byte[] firstValue = null;
199 byte[] secondValue = null;
200 int count = 0;
201 for(KeyValue kv : r.list()) {
202 if (count == 0) {
203 firstValue = kv.getValue();
204 }
205 if (count == 1) {
206 secondValue = kv.getValue();
207 }
208 count++;
209 if (count == 2) {
210 break;
211 }
212 }
213
214 String first = "";
215 if (firstValue == null) {
216 throw new NullPointerException(Bytes.toString(r.getRow()) +
217 ": first value is null");
218 }
219 first = new String(firstValue, HConstants.UTF8_ENCODING);
220
221 String second = "";
222 if (secondValue == null) {
223 throw new NullPointerException(Bytes.toString(r.getRow()) +
224 ": second value is null");
225 }
226 byte[] secondReversed = new byte[secondValue.length];
227 for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
228 secondReversed[i] = secondValue[j];
229 }
230 second = new String(secondReversed, HConstants.UTF8_ENCODING);
231
232 if (first.compareTo(second) != 0) {
233 if (LOG.isDebugEnabled()) {
234 LOG.debug("second key is not the reverse of first. row=" +
235 Bytes.toStringBinary(r.getRow()) + ", first value=" + first +
236 ", second value=" + second);
237 }
238 fail();
239 }
240 }
241 } finally {
242 scanner.close();
243 }
244 }
245
246
247
248
249 public void testAddDependencyJars() throws Exception {
250 Job job = new Job();
251 TableMapReduceUtil.addDependencyJars(job);
252 String tmpjars = job.getConfiguration().get("tmpjars");
253
254 System.err.println("tmpjars: " + tmpjars);
255 assertTrue(tmpjars.contains("zookeeper"));
256 assertTrue(tmpjars.contains("guava"));
257 }
258 }