1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import com.google.common.collect.Lists;
22 import org.apache.commons.logging.Log;
23 import org.apache.commons.logging.LogFactory;
24 import org.apache.hadoop.conf.Configuration;
25 import org.apache.hadoop.fs.FileUtil;
26 import org.apache.hadoop.fs.Path;
27 import org.apache.hadoop.hbase.HBaseTestingUtility;
28 import org.apache.hadoop.hbase.TableName;
29 import org.apache.hadoop.hbase.client.HTable;
30 import org.apache.hadoop.hbase.client.Result;
31 import org.apache.hadoop.hbase.client.Scan;
32 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
33 import org.apache.hadoop.hbase.util.Bytes;
34 import org.apache.hadoop.io.NullWritable;
35 import org.apache.hadoop.mapreduce.Job;
36 import org.apache.hadoop.mapreduce.Reducer;
37 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
38 import org.junit.After;
39 import org.junit.AfterClass;
40 import org.junit.BeforeClass;
41 import org.junit.Test;
42
43 import java.io.File;
44 import java.io.IOException;
45 import java.util.ArrayList;
46 import java.util.List;
47 import java.util.Map;
48 import java.util.NavigableMap;
49
50 import static org.junit.Assert.assertEquals;
51 import static org.junit.Assert.assertTrue;
52
53
54
55
56 public abstract class MultiTableInputFormatTestBase {
57 static final Log LOG = LogFactory.getLog(TestMultiTableInputFormat.class);
58 public static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
59 static final String TABLE_NAME = "scantest";
60 static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
61 static final String KEY_STARTROW = "startRow";
62 static final String KEY_LASTROW = "stpRow";
63
64 static List<String> TABLES = Lists.newArrayList();
65
66 static {
67 for (int i = 0; i < 3; i++) {
68 TABLES.add(TABLE_NAME + String.valueOf(i));
69 }
70 }
71
72 @BeforeClass
73 public static void setUpBeforeClass() throws Exception {
74
75 TEST_UTIL.enableDebug(MultiTableInputFormatBase.class);
76
77 TEST_UTIL.startMiniCluster(3);
78
79 for (String tableName : TABLES) {
80 HTable table =
81 TEST_UTIL.createMultiRegionTable(TableName.valueOf(tableName),
82 INPUT_FAMILY, 4);
83 try {
84 TEST_UTIL.loadTable(table, INPUT_FAMILY, false);
85 } finally {
86 table.close();
87 }
88 }
89
90 TEST_UTIL.startMiniMapReduceCluster();
91 }
92
93 @AfterClass
94 public static void tearDownAfterClass() throws Exception {
95 TEST_UTIL.shutdownMiniMapReduceCluster();
96 TEST_UTIL.shutdownMiniCluster();
97 }
98
99 @After
100 public void tearDown() throws Exception {
101 Configuration c = TEST_UTIL.getConfiguration();
102 FileUtil.fullyDelete(new File(c.get("hadoop.tmp.dir")));
103 }
104
105
106
107
108 public static class ScanMapper extends
109 TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
110
111
112
113
114
115
116
117
118 @Override
119 public void map(ImmutableBytesWritable key, Result value, Context context)
120 throws IOException, InterruptedException {
121 makeAssertions(key, value);
122 context.write(key, key);
123 }
124
125 public void makeAssertions(ImmutableBytesWritable key, Result value) throws IOException {
126 if (value.size() != 1) {
127 throw new IOException("There should only be one input column");
128 }
129 Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> cf =
130 value.getMap();
131 if (!cf.containsKey(INPUT_FAMILY)) {
132 throw new IOException("Wrong input columns. Missing: '" +
133 Bytes.toString(INPUT_FAMILY) + "'.");
134 }
135 String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
136 LOG.debug("map: key -> " + Bytes.toStringBinary(key.get()) +
137 ", value -> " + val);
138 }
139 }
140
141
142
143
144 public static class ScanReducer
145 extends
146 Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
147 NullWritable, NullWritable> {
148 private String first = null;
149 private String last = null;
150
151 @Override
152 protected void reduce(ImmutableBytesWritable key,
153 Iterable<ImmutableBytesWritable> values, Context context)
154 throws IOException, InterruptedException {
155 makeAssertions(key, values);
156 }
157
158 protected void makeAssertions(ImmutableBytesWritable key,
159 Iterable<ImmutableBytesWritable> values) {
160 int count = 0;
161 for (ImmutableBytesWritable value : values) {
162 String val = Bytes.toStringBinary(value.get());
163 LOG.debug("reduce: key[" + count + "] -> " +
164 Bytes.toStringBinary(key.get()) + ", value -> " + val);
165 if (first == null) first = val;
166 last = val;
167 count++;
168 }
169 assertEquals(3, count);
170 }
171
172 @Override
173 protected void cleanup(Context context) throws IOException,
174 InterruptedException {
175 Configuration c = context.getConfiguration();
176 cleanup(c);
177 }
178
179 protected void cleanup(Configuration c) {
180 String startRow = c.get(KEY_STARTROW);
181 String lastRow = c.get(KEY_LASTROW);
182 LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" +
183 startRow + "\"");
184 LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow +
185 "\"");
186 if (startRow != null && startRow.length() > 0) {
187 assertEquals(startRow, first);
188 }
189 if (lastRow != null && lastRow.length() > 0) {
190 assertEquals(lastRow, last);
191 }
192 }
193 }
194
195 @Test
196 public void testScanEmptyToEmpty() throws IOException, InterruptedException,
197 ClassNotFoundException {
198 testScan(null, null, null);
199 }
200
201 @Test
202 public void testScanEmptyToAPP() throws IOException, InterruptedException,
203 ClassNotFoundException {
204 testScan(null, "app", "apo");
205 }
206
207 @Test
208 public void testScanOBBToOPP() throws IOException, InterruptedException,
209 ClassNotFoundException {
210 testScan("obb", "opp", "opo");
211 }
212
213 @Test
214 public void testScanYZYToEmpty() throws IOException, InterruptedException,
215 ClassNotFoundException {
216 testScan("yzy", null, "zzz");
217 }
218
219
220
221
222
223
224
225
226 private void testScan(String start, String stop, String last)
227 throws IOException, InterruptedException, ClassNotFoundException {
228 String jobName =
229 "Scan" + (start != null ? start.toUpperCase() : "Empty") + "To" +
230 (stop != null ? stop.toUpperCase() : "Empty");
231 LOG.info("Before map/reduce startup - job " + jobName);
232 Configuration c = new Configuration(TEST_UTIL.getConfiguration());
233
234 c.set(KEY_STARTROW, start != null ? start : "");
235 c.set(KEY_LASTROW, last != null ? last : "");
236
237 List<Scan> scans = new ArrayList<Scan>();
238
239 for (String tableName : TABLES) {
240 Scan scan = new Scan();
241
242 scan.addFamily(INPUT_FAMILY);
243 scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName));
244
245 if (start != null) {
246 scan.setStartRow(Bytes.toBytes(start));
247 }
248 if (stop != null) {
249 scan.setStopRow(Bytes.toBytes(stop));
250 }
251
252 scans.add(scan);
253
254 LOG.info("scan before: " + scan);
255 }
256
257 runJob(jobName, c, scans);
258 }
259
260 protected void runJob(String jobName, Configuration c, List<Scan> scans)
261 throws IOException, InterruptedException, ClassNotFoundException {
262 Job job = new Job(c, jobName);
263
264 initJob(scans, job);
265 job.setReducerClass(ScanReducer.class);
266 job.setNumReduceTasks(1);
267 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
268 LOG.info("Started " + job.getJobName());
269 job.waitForCompletion(true);
270 assertTrue(job.isSuccessful());
271 LOG.info("After map/reduce completion - job " + jobName);
272 }
273
274 protected abstract void initJob(List<Scan> scans, Job job) throws IOException;
275
276
277 }