1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.mapreduce;
19
20 import static org.junit.Assert.assertEquals;
21 import static org.junit.Assert.assertTrue;
22
23 import java.io.File;
24 import java.io.IOException;
25 import java.util.ArrayList;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.NavigableMap;
29
30 import org.apache.commons.logging.Log;
31 import org.apache.commons.logging.LogFactory;
32 import org.apache.hadoop.conf.Configuration;
33 import org.apache.hadoop.fs.FileUtil;
34 import org.apache.hadoop.fs.Path;
35 import org.apache.hadoop.hbase.HBaseTestingUtility;
36 import org.apache.hadoop.hbase.LargeTests;
37 import org.apache.hadoop.hbase.client.HTable;
38 import org.apache.hadoop.hbase.client.Result;
39 import org.apache.hadoop.hbase.client.Scan;
40 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
41 import org.apache.hadoop.hbase.util.Bytes;
42 import org.apache.hadoop.io.NullWritable;
43 import org.apache.hadoop.mapreduce.Job;
44 import org.apache.hadoop.mapreduce.Reducer;
45 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
46 import org.junit.After;
47 import org.junit.AfterClass;
48 import org.junit.BeforeClass;
49 import org.junit.Test;
50 import org.junit.experimental.categories.Category;
51
52
53
54
55
56
57 @Category(LargeTests.class)
58 public class TestMultiTableInputFormat {
59
60 static final Log LOG = LogFactory.getLog(TestMultiTableInputFormat.class);
61 static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
62
63 static final String TABLE_NAME = "scantest";
64 static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
65 static final String KEY_STARTROW = "startRow";
66 static final String KEY_LASTROW = "stpRow";
67
68 @BeforeClass
69 public static void setUpBeforeClass() throws Exception {
70
71 TEST_UTIL.enableDebug(MultiTableInputFormat.class);
72 TEST_UTIL.enableDebug(MultiTableInputFormatBase.class);
73
74 TEST_UTIL.startMiniCluster(3);
75
76 for (int i = 0; i < 3; i++) {
77 HTable table =
78 TEST_UTIL.createTable(Bytes.toBytes(TABLE_NAME + String.valueOf(i)),
79 INPUT_FAMILY);
80 TEST_UTIL.createMultiRegions(table, INPUT_FAMILY);
81 TEST_UTIL.loadTable(table, INPUT_FAMILY);
82 }
83
84 TEST_UTIL.startMiniMapReduceCluster();
85 }
86
87 @AfterClass
88 public static void tearDownAfterClass() throws Exception {
89 TEST_UTIL.shutdownMiniMapReduceCluster();
90 TEST_UTIL.shutdownMiniCluster();
91 }
92
93 @After
94 public void tearDown() throws Exception {
95 Configuration c = TEST_UTIL.getConfiguration();
96 FileUtil.fullyDelete(new File(c.get("hadoop.tmp.dir")));
97 }
98
99
100
101
102 public static class ScanMapper extends
103 TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
104
105
106
107
108
109
110
111
112 @Override
113 public void map(ImmutableBytesWritable key, Result value, Context context)
114 throws IOException, InterruptedException {
115 if (value.size() != 1) {
116 throw new IOException("There should only be one input column");
117 }
118 Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> cf =
119 value.getMap();
120 if (!cf.containsKey(INPUT_FAMILY)) {
121 throw new IOException("Wrong input columns. Missing: '" +
122 Bytes.toString(INPUT_FAMILY) + "'.");
123 }
124 String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
125 LOG.debug("map: key -> " + Bytes.toStringBinary(key.get()) +
126 ", value -> " + val);
127 context.write(key, key);
128 }
129 }
130
131
132
133
134 public static class ScanReducer
135 extends
136 Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
137 NullWritable, NullWritable> {
138 private String first = null;
139 private String last = null;
140
141 protected void reduce(ImmutableBytesWritable key,
142 Iterable<ImmutableBytesWritable> values, Context context)
143 throws IOException, InterruptedException {
144 int count = 0;
145 for (ImmutableBytesWritable value : values) {
146 String val = Bytes.toStringBinary(value.get());
147 LOG.debug("reduce: key[" + count + "] -> " +
148 Bytes.toStringBinary(key.get()) + ", value -> " + val);
149 if (first == null) first = val;
150 last = val;
151 count++;
152 }
153 assertEquals(3, count);
154 }
155
156 protected void cleanup(Context context) throws IOException,
157 InterruptedException {
158 Configuration c = context.getConfiguration();
159 String startRow = c.get(KEY_STARTROW);
160 String lastRow = c.get(KEY_LASTROW);
161 LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" +
162 startRow + "\"");
163 LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow +
164 "\"");
165 if (startRow != null && startRow.length() > 0) {
166 assertEquals(startRow, first);
167 }
168 if (lastRow != null && lastRow.length() > 0) {
169 assertEquals(lastRow, last);
170 }
171 }
172 }
173
174 @Test
175 public void testScanEmptyToEmpty() throws IOException, InterruptedException,
176 ClassNotFoundException {
177 testScan(null, null, null);
178 }
179
180 @Test
181 public void testScanEmptyToAPP() throws IOException, InterruptedException,
182 ClassNotFoundException {
183 testScan(null, "app", "apo");
184 }
185
186 @Test
187 public void testScanOBBToOPP() throws IOException, InterruptedException,
188 ClassNotFoundException {
189 testScan("obb", "opp", "opo");
190 }
191
192 @Test
193 public void testScanOPPToEmpty() throws IOException, InterruptedException,
194 ClassNotFoundException {
195 testScan("opp", null, "zzz");
196 }
197
198 @Test
199 public void testScanYZYToEmpty() throws IOException, InterruptedException,
200 ClassNotFoundException {
201 testScan("yzy", null, "zzz");
202 }
203
204
205
206
207
208
209
210
211 private void testScan(String start, String stop, String last)
212 throws IOException, InterruptedException, ClassNotFoundException {
213 String jobName =
214 "Scan" + (start != null ? start.toUpperCase() : "Empty") + "To" +
215 (stop != null ? stop.toUpperCase() : "Empty");
216 LOG.info("Before map/reduce startup - job " + jobName);
217 Configuration c = new Configuration(TEST_UTIL.getConfiguration());
218
219 c.set(KEY_STARTROW, start != null ? start : "");
220 c.set(KEY_LASTROW, last != null ? last : "");
221
222 List<Scan> scans = new ArrayList<Scan>();
223
224 for(int i=0; i<3; i++){
225 Scan scan = new Scan();
226
227 scan.addFamily(INPUT_FAMILY);
228 scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(TABLE_NAME + i));
229
230 if (start != null) {
231 scan.setStartRow(Bytes.toBytes(start));
232 }
233 if (stop != null) {
234 scan.setStopRow(Bytes.toBytes(stop));
235 }
236
237 scans.add(scan);
238
239 LOG.info("scan before: " + scan);
240 }
241
242 Job job = new Job(c, jobName);
243
244 TableMapReduceUtil.initTableMapperJob(scans, ScanMapper.class,
245 ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
246 job.setReducerClass(ScanReducer.class);
247 job.setNumReduceTasks(1);
248 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
249 LOG.info("Started " + job.getJobName());
250 job.waitForCompletion(true);
251 assertTrue(job.isSuccessful());
252 LOG.info("After map/reduce completion - job " + jobName);
253 }
254 }