1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import java.io.File;
23 import java.io.IOException;
24 import java.util.Map;
25 import java.util.NavigableMap;
26
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.hadoop.conf.Configuration;
30 import org.apache.hadoop.fs.FileUtil;
31 import org.apache.hadoop.fs.Path;
32 import org.apache.hadoop.hbase.HBaseTestingUtility;
33 import org.apache.hadoop.hbase.LargeTests;
34 import org.apache.hadoop.hbase.client.HTable;
35 import org.apache.hadoop.hbase.client.Result;
36 import org.apache.hadoop.hbase.client.Scan;
37 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
38 import org.apache.hadoop.hbase.util.Bytes;
39 import org.apache.hadoop.io.NullWritable;
40 import org.apache.hadoop.mapreduce.Job;
41 import org.apache.hadoop.mapreduce.Reducer;
42 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
43 import org.junit.After;
44 import org.junit.AfterClass;
45 import org.junit.Before;
46 import org.junit.BeforeClass;
47 import org.junit.Test;
48 import org.junit.experimental.categories.Category;
49
50 import static org.junit.Assert.assertEquals;
51 import static org.junit.Assert.assertTrue;
52
53
54
55
56
57
58 @Category(LargeTests.class)
59 public class TestTableInputFormatScan {
60
61 static final Log LOG = LogFactory.getLog(TestTableInputFormatScan.class);
62 static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
63
64 static final byte[] TABLE_NAME = Bytes.toBytes("scantest");
65 static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
66 static final String KEY_STARTROW = "startRow";
67 static final String KEY_LASTROW = "stpRow";
68
69 private static HTable table = null;
70
71 @BeforeClass
72 public static void setUpBeforeClass() throws Exception {
73
74 TEST_UTIL.enableDebug(TableInputFormat.class);
75 TEST_UTIL.enableDebug(TableInputFormatBase.class);
76
77 TEST_UTIL.startMiniCluster(3);
78
79 table = TEST_UTIL.createTable(TABLE_NAME, INPUT_FAMILY);
80 TEST_UTIL.createMultiRegions(table, INPUT_FAMILY);
81 TEST_UTIL.loadTable(table, INPUT_FAMILY);
82
83 TEST_UTIL.startMiniMapReduceCluster();
84 }
85
86 @AfterClass
87 public static void tearDownAfterClass() throws Exception {
88 TEST_UTIL.shutdownMiniMapReduceCluster();
89 TEST_UTIL.shutdownMiniCluster();
90 }
91
92
93
94
95 public static class ScanMapper
96 extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
97
98
99
100
101
102
103
104
105
106 @Override
107 public void map(ImmutableBytesWritable key, Result value,
108 Context context)
109 throws IOException, InterruptedException {
110 if (value.size() != 1) {
111 throw new IOException("There should only be one input column");
112 }
113 Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
114 cf = value.getMap();
115 if(!cf.containsKey(INPUT_FAMILY)) {
116 throw new IOException("Wrong input columns. Missing: '" +
117 Bytes.toString(INPUT_FAMILY) + "'.");
118 }
119 String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
120 LOG.info("map: key -> " + Bytes.toStringBinary(key.get()) +
121 ", value -> " + val);
122 context.write(key, key);
123 }
124
125 }
126
127
128
129
130 public static class ScanReducer
131 extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
132 NullWritable, NullWritable> {
133
134 private String first = null;
135 private String last = null;
136
137 protected void reduce(ImmutableBytesWritable key,
138 Iterable<ImmutableBytesWritable> values, Context context)
139 throws IOException ,InterruptedException {
140 int count = 0;
141 for (ImmutableBytesWritable value : values) {
142 String val = Bytes.toStringBinary(value.get());
143 LOG.info("reduce: key[" + count + "] -> " +
144 Bytes.toStringBinary(key.get()) + ", value -> " + val);
145 if (first == null) first = val;
146 last = val;
147 count++;
148 }
149 }
150
151 protected void cleanup(Context context)
152 throws IOException, InterruptedException {
153 Configuration c = context.getConfiguration();
154 String startRow = c.get(KEY_STARTROW);
155 String lastRow = c.get(KEY_LASTROW);
156 LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" + startRow + "\"");
157 LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow + "\"");
158 if (startRow != null && startRow.length() > 0) {
159 assertEquals(startRow, first);
160 }
161 if (lastRow != null && lastRow.length() > 0) {
162 assertEquals(lastRow, last);
163 }
164 }
165
166 }
167
168
169
170
171
172
173
174
175 @Test
176 public void testScanEmptyToEmpty()
177 throws IOException, InterruptedException, ClassNotFoundException {
178 testScan(null, null, null);
179 }
180
181
182
183
184
185
186
187
188 @Test
189 public void testScanEmptyToAPP()
190 throws IOException, InterruptedException, ClassNotFoundException {
191 testScan(null, "app", "apo");
192 }
193
194
195
196
197
198
199
200
201 @Test
202 public void testScanEmptyToBBA()
203 throws IOException, InterruptedException, ClassNotFoundException {
204 testScan(null, "bba", "baz");
205 }
206
207
208
209
210
211
212
213
214 @Test
215 public void testScanEmptyToBBB()
216 throws IOException, InterruptedException, ClassNotFoundException {
217 testScan(null, "bbb", "bba");
218 }
219
220
221
222
223
224
225
226
227 @Test
228 public void testScanEmptyToOPP()
229 throws IOException, InterruptedException, ClassNotFoundException {
230 testScan(null, "opp", "opo");
231 }
232
233
234
235
236
237
238
239
240 @Test
241 public void testScanOBBToOPP()
242 throws IOException, InterruptedException, ClassNotFoundException {
243 testScan("obb", "opp", "opo");
244 }
245
246
247
248
249
250
251
252
253 @Test
254 public void testScanOBBToQPP()
255 throws IOException, InterruptedException, ClassNotFoundException {
256 testScan("obb", "qpp", "qpo");
257 }
258
259
260
261
262
263
264
265
266 @Test
267 public void testScanOPPToEmpty()
268 throws IOException, InterruptedException, ClassNotFoundException {
269 testScan("opp", null, "zzz");
270 }
271
272
273
274
275
276
277
278
279 @Test
280 public void testScanYYXToEmpty()
281 throws IOException, InterruptedException, ClassNotFoundException {
282 testScan("yyx", null, "zzz");
283 }
284
285
286
287
288
289
290
291
292 @Test
293 public void testScanYYYToEmpty()
294 throws IOException, InterruptedException, ClassNotFoundException {
295 testScan("yyy", null, "zzz");
296 }
297
298
299
300
301
302
303
304
305 @Test
306 public void testScanYZYToEmpty()
307 throws IOException, InterruptedException, ClassNotFoundException {
308 testScan("yzy", null, "zzz");
309 }
310
311 @Test
312 public void testScanFromConfiguration()
313 throws IOException, InterruptedException, ClassNotFoundException {
314 testScanFromConfiguration("bba", "bbd", "bbc");
315 }
316
317
318
319
320
321
322
323
324 private void testScanFromConfiguration(String start, String stop, String last)
325 throws IOException, InterruptedException, ClassNotFoundException {
326 String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase() : "Empty") +
327 "To" + (stop != null ? stop.toUpperCase() : "Empty");
328 Configuration c = new Configuration(TEST_UTIL.getConfiguration());
329 c.set(TableInputFormat.INPUT_TABLE, Bytes.toString(TABLE_NAME));
330 c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILY));
331 c.set(KEY_STARTROW, start != null ? start : "");
332 c.set(KEY_LASTROW, last != null ? last : "");
333
334 if (start != null) {
335 c.set(TableInputFormat.SCAN_ROW_START, start);
336 }
337
338 if (stop != null) {
339 c.set(TableInputFormat.SCAN_ROW_STOP, stop);
340 }
341
342 Job job = new Job(c, jobName);
343 job.setMapperClass(ScanMapper.class);
344 job.setReducerClass(ScanReducer.class);
345 job.setMapOutputKeyClass(ImmutableBytesWritable.class);
346 job.setMapOutputValueClass(ImmutableBytesWritable.class);
347 job.setInputFormatClass(TableInputFormat.class);
348 job.setNumReduceTasks(1);
349 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
350 job.waitForCompletion(true);
351 assertTrue(job.isComplete());
352 }
353
354
355
356
357
358
359
360
361 private void testScan(String start, String stop, String last)
362 throws IOException, InterruptedException, ClassNotFoundException {
363 String jobName = "Scan" + (start != null ? start.toUpperCase() : "Empty") +
364 "To" + (stop != null ? stop.toUpperCase() : "Empty");
365 LOG.info("Before map/reduce startup - job " + jobName);
366 Configuration c = new Configuration(TEST_UTIL.getConfiguration());
367 Scan scan = new Scan();
368 scan.addFamily(INPUT_FAMILY);
369 if (start != null) {
370 scan.setStartRow(Bytes.toBytes(start));
371 }
372 c.set(KEY_STARTROW, start != null ? start : "");
373 if (stop != null) {
374 scan.setStopRow(Bytes.toBytes(stop));
375 }
376 c.set(KEY_LASTROW, last != null ? last : "");
377 LOG.info("scan before: " + scan);
378 Job job = new Job(c, jobName);
379 TableMapReduceUtil.initTableMapperJob(
380 Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
381 ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
382 job.setReducerClass(ScanReducer.class);
383 job.setNumReduceTasks(1);
384 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
385 LOG.info("Started " + job.getJobName());
386 job.waitForCompletion(true);
387 assertTrue(job.isComplete());
388 LOG.info("After map/reduce completion - job " + jobName);
389 }
390
391 @org.junit.Rule
392 public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
393 new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
394 }
395