1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertTrue;
23
24 import java.io.IOException;
25 import java.util.Map;
26 import java.util.NavigableMap;
27
28 import org.apache.commons.logging.Log;
29 import org.apache.commons.logging.LogFactory;
30 import org.apache.hadoop.conf.Configuration;
31 import org.apache.hadoop.fs.Path;
32 import org.apache.hadoop.hbase.HBaseTestingUtility;
33 import org.apache.hadoop.hbase.TableName;
34 import org.apache.hadoop.hbase.client.HTable;
35 import org.apache.hadoop.hbase.client.Result;
36 import org.apache.hadoop.hbase.client.Scan;
37 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
38 import org.apache.hadoop.hbase.util.Bytes;
39 import org.apache.hadoop.io.NullWritable;
40 import org.apache.hadoop.mapreduce.Job;
41 import org.apache.hadoop.mapreduce.Reducer;
42 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
43 import org.junit.AfterClass;
44 import org.junit.BeforeClass;
45
46
47
48
49
50
51
52
53
54
55
56
57 public abstract class TestTableInputFormatScanBase {
58
59 static final Log LOG = LogFactory.getLog(TestTableInputFormatScanBase.class);
60 static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
61
62 static final byte[] TABLE_NAME = Bytes.toBytes("scantest");
63 static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
64 static final String KEY_STARTROW = "startRow";
65 static final String KEY_LASTROW = "stpRow";
66
67 private static HTable table = null;
68
69 @BeforeClass
70 public static void setUpBeforeClass() throws Exception {
71
72
73 System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
74
75
76 TEST_UTIL.enableDebug(TableInputFormat.class);
77 TEST_UTIL.enableDebug(TableInputFormatBase.class);
78
79 TEST_UTIL.startMiniCluster(3);
80
81 table = TEST_UTIL.createTable(TableName.valueOf(TABLE_NAME), INPUT_FAMILY);
82 TEST_UTIL.createMultiRegions(table, INPUT_FAMILY);
83 TEST_UTIL.loadTable(table, INPUT_FAMILY, false);
84
85 TEST_UTIL.startMiniMapReduceCluster();
86 }
87
88 @AfterClass
89 public static void tearDownAfterClass() throws Exception {
90 TEST_UTIL.shutdownMiniMapReduceCluster();
91 TEST_UTIL.shutdownMiniCluster();
92 }
93
94
95
96
97 public static class ScanMapper
98 extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
99
100
101
102
103
104
105
106
107
108 @Override
109 public void map(ImmutableBytesWritable key, Result value,
110 Context context)
111 throws IOException, InterruptedException {
112 if (value.size() != 1) {
113 throw new IOException("There should only be one input column");
114 }
115 Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
116 cf = value.getMap();
117 if(!cf.containsKey(INPUT_FAMILY)) {
118 throw new IOException("Wrong input columns. Missing: '" +
119 Bytes.toString(INPUT_FAMILY) + "'.");
120 }
121 String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
122 LOG.info("map: key -> " + Bytes.toStringBinary(key.get()) +
123 ", value -> " + val);
124 context.write(key, key);
125 }
126
127 }
128
129
130
131
132 public static class ScanReducer
133 extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
134 NullWritable, NullWritable> {
135
136 private String first = null;
137 private String last = null;
138
139 protected void reduce(ImmutableBytesWritable key,
140 Iterable<ImmutableBytesWritable> values, Context context)
141 throws IOException ,InterruptedException {
142 int count = 0;
143 for (ImmutableBytesWritable value : values) {
144 String val = Bytes.toStringBinary(value.get());
145 LOG.info("reduce: key[" + count + "] -> " +
146 Bytes.toStringBinary(key.get()) + ", value -> " + val);
147 if (first == null) first = val;
148 last = val;
149 count++;
150 }
151 }
152
153 protected void cleanup(Context context)
154 throws IOException, InterruptedException {
155 Configuration c = context.getConfiguration();
156 String startRow = c.get(KEY_STARTROW);
157 String lastRow = c.get(KEY_LASTROW);
158 LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" + startRow + "\"");
159 LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow + "\"");
160 if (startRow != null && startRow.length() > 0) {
161 assertEquals(startRow, first);
162 }
163 if (lastRow != null && lastRow.length() > 0) {
164 assertEquals(lastRow, last);
165 }
166 }
167
168 }
169
170
171
172
173
174
175
176
177 protected void testScanFromConfiguration(String start, String stop, String last)
178 throws IOException, InterruptedException, ClassNotFoundException {
179 String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase() : "Empty") +
180 "To" + (stop != null ? stop.toUpperCase() : "Empty");
181 Configuration c = new Configuration(TEST_UTIL.getConfiguration());
182 c.set(TableInputFormat.INPUT_TABLE, Bytes.toString(TABLE_NAME));
183 c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILY));
184 c.set(KEY_STARTROW, start != null ? start : "");
185 c.set(KEY_LASTROW, last != null ? last : "");
186
187 if (start != null) {
188 c.set(TableInputFormat.SCAN_ROW_START, start);
189 }
190
191 if (stop != null) {
192 c.set(TableInputFormat.SCAN_ROW_STOP, stop);
193 }
194
195 Job job = new Job(c, jobName);
196 job.setMapperClass(ScanMapper.class);
197 job.setReducerClass(ScanReducer.class);
198 job.setMapOutputKeyClass(ImmutableBytesWritable.class);
199 job.setMapOutputValueClass(ImmutableBytesWritable.class);
200 job.setInputFormatClass(TableInputFormat.class);
201 job.setNumReduceTasks(1);
202 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
203 TableMapReduceUtil.addDependencyJars(job);
204 assertTrue(job.waitForCompletion(true));
205 }
206
207
208
209
210
211
212
213
214 protected void testScan(String start, String stop, String last)
215 throws IOException, InterruptedException, ClassNotFoundException {
216 String jobName = "Scan" + (start != null ? start.toUpperCase() : "Empty") +
217 "To" + (stop != null ? stop.toUpperCase() : "Empty");
218 LOG.info("Before map/reduce startup - job " + jobName);
219 Configuration c = new Configuration(TEST_UTIL.getConfiguration());
220 Scan scan = new Scan();
221 scan.addFamily(INPUT_FAMILY);
222 if (start != null) {
223 scan.setStartRow(Bytes.toBytes(start));
224 }
225 c.set(KEY_STARTROW, start != null ? start : "");
226 if (stop != null) {
227 scan.setStopRow(Bytes.toBytes(stop));
228 }
229 c.set(KEY_LASTROW, last != null ? last : "");
230 LOG.info("scan before: " + scan);
231 Job job = new Job(c, jobName);
232 TableMapReduceUtil.initTableMapperJob(
233 Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
234 ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
235 job.setReducerClass(ScanReducer.class);
236 job.setNumReduceTasks(1);
237 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
238 LOG.info("Started " + job.getJobName());
239 assertTrue(job.waitForCompletion(true));
240 LOG.info("After map/reduce completion - job " + jobName);
241 }
242
243 }
244