1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertTrue;
23
24 import java.io.IOException;
25 import java.util.Arrays;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.NavigableMap;
29
30 import org.apache.commons.logging.Log;
31 import org.apache.commons.logging.LogFactory;
32 import org.apache.hadoop.conf.Configuration;
33 import org.apache.hadoop.fs.Path;
34 import org.apache.hadoop.hbase.HBaseTestingUtility;
35 import org.apache.hadoop.hbase.HRegionLocation;
36 import org.apache.hadoop.hbase.TableName;
37 import org.apache.hadoop.hbase.client.HTable;
38 import org.apache.hadoop.hbase.client.Result;
39 import org.apache.hadoop.hbase.client.Scan;
40 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
41 import org.apache.hadoop.hbase.util.Bytes;
42 import org.apache.hadoop.io.NullWritable;
43 import org.apache.hadoop.mapreduce.InputSplit;
44 import org.apache.hadoop.mapreduce.Job;
45 import org.apache.hadoop.mapreduce.Reducer;
46 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
47 import org.junit.AfterClass;
48 import org.junit.Assert;
49 import org.junit.BeforeClass;
50
51
52
53
54
55
56
57
58
59
60
61
62
63 public abstract class TestTableInputFormatScanBase {
64
65 static final Log LOG = LogFactory.getLog(TestTableInputFormatScanBase.class);
66 static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
67
68 static final byte[] TABLE_NAME = Bytes.toBytes("scantest");
69 static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
70 static final String KEY_STARTROW = "startRow";
71 static final String KEY_LASTROW = "stpRow";
72
73 private static HTable table = null;
74
75 @BeforeClass
76 public static void setUpBeforeClass() throws Exception {
77
78
79 System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
80
81
82 TEST_UTIL.enableDebug(TableInputFormat.class);
83 TEST_UTIL.enableDebug(TableInputFormatBase.class);
84
85 TEST_UTIL.startMiniCluster(3);
86
87 table = TEST_UTIL.createTable(TableName.valueOf(TABLE_NAME), INPUT_FAMILY);
88 TEST_UTIL.createMultiRegions(table, INPUT_FAMILY);
89 TEST_UTIL.loadTable(table, INPUT_FAMILY, false);
90
91 TEST_UTIL.startMiniMapReduceCluster();
92 }
93
94 @AfterClass
95 public static void tearDownAfterClass() throws Exception {
96 TEST_UTIL.shutdownMiniMapReduceCluster();
97 TEST_UTIL.shutdownMiniCluster();
98 }
99
100
101
102
103 public static class ScanMapper
104 extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
105
106
107
108
109
110
111
112
113
114 @Override
115 public void map(ImmutableBytesWritable key, Result value,
116 Context context)
117 throws IOException, InterruptedException {
118 if (value.size() != 1) {
119 throw new IOException("There should only be one input column");
120 }
121 Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
122 cf = value.getMap();
123 if(!cf.containsKey(INPUT_FAMILY)) {
124 throw new IOException("Wrong input columns. Missing: '" +
125 Bytes.toString(INPUT_FAMILY) + "'.");
126 }
127 String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
128 LOG.info("map: key -> " + Bytes.toStringBinary(key.get()) +
129 ", value -> " + val);
130 context.write(key, key);
131 }
132
133 }
134
135
136
137
138 public static class ScanReducer
139 extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
140 NullWritable, NullWritable> {
141
142 private String first = null;
143 private String last = null;
144
145 protected void reduce(ImmutableBytesWritable key,
146 Iterable<ImmutableBytesWritable> values, Context context)
147 throws IOException ,InterruptedException {
148 int count = 0;
149 for (ImmutableBytesWritable value : values) {
150 String val = Bytes.toStringBinary(value.get());
151 LOG.info("reduce: key[" + count + "] -> " +
152 Bytes.toStringBinary(key.get()) + ", value -> " + val);
153 if (first == null) first = val;
154 last = val;
155 count++;
156 }
157 }
158
159 protected void cleanup(Context context)
160 throws IOException, InterruptedException {
161 Configuration c = context.getConfiguration();
162 String startRow = c.get(KEY_STARTROW);
163 String lastRow = c.get(KEY_LASTROW);
164 LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" + startRow + "\"");
165 LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow + "\"");
166 if (startRow != null && startRow.length() > 0) {
167 assertEquals(startRow, first);
168 }
169 if (lastRow != null && lastRow.length() > 0) {
170 assertEquals(lastRow, last);
171 }
172 }
173
174 }
175
176
177
178
179
180
181
182
183 protected void testScanFromConfiguration(String start, String stop, String last)
184 throws IOException, InterruptedException, ClassNotFoundException {
185 String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase() : "Empty") +
186 "To" + (stop != null ? stop.toUpperCase() : "Empty");
187 Configuration c = new Configuration(TEST_UTIL.getConfiguration());
188 c.set(TableInputFormat.INPUT_TABLE, Bytes.toString(TABLE_NAME));
189 c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILY));
190 c.set(KEY_STARTROW, start != null ? start : "");
191 c.set(KEY_LASTROW, last != null ? last : "");
192
193 if (start != null) {
194 c.set(TableInputFormat.SCAN_ROW_START, start);
195 }
196
197 if (stop != null) {
198 c.set(TableInputFormat.SCAN_ROW_STOP, stop);
199 }
200
201 Job job = new Job(c, jobName);
202 job.setMapperClass(ScanMapper.class);
203 job.setReducerClass(ScanReducer.class);
204 job.setMapOutputKeyClass(ImmutableBytesWritable.class);
205 job.setMapOutputValueClass(ImmutableBytesWritable.class);
206 job.setInputFormatClass(TableInputFormat.class);
207 job.setNumReduceTasks(1);
208 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
209 TableMapReduceUtil.addDependencyJars(job);
210 assertTrue(job.waitForCompletion(true));
211 }
212
213
214
215
216
217
218
219
220 protected void testScan(String start, String stop, String last)
221 throws IOException, InterruptedException, ClassNotFoundException {
222 String jobName = "Scan" + (start != null ? start.toUpperCase() : "Empty") +
223 "To" + (stop != null ? stop.toUpperCase() : "Empty");
224 LOG.info("Before map/reduce startup - job " + jobName);
225 Configuration c = new Configuration(TEST_UTIL.getConfiguration());
226 Scan scan = new Scan();
227 scan.addFamily(INPUT_FAMILY);
228 if (start != null) {
229 scan.setStartRow(Bytes.toBytes(start));
230 }
231 c.set(KEY_STARTROW, start != null ? start : "");
232 if (stop != null) {
233 scan.setStopRow(Bytes.toBytes(stop));
234 }
235 c.set(KEY_LASTROW, last != null ? last : "");
236 LOG.info("scan before: " + scan);
237 Job job = new Job(c, jobName);
238 TableMapReduceUtil.initTableMapperJob(
239 Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
240 ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
241 job.setReducerClass(ScanReducer.class);
242 job.setNumReduceTasks(1);
243 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
244 LOG.info("Started " + job.getJobName());
245 assertTrue(job.waitForCompletion(true));
246 LOG.info("After map/reduce completion - job " + jobName);
247 }
248
249
250
251
252
253
254
255
256
257 public void testNumOfSplits(String ratio, int expectedNumOfSplits) throws IOException,
258 InterruptedException,
259 ClassNotFoundException {
260 String jobName = "TestJobForNumOfSplits";
261 LOG.info("Before map/reduce startup - job " + jobName);
262 Configuration c = new Configuration(TEST_UTIL.getConfiguration());
263 Scan scan = new Scan();
264 scan.addFamily(INPUT_FAMILY);
265 c.set("hbase.mapreduce.input.autobalance", "true");
266 c.set("hbase.mapreduce.input.autobalance.maxskewratio", ratio);
267 c.set(KEY_STARTROW, "");
268 c.set(KEY_LASTROW, "");
269 Job job = new Job(c, jobName);
270 TableMapReduceUtil.initTableMapperJob(Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
271 ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
272 TableInputFormat tif = new TableInputFormat();
273 tif.setConf(job.getConfiguration());
274 Assert.assertEquals(new String(TABLE_NAME), new String(table.getTableName()));
275 List<InputSplit> splits = tif.getSplits(job);
276 Assert.assertEquals(expectedNumOfSplits, splits.size());
277 }
278
279
280
281
282 public void testGetSplitKey(byte[] startKey, byte[] endKey, byte[] splitKey, boolean isText) {
283 byte[] result = TableInputFormatBase.getSplitKey(startKey, endKey, isText);
284 Assert.assertArrayEquals(splitKey, result);
285 }
286 }
287