1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import java.io.UnsupportedEncodingException;
23 import java.util.List;
24 import java.util.ArrayList;
25
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28 import org.apache.hadoop.hbase.*;
29 import org.apache.hadoop.mapreduce.Job;
30 import org.apache.hadoop.fs.FSDataOutputStream;
31 import org.apache.hadoop.fs.Path;
32 import org.apache.hadoop.fs.FileSystem;
33 import org.apache.hadoop.conf.Configuration;
34 import org.apache.hadoop.util.GenericOptionsParser;
35
36 import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser;
37 import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException;
38 import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.ParsedLine;
39 import org.apache.hadoop.hbase.util.Bytes;
40 import org.apache.hadoop.hbase.client.HTable;
41 import org.apache.hadoop.hbase.client.ResultScanner;
42 import org.apache.hadoop.hbase.client.Scan;
43 import org.apache.hadoop.hbase.client.HBaseAdmin;
44 import org.apache.hadoop.hbase.client.Result;
45
46 import org.junit.Test;
47
48 import com.google.common.base.Joiner;
49 import com.google.common.base.Splitter;
50 import com.google.common.collect.Iterables;
51 import org.junit.experimental.categories.Category;
52
53 import static org.junit.Assert.*;
54
55 @Category(MediumTests.class)
56 public class TestImportTsv {
57 private static final Log LOG = LogFactory.getLog(TestImportTsv.class);
58
59 @Test
60 public void testTsvParserSpecParsing() {
61 TsvParser parser;
62
63 parser = new TsvParser("HBASE_ROW_KEY", "\t");
64 assertNull(parser.getFamily(0));
65 assertNull(parser.getQualifier(0));
66 assertEquals(0, parser.getRowKeyColumnIndex());
67 assertFalse(parser.hasTimestamp());
68
69 parser = new TsvParser("HBASE_ROW_KEY,col1:scol1", "\t");
70 assertNull(parser.getFamily(0));
71 assertNull(parser.getQualifier(0));
72 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
73 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
74 assertEquals(0, parser.getRowKeyColumnIndex());
75 assertFalse(parser.hasTimestamp());
76
77 parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,col1:scol2", "\t");
78 assertNull(parser.getFamily(0));
79 assertNull(parser.getQualifier(0));
80 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
81 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
82 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(2));
83 assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(2));
84 assertEquals(0, parser.getRowKeyColumnIndex());
85 assertFalse(parser.hasTimestamp());
86
87 parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2",
88 "\t");
89 assertNull(parser.getFamily(0));
90 assertNull(parser.getQualifier(0));
91 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
92 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
93 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3));
94 assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3));
95 assertEquals(0, parser.getRowKeyColumnIndex());
96 assertTrue(parser.hasTimestamp());
97 assertEquals(2, parser.getTimestampKeyColumnIndex());
98 }
99
100 @Test
101 public void testTsvParser() throws BadTsvLineException {
102 TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t");
103 assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0));
104 assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0));
105 assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1));
106 assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1));
107 assertNull(parser.getFamily(2));
108 assertNull(parser.getQualifier(2));
109 assertEquals(2, parser.getRowKeyColumnIndex());
110
111 assertEquals(TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX, parser
112 .getTimestampKeyColumnIndex());
113
114 byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d");
115 ParsedLine parsed = parser.parse(line, line.length);
116 checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
117 }
118
119
120 @Test
121 public void testTsvParserWithTimestamp() throws BadTsvLineException {
122 TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t");
123 assertNull(parser.getFamily(0));
124 assertNull(parser.getQualifier(0));
125 assertNull(parser.getFamily(1));
126 assertNull(parser.getQualifier(1));
127 assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(2));
128 assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(2));
129 assertEquals(0, parser.getRowKeyColumnIndex());
130 assertEquals(1, parser.getTimestampKeyColumnIndex());
131
132 byte[] line = Bytes.toBytes("rowkey\t1234\tval_a");
133 ParsedLine parsed = parser.parse(line, line.length);
134 assertEquals(1234l, parsed.getTimestamp(-1));
135 checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
136 }
137
138 private void checkParsing(ParsedLine parsed, Iterable<String> expected) {
139 ArrayList<String> parsedCols = new ArrayList<String>();
140 for (int i = 0; i < parsed.getColumnCount(); i++) {
141 parsedCols.add(Bytes.toString(
142 parsed.getLineBytes(),
143 parsed.getColumnOffset(i),
144 parsed.getColumnLength(i)));
145 }
146 if (!Iterables.elementsEqual(parsedCols, expected)) {
147 fail("Expected: " + Joiner.on(",").join(expected) + "\n" +
148 "Got:" + Joiner.on(",").join(parsedCols));
149 }
150 }
151
152 private void assertBytesEquals(byte[] a, byte[] b) {
153 assertEquals(Bytes.toStringBinary(a), Bytes.toStringBinary(b));
154 }
155
156
157
158
159 @Test(expected=BadTsvLineException.class)
160 public void testTsvParserBadTsvLineExcessiveColumns() throws BadTsvLineException {
161 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
162 byte[] line = Bytes.toBytes("val_a\tval_b\tval_c");
163 parser.parse(line, line.length);
164 }
165
166 @Test(expected=BadTsvLineException.class)
167 public void testTsvParserBadTsvLineZeroColumn() throws BadTsvLineException {
168 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
169 byte[] line = Bytes.toBytes("");
170 parser.parse(line, line.length);
171 }
172
173 @Test(expected=BadTsvLineException.class)
174 public void testTsvParserBadTsvLineOnlyKey() throws BadTsvLineException {
175 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
176 byte[] line = Bytes.toBytes("key_only");
177 parser.parse(line, line.length);
178 }
179
180 @Test(expected=BadTsvLineException.class)
181 public void testTsvParserBadTsvLineNoRowKey() throws BadTsvLineException {
182 TsvParser parser = new TsvParser("col_a,HBASE_ROW_KEY", "\t");
183 byte[] line = Bytes.toBytes("only_cola_data_and_no_row_key");
184 parser.parse(line, line.length);
185 }
186
187 @Test(expected = BadTsvLineException.class)
188 public void testTsvParserInvalidTimestamp() throws BadTsvLineException {
189 TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t");
190 assertEquals(1, parser.getTimestampKeyColumnIndex());
191 byte[] line = Bytes.toBytes("rowkey\ttimestamp\tval_a");
192 ParsedLine parsed = parser.parse(line, line.length);
193 assertEquals(-1, parsed.getTimestamp(-1));
194 checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
195 }
196
197 @Test(expected = BadTsvLineException.class)
198 public void testTsvParserNoTimestampValue() throws BadTsvLineException {
199 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t");
200 assertEquals(2, parser.getTimestampKeyColumnIndex());
201 byte[] line = Bytes.toBytes("rowkey\tval_a");
202 parser.parse(line, line.length);
203 }
204
205
206 @Test
207 public void testMROnTable()
208 throws Exception {
209 String TABLE_NAME = "TestTable";
210 String FAMILY = "FAM";
211 String INPUT_FILE = "InputFile.esv";
212
213
214 String[] args = new String[] {
215 "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B",
216 "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b",
217 TABLE_NAME,
218 INPUT_FILE
219 };
220
221 doMROnTableTest(INPUT_FILE, FAMILY, TABLE_NAME, null, args, 1);
222 }
223
224 @Test
225 public void testMROnTableWithTimestamp() throws Exception {
226 String TABLE_NAME = "TestTable";
227 String FAMILY = "FAM";
228 String INPUT_FILE = "InputFile1.csv";
229
230
231 String[] args = new String[] {
232 "-D" + ImportTsv.COLUMNS_CONF_KEY
233 + "=HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B",
234 "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=,", TABLE_NAME, INPUT_FILE };
235
236 String data = "KEY,1234,VALUE1,VALUE2\n";
237 doMROnTableTest(INPUT_FILE, FAMILY, TABLE_NAME, data, args, 1);
238 }
239
240
241 @Test
242 public void testMROnTableWithCustomMapper()
243 throws Exception {
244 String TABLE_NAME = "TestTable";
245 String FAMILY = "FAM";
246 String INPUT_FILE = "InputFile2.esv";
247
248
249 String[] args = new String[] {
250 "-D" + ImportTsv.MAPPER_CONF_KEY + "=org.apache.hadoop.hbase.mapreduce.TsvImporterCustomTestMapper",
251 TABLE_NAME,
252 INPUT_FILE
253 };
254
255 doMROnTableTest(INPUT_FILE, FAMILY, TABLE_NAME, null, args, 3);
256 }
257
258 private void doMROnTableTest(String inputFile, String family, String tableName,
259 String data, String[] args, int valueMultiplier) throws Exception {
260
261
262 HBaseTestingUtility htu1 = new HBaseTestingUtility();
263
264 htu1.startMiniCluster();
265 htu1.startMiniMapReduceCluster();
266
267 GenericOptionsParser opts = new GenericOptionsParser(htu1.getConfiguration(), args);
268 Configuration conf = opts.getConfiguration();
269 args = opts.getRemainingArgs();
270
271 try {
272 FileSystem fs = FileSystem.get(conf);
273 FSDataOutputStream op = fs.create(new Path(inputFile), true);
274 if (data == null) {
275 data = "KEY\u001bVALUE1\u001bVALUE2\n";
276 }
277 op.write(Bytes.toBytes(data));
278 op.close();
279
280 final byte[] FAM = Bytes.toBytes(family);
281 final byte[] TAB = Bytes.toBytes(tableName);
282 if (conf.get(ImportTsv.BULK_OUTPUT_CONF_KEY) == null) {
283 HTableDescriptor desc = new HTableDescriptor(TAB);
284 desc.addFamily(new HColumnDescriptor(FAM));
285 HBaseAdmin admin = new HBaseAdmin(conf);
286 admin.createTable(desc);
287 admin.close();
288 } else {
289 LOG.info("set the hbaseAdmin");
290 ImportTsv.createHbaseAdmin(conf);
291 }
292 Job job = ImportTsv.createSubmittableJob(conf, args);
293 job.waitForCompletion(false);
294 assertTrue(job.isSuccessful());
295
296 HTable table = new HTable(new Configuration(conf), TAB);
297 boolean verified = false;
298 long pause = conf.getLong("hbase.client.pause", 5 * 1000);
299 int numRetries = conf.getInt("hbase.client.retries.number", 5);
300 for (int i = 0; i < numRetries; i++) {
301 try {
302 Scan scan = new Scan();
303
304 scan.addFamily(FAM);
305 ResultScanner resScanner = table.getScanner(scan);
306 for (Result res : resScanner) {
307 assertTrue(res.size() == 2);
308 List<KeyValue> kvs = res.list();
309 assertEquals(toU8Str(kvs.get(0).getRow()),
310 toU8Str(Bytes.toBytes("KEY")));
311 assertEquals(toU8Str(kvs.get(1).getRow()),
312 toU8Str(Bytes.toBytes("KEY")));
313 assertEquals(toU8Str(kvs.get(0).getValue()),
314 toU8Str(Bytes.toBytes("VALUE" + valueMultiplier)));
315 assertEquals(toU8Str(kvs.get(1).getValue()),
316 toU8Str(Bytes.toBytes("VALUE" + 2*valueMultiplier)));
317
318 }
319 verified = true;
320 break;
321 } catch (NullPointerException e) {
322
323
324 }
325 try {
326 Thread.sleep(pause);
327 } catch (InterruptedException e) {
328
329 }
330 }
331 table.close();
332 assertTrue(verified);
333 } finally {
334 htu1.shutdownMiniMapReduceCluster();
335 htu1.shutdownMiniCluster();
336 }
337 }
338
339 @Test
340 public void testBulkOutputWithoutAnExistingTable() throws Exception {
341 String TABLE_NAME = "TestTable";
342 String FAMILY = "FAM";
343 String INPUT_FILE = "InputFile2.esv";
344
345
346 String[] args = new String[] {
347 "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B",
348 "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b",
349 "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=output", TABLE_NAME,
350 INPUT_FILE };
351 doMROnTableTest(INPUT_FILE, FAMILY, TABLE_NAME, null, args, 3);
352 }
353
354 public static String toU8Str(byte[] bytes) throws UnsupportedEncodingException {
355 return new String(bytes);
356 }
357
358 @org.junit.Rule
359 public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
360 new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
361 }
362