1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNull;
24 import static org.junit.Assert.assertTrue;
25 import static org.junit.Assert.fail;
26
27 import java.util.ArrayList;
28
29 import org.apache.hadoop.hbase.HConstants;
30 import org.apache.hadoop.hbase.SmallTests;
31 import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser;
32 import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException;
33 import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.ParsedLine;
34 import org.apache.hadoop.hbase.util.Bytes;
35 import org.apache.hadoop.hbase.util.Pair;
36 import org.junit.Test;
37 import org.junit.experimental.categories.Category;
38
39 import com.google.common.base.Joiner;
40 import com.google.common.base.Splitter;
41 import com.google.common.collect.Iterables;
42
43
44
45
46 @Category(SmallTests.class)
47 public class TestImportTsvParser {
48
49 private void assertBytesEquals(byte[] a, byte[] b) {
50 assertEquals(Bytes.toStringBinary(a), Bytes.toStringBinary(b));
51 }
52
53 private void checkParsing(ParsedLine parsed, Iterable<String> expected) {
54 ArrayList<String> parsedCols = new ArrayList<String>();
55 for (int i = 0; i < parsed.getColumnCount(); i++) {
56 parsedCols.add(Bytes.toString(parsed.getLineBytes(), parsed.getColumnOffset(i),
57 parsed.getColumnLength(i)));
58 }
59 if (!Iterables.elementsEqual(parsedCols, expected)) {
60 fail("Expected: " + Joiner.on(",").join(expected) + "\n" + "Got:"
61 + Joiner.on(",").join(parsedCols));
62 }
63 }
64
65 @Test
66 public void testTsvParserSpecParsing() {
67 TsvParser parser;
68
69 parser = new TsvParser("HBASE_ROW_KEY", "\t");
70 assertNull(parser.getFamily(0));
71 assertNull(parser.getQualifier(0));
72 assertEquals(0, parser.getRowKeyColumnIndex());
73 assertFalse(parser.hasTimestamp());
74
75 parser = new TsvParser("HBASE_ROW_KEY,col1:scol1", "\t");
76 assertNull(parser.getFamily(0));
77 assertNull(parser.getQualifier(0));
78 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
79 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
80 assertEquals(0, parser.getRowKeyColumnIndex());
81 assertFalse(parser.hasTimestamp());
82
83 parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,col1:scol2", "\t");
84 assertNull(parser.getFamily(0));
85 assertNull(parser.getQualifier(0));
86 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
87 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
88 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(2));
89 assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(2));
90 assertEquals(0, parser.getRowKeyColumnIndex());
91 assertFalse(parser.hasTimestamp());
92
93 parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2", "\t");
94 assertNull(parser.getFamily(0));
95 assertNull(parser.getQualifier(0));
96 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
97 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
98 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3));
99 assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3));
100 assertEquals(0, parser.getRowKeyColumnIndex());
101 assertTrue(parser.hasTimestamp());
102 assertEquals(2, parser.getTimestampKeyColumnIndex());
103 }
104
105 @Test
106 public void testTsvParser() throws BadTsvLineException {
107 TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t");
108 assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0));
109 assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0));
110 assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1));
111 assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1));
112 assertNull(parser.getFamily(2));
113 assertNull(parser.getQualifier(2));
114 assertEquals(2, parser.getRowKeyColumnIndex());
115
116 assertEquals(TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX,
117 parser.getTimestampKeyColumnIndex());
118
119 byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d");
120 ParsedLine parsed = parser.parse(line, line.length);
121 checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
122 }
123
124 @Test
125 public void testTsvParserWithTimestamp() throws BadTsvLineException {
126 TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t");
127 assertNull(parser.getFamily(0));
128 assertNull(parser.getQualifier(0));
129 assertNull(parser.getFamily(1));
130 assertNull(parser.getQualifier(1));
131 assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(2));
132 assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(2));
133 assertEquals(0, parser.getRowKeyColumnIndex());
134 assertEquals(1, parser.getTimestampKeyColumnIndex());
135
136 byte[] line = Bytes.toBytes("rowkey\t1234\tval_a");
137 ParsedLine parsed = parser.parse(line, line.length);
138 assertEquals(1234l, parsed.getTimestamp(-1));
139 checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
140 }
141
142
143
144
145 @Test(expected = BadTsvLineException.class)
146 public void testTsvParserBadTsvLineExcessiveColumns() throws BadTsvLineException {
147 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
148 byte[] line = Bytes.toBytes("val_a\tval_b\tval_c");
149 parser.parse(line, line.length);
150 }
151
152 @Test(expected = BadTsvLineException.class)
153 public void testTsvParserBadTsvLineZeroColumn() throws BadTsvLineException {
154 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
155 byte[] line = Bytes.toBytes("");
156 parser.parse(line, line.length);
157 }
158
159 @Test(expected = BadTsvLineException.class)
160 public void testTsvParserBadTsvLineOnlyKey() throws BadTsvLineException {
161 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
162 byte[] line = Bytes.toBytes("key_only");
163 parser.parse(line, line.length);
164 }
165
166 @Test(expected = BadTsvLineException.class)
167 public void testTsvParserBadTsvLineNoRowKey() throws BadTsvLineException {
168 TsvParser parser = new TsvParser("col_a,HBASE_ROW_KEY", "\t");
169 byte[] line = Bytes.toBytes("only_cola_data_and_no_row_key");
170 parser.parse(line, line.length);
171 }
172
173 @Test(expected = BadTsvLineException.class)
174 public void testTsvParserInvalidTimestamp() throws BadTsvLineException {
175 TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t");
176 assertEquals(1, parser.getTimestampKeyColumnIndex());
177 byte[] line = Bytes.toBytes("rowkey\ttimestamp\tval_a");
178 ParsedLine parsed = parser.parse(line, line.length);
179 assertEquals(-1, parsed.getTimestamp(-1));
180 checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
181 }
182
183 @Test(expected = BadTsvLineException.class)
184 public void testTsvParserNoTimestampValue() throws BadTsvLineException {
185 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t");
186 assertEquals(2, parser.getTimestampKeyColumnIndex());
187 byte[] line = Bytes.toBytes("rowkey\tval_a");
188 parser.parse(line, line.length);
189 }
190
191 @Test
192 public void testTsvParserParseRowKey() throws BadTsvLineException {
193 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t");
194 assertEquals(0, parser.getRowKeyColumnIndex());
195 byte[] line = Bytes.toBytes("rowkey\tval_a\t1234");
196 Pair<Integer, Integer> rowKeyOffsets = parser
197 .parseRowKey(line, line.length);
198 assertEquals(0, rowKeyOffsets.getFirst().intValue());
199 assertEquals(5, rowKeyOffsets.getSecond().intValue());
200 try {
201 line = Bytes.toBytes("\t\tval_a\t1234");
202 parser.parseRowKey(line, line.length);
203 fail("Should get BadTsvLineException on empty rowkey.");
204 } catch (BadTsvLineException b) {
205
206 }
207 parser = new TsvParser("col_a,HBASE_ROW_KEY,HBASE_TS_KEY", "\t");
208 assertEquals(1, parser.getRowKeyColumnIndex());
209 line = Bytes.toBytes("val_a\trowkey\t1234");
210 rowKeyOffsets = parser.parseRowKey(line, line.length);
211 assertEquals(6, rowKeyOffsets.getFirst().intValue());
212 assertEquals(11, rowKeyOffsets.getSecond().intValue());
213 try {
214 line = Bytes.toBytes("val_a");
215 rowKeyOffsets = parser.parseRowKey(line, line.length);
216 fail("Should get BadTsvLineException when number of columns less than rowkey position.");
217 } catch (BadTsvLineException b) {
218
219 }
220 parser = new TsvParser("col_a,HBASE_TS_KEY,HBASE_ROW_KEY", "\t");
221 assertEquals(2, parser.getRowKeyColumnIndex());
222 line = Bytes.toBytes("val_a\t1234\trowkey");
223 rowKeyOffsets = parser.parseRowKey(line, line.length);
224 assertEquals(11, rowKeyOffsets.getFirst().intValue());
225 assertEquals(16, rowKeyOffsets.getSecond().intValue());
226 }
227
228 }