1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import java.io.UnsupportedEncodingException;
23  import java.util.List;
24  import java.util.ArrayList;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.hbase.*;
29  import org.apache.hadoop.mapreduce.Job;
30  import org.apache.hadoop.fs.FSDataOutputStream;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.util.GenericOptionsParser;
35  
36  import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser;
37  import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException;
38  import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.ParsedLine;
39  import org.apache.hadoop.hbase.util.Bytes;
40  import org.apache.hadoop.hbase.client.HTable;
41  import org.apache.hadoop.hbase.client.ResultScanner;
42  import org.apache.hadoop.hbase.client.Scan;
43  import org.apache.hadoop.hbase.client.HBaseAdmin;
44  import org.apache.hadoop.hbase.client.Result;
45  
46  import org.junit.Test;
47  
48  import com.google.common.base.Joiner;
49  import com.google.common.base.Splitter;
50  import com.google.common.collect.Iterables;
51  import org.junit.experimental.categories.Category;
52  
53  import static org.junit.Assert.*;
54  
55  @Category(MediumTests.class)
56  public class TestImportTsv {
57    private static final Log LOG = LogFactory.getLog(TestImportTsv.class);
58  
59    @Test
60    public void testTsvParserSpecParsing() {
61      TsvParser parser;
62  
63      parser = new TsvParser("HBASE_ROW_KEY", "\t");
64      assertNull(parser.getFamily(0));
65      assertNull(parser.getQualifier(0));
66      assertEquals(0, parser.getRowKeyColumnIndex());
67      assertFalse(parser.hasTimestamp());
68  
69      parser = new TsvParser("HBASE_ROW_KEY,col1:scol1", "\t");
70      assertNull(parser.getFamily(0));
71      assertNull(parser.getQualifier(0));
72      assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
73      assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
74      assertEquals(0, parser.getRowKeyColumnIndex());
75      assertFalse(parser.hasTimestamp());
76  
77      parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,col1:scol2", "\t");
78      assertNull(parser.getFamily(0));
79      assertNull(parser.getQualifier(0));
80      assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
81      assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
82      assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(2));
83      assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(2));
84      assertEquals(0, parser.getRowKeyColumnIndex());
85      assertFalse(parser.hasTimestamp());
86      
87      parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2",
88          "\t");
89      assertNull(parser.getFamily(0));
90      assertNull(parser.getQualifier(0));
91      assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
92      assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
93      assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3));
94      assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3));
95      assertEquals(0, parser.getRowKeyColumnIndex());
96      assertTrue(parser.hasTimestamp());
97      assertEquals(2, parser.getTimestampKeyColumnIndex());
98    }
99  
100   @Test
101   public void testTsvParser() throws BadTsvLineException {
102     TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t");
103     assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0));
104     assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0));
105     assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1));
106     assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1));
107     assertNull(parser.getFamily(2));
108     assertNull(parser.getQualifier(2));
109     assertEquals(2, parser.getRowKeyColumnIndex());
110     
111     assertEquals(TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX, parser
112         .getTimestampKeyColumnIndex());
113     
114     byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d");
115     ParsedLine parsed = parser.parse(line, line.length);
116     checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
117   }
118   
119   
120   @Test
121   public void testTsvParserWithTimestamp() throws BadTsvLineException {
122     TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t");
123     assertNull(parser.getFamily(0));
124     assertNull(parser.getQualifier(0));
125     assertNull(parser.getFamily(1));
126     assertNull(parser.getQualifier(1));
127     assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(2));
128     assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(2));
129     assertEquals(0, parser.getRowKeyColumnIndex());
130     assertEquals(1, parser.getTimestampKeyColumnIndex());
131 
132     byte[] line = Bytes.toBytes("rowkey\t1234\tval_a");
133     ParsedLine parsed = parser.parse(line, line.length);
134     assertEquals(1234l, parsed.getTimestamp(-1));
135     checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
136   }
137 
138   private void checkParsing(ParsedLine parsed, Iterable<String> expected) {
139     ArrayList<String> parsedCols = new ArrayList<String>();
140     for (int i = 0; i < parsed.getColumnCount(); i++) {
141       parsedCols.add(Bytes.toString(
142           parsed.getLineBytes(),
143           parsed.getColumnOffset(i),
144           parsed.getColumnLength(i)));
145     }
146     if (!Iterables.elementsEqual(parsedCols, expected)) {
147       fail("Expected: " + Joiner.on(",").join(expected) + "\n" +
148           "Got:" + Joiner.on(",").join(parsedCols));
149     }
150   }
151 
152   private void assertBytesEquals(byte[] a, byte[] b) {
153     assertEquals(Bytes.toStringBinary(a), Bytes.toStringBinary(b));
154   }
155 
156   /**
157    * Test cases that throw BadTsvLineException
158    */
159   @Test(expected=BadTsvLineException.class)
160   public void testTsvParserBadTsvLineExcessiveColumns() throws BadTsvLineException {
161     TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
162     byte[] line = Bytes.toBytes("val_a\tval_b\tval_c");
163     parser.parse(line, line.length);
164   }
165 
166   @Test(expected=BadTsvLineException.class)
167   public void testTsvParserBadTsvLineZeroColumn() throws BadTsvLineException {
168     TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
169     byte[] line = Bytes.toBytes("");
170     parser.parse(line, line.length);
171   }
172 
173   @Test(expected=BadTsvLineException.class)
174   public void testTsvParserBadTsvLineOnlyKey() throws BadTsvLineException {
175     TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
176     byte[] line = Bytes.toBytes("key_only");
177     parser.parse(line, line.length);
178   }
179 
180   @Test(expected=BadTsvLineException.class)
181   public void testTsvParserBadTsvLineNoRowKey() throws BadTsvLineException {
182     TsvParser parser = new TsvParser("col_a,HBASE_ROW_KEY", "\t");
183     byte[] line = Bytes.toBytes("only_cola_data_and_no_row_key");
184     parser.parse(line, line.length);
185   }
186   
187   @Test(expected = BadTsvLineException.class)
188   public void testTsvParserInvalidTimestamp() throws BadTsvLineException {
189     TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t");
190     assertEquals(1, parser.getTimestampKeyColumnIndex());
191     byte[] line = Bytes.toBytes("rowkey\ttimestamp\tval_a");
192     ParsedLine parsed = parser.parse(line, line.length);
193     assertEquals(-1, parsed.getTimestamp(-1));
194     checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
195   }
196   
197   @Test(expected = BadTsvLineException.class)
198   public void testTsvParserNoTimestampValue() throws BadTsvLineException {
199     TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t");
200     assertEquals(2, parser.getTimestampKeyColumnIndex());
201     byte[] line = Bytes.toBytes("rowkey\tval_a");
202     parser.parse(line, line.length);
203   }
204   
205 
206   @Test
207   public void testMROnTable()
208   throws Exception {
209     String TABLE_NAME = "TestTable";
210     String FAMILY = "FAM";
211     String INPUT_FILE = "InputFile.esv";
212 
213     // Prepare the arguments required for the test.
214     String[] args = new String[] {
215         "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B",
216         "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b",
217         TABLE_NAME,
218         INPUT_FILE
219     };
220 
221     doMROnTableTest(INPUT_FILE, FAMILY, TABLE_NAME, null, args, 1);
222   }
223   
224   @Test
225   public void testMROnTableWithTimestamp() throws Exception {
226     String TABLE_NAME = "TestTable";
227     String FAMILY = "FAM";
228     String INPUT_FILE = "InputFile1.csv";
229 
230     // Prepare the arguments required for the test.
231     String[] args = new String[] {
232         "-D" + ImportTsv.COLUMNS_CONF_KEY
233             + "=HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B",
234         "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=,", TABLE_NAME, INPUT_FILE };
235 
236     String data = "KEY,1234,VALUE1,VALUE2\n";
237     doMROnTableTest(INPUT_FILE, FAMILY, TABLE_NAME, data, args, 1);
238   }
239   
240 
241   @Test
242   public void testMROnTableWithCustomMapper()
243   throws Exception {
244     String TABLE_NAME = "TestTable";
245     String FAMILY = "FAM";
246     String INPUT_FILE = "InputFile2.esv";
247 
248     // Prepare the arguments required for the test.
249     String[] args = new String[] {
250         "-D" + ImportTsv.MAPPER_CONF_KEY + "=org.apache.hadoop.hbase.mapreduce.TsvImporterCustomTestMapper",
251         TABLE_NAME,
252         INPUT_FILE
253     };
254 
255     doMROnTableTest(INPUT_FILE, FAMILY, TABLE_NAME, null, args, 3);
256   }
257 
258   private void doMROnTableTest(String inputFile, String family, String tableName,
259                                String data, String[] args, int valueMultiplier) throws Exception {
260 
261     // Cluster
262     HBaseTestingUtility htu1 = new HBaseTestingUtility();
263 
264     htu1.startMiniCluster();
265     htu1.startMiniMapReduceCluster();
266 
267     GenericOptionsParser opts = new GenericOptionsParser(htu1.getConfiguration(), args);
268     Configuration conf = opts.getConfiguration();
269     args = opts.getRemainingArgs();
270 
271     try {
272       FileSystem fs = FileSystem.get(conf);
273       FSDataOutputStream op = fs.create(new Path(inputFile), true);
274       if (data == null) {
275         data = "KEY\u001bVALUE1\u001bVALUE2\n";
276       }
277       op.write(Bytes.toBytes(data));
278       op.close();
279 
280       final byte[] FAM = Bytes.toBytes(family);
281       final byte[] TAB = Bytes.toBytes(tableName);
282       if (conf.get(ImportTsv.BULK_OUTPUT_CONF_KEY) == null) {
283         HTableDescriptor desc = new HTableDescriptor(TAB);
284         desc.addFamily(new HColumnDescriptor(FAM));
285         HBaseAdmin admin = new HBaseAdmin(conf);
286         admin.createTable(desc);
287         admin.close();
288       } else { // set the hbaseAdmin as we are not going through main()
289         LOG.info("set the hbaseAdmin");
290         ImportTsv.createHbaseAdmin(conf);
291       }
292       Job job = ImportTsv.createSubmittableJob(conf, args);
293       job.waitForCompletion(false);
294       assertTrue(job.isSuccessful());
295       
296       HTable table = new HTable(new Configuration(conf), TAB);
297       boolean verified = false;
298       long pause = conf.getLong("hbase.client.pause", 5 * 1000);
299       int numRetries = conf.getInt("hbase.client.retries.number", 5);
300       for (int i = 0; i < numRetries; i++) {
301         try {
302           Scan scan = new Scan();
303           // Scan entire family.
304           scan.addFamily(FAM);
305           ResultScanner resScanner = table.getScanner(scan);
306           for (Result res : resScanner) {
307             assertTrue(res.size() == 2);
308             List<KeyValue> kvs = res.list();
309             assertEquals(toU8Str(kvs.get(0).getRow()),
310                 toU8Str(Bytes.toBytes("KEY")));
311             assertEquals(toU8Str(kvs.get(1).getRow()),
312                 toU8Str(Bytes.toBytes("KEY")));
313             assertEquals(toU8Str(kvs.get(0).getValue()),
314                 toU8Str(Bytes.toBytes("VALUE" + valueMultiplier)));
315             assertEquals(toU8Str(kvs.get(1).getValue()),
316                 toU8Str(Bytes.toBytes("VALUE" + 2*valueMultiplier)));
317             // Only one result set is expected, so let it loop.
318           }
319           verified = true;
320           break;
321         } catch (NullPointerException e) {
322           // If here, a cell was empty.  Presume its because updates came in
323           // after the scanner had been opened.  Wait a while and retry.
324         }
325         try {
326           Thread.sleep(pause);
327         } catch (InterruptedException e) {
328           // continue
329         }
330       }
331       table.close();
332       assertTrue(verified);
333     } finally {
334       htu1.shutdownMiniMapReduceCluster();
335       htu1.shutdownMiniCluster();
336     }
337   }
338   
339   @Test
340   public void testBulkOutputWithoutAnExistingTable() throws Exception {
341     String TABLE_NAME = "TestTable";
342     String FAMILY = "FAM";
343     String INPUT_FILE = "InputFile2.esv";
344 
345     // Prepare the arguments required for the test.
346     String[] args = new String[] {
347         "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B",
348         "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b",
349         "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=output", TABLE_NAME,
350         INPUT_FILE };
351     doMROnTableTest(INPUT_FILE, FAMILY, TABLE_NAME, null, args, 3);
352   }
353 
354   public static String toU8Str(byte[] bytes) throws UnsupportedEncodingException {
355     return new String(bytes);
356   }
357 
358   @org.junit.Rule
359   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
360     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
361 }
362