001 package org.apache.fulcrum.parser; 002 003 004 /* 005 * Licensed to the Apache Software Foundation (ASF) under one 006 * or more contributor license agreements. See the NOTICE file 007 * distributed with this work for additional information 008 * regarding copyright ownership. The ASF licenses this file 009 * to you under the Apache License, Version 2.0 (the 010 * "License"); you may not use this file except in compliance 011 * with the License. You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, 016 * software distributed under the License is distributed on an 017 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 018 * KIND, either express or implied. See the License for the 019 * specific language governing permissions and limitations 020 * under the License. 021 */ 022 023 024 import java.io.Reader; 025 import java.io.StreamTokenizer; 026 import java.util.List; 027 028 /** 029 * CSVParser is used to parse a stream with comma-separated values and 030 * generate ParameterParser objects which can be used to 031 * extract the values in the desired type. 032 * 033 * <p>The class extends the abstract class DataStreamParser and implements 034 * initTokenizer with suitable values for CSV files to provide this 035 * functionality. 036 * 037 * <p>The class (indirectly through DataStreamParser) implements the 038 * java.util.Iterator interface for convenience. 039 * This allows simple use in a Velocity template for example: 040 * 041 * <pre> 042 * #foreach ($row in $csvfile) 043 * Name: $row.Name 044 * Description: $row.Description 045 * #end 046 * </pre> 047 * 048 * @author <a href="mailto:sean@informage.net">Sean Legassick</a> 049 * @version $Id: CSVParser.java 535465 2007-05-05 06:58:06Z tv $ 050 */ 051 public class CSVParser extends DataStreamParser 052 { 053 /** 054 * Create a new CSVParser instance. Requires a Reader to read the 055 * comma-separated values from. The column headers must be set 056 * independently either explicitly, or by reading the first line 057 * of the CSV values. 058 * 059 * @param in the input reader. 060 */ 061 public CSVParser(Reader in) 062 { 063 super(in, null, null); 064 } 065 066 /** 067 * Create a new CSVParser instance. Requires a Reader to read the 068 * comma-separated values from, and a list of column names. 069 * 070 * @param in the input reader. 071 * @param columnNames a list of column names. 072 */ 073 public CSVParser(Reader in, List columnNames) 074 { 075 super(in, columnNames, null); 076 } 077 078 /** 079 * Create a new CSVParser instance. Requires a Reader to read the 080 * comma-separated values from, a list of column names and a 081 * character encoding. 082 * 083 * @param in the input reader. 084 * @param columnNames a list of column names. 085 * @param characterEncoding the character encoding of the input. 086 */ 087 public CSVParser(Reader in, List columnNames, String characterEncoding) 088 { 089 super(in, columnNames, characterEncoding); 090 } 091 092 /** 093 * Initialize the StreamTokenizer instance used to read the lines 094 * from the input reader. 095 */ 096 protected void initTokenizer(StreamTokenizer tokenizer) 097 { 098 // set all numeric characters as ordinary characters 099 // (switches off number parsing) 100 tokenizer.ordinaryChars('0', '9'); 101 tokenizer.ordinaryChars('-', '-'); 102 tokenizer.ordinaryChars('.', '.'); 103 104 // set all printable characters to be treated as word chars 105 tokenizer.wordChars(' ', Integer.MAX_VALUE); 106 107 // now set comma as the whitespace character 108 tokenizer.whitespaceChars(',', ','); 109 110 // and set the quote mark as the quoting character 111 tokenizer.quoteChar('"'); 112 113 // and finally say that end of line is significant 114 tokenizer.eolIsSignificant(true); 115 } 116 }