001    package org.apache.fulcrum.parser;
002    
003    
004    /*
005     * Licensed to the Apache Software Foundation (ASF) under one
006     * or more contributor license agreements.  See the NOTICE file
007     * distributed with this work for additional information
008     * regarding copyright ownership.  The ASF licenses this file
009     * to you under the Apache License, Version 2.0 (the
010     * "License"); you may not use this file except in compliance
011     * with the License.  You may obtain a copy of the License at
012     *
013     *   http://www.apache.org/licenses/LICENSE-2.0
014     *
015     * Unless required by applicable law or agreed to in writing,
016     * software distributed under the License is distributed on an
017     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
018     * KIND, either express or implied.  See the License for the
019     * specific language governing permissions and limitations
020     * under the License.
021     */
022    
023    
024    import java.io.Reader;
025    import java.io.StreamTokenizer;
026    import java.util.List;
027    
028    /**
029     * CSVParser is used to parse a stream with comma-separated values and
030     * generate ParameterParser objects which can be used to
031     * extract the values in the desired type.
032     *
033     * <p>The class extends the abstract class DataStreamParser and implements
034     * initTokenizer with suitable values for CSV files to provide this
035     * functionality.
036     *
037     * <p>The class (indirectly through DataStreamParser) implements the
038     * java.util.Iterator interface for convenience.
039     * This allows simple use in a Velocity template for example:
040     *
041     * <pre>
042     * #foreach ($row in $csvfile)
043     *   Name: $row.Name
044     *   Description: $row.Description
045     * #end
046     * </pre>
047     *
048     * @author <a href="mailto:sean@informage.net">Sean Legassick</a>
049     * @version $Id: CSVParser.java 535465 2007-05-05 06:58:06Z tv $
050     */
051    public class CSVParser extends DataStreamParser
052    {
053        /**
054         * Create a new CSVParser instance. Requires a Reader to read the
055         * comma-separated values from. The column headers must be set
056         * independently either explicitly, or by reading the first line
057         * of the CSV values.
058         *
059         * @param in the input reader.
060         */
061        public CSVParser(Reader in)
062        {
063            super(in, null, null);
064        }
065    
066        /**
067         * Create a new CSVParser instance. Requires a Reader to read the
068         * comma-separated values from, and a list of column names.
069         *
070         * @param in the input reader.
071         * @param columnNames a list of column names.
072         */
073        public CSVParser(Reader in, List columnNames)
074        {
075            super(in, columnNames, null);
076        }
077    
078        /**
079         * Create a new CSVParser instance. Requires a Reader to read the
080         * comma-separated values from, a list of column names and a
081         * character encoding.
082         *
083         * @param in the input reader.
084         * @param columnNames a list of column names.
085         * @param characterEncoding the character encoding of the input.
086         */
087        public CSVParser(Reader in, List columnNames, String characterEncoding)
088        {
089            super(in, columnNames, characterEncoding);
090        }
091    
092        /**
093         * Initialize the StreamTokenizer instance used to read the lines
094         * from the input reader.
095         */
096        protected void initTokenizer(StreamTokenizer tokenizer)
097        {
098            // set all numeric characters as ordinary characters
099            // (switches off number parsing)
100            tokenizer.ordinaryChars('0', '9');
101            tokenizer.ordinaryChars('-', '-');
102            tokenizer.ordinaryChars('.', '.');
103    
104            // set all printable characters to be treated as word chars
105            tokenizer.wordChars(' ', Integer.MAX_VALUE);
106    
107            // now set comma as the whitespace character
108            tokenizer.whitespaceChars(',', ',');
109    
110            // and  set the quote mark as the quoting character
111            tokenizer.quoteChar('"');
112    
113            // and finally say that end of line is significant
114            tokenizer.eolIsSignificant(true);
115        }
116    }