Coverage report

  %line %branch
org.apache.turbine.util.parser.DataStreamParser
0% 
0% 

 1  
 package org.apache.turbine.util.parser;
 2  
 
 3  
 /*
 4  
  * Copyright 2001-2004 The Apache Software Foundation.
 5  
  *
 6  
  * Licensed under the Apache License, Version 2.0 (the "License")
 7  
  * you may not use this file except in compliance with the License.
 8  
  * You may obtain a copy of the License at
 9  
  *
 10  
  *     http://www.apache.org/licenses/LICENSE-2.0
 11  
  *
 12  
  * Unless required by applicable law or agreed to in writing, software
 13  
  * distributed under the License is distributed on an "AS IS" BASIS,
 14  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15  
  * See the License for the specific language governing permissions and
 16  
  * limitations under the License.
 17  
  */
 18  
 
 19  
 import java.io.BufferedReader;
 20  
 import java.io.IOException;
 21  
 import java.io.InputStreamReader;
 22  
 import java.io.Reader;
 23  
 import java.io.StreamTokenizer;
 24  
 
 25  
 import java.util.ArrayList;
 26  
 import java.util.Iterator;
 27  
 import java.util.List;
 28  
 import java.util.NoSuchElementException;
 29  
 
 30  
 import org.apache.commons.logging.Log;
 31  
 import org.apache.commons.logging.LogFactory;
 32  
 
 33  
 /**
 34  
  * DataStreamParser is used to parse a stream with a fixed format and
 35  
  * generate ValueParser objects which can be used to extract the values
 36  
  * in the desired type.
 37  
  *
 38  
  * <p>The class itself is abstract - a concrete subclass which implements
 39  
  * the initTokenizer method such as CSVParser or TSVParser is required
 40  
  * to use the functionality.
 41  
  *
 42  
  * <p>The class implements the java.util.Iterator interface for convenience.
 43  
  * This allows simple use in a Velocity template for example:
 44  
  *
 45  
  * <pre>
 46  
  * #foreach ($row in $datastream)
 47  
  *   Name: $row.Name
 48  
  *   Description: $row.Description
 49  
  * #end
 50  
  * </pre>
 51  
  *
 52  
  * @author <a href="mailto:sean@informage.net">Sean Legassick</a>
 53  
  * @author <a href="mailto:martin@mvdb.net">Martin van den Bemt</a>
 54  
  * @version $Id: DataStreamParser.java,v 1.1.2.3 2004/08/14 20:11:43 henning Exp $
 55  
  */
 56  
 public abstract class DataStreamParser implements Iterator
 57  
 {
 58  
     /** Logging */
 59  0
     private static Log log = LogFactory.getLog(DataStreamParser.class);
 60  
 
 61  
     /**
 62  
      * Conditional compilation flag.
 63  
      */
 64  
     private static final boolean DEBUG = false;
 65  
 
 66  
     /**
 67  
      * The constant for empty fields
 68  
      */
 69  
     protected static final String EMPTYFIELDNAME = "UNKNOWNFIELD";
 70  
 
 71  
     /**
 72  
      * The list of column names.
 73  
      */
 74  
     private List columnNames;
 75  
 
 76  
     /**
 77  
      * The stream tokenizer for reading values from the input reader.
 78  
      */
 79  
     private StreamTokenizer tokenizer;
 80  
 
 81  
     /**
 82  
      * The parameter parser holding the values of columns for the current line.
 83  
      */
 84  
     private ValueParser lineValues;
 85  
 
 86  
     /**
 87  
      * Indicates whether or not the tokenizer has read anything yet.
 88  
      */
 89  0
     private boolean neverRead = true;
 90  
 
 91  
     /**
 92  
      * The character encoding of the input
 93  
      */
 94  
     private String characterEncoding;
 95  
 
 96  
     /**
 97  
      * The fieldseperator, which can be almost any char
 98  
      */
 99  
     private char fieldSeparator;
 100  
 
 101  
     /**
 102  
      * Create a new DataStreamParser instance. Requires a Reader to read the
 103  
      * comma-separated values from, a list of column names and a
 104  
      * character encoding.
 105  
      *
 106  
      * @param in the input reader.
 107  
      * @param columnNames a list of column names.
 108  
      * @param characterEncoding the character encoding of the input.
 109  
      */
 110  
     public DataStreamParser(Reader in, List columnNames,
 111  
                             String characterEncoding)
 112  0
     {
 113  0
         this.columnNames = columnNames;
 114  0
         this.characterEncoding = characterEncoding;
 115  
 
 116  0
         if (this.characterEncoding == null)
 117  
         {
 118  
             // try and get the characterEncoding from the reader
 119  0
             this.characterEncoding = "US-ASCII";
 120  
             try
 121  
             {
 122  0
                 this.characterEncoding = ((InputStreamReader) in).getEncoding();
 123  
             }
 124  0
             catch (ClassCastException e)
 125  
             {
 126  0
             }
 127  
         }
 128  
 
 129  0
         tokenizer = new StreamTokenizer(class="keyword">new BufferedReader(in));
 130  0
         initTokenizer(tokenizer);
 131  0
     }
 132  
 
 133  
     /**
 134  
      * Initialize the StreamTokenizer instance used to read the lines
 135  
      * from the input reader. This must be implemented in subclasses to
 136  
      * set up other tokenizing properties.
 137  
      *
 138  
      * @param tokenizer the tokenizer to adjust
 139  
      */
 140  
     protected void initTokenizer(StreamTokenizer tokenizer)
 141  
     {
 142  
         // set all numeric characters as ordinary characters
 143  
         // (switches off number parsing)
 144  0
         tokenizer.ordinaryChars('0', '9');
 145  0
         tokenizer.ordinaryChars('-', '-');
 146  0
         tokenizer.ordinaryChars('.', '.');
 147  
 
 148  
         // leave out the comma sign (,), we need it for empty fields
 149  
 
 150  0
         tokenizer.wordChars(' ', Integer.MAX_VALUE);
 151  
 
 152  
         // and  set the quote mark as the quoting character
 153  0
         tokenizer.quoteChar('"');
 154  
 
 155  
         // and finally say that end of line is significant
 156  0
         tokenizer.eolIsSignificant(true);
 157  0
     }
 158  
 
 159  
     /**
 160  
      * This method must be called to setup the field seperator
 161  
      * @param fieldSeparator the char which separates the fields
 162  
      */
 163  
     public void setFieldSeparator(char fieldSeparator)
 164  
     {
 165  0
         this.fieldSeparator = fieldSeparator;
 166  
         // make this field also an ordinary char by default.
 167  0
         tokenizer.ordinaryChar(fieldSeparator);
 168  0
     }
 169  
 
 170  
     /**
 171  
      * Set the list of column names explicitly.
 172  
      *
 173  
      * @param columnNames A list of column names.
 174  
      */
 175  
     public void setColumnNames(List columnNames)
 176  
     {
 177  0
         this.columnNames = columnNames;
 178  0
     }
 179  
 
 180  
     /**
 181  
      * Read the list of column names from the input reader using the
 182  
      * tokenizer. If fieldNames are empty, we use the current fieldNumber
 183  
      * + the EMPTYFIELDNAME to make one up.
 184  
      *
 185  
      * @exception IOException an IOException occurred.
 186  
      */
 187  
     public void readColumnNames()
 188  
             throws IOException
 189  
     {
 190  0
         columnNames = new ArrayList();
 191  0
         int lastTtype = 0;
 192  0
         int fieldCounter = 1;
 193  
 
 194  0
         neverRead = false;
 195  0
         tokenizer.nextToken();
 196  
         while (tokenizer.ttype == StreamTokenizer.TT_WORD || tokenizer.ttype == StreamTokenizer.TT_EOL
 197  0
                 || tokenizer.ttype == '"' || tokenizer.ttype == fieldSeparator)
 198  
         {
 199  0
             if (tokenizer.ttype != fieldSeparator && tokenizer.ttype != StreamTokenizer.TT_EOL)
 200  
             {
 201  0
                 columnNames.add(tokenizer.sval);
 202  0
                 fieldCounter++;
 203  
             }
 204  0
             else if (tokenizer.ttype == fieldSeparator && lastTtype == fieldSeparator)
 205  
             {
 206  
                 // we have an empty field name
 207  0
                 columnNames.add(EMPTYFIELDNAME + fieldCounter);
 208  0
                 fieldCounter++;
 209  
             }
 210  0
             else if (lastTtype == fieldSeparator && tokenizer.ttype == StreamTokenizer.TT_EOL)
 211  
             {
 212  0
                 columnNames.add(EMPTYFIELDNAME + fieldCounter);
 213  0
                 break;
 214  
             }
 215  0
             else if (tokenizer.ttype == StreamTokenizer.TT_EOL)
 216  
             {
 217  0
                 break;
 218  
             }
 219  0
             lastTtype = tokenizer.ttype;
 220  0
             tokenizer.nextToken();
 221  
         }
 222  0
     }
 223  
 
 224  
     /**
 225  
      * Determine whether a further row of values exists in the input.
 226  
      *
 227  
      * @return true if the input has more rows.
 228  
      * @exception IOException an IOException occurred.
 229  
      */
 230  
     public boolean hasNextRow()
 231  
             throws IOException
 232  
     {
 233  
         // check for end of line ensures that an empty last line doesn't
 234  
         // give a false positive for hasNextRow
 235  0
         if (neverRead || tokenizer.ttype == StreamTokenizer.TT_EOL)
 236  
         {
 237  0
             tokenizer.nextToken();
 238  0
             tokenizer.pushBack();
 239  0
             neverRead = false;
 240  
         }
 241  0
         return tokenizer.ttype != StreamTokenizer.TT_EOF;
 242  
     }
 243  
 
 244  
     /**
 245  
      * Returns a ValueParser object containing the next row of values.
 246  
      *
 247  
      * @return a ValueParser object.
 248  
      * @exception IOException an IOException occurred.
 249  
      * @exception NoSuchElementException there are no more rows in the input.
 250  
      */
 251  
     public ValueParser nextRow()
 252  
             throws IOException, NoSuchElementException
 253  
     {
 254  0
         if (!hasNextRow())
 255  
         {
 256  0
             throw new NoSuchElementException();
 257  
         }
 258  
 
 259  0
         if (lineValues == null)
 260  
         {
 261  0
             lineValues = new BaseValueParser(characterEncoding);
 262  
         }
 263  
         else
 264  
         {
 265  0
             lineValues.clear();
 266  
         }
 267  
 
 268  0
         Iterator it = columnNames.iterator();
 269  0
         tokenizer.nextToken();
 270  
         while (tokenizer.ttype == StreamTokenizer.TT_WORD
 271  0
                 || tokenizer.ttype == '"' || tokenizer.ttype == fieldSeparator)
 272  
         {
 273  0
             int lastTtype = 0;
 274  
             // note this means that if there are more values than
 275  
             // column names, the extra values are discarded.
 276  0
             if (it.hasNext())
 277  
             {
 278  0
                 String colname = it.next().toString();
 279  0
                 String colval = tokenizer.sval;
 280  0
                 if (tokenizer.ttype != fieldSeparator && lastTtype != fieldSeparator)
 281  
                 {
 282  
                     if (DEBUG)
 283  
                     {
 284  
                         log.debug("DataStreamParser.nextRow(): " +
 285  
                                 colname + "=" + colval);
 286  
                     }
 287  0
                     lineValues.add(colname, colval);
 288  
                 }
 289  0
                 else if (tokenizer.ttype == fieldSeparator && lastTtype != fieldSeparator)
 290  
                 {
 291  0
                     lastTtype = tokenizer.ttype;
 292  0
                     tokenizer.nextToken();
 293  0
                     if (tokenizer.ttype != fieldSeparator && tokenizer.sval != null)
 294  
                     {
 295  0
                         lineValues.add(colname, tokenizer.sval);
 296  
                     }
 297  0
                     else if (tokenizer.ttype == StreamTokenizer.TT_EOL)
 298  
                     {
 299  0
                         tokenizer.pushBack();
 300  
                     }
 301  
                 }
 302  
             }
 303  0
             tokenizer.nextToken();
 304  
         }
 305  
 
 306  0
         return lineValues;
 307  
     }
 308  
 
 309  
     /**
 310  
      * Determine whether a further row of values exists in the input.
 311  
      *
 312  
      * @return true if the input has more rows.
 313  
      */
 314  
     public boolean hasNext()
 315  
     {
 316  0
         boolean hasNext = false;
 317  
 
 318  
         try
 319  
         {
 320  0
             hasNext = hasNextRow();
 321  
         }
 322  0
         catch (IOException e)
 323  
         {
 324  0
             log.error("IOException in CSVParser.hasNext", e);
 325  0
         }
 326  
 
 327  0
         return hasNext;
 328  
     }
 329  
 
 330  
     /**
 331  
      * Returns a ValueParser object containing the next row of values.
 332  
      *
 333  
      * @return a ValueParser object as an Object.
 334  
      * @exception NoSuchElementException there are no more rows in the input
 335  
      *                                   or an IOException occurred.
 336  
      */
 337  
     public Object next()
 338  
             throws NoSuchElementException
 339  
     {
 340  0
         Object nextRow = null;
 341  
 
 342  
         try
 343  
         {
 344  0
             nextRow = nextRow();
 345  
         }
 346  0
         catch (IOException e)
 347  
         {
 348  0
             log.error("IOException in CSVParser.next", e);
 349  0
             throw new NoSuchElementException();
 350  0
         }
 351  
 
 352  0
         return nextRow;
 353  
     }
 354  
 
 355  
     /**
 356  
      * The optional Iterator.remove method is not supported.
 357  
      *
 358  
      * @exception UnsupportedOperationException the operation is not supported.
 359  
      */
 360  
     public void remove()
 361  
             throws UnsupportedOperationException
 362  
     {
 363  0
         throw new UnsupportedOperationException();
 364  
     }
 365  
 }

This report is generated by jcoverage, Maven and Maven JCoverage Plugin.