View Javadoc
1 package org.apache.turbine.util; 2 3 /* ==================================================================== 4 * The Apache Software License, Version 1.1 5 * 6 * Copyright (c) 2001 The Apache Software Foundation. All rights 7 * reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in 18 * the documentation and/or other materials provided with the 19 * distribution. 20 * 21 * 3. The end-user documentation included with the redistribution, 22 * if any, must include the following acknowledgment: 23 * "This product includes software developed by the 24 * Apache Software Foundation (http://www.apache.org/)." 25 * Alternately, this acknowledgment may appear in the software itself, 26 * if and wherever such third-party acknowledgments normally appear. 27 * 28 * 4. The names "Apache" and "Apache Software Foundation" and 29 * "Apache Turbine" must not be used to endorse or promote products 30 * derived from this software without prior written permission. For 31 * written permission, please contact apache@apache.org. 32 * 33 * 5. Products derived from this software may not be called "Apache", 34 * "Apache Turbine", nor may "Apache" appear in their name, without 35 * prior written permission of the Apache Software Foundation. 36 * 37 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 38 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 39 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 40 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR 41 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 44 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 45 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 46 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 47 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 48 * SUCH DAMAGE. 49 * ==================================================================== 50 * 51 * This software consists of voluntary contributions made by many 52 * individuals on behalf of the Apache Software Foundation. For more 53 * information on the Apache Software Foundation, please see 54 * <http://www.apache.org/>;. 55 */ 56 57 import java.io.BufferedReader; 58 import java.io.InputStreamReader; 59 import java.io.IOException; 60 import java.io.Reader; 61 import java.io.StreamTokenizer; 62 import java.util.ArrayList; 63 import java.util.Iterator; 64 import java.util.List; 65 import java.util.NoSuchElementException; 66 67 import org.apache.turbine.util.parser.BaseValueParser; 68 69 /*** 70 * DataStreamParser is used to parse a stream with a fixed format and 71 * generate ValueParser objects which can be used to extract the values 72 * in the desired type. 73 * 74 * <p>The class itself is abstract - a concrete subclass which implements 75 * the initTokenizer method such as CSVParser or TSVParser is required 76 * to use the functionality. 77 * 78 * <p>The class implements the java.util.Iterator interface for convenience. 79 * This allows simple use in a Velocity template for example: 80 * 81 * <pre> 82 * #foreach ($row in $datastream) 83 * Name: $row.Name 84 * Description: $row.Description 85 * #end 86 * </pre> 87 * 88 * @author <a href="mailto:sean@informage.net">Sean Legassick</a> 89 * @author <a href="mailto:martin@mvdb.net">Martin van den Bemt</a> 90 * @version $Id: DataStreamParser.java,v 1.2 2002/05/29 13:38:26 brekke Exp $ 91 */ 92 public abstract class DataStreamParser implements Iterator 93 { 94 /*** 95 * Conditional compilation flag. 96 */ 97 private static final boolean DEBUG = false; 98 99 /*** 100 * The constant for empty fields 101 */ 102 protected static final String EMPTYFIELDNAME="UNKNOWNFIELD"; 103 104 /*** 105 * The list of column names. 106 */ 107 private List columnNames; 108 109 /*** 110 * The stream tokenizer for reading values from the input reader. 111 */ 112 private StreamTokenizer tokenizer; 113 114 /*** 115 * The parameter parser holding the values of columns for the current line. 116 */ 117 private ValueParser lineValues; 118 119 /*** 120 * Indicates whether or not the tokenizer has read anything yet. 121 */ 122 private boolean neverRead = true; 123 124 /*** 125 * The character encoding of the input 126 */ 127 private String characterEncoding; 128 129 /*** 130 * The fieldseperator, which can be almost any char 131 */ 132 private char fieldSeparator; 133 134 /*** 135 * Create a new DataStreamParser instance. Requires a Reader to read the 136 * comma-separated values from, a list of column names and a 137 * character encoding. 138 * 139 * @param in the input reader. 140 * @param columnNames a list of column names. 141 * @param characterEncoding the character encoding of the input. 142 */ 143 public DataStreamParser(Reader in, List columnNames, 144 String characterEncoding) 145 { 146 this.columnNames = columnNames; 147 this.characterEncoding = characterEncoding; 148 149 if (this.characterEncoding == null) 150 { 151 // try and get the characterEncoding from the reader 152 this.characterEncoding = "US-ASCII"; 153 try 154 { 155 this.characterEncoding = ((InputStreamReader)in).getEncoding(); 156 } 157 catch (ClassCastException e) 158 { 159 } 160 } 161 162 tokenizer = new StreamTokenizer(new BufferedReader(in)); 163 initTokenizer(tokenizer); 164 } 165 166 /*** 167 * Initialize the StreamTokenizer instance used to read the lines 168 * from the input reader. This must be implemented in subclasses to 169 * set up other tokenizing properties. 170 * 171 * @param tokenizer the tokenizer to adjust 172 */ 173 protected void initTokenizer(StreamTokenizer tokenizer) 174 { 175 // set all numeric characters as ordinary characters 176 // (switches off number parsing) 177 tokenizer.ordinaryChars('0', '9'); 178 tokenizer.ordinaryChars('-', '-'); 179 tokenizer.ordinaryChars('.', '.'); 180 181 // leave out the comma sign (,), we need it for empty fields 182 183 tokenizer.wordChars(' ', Integer.MAX_VALUE); 184 185 // and set the quote mark as the quoting character 186 tokenizer.quoteChar('"'); 187 188 // and finally say that end of line is significant 189 tokenizer.eolIsSignificant(true); 190 } 191 192 /*** 193 * This method must be called to setup the field seperator 194 * @param fieldSeparator the char which separates the fields 195 */ 196 public void setFieldSeparator(char fieldSeparator) 197 { 198 this.fieldSeparator = fieldSeparator; 199 // make this field also an ordinary char by default. 200 tokenizer.ordinaryChar(fieldSeparator); 201 } 202 203 204 /*** 205 * Set the list of column names explicitly. 206 * 207 * @param columnNames A list of column names. 208 */ 209 public void setColumnNames(List columnNames) 210 { 211 this.columnNames = columnNames; 212 } 213 214 /*** 215 * Read the list of column names from the input reader using the 216 * tokenizer. If fieldNames are empty, we use the current fieldNumber 217 * + the EMPTYFIELDNAME to make one up. 218 * 219 * @exception IOException an IOException occurred. 220 */ 221 public void readColumnNames() 222 throws IOException 223 { 224 columnNames = new ArrayList(); 225 int lastTtype = 0; 226 int fieldCounter = 1; 227 228 neverRead = false; 229 tokenizer.nextToken(); 230 while (tokenizer.ttype == tokenizer.TT_WORD || tokenizer.ttype == tokenizer.TT_EOL 231 || tokenizer.ttype == '"' || tokenizer.ttype == fieldSeparator) 232 { 233 if (tokenizer.ttype != fieldSeparator && tokenizer.ttype != tokenizer.TT_EOL) 234 { 235 columnNames.add(tokenizer.sval); 236 fieldCounter++; 237 } 238 else if (tokenizer.ttype == fieldSeparator && lastTtype == fieldSeparator) 239 { 240 // we have an empty field name 241 columnNames.add(EMPTYFIELDNAME+fieldCounter); 242 fieldCounter++; 243 } 244 else if (lastTtype == fieldSeparator && tokenizer.ttype == tokenizer.TT_EOL) 245 { 246 columnNames.add(EMPTYFIELDNAME+fieldCounter); 247 break; 248 } 249 else if(tokenizer.ttype == tokenizer.TT_EOL) 250 { 251 break; 252 } 253 lastTtype = tokenizer.ttype; 254 tokenizer.nextToken(); 255 } 256 } 257 258 /*** 259 * Determine whether a further row of values exists in the input. 260 * 261 * @return true if the input has more rows. 262 * @exception IOException an IOException occurred. 263 */ 264 public boolean hasNextRow() 265 throws IOException 266 { 267 // check for end of line ensures that an empty last line doesn't 268 // give a false positive for hasNextRow 269 if (neverRead || tokenizer.ttype == StreamTokenizer.TT_EOL) 270 { 271 tokenizer.nextToken(); 272 tokenizer.pushBack(); 273 neverRead = false; 274 } 275 return tokenizer.ttype != StreamTokenizer.TT_EOF; 276 } 277 278 /*** 279 * Returns a ValueParser object containing the next row of values. 280 * 281 * @return a ValueParser object. 282 * @exception IOException an IOException occurred. 283 * @exception NoSuchElementException there are no more rows in the input. 284 */ 285 public ValueParser nextRow() 286 throws IOException, NoSuchElementException 287 { 288 if (!hasNextRow()) 289 { 290 throw new NoSuchElementException(); 291 } 292 293 if (lineValues == null) 294 { 295 lineValues = new BaseValueParser(characterEncoding); 296 } 297 else 298 { 299 lineValues.clear(); 300 } 301 302 Iterator it = columnNames.iterator(); 303 tokenizer.nextToken(); 304 while (tokenizer.ttype == StreamTokenizer.TT_WORD 305 || tokenizer.ttype == '"' || tokenizer.ttype == fieldSeparator) 306 { 307 int lastTtype = 0; 308 // note this means that if there are more values than 309 // column names, the extra values are discarded. 310 if (it.hasNext()) 311 { 312 String colname = it.next().toString(); 313 String colval = tokenizer.sval; 314 if (tokenizer.ttype != fieldSeparator && lastTtype != fieldSeparator) 315 { 316 if (DEBUG) 317 { 318 Log.debug("DataStreamParser.nextRow(): " + 319 colname + "=" + colval); 320 } 321 lineValues.add(colname, colval); 322 } 323 else if (tokenizer.ttype == fieldSeparator && lastTtype != fieldSeparator) 324 { 325 lastTtype = tokenizer.ttype; 326 tokenizer.nextToken(); 327 if (tokenizer.ttype != fieldSeparator && tokenizer.sval!=null) 328 { 329 lineValues.add(colname, tokenizer.sval); 330 } 331 else if (tokenizer.ttype == tokenizer.TT_EOL) 332 { 333 tokenizer.pushBack(); 334 } 335 } 336 } 337 tokenizer.nextToken(); 338 } 339 340 return lineValues; 341 } 342 343 /*** 344 * Determine whether a further row of values exists in the input. 345 * 346 * @return true if the input has more rows. 347 */ 348 public boolean hasNext() 349 { 350 boolean hasNext = false; 351 352 try 353 { 354 hasNext = hasNextRow(); 355 } 356 catch (IOException e) 357 { 358 Log.error("IOException in CSVParser.hasNext", e); 359 } 360 361 return hasNext; 362 } 363 364 /*** 365 * Returns a ValueParser object containing the next row of values. 366 * 367 * @return a ValueParser object as an Object. 368 * @exception NoSuchElementException there are no more rows in the input 369 * or an IOException occurred. 370 */ 371 public Object next() 372 throws NoSuchElementException 373 { 374 Object nextRow = null; 375 376 try 377 { 378 nextRow = nextRow(); 379 } 380 catch (IOException e) 381 { 382 Log.error("IOException in CSVParser.next", e); 383 throw new NoSuchElementException(); 384 } 385 386 return nextRow; 387 } 388 389 /*** 390 * The optional Iterator.remove method is not supported. 391 * 392 * @exception UnsupportedOperationException the operation is not supported. 393 */ 394 public void remove() 395 throws UnsupportedOperationException 396 { 397 throw new UnsupportedOperationException(); 398 } 399 }

This page was automatically generated by Maven