1 package org.apache.turbine.util;
2
3 /* ====================================================================
4 * The Apache Software License, Version 1.1
5 *
6 * Copyright (c) 2001 The Apache Software Foundation. All rights
7 * reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 *
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in
18 * the documentation and/or other materials provided with the
19 * distribution.
20 *
21 * 3. The end-user documentation included with the redistribution,
22 * if any, must include the following acknowledgment:
23 * "This product includes software developed by the
24 * Apache Software Foundation (http://www.apache.org/)."
25 * Alternately, this acknowledgment may appear in the software itself,
26 * if and wherever such third-party acknowledgments normally appear.
27 *
28 * 4. The names "Apache" and "Apache Software Foundation" and
29 * "Apache Turbine" must not be used to endorse or promote products
30 * derived from this software without prior written permission. For
31 * written permission, please contact apache@apache.org.
32 *
33 * 5. Products derived from this software may not be called "Apache",
34 * "Apache Turbine", nor may "Apache" appear in their name, without
35 * prior written permission of the Apache Software Foundation.
36 *
37 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48 * SUCH DAMAGE.
49 * ====================================================================
50 *
51 * This software consists of voluntary contributions made by many
52 * individuals on behalf of the Apache Software Foundation. For more
53 * information on the Apache Software Foundation, please see
54 * <http://www.apache.org/>.
55 */
56
57 import java.io.BufferedReader;
58 import java.io.InputStreamReader;
59 import java.io.IOException;
60 import java.io.Reader;
61 import java.io.StreamTokenizer;
62 import java.util.ArrayList;
63 import java.util.Iterator;
64 import java.util.List;
65 import java.util.NoSuchElementException;
66
67 import org.apache.turbine.util.parser.BaseValueParser;
68
69 /***
70 * DataStreamParser is used to parse a stream with a fixed format and
71 * generate ValueParser objects which can be used to extract the values
72 * in the desired type.
73 *
74 * <p>The class itself is abstract - a concrete subclass which implements
75 * the initTokenizer method such as CSVParser or TSVParser is required
76 * to use the functionality.
77 *
78 * <p>The class implements the java.util.Iterator interface for convenience.
79 * This allows simple use in a Velocity template for example:
80 *
81 * <pre>
82 * #foreach ($row in $datastream)
83 * Name: $row.Name
84 * Description: $row.Description
85 * #end
86 * </pre>
87 *
88 * @author <a href="mailto:sean@informage.net">Sean Legassick</a>
89 * @author <a href="mailto:martin@mvdb.net">Martin van den Bemt</a>
90 * @version $Id: DataStreamParser.java,v 1.2 2002/05/29 13:38:26 brekke Exp $
91 */
92 public abstract class DataStreamParser implements Iterator
93 {
94 /***
95 * Conditional compilation flag.
96 */
97 private static final boolean DEBUG = false;
98
99 /***
100 * The constant for empty fields
101 */
102 protected static final String EMPTYFIELDNAME="UNKNOWNFIELD";
103
104 /***
105 * The list of column names.
106 */
107 private List columnNames;
108
109 /***
110 * The stream tokenizer for reading values from the input reader.
111 */
112 private StreamTokenizer tokenizer;
113
114 /***
115 * The parameter parser holding the values of columns for the current line.
116 */
117 private ValueParser lineValues;
118
119 /***
120 * Indicates whether or not the tokenizer has read anything yet.
121 */
122 private boolean neverRead = true;
123
124 /***
125 * The character encoding of the input
126 */
127 private String characterEncoding;
128
129 /***
130 * The fieldseperator, which can be almost any char
131 */
132 private char fieldSeparator;
133
134 /***
135 * Create a new DataStreamParser instance. Requires a Reader to read the
136 * comma-separated values from, a list of column names and a
137 * character encoding.
138 *
139 * @param in the input reader.
140 * @param columnNames a list of column names.
141 * @param characterEncoding the character encoding of the input.
142 */
143 public DataStreamParser(Reader in, List columnNames,
144 String characterEncoding)
145 {
146 this.columnNames = columnNames;
147 this.characterEncoding = characterEncoding;
148
149 if (this.characterEncoding == null)
150 {
151 // try and get the characterEncoding from the reader
152 this.characterEncoding = "US-ASCII";
153 try
154 {
155 this.characterEncoding = ((InputStreamReader)in).getEncoding();
156 }
157 catch (ClassCastException e)
158 {
159 }
160 }
161
162 tokenizer = new StreamTokenizer(new BufferedReader(in));
163 initTokenizer(tokenizer);
164 }
165
166 /***
167 * Initialize the StreamTokenizer instance used to read the lines
168 * from the input reader. This must be implemented in subclasses to
169 * set up other tokenizing properties.
170 *
171 * @param tokenizer the tokenizer to adjust
172 */
173 protected void initTokenizer(StreamTokenizer tokenizer)
174 {
175 // set all numeric characters as ordinary characters
176 // (switches off number parsing)
177 tokenizer.ordinaryChars('0', '9');
178 tokenizer.ordinaryChars('-', '-');
179 tokenizer.ordinaryChars('.', '.');
180
181 // leave out the comma sign (,), we need it for empty fields
182
183 tokenizer.wordChars(' ', Integer.MAX_VALUE);
184
185 // and set the quote mark as the quoting character
186 tokenizer.quoteChar('"');
187
188 // and finally say that end of line is significant
189 tokenizer.eolIsSignificant(true);
190 }
191
192 /***
193 * This method must be called to setup the field seperator
194 * @param fieldSeparator the char which separates the fields
195 */
196 public void setFieldSeparator(char fieldSeparator)
197 {
198 this.fieldSeparator = fieldSeparator;
199 // make this field also an ordinary char by default.
200 tokenizer.ordinaryChar(fieldSeparator);
201 }
202
203
204 /***
205 * Set the list of column names explicitly.
206 *
207 * @param columnNames A list of column names.
208 */
209 public void setColumnNames(List columnNames)
210 {
211 this.columnNames = columnNames;
212 }
213
214 /***
215 * Read the list of column names from the input reader using the
216 * tokenizer. If fieldNames are empty, we use the current fieldNumber
217 * + the EMPTYFIELDNAME to make one up.
218 *
219 * @exception IOException an IOException occurred.
220 */
221 public void readColumnNames()
222 throws IOException
223 {
224 columnNames = new ArrayList();
225 int lastTtype = 0;
226 int fieldCounter = 1;
227
228 neverRead = false;
229 tokenizer.nextToken();
230 while (tokenizer.ttype == tokenizer.TT_WORD || tokenizer.ttype == tokenizer.TT_EOL
231 || tokenizer.ttype == '"' || tokenizer.ttype == fieldSeparator)
232 {
233 if (tokenizer.ttype != fieldSeparator && tokenizer.ttype != tokenizer.TT_EOL)
234 {
235 columnNames.add(tokenizer.sval);
236 fieldCounter++;
237 }
238 else if (tokenizer.ttype == fieldSeparator && lastTtype == fieldSeparator)
239 {
240 // we have an empty field name
241 columnNames.add(EMPTYFIELDNAME+fieldCounter);
242 fieldCounter++;
243 }
244 else if (lastTtype == fieldSeparator && tokenizer.ttype == tokenizer.TT_EOL)
245 {
246 columnNames.add(EMPTYFIELDNAME+fieldCounter);
247 break;
248 }
249 else if(tokenizer.ttype == tokenizer.TT_EOL)
250 {
251 break;
252 }
253 lastTtype = tokenizer.ttype;
254 tokenizer.nextToken();
255 }
256 }
257
258 /***
259 * Determine whether a further row of values exists in the input.
260 *
261 * @return true if the input has more rows.
262 * @exception IOException an IOException occurred.
263 */
264 public boolean hasNextRow()
265 throws IOException
266 {
267 // check for end of line ensures that an empty last line doesn't
268 // give a false positive for hasNextRow
269 if (neverRead || tokenizer.ttype == StreamTokenizer.TT_EOL)
270 {
271 tokenizer.nextToken();
272 tokenizer.pushBack();
273 neverRead = false;
274 }
275 return tokenizer.ttype != StreamTokenizer.TT_EOF;
276 }
277
278 /***
279 * Returns a ValueParser object containing the next row of values.
280 *
281 * @return a ValueParser object.
282 * @exception IOException an IOException occurred.
283 * @exception NoSuchElementException there are no more rows in the input.
284 */
285 public ValueParser nextRow()
286 throws IOException, NoSuchElementException
287 {
288 if (!hasNextRow())
289 {
290 throw new NoSuchElementException();
291 }
292
293 if (lineValues == null)
294 {
295 lineValues = new BaseValueParser(characterEncoding);
296 }
297 else
298 {
299 lineValues.clear();
300 }
301
302 Iterator it = columnNames.iterator();
303 tokenizer.nextToken();
304 while (tokenizer.ttype == StreamTokenizer.TT_WORD
305 || tokenizer.ttype == '"' || tokenizer.ttype == fieldSeparator)
306 {
307 int lastTtype = 0;
308 // note this means that if there are more values than
309 // column names, the extra values are discarded.
310 if (it.hasNext())
311 {
312 String colname = it.next().toString();
313 String colval = tokenizer.sval;
314 if (tokenizer.ttype != fieldSeparator && lastTtype != fieldSeparator)
315 {
316 if (DEBUG)
317 {
318 Log.debug("DataStreamParser.nextRow(): " +
319 colname + "=" + colval);
320 }
321 lineValues.add(colname, colval);
322 }
323 else if (tokenizer.ttype == fieldSeparator && lastTtype != fieldSeparator)
324 {
325 lastTtype = tokenizer.ttype;
326 tokenizer.nextToken();
327 if (tokenizer.ttype != fieldSeparator && tokenizer.sval!=null)
328 {
329 lineValues.add(colname, tokenizer.sval);
330 }
331 else if (tokenizer.ttype == tokenizer.TT_EOL)
332 {
333 tokenizer.pushBack();
334 }
335 }
336 }
337 tokenizer.nextToken();
338 }
339
340 return lineValues;
341 }
342
343 /***
344 * Determine whether a further row of values exists in the input.
345 *
346 * @return true if the input has more rows.
347 */
348 public boolean hasNext()
349 {
350 boolean hasNext = false;
351
352 try
353 {
354 hasNext = hasNextRow();
355 }
356 catch (IOException e)
357 {
358 Log.error("IOException in CSVParser.hasNext", e);
359 }
360
361 return hasNext;
362 }
363
364 /***
365 * Returns a ValueParser object containing the next row of values.
366 *
367 * @return a ValueParser object as an Object.
368 * @exception NoSuchElementException there are no more rows in the input
369 * or an IOException occurred.
370 */
371 public Object next()
372 throws NoSuchElementException
373 {
374 Object nextRow = null;
375
376 try
377 {
378 nextRow = nextRow();
379 }
380 catch (IOException e)
381 {
382 Log.error("IOException in CSVParser.next", e);
383 throw new NoSuchElementException();
384 }
385
386 return nextRow;
387 }
388
389 /***
390 * The optional Iterator.remove method is not supported.
391 *
392 * @exception UnsupportedOperationException the operation is not supported.
393 */
394 public void remove()
395 throws UnsupportedOperationException
396 {
397 throw new UnsupportedOperationException();
398 }
399 }
This page was automatically generated by Maven