%line | %branch | |||||||||
---|---|---|---|---|---|---|---|---|---|---|
org.apache.turbine.util.parser.DataStreamParser |
|
|
1 | package org.apache.turbine.util.parser; |
|
2 | ||
3 | /* |
|
4 | * Copyright 2001-2004 The Apache Software Foundation. |
|
5 | * |
|
6 | * Licensed under the Apache License, Version 2.0 (the "License") |
|
7 | * you may not use this file except in compliance with the License. |
|
8 | * You may obtain a copy of the License at |
|
9 | * |
|
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
|
11 | * |
|
12 | * Unless required by applicable law or agreed to in writing, software |
|
13 | * distributed under the License is distributed on an "AS IS" BASIS, |
|
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
15 | * See the License for the specific language governing permissions and |
|
16 | * limitations under the License. |
|
17 | */ |
|
18 | ||
19 | import java.io.BufferedReader; |
|
20 | import java.io.IOException; |
|
21 | import java.io.InputStreamReader; |
|
22 | import java.io.Reader; |
|
23 | import java.io.StreamTokenizer; |
|
24 | ||
25 | import java.util.ArrayList; |
|
26 | import java.util.Iterator; |
|
27 | import java.util.List; |
|
28 | import java.util.NoSuchElementException; |
|
29 | ||
30 | import org.apache.commons.logging.Log; |
|
31 | import org.apache.commons.logging.LogFactory; |
|
32 | ||
33 | /** |
|
34 | * DataStreamParser is used to parse a stream with a fixed format and |
|
35 | * generate ValueParser objects which can be used to extract the values |
|
36 | * in the desired type. |
|
37 | * |
|
38 | * <p>The class itself is abstract - a concrete subclass which implements |
|
39 | * the initTokenizer method such as CSVParser or TSVParser is required |
|
40 | * to use the functionality. |
|
41 | * |
|
42 | * <p>The class implements the java.util.Iterator interface for convenience. |
|
43 | * This allows simple use in a Velocity template for example: |
|
44 | * |
|
45 | * <pre> |
|
46 | * #foreach ($row in $datastream) |
|
47 | * Name: $row.Name |
|
48 | * Description: $row.Description |
|
49 | * #end |
|
50 | * </pre> |
|
51 | * |
|
52 | * @author <a href="mailto:sean@informage.net">Sean Legassick</a> |
|
53 | * @author <a href="mailto:martin@mvdb.net">Martin van den Bemt</a> |
|
54 | * @version $Id: DataStreamParser.java,v 1.1.2.3 2004/08/14 20:11:43 henning Exp $ |
|
55 | */ |
|
56 | public abstract class DataStreamParser implements Iterator |
|
57 | { |
|
58 | /** Logging */ |
|
59 | 0 | private static Log log = LogFactory.getLog(DataStreamParser.class); |
60 | ||
61 | /** |
|
62 | * Conditional compilation flag. |
|
63 | */ |
|
64 | private static final boolean DEBUG = false; |
|
65 | ||
66 | /** |
|
67 | * The constant for empty fields |
|
68 | */ |
|
69 | protected static final String EMPTYFIELDNAME = "UNKNOWNFIELD"; |
|
70 | ||
71 | /** |
|
72 | * The list of column names. |
|
73 | */ |
|
74 | private List columnNames; |
|
75 | ||
76 | /** |
|
77 | * The stream tokenizer for reading values from the input reader. |
|
78 | */ |
|
79 | private StreamTokenizer tokenizer; |
|
80 | ||
81 | /** |
|
82 | * The parameter parser holding the values of columns for the current line. |
|
83 | */ |
|
84 | private ValueParser lineValues; |
|
85 | ||
86 | /** |
|
87 | * Indicates whether or not the tokenizer has read anything yet. |
|
88 | */ |
|
89 | 0 | private boolean neverRead = true; |
90 | ||
91 | /** |
|
92 | * The character encoding of the input |
|
93 | */ |
|
94 | private String characterEncoding; |
|
95 | ||
96 | /** |
|
97 | * The fieldseperator, which can be almost any char |
|
98 | */ |
|
99 | private char fieldSeparator; |
|
100 | ||
101 | /** |
|
102 | * Create a new DataStreamParser instance. Requires a Reader to read the |
|
103 | * comma-separated values from, a list of column names and a |
|
104 | * character encoding. |
|
105 | * |
|
106 | * @param in the input reader. |
|
107 | * @param columnNames a list of column names. |
|
108 | * @param characterEncoding the character encoding of the input. |
|
109 | */ |
|
110 | public DataStreamParser(Reader in, List columnNames, |
|
111 | String characterEncoding) |
|
112 | 0 | { |
113 | 0 | this.columnNames = columnNames; |
114 | 0 | this.characterEncoding = characterEncoding; |
115 | ||
116 | 0 | if (this.characterEncoding == null) |
117 | { |
|
118 | // try and get the characterEncoding from the reader |
|
119 | 0 | this.characterEncoding = "US-ASCII"; |
120 | try |
|
121 | { |
|
122 | 0 | this.characterEncoding = ((InputStreamReader) in).getEncoding(); |
123 | } |
|
124 | 0 | catch (ClassCastException e) |
125 | { |
|
126 | 0 | } |
127 | } |
|
128 | ||
129 | 0 | tokenizer = new StreamTokenizer(class="keyword">new BufferedReader(in)); |
130 | 0 | initTokenizer(tokenizer); |
131 | 0 | } |
132 | ||
133 | /** |
|
134 | * Initialize the StreamTokenizer instance used to read the lines |
|
135 | * from the input reader. This must be implemented in subclasses to |
|
136 | * set up other tokenizing properties. |
|
137 | * |
|
138 | * @param tokenizer the tokenizer to adjust |
|
139 | */ |
|
140 | protected void initTokenizer(StreamTokenizer tokenizer) |
|
141 | { |
|
142 | // set all numeric characters as ordinary characters |
|
143 | // (switches off number parsing) |
|
144 | 0 | tokenizer.ordinaryChars('0', '9'); |
145 | 0 | tokenizer.ordinaryChars('-', '-'); |
146 | 0 | tokenizer.ordinaryChars('.', '.'); |
147 | ||
148 | // leave out the comma sign (,), we need it for empty fields |
|
149 | ||
150 | 0 | tokenizer.wordChars(' ', Integer.MAX_VALUE); |
151 | ||
152 | // and set the quote mark as the quoting character |
|
153 | 0 | tokenizer.quoteChar('"'); |
154 | ||
155 | // and finally say that end of line is significant |
|
156 | 0 | tokenizer.eolIsSignificant(true); |
157 | 0 | } |
158 | ||
159 | /** |
|
160 | * This method must be called to setup the field seperator |
|
161 | * @param fieldSeparator the char which separates the fields |
|
162 | */ |
|
163 | public void setFieldSeparator(char fieldSeparator) |
|
164 | { |
|
165 | 0 | this.fieldSeparator = fieldSeparator; |
166 | // make this field also an ordinary char by default. |
|
167 | 0 | tokenizer.ordinaryChar(fieldSeparator); |
168 | 0 | } |
169 | ||
170 | /** |
|
171 | * Set the list of column names explicitly. |
|
172 | * |
|
173 | * @param columnNames A list of column names. |
|
174 | */ |
|
175 | public void setColumnNames(List columnNames) |
|
176 | { |
|
177 | 0 | this.columnNames = columnNames; |
178 | 0 | } |
179 | ||
180 | /** |
|
181 | * Read the list of column names from the input reader using the |
|
182 | * tokenizer. If fieldNames are empty, we use the current fieldNumber |
|
183 | * + the EMPTYFIELDNAME to make one up. |
|
184 | * |
|
185 | * @exception IOException an IOException occurred. |
|
186 | */ |
|
187 | public void readColumnNames() |
|
188 | throws IOException |
|
189 | { |
|
190 | 0 | columnNames = new ArrayList(); |
191 | 0 | int lastTtype = 0; |
192 | 0 | int fieldCounter = 1; |
193 | ||
194 | 0 | neverRead = false; |
195 | 0 | tokenizer.nextToken(); |
196 | while (tokenizer.ttype == StreamTokenizer.TT_WORD || tokenizer.ttype == StreamTokenizer.TT_EOL |
|
197 | 0 | || tokenizer.ttype == '"' || tokenizer.ttype == fieldSeparator) |
198 | { |
|
199 | 0 | if (tokenizer.ttype != fieldSeparator && tokenizer.ttype != StreamTokenizer.TT_EOL) |
200 | { |
|
201 | 0 | columnNames.add(tokenizer.sval); |
202 | 0 | fieldCounter++; |
203 | } |
|
204 | 0 | else if (tokenizer.ttype == fieldSeparator && lastTtype == fieldSeparator) |
205 | { |
|
206 | // we have an empty field name |
|
207 | 0 | columnNames.add(EMPTYFIELDNAME + fieldCounter); |
208 | 0 | fieldCounter++; |
209 | } |
|
210 | 0 | else if (lastTtype == fieldSeparator && tokenizer.ttype == StreamTokenizer.TT_EOL) |
211 | { |
|
212 | 0 | columnNames.add(EMPTYFIELDNAME + fieldCounter); |
213 | 0 | break; |
214 | } |
|
215 | 0 | else if (tokenizer.ttype == StreamTokenizer.TT_EOL) |
216 | { |
|
217 | 0 | break; |
218 | } |
|
219 | 0 | lastTtype = tokenizer.ttype; |
220 | 0 | tokenizer.nextToken(); |
221 | } |
|
222 | 0 | } |
223 | ||
224 | /** |
|
225 | * Determine whether a further row of values exists in the input. |
|
226 | * |
|
227 | * @return true if the input has more rows. |
|
228 | * @exception IOException an IOException occurred. |
|
229 | */ |
|
230 | public boolean hasNextRow() |
|
231 | throws IOException |
|
232 | { |
|
233 | // check for end of line ensures that an empty last line doesn't |
|
234 | // give a false positive for hasNextRow |
|
235 | 0 | if (neverRead || tokenizer.ttype == StreamTokenizer.TT_EOL) |
236 | { |
|
237 | 0 | tokenizer.nextToken(); |
238 | 0 | tokenizer.pushBack(); |
239 | 0 | neverRead = false; |
240 | } |
|
241 | 0 | return tokenizer.ttype != StreamTokenizer.TT_EOF; |
242 | } |
|
243 | ||
244 | /** |
|
245 | * Returns a ValueParser object containing the next row of values. |
|
246 | * |
|
247 | * @return a ValueParser object. |
|
248 | * @exception IOException an IOException occurred. |
|
249 | * @exception NoSuchElementException there are no more rows in the input. |
|
250 | */ |
|
251 | public ValueParser nextRow() |
|
252 | throws IOException, NoSuchElementException |
|
253 | { |
|
254 | 0 | if (!hasNextRow()) |
255 | { |
|
256 | 0 | throw new NoSuchElementException(); |
257 | } |
|
258 | ||
259 | 0 | if (lineValues == null) |
260 | { |
|
261 | 0 | lineValues = new BaseValueParser(characterEncoding); |
262 | } |
|
263 | else |
|
264 | { |
|
265 | 0 | lineValues.clear(); |
266 | } |
|
267 | ||
268 | 0 | Iterator it = columnNames.iterator(); |
269 | 0 | tokenizer.nextToken(); |
270 | while (tokenizer.ttype == StreamTokenizer.TT_WORD |
|
271 | 0 | || tokenizer.ttype == '"' || tokenizer.ttype == fieldSeparator) |
272 | { |
|
273 | 0 | int lastTtype = 0; |
274 | // note this means that if there are more values than |
|
275 | // column names, the extra values are discarded. |
|
276 | 0 | if (it.hasNext()) |
277 | { |
|
278 | 0 | String colname = it.next().toString(); |
279 | 0 | String colval = tokenizer.sval; |
280 | 0 | if (tokenizer.ttype != fieldSeparator && lastTtype != fieldSeparator) |
281 | { |
|
282 | if (DEBUG) |
|
283 | { |
|
284 | log.debug("DataStreamParser.nextRow(): " + |
|
285 | colname + "=" + colval); |
|
286 | } |
|
287 | 0 | lineValues.add(colname, colval); |
288 | } |
|
289 | 0 | else if (tokenizer.ttype == fieldSeparator && lastTtype != fieldSeparator) |
290 | { |
|
291 | 0 | lastTtype = tokenizer.ttype; |
292 | 0 | tokenizer.nextToken(); |
293 | 0 | if (tokenizer.ttype != fieldSeparator && tokenizer.sval != null) |
294 | { |
|
295 | 0 | lineValues.add(colname, tokenizer.sval); |
296 | } |
|
297 | 0 | else if (tokenizer.ttype == StreamTokenizer.TT_EOL) |
298 | { |
|
299 | 0 | tokenizer.pushBack(); |
300 | } |
|
301 | } |
|
302 | } |
|
303 | 0 | tokenizer.nextToken(); |
304 | } |
|
305 | ||
306 | 0 | return lineValues; |
307 | } |
|
308 | ||
309 | /** |
|
310 | * Determine whether a further row of values exists in the input. |
|
311 | * |
|
312 | * @return true if the input has more rows. |
|
313 | */ |
|
314 | public boolean hasNext() |
|
315 | { |
|
316 | 0 | boolean hasNext = false; |
317 | ||
318 | try |
|
319 | { |
|
320 | 0 | hasNext = hasNextRow(); |
321 | } |
|
322 | 0 | catch (IOException e) |
323 | { |
|
324 | 0 | log.error("IOException in CSVParser.hasNext", e); |
325 | 0 | } |
326 | ||
327 | 0 | return hasNext; |
328 | } |
|
329 | ||
330 | /** |
|
331 | * Returns a ValueParser object containing the next row of values. |
|
332 | * |
|
333 | * @return a ValueParser object as an Object. |
|
334 | * @exception NoSuchElementException there are no more rows in the input |
|
335 | * or an IOException occurred. |
|
336 | */ |
|
337 | public Object next() |
|
338 | throws NoSuchElementException |
|
339 | { |
|
340 | 0 | Object nextRow = null; |
341 | ||
342 | try |
|
343 | { |
|
344 | 0 | nextRow = nextRow(); |
345 | } |
|
346 | 0 | catch (IOException e) |
347 | { |
|
348 | 0 | log.error("IOException in CSVParser.next", e); |
349 | 0 | throw new NoSuchElementException(); |
350 | 0 | } |
351 | ||
352 | 0 | return nextRow; |
353 | } |
|
354 | ||
355 | /** |
|
356 | * The optional Iterator.remove method is not supported. |
|
357 | * |
|
358 | * @exception UnsupportedOperationException the operation is not supported. |
|
359 | */ |
|
360 | public void remove() |
|
361 | throws UnsupportedOperationException |
|
362 | { |
|
363 | 0 | throw new UnsupportedOperationException(); |
364 | } |
|
365 | } |
This report is generated by jcoverage, Maven and Maven JCoverage Plugin. |