1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.struts2.jasper.xmlparser;
19
20 import com.opensymphony.xwork2.util.logging.Logger;
21 import com.opensymphony.xwork2.util.logging.LoggerFactory;
22
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.Reader;
26
27 /***
28 * Reader for UCS-2 and UCS-4 encodings.
29 * (i.e., encodings from ISO-10646-UCS-(2|4)).
30 *
31 * @author Neil Graham, IBM
32 * @version $Id: UCSReader.java 466606 2006-10-21 23:07:12Z markt $
33 */
34 public class UCSReader extends Reader {
35
36 private Logger log = LoggerFactory.getLogger(UCSReader.class);
37
38
39
40
41
42 /***
43 * Default byte buffer size (8192, larger than that of ASCIIReader
44 * since it's reasonable to surmise that the average UCS-4-encoded
45 * file should be 4 times as large as the average ASCII-encoded file).
46 */
47 public static final int DEFAULT_BUFFER_SIZE = 8192;
48
49 public static final short UCS2LE = 1;
50 public static final short UCS2BE = 2;
51 public static final short UCS4LE = 4;
52 public static final short UCS4BE = 8;
53
54
55
56
57
58 /***
59 * Input stream.
60 */
61 protected InputStream fInputStream;
62
63 /***
64 * Byte buffer.
65 */
66 protected byte[] fBuffer;
67
68
69 protected short fEncoding;
70
71
72
73
74
75 /***
76 * Constructs an ASCII reader from the specified input stream
77 * using the default buffer size. The Endian-ness and whether this is
78 * UCS-2 or UCS-4 needs also to be known in advance.
79 *
80 * @param inputStream The input stream.
81 * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
82 */
83 public UCSReader(InputStream inputStream, short encoding) {
84 this(inputStream, DEFAULT_BUFFER_SIZE, encoding);
85 }
86
87 /***
88 * Constructs an ASCII reader from the specified input stream
89 * and buffer size. The Endian-ness and whether this is
90 * UCS-2 or UCS-4 needs also to be known in advance.
91 *
92 * @param inputStream The input stream.
93 * @param size The initial buffer size.
94 * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
95 */
96 public UCSReader(InputStream inputStream, int size, short encoding) {
97 fInputStream = inputStream;
98 fBuffer = new byte[size];
99 fEncoding = encoding;
100 }
101
102
103
104
105
106 /***
107 * Read a single character. This method will block until a character is
108 * available, an I/O error occurs, or the end of the stream is reached.
109 * <p/>
110 * <p> Subclasses that intend to support efficient single-character input
111 * should override this method.
112 *
113 * @return The character read, as an integer in the range 0 to 127
114 * (<tt>0x00-0x7f</tt>), or -1 if the end of the stream has
115 * been reached
116 * @throws IOException If an I/O error occurs
117 */
118 public int read() throws IOException {
119 int b0 = fInputStream.read() & 0xff;
120 if (b0 == 0xff)
121 return -1;
122 int b1 = fInputStream.read() & 0xff;
123 if (b1 == 0xff)
124 return -1;
125 if (fEncoding >= 4) {
126 int b2 = fInputStream.read() & 0xff;
127 if (b2 == 0xff)
128 return -1;
129 int b3 = fInputStream.read() & 0xff;
130 if (b3 == 0xff)
131 return -1;
132 if (log.isDebugEnabled())
133 log.debug("b0 is " + (b0 & 0xff) + " b1 " + (b1 & 0xff) + " b2 " + (b2 & 0xff) + " b3 " + (b3 & 0xff));
134 if (fEncoding == UCS4BE)
135 return (b0 << 24) + (b1 << 16) + (b2 << 8) + b3;
136 else
137 return (b3 << 24) + (b2 << 16) + (b1 << 8) + b0;
138 } else {
139 if (fEncoding == UCS2BE)
140 return (b0 << 8) + b1;
141 else
142 return (b1 << 8) + b0;
143 }
144 }
145
146 /***
147 * Read characters into a portion of an array. This method will block
148 * until some input is available, an I/O error occurs, or the end of the
149 * stream is reached.
150 *
151 * @param ch Destination buffer
152 * @param offset Offset at which to start storing characters
153 * @param length Maximum number of characters to read
154 * @return The number of characters read, or -1 if the end of the
155 * stream has been reached
156 * @throws IOException If an I/O error occurs
157 */
158 public int read(char ch[], int offset, int length) throws IOException {
159 int byteLength = length << ((fEncoding >= 4) ? 2 : 1);
160 if (byteLength > fBuffer.length) {
161 byteLength = fBuffer.length;
162 }
163 int count = fInputStream.read(fBuffer, 0, byteLength);
164 if (count == -1) return -1;
165
166 if (fEncoding >= 4) {
167
168 int numToRead = (4 - (count & 3) & 3);
169 for (int i = 0; i < numToRead; i++) {
170 int charRead = fInputStream.read();
171 if (charRead == -1) {
172 for (int j = i; j < numToRead; j++)
173 fBuffer[count + j] = 0;
174 break;
175 } else {
176 fBuffer[count + i] = (byte) charRead;
177 }
178 }
179 count += numToRead;
180 } else {
181 int numToRead = count & 1;
182 if (numToRead != 0) {
183 count++;
184 int charRead = fInputStream.read();
185 if (charRead == -1) {
186 fBuffer[count] = 0;
187 } else {
188 fBuffer[count] = (byte) charRead;
189 }
190 }
191 }
192
193
194 int numChars = count >> ((fEncoding >= 4) ? 2 : 1);
195 int curPos = 0;
196 for (int i = 0; i < numChars; i++) {
197 int b0 = fBuffer[curPos++] & 0xff;
198 int b1 = fBuffer[curPos++] & 0xff;
199 if (fEncoding >= 4) {
200 int b2 = fBuffer[curPos++] & 0xff;
201 int b3 = fBuffer[curPos++] & 0xff;
202 if (fEncoding == UCS4BE)
203 ch[offset + i] = (char) ((b0 << 24) + (b1 << 16) + (b2 << 8) + b3);
204 else
205 ch[offset + i] = (char) ((b3 << 24) + (b2 << 16) + (b1 << 8) + b0);
206 } else {
207 if (fEncoding == UCS2BE)
208 ch[offset + i] = (char) ((b0 << 8) + b1);
209 else
210 ch[offset + i] = (char) ((b1 << 8) + b0);
211 }
212 }
213 return numChars;
214 }
215
216 /***
217 * Skip characters. This method will block until some characters are
218 * available, an I/O error occurs, or the end of the stream is reached.
219 *
220 * @param n The number of characters to skip
221 * @return The number of characters actually skipped
222 * @throws IOException If an I/O error occurs
223 */
224 public long skip(long n) throws IOException {
225
226
227
228
229
230
231 int charWidth = (fEncoding >= 4) ? 2 : 1;
232 long bytesSkipped = fInputStream.skip(n << charWidth);
233 if ((bytesSkipped & (charWidth | 1)) == 0) return bytesSkipped >> charWidth;
234 return (bytesSkipped >> charWidth) + 1;
235 }
236
237 /***
238 * Tell whether this stream is ready to be read.
239 *
240 * @return True if the next read() is guaranteed not to block for input,
241 * false otherwise. Note that returning false does not guarantee that the
242 * next read will block.
243 * @throws IOException If an I/O error occurs
244 */
245 public boolean ready() throws IOException {
246 return false;
247 }
248
249 /***
250 * Tell whether this stream supports the mark() operation.
251 */
252 public boolean markSupported() {
253 return fInputStream.markSupported();
254 }
255
256 /***
257 * Mark the present position in the stream. Subsequent calls to reset()
258 * will attempt to reposition the stream to this point. Not all
259 * character-input streams support the mark() operation.
260 *
261 * @param readAheadLimit Limit on the number of characters that may be
262 * read while still preserving the mark. After
263 * reading this many characters, attempting to
264 * reset the stream may fail.
265 * @throws IOException If the stream does not support mark(),
266 * or if some other I/O error occurs
267 */
268 public void mark(int readAheadLimit) throws IOException {
269 fInputStream.mark(readAheadLimit);
270 }
271
272 /***
273 * Reset the stream. If the stream has been marked, then attempt to
274 * reposition it at the mark. If the stream has not been marked, then
275 * attempt to reset it in some way appropriate to the particular stream,
276 * for example by repositioning it to its starting point. Not all
277 * character-input streams support the reset() operation, and some support
278 * reset() without supporting mark().
279 *
280 * @throws IOException If the stream has not been marked,
281 * or if the mark has been invalidated,
282 * or if the stream does not support reset(),
283 * or if some other I/O error occurs
284 */
285 public void reset() throws IOException {
286 fInputStream.reset();
287 }
288
289 /***
290 * Close the stream. Once a stream has been closed, further read(),
291 * ready(), mark(), or reset() invocations will throw an IOException.
292 * Closing a previously-closed stream, however, has no effect.
293 *
294 * @throws IOException If an I/O error occurs
295 */
296 public void close() throws IOException {
297 fInputStream.close();
298 }
299
300 }