View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.struts2.jasper.xmlparser;
19  
20  import com.opensymphony.xwork2.util.logging.Logger;
21  import com.opensymphony.xwork2.util.logging.LoggerFactory;
22  
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.io.Reader;
26  
27  /***
28   * Reader for UCS-2 and UCS-4 encodings.
29   * (i.e., encodings from ISO-10646-UCS-(2|4)).
30   *
31   * @author Neil Graham, IBM
32   * @version $Id: UCSReader.java 466606 2006-10-21 23:07:12Z markt $
33   */
34  public class UCSReader extends Reader {
35  
36      private Logger log = LoggerFactory.getLogger(UCSReader.class);
37  
38      //
39      // Constants
40      //
41  
42      /***
43       * Default byte buffer size (8192, larger than that of ASCIIReader
44       * since it's reasonable to surmise that the average UCS-4-encoded
45       * file should be 4 times as large as the average ASCII-encoded file).
46       */
47      public static final int DEFAULT_BUFFER_SIZE = 8192;
48  
49      public static final short UCS2LE = 1;
50      public static final short UCS2BE = 2;
51      public static final short UCS4LE = 4;
52      public static final short UCS4BE = 8;
53  
54      //
55      // Data
56      //
57  
58      /***
59       * Input stream.
60       */
61      protected InputStream fInputStream;
62  
63      /***
64       * Byte buffer.
65       */
66      protected byte[] fBuffer;
67  
68      // what kind of data we're dealing with
69      protected short fEncoding;
70  
71      //
72      // Constructors
73      //
74  
75      /***
76       * Constructs an ASCII reader from the specified input stream
77       * using the default buffer size.  The Endian-ness and whether this is
78       * UCS-2 or UCS-4 needs also to be known in advance.
79       *
80       * @param inputStream The input stream.
81       * @param encoding    One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
82       */
83      public UCSReader(InputStream inputStream, short encoding) {
84          this(inputStream, DEFAULT_BUFFER_SIZE, encoding);
85      } // <init>(InputStream, short)
86  
87      /***
88       * Constructs an ASCII reader from the specified input stream
89       * and buffer size.  The Endian-ness and whether this is
90       * UCS-2 or UCS-4 needs also to be known in advance.
91       *
92       * @param inputStream The input stream.
93       * @param size        The initial buffer size.
94       * @param encoding    One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
95       */
96      public UCSReader(InputStream inputStream, int size, short encoding) {
97          fInputStream = inputStream;
98          fBuffer = new byte[size];
99          fEncoding = encoding;
100     } // <init>(InputStream,int,short)
101 
102     //
103     // Reader methods
104     //
105 
106     /***
107      * Read a single character.  This method will block until a character is
108      * available, an I/O error occurs, or the end of the stream is reached.
109      * <p/>
110      * <p> Subclasses that intend to support efficient single-character input
111      * should override this method.
112      *
113      * @return The character read, as an integer in the range 0 to 127
114      *         (<tt>0x00-0x7f</tt>), or -1 if the end of the stream has
115      *         been reached
116      * @throws IOException If an I/O error occurs
117      */
118     public int read() throws IOException {
119         int b0 = fInputStream.read() & 0xff;
120         if (b0 == 0xff)
121             return -1;
122         int b1 = fInputStream.read() & 0xff;
123         if (b1 == 0xff)
124             return -1;
125         if (fEncoding >= 4) {
126             int b2 = fInputStream.read() & 0xff;
127             if (b2 == 0xff)
128                 return -1;
129             int b3 = fInputStream.read() & 0xff;
130             if (b3 == 0xff)
131                 return -1;
132             if (log.isDebugEnabled())
133                 log.debug("b0 is " + (b0 & 0xff) + " b1 " + (b1 & 0xff) + " b2 " + (b2 & 0xff) + " b3 " + (b3 & 0xff));
134             if (fEncoding == UCS4BE)
135                 return (b0 << 24) + (b1 << 16) + (b2 << 8) + b3;
136             else
137                 return (b3 << 24) + (b2 << 16) + (b1 << 8) + b0;
138         } else { // UCS-2
139             if (fEncoding == UCS2BE)
140                 return (b0 << 8) + b1;
141             else
142                 return (b1 << 8) + b0;
143         }
144     } // read():int
145 
146     /***
147      * Read characters into a portion of an array.  This method will block
148      * until some input is available, an I/O error occurs, or the end of the
149      * stream is reached.
150      *
151      * @param ch     Destination buffer
152      * @param offset Offset at which to start storing characters
153      * @param length Maximum number of characters to read
154      * @return The number of characters read, or -1 if the end of the
155      *         stream has been reached
156      * @throws IOException If an I/O error occurs
157      */
158     public int read(char ch[], int offset, int length) throws IOException {
159         int byteLength = length << ((fEncoding >= 4) ? 2 : 1);
160         if (byteLength > fBuffer.length) {
161             byteLength = fBuffer.length;
162         }
163         int count = fInputStream.read(fBuffer, 0, byteLength);
164         if (count == -1) return -1;
165         // try and make count be a multiple of the number of bytes we're looking for
166         if (fEncoding >= 4) { // BigEndian
167             // this looks ugly, but it avoids an if at any rate...
168             int numToRead = (4 - (count & 3) & 3);
169             for (int i = 0; i < numToRead; i++) {
170                 int charRead = fInputStream.read();
171                 if (charRead == -1) { // end of input; something likely went wrong!A  Pad buffer with nulls.
172                     for (int j = i; j < numToRead; j++)
173                         fBuffer[count + j] = 0;
174                     break;
175                 } else {
176                     fBuffer[count + i] = (byte) charRead;
177                 }
178             }
179             count += numToRead;
180         } else {
181             int numToRead = count & 1;
182             if (numToRead != 0) {
183                 count++;
184                 int charRead = fInputStream.read();
185                 if (charRead == -1) { // end of input; something likely went wrong!A  Pad buffer with nulls.
186                     fBuffer[count] = 0;
187                 } else {
188                     fBuffer[count] = (byte) charRead;
189                 }
190             }
191         }
192 
193         // now count is a multiple of the right number of bytes
194         int numChars = count >> ((fEncoding >= 4) ? 2 : 1);
195         int curPos = 0;
196         for (int i = 0; i < numChars; i++) {
197             int b0 = fBuffer[curPos++] & 0xff;
198             int b1 = fBuffer[curPos++] & 0xff;
199             if (fEncoding >= 4) {
200                 int b2 = fBuffer[curPos++] & 0xff;
201                 int b3 = fBuffer[curPos++] & 0xff;
202                 if (fEncoding == UCS4BE)
203                     ch[offset + i] = (char) ((b0 << 24) + (b1 << 16) + (b2 << 8) + b3);
204                 else
205                     ch[offset + i] = (char) ((b3 << 24) + (b2 << 16) + (b1 << 8) + b0);
206             } else { // UCS-2
207                 if (fEncoding == UCS2BE)
208                     ch[offset + i] = (char) ((b0 << 8) + b1);
209                 else
210                     ch[offset + i] = (char) ((b1 << 8) + b0);
211             }
212         }
213         return numChars;
214     } // read(char[],int,int)
215 
216     /***
217      * Skip characters.  This method will block until some characters are
218      * available, an I/O error occurs, or the end of the stream is reached.
219      *
220      * @param n The number of characters to skip
221      * @return The number of characters actually skipped
222      * @throws IOException If an I/O error occurs
223      */
224     public long skip(long n) throws IOException {
225         // charWidth will represent the number of bits to move
226         // n leftward to get num of bytes to skip, and then move the result rightward
227         // to get num of chars effectively skipped.
228         // The trick with &'ing, as with elsewhere in this dcode, is
229         // intended to avoid an expensive use of / that might not be optimized
230         // away.
231         int charWidth = (fEncoding >= 4) ? 2 : 1;
232         long bytesSkipped = fInputStream.skip(n << charWidth);
233         if ((bytesSkipped & (charWidth | 1)) == 0) return bytesSkipped >> charWidth;
234         return (bytesSkipped >> charWidth) + 1;
235     } // skip(long):long
236 
237     /***
238      * Tell whether this stream is ready to be read.
239      *
240      * @return True if the next read() is guaranteed not to block for input,
241      *         false otherwise.  Note that returning false does not guarantee that the
242      *         next read will block.
243      * @throws IOException If an I/O error occurs
244      */
245     public boolean ready() throws IOException {
246         return false;
247     } // ready()
248 
249     /***
250      * Tell whether this stream supports the mark() operation.
251      */
252     public boolean markSupported() {
253         return fInputStream.markSupported();
254     } // markSupported()
255 
256     /***
257      * Mark the present position in the stream.  Subsequent calls to reset()
258      * will attempt to reposition the stream to this point.  Not all
259      * character-input streams support the mark() operation.
260      *
261      * @param readAheadLimit Limit on the number of characters that may be
262      *                       read while still preserving the mark.  After
263      *                       reading this many characters, attempting to
264      *                       reset the stream may fail.
265      * @throws IOException If the stream does not support mark(),
266      *                     or if some other I/O error occurs
267      */
268     public void mark(int readAheadLimit) throws IOException {
269         fInputStream.mark(readAheadLimit);
270     } // mark(int)
271 
272     /***
273      * Reset the stream.  If the stream has been marked, then attempt to
274      * reposition it at the mark.  If the stream has not been marked, then
275      * attempt to reset it in some way appropriate to the particular stream,
276      * for example by repositioning it to its starting point.  Not all
277      * character-input streams support the reset() operation, and some support
278      * reset() without supporting mark().
279      *
280      * @throws IOException If the stream has not been marked,
281      *                     or if the mark has been invalidated,
282      *                     or if the stream does not support reset(),
283      *                     or if some other I/O error occurs
284      */
285     public void reset() throws IOException {
286         fInputStream.reset();
287     } // reset()
288 
289     /***
290      * Close the stream.  Once a stream has been closed, further read(),
291      * ready(), mark(), or reset() invocations will throw an IOException.
292      * Closing a previously-closed stream, however, has no effect.
293      *
294      * @throws IOException If an I/O error occurs
295      */
296     public void close() throws IOException {
297         fInputStream.close();
298     } // close()
299 
300 } // class UCSReader