View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   * ====================================================================
17   *
18   * This software consists of voluntary contributions made by many
19   * individuals on behalf of the Apache Software Foundation and was
20   * originally based on software copyright (c) 1999, International
21   * Business Machines, Inc., http://www.apache.org.  For more
22   * information on the Apache Software Foundation, please see
23   * <http://www.apache.org/>.
24   */
25  
26  package org.apache.struts2.jasper.xmlparser;
27  
28  import org.apache.struts2.jasper.JasperException;
29  import org.apache.struts2.jasper.JspCompilationContext;
30  import org.apache.struts2.jasper.compiler.ErrorDispatcher;
31  import org.apache.struts2.jasper.compiler.JspUtil;
32  
33  import java.io.*;
34  import java.util.Locale;
35  import java.util.jar.JarFile;
36  
37  public class XMLEncodingDetector {
38  
39      private InputStream stream;
40      private String encoding;
41      private boolean isEncodingSetInProlog;
42      private Boolean isBigEndian;
43      private Reader reader;
44  
45      // org.apache.xerces.impl.XMLEntityManager fields
46      public static final int DEFAULT_BUFFER_SIZE = 2048;
47      public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64;
48      private boolean fAllowJavaEncodings;
49      private SymbolTable fSymbolTable;
50      private XMLEncodingDetector fCurrentEntity;
51      private int fBufferSize = DEFAULT_BUFFER_SIZE;
52  
53      // org.apache.xerces.impl.XMLEntityManager.ScannedEntity fields
54      private int lineNumber = 1;
55      private int columnNumber = 1;
56      private boolean literal;
57      private char[] ch = new char[DEFAULT_BUFFER_SIZE];
58      private int position;
59      private int count;
60      private boolean mayReadChunks = false;
61  
62      // org.apache.xerces.impl.XMLScanner fields
63      private XMLString fString = new XMLString();
64      private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
65      private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
66      private final static String fVersionSymbol = "version";
67      private final static String fEncodingSymbol = "encoding";
68      private final static String fStandaloneSymbol = "standalone";
69  
70      // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl fields
71      private int fMarkupDepth = 0;
72      private String[] fStrings = new String[3];
73  
74      private ErrorDispatcher err;
75  
76      /***
77       * Constructor
78       */
79      public XMLEncodingDetector() {
80          fSymbolTable = new SymbolTable();
81          fCurrentEntity = this;
82      }
83  
84      /***
85       * Autodetects the encoding of the XML document supplied by the given
86       * input stream.
87       * <p/>
88       * Encoding autodetection is done according to the XML 1.0 specification,
89       * Appendix F.1: Detection Without External Encoding Information.
90       *
91       * @return Two-element array, where the first element (of type
92       *         java.lang.String) contains the name of the (auto)detected encoding, and
93       *         the second element (of type java.lang.Boolean) specifies whether the
94       *         encoding was specified using the 'encoding' attribute of an XML prolog
95       *         (TRUE) or autodetected (FALSE).
96       */
97      public static Object[] getEncoding(String fname, JarFile jarFile,
98                                         JspCompilationContext ctxt,
99                                         ErrorDispatcher err)
100             throws IOException, JasperException {
101         InputStream inStream = JspUtil.getInputStream(fname, jarFile, ctxt,
102                 err);
103         XMLEncodingDetector detector = new XMLEncodingDetector();
104         Object[] ret = detector.getEncoding(inStream, err);
105         inStream.close();
106 
107         return ret;
108     }
109 
110     private Object[] getEncoding(InputStream in, ErrorDispatcher err)
111             throws IOException, JasperException {
112         this.stream = in;
113         this.err = err;
114         createInitialReader();
115         scanXMLDecl();
116 
117         return new Object[]{this.encoding,
118                 new Boolean(this.isEncodingSetInProlog)};
119     }
120 
121     // stub method
122     void endEntity() {
123     }
124 
125     // Adapted from:
126     // org.apache.xerces.impl.XMLEntityManager.startEntity()
127     private void createInitialReader() throws IOException, JasperException {
128 
129         // wrap this stream in RewindableInputStream
130         stream = new RewindableInputStream(stream);
131 
132         // perform auto-detect of encoding if necessary
133         if (encoding == null) {
134             // read first four bytes and determine encoding
135             final byte[] b4 = new byte[4];
136             int count = 0;
137             for (; count < 4; count++) {
138                 b4[count] = (byte) stream.read();
139             }
140             if (count == 4) {
141                 Object[] encodingDesc = getEncodingName(b4, count);
142                 encoding = (String) (encodingDesc[0]);
143                 isBigEndian = (Boolean) (encodingDesc[1]);
144 
145                 stream.reset();
146                 // Special case UTF-8 files with BOM created by Microsoft
147                 // tools. It's more efficient to consume the BOM than make
148                 // the reader perform extra checks. -Ac
149                 if (count > 2 && encoding.equals("UTF-8")) {
150                     int b0 = b4[0] & 0xFF;
151                     int b1 = b4[1] & 0xFF;
152                     int b2 = b4[2] & 0xFF;
153                     if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
154                         // ignore first three bytes...
155                         stream.skip(3);
156                     }
157                 }
158                 reader = createReader(stream, encoding, isBigEndian);
159             } else {
160                 reader = createReader(stream, encoding, isBigEndian);
161             }
162         }
163     }
164 
165     // Adapted from:
166     // org.apache.xerces.impl.XMLEntityManager.createReader
167     /***
168      * Creates a reader capable of reading the given input stream in
169      * the specified encoding.
170      *
171      * @param inputStream The input stream.
172      * @param encoding    The encoding name that the input stream is
173      *                    encoded using. If the user has specified that
174      *                    Java encoding names are allowed, then the
175      *                    encoding name may be a Java encoding name;
176      *                    otherwise, it is an ianaEncoding name.
177      * @param isBigEndian For encodings (like uCS-4), whose names cannot
178      *                    specify a byte order, this tells whether the order
179      *                    is bigEndian. null means unknown or not relevant.
180      * @return Returns a reader.
181      */
182     private Reader createReader(InputStream inputStream, String encoding,
183                                 Boolean isBigEndian)
184             throws IOException, JasperException {
185 
186         // normalize encoding name
187         if (encoding == null) {
188             encoding = "UTF-8";
189         }
190 
191         // try to use an optimized reader
192         String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
193         if (ENCODING.equals("UTF-8")) {
194             return new UTF8Reader(inputStream, fBufferSize);
195         }
196         if (ENCODING.equals("US-ASCII")) {
197             return new ASCIIReader(inputStream, fBufferSize);
198         }
199         if (ENCODING.equals("ISO-10646-UCS-4")) {
200             if (isBigEndian != null) {
201                 boolean isBE = isBigEndian.booleanValue();
202                 if (isBE) {
203                     return new UCSReader(inputStream, UCSReader.UCS4BE);
204                 } else {
205                     return new UCSReader(inputStream, UCSReader.UCS4LE);
206                 }
207             } else {
208                 err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
209                         encoding);
210             }
211         }
212         if (ENCODING.equals("ISO-10646-UCS-2")) {
213             if (isBigEndian != null) { // sould never happen with this encoding...
214                 boolean isBE = isBigEndian.booleanValue();
215                 if (isBE) {
216                     return new UCSReader(inputStream, UCSReader.UCS2BE);
217                 } else {
218                     return new UCSReader(inputStream, UCSReader.UCS2LE);
219                 }
220             } else {
221                 err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
222                         encoding);
223             }
224         }
225 
226         // check for valid name
227         boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
228         boolean validJava = XMLChar.isValidJavaEncoding(encoding);
229         if (!validIANA || (fAllowJavaEncodings && !validJava)) {
230             err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
231             // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
232             //       because every byte is a valid ISO Latin 1 character.
233             //       It may not translate correctly but if we failed on
234             //       the encoding anyway, then we're expecting the content
235             //       of the document to be bad. This will just prevent an
236             //       invalid UTF-8 sequence to be detected. This is only
237             //       important when continue-after-fatal-error is turned
238             //       on. -Ac
239             encoding = "ISO-8859-1";
240         }
241 
242         // try to use a Java reader
243         String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
244         if (javaEncoding == null) {
245             if (fAllowJavaEncodings) {
246                 javaEncoding = encoding;
247             } else {
248                 err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
249                 // see comment above.
250                 javaEncoding = "ISO8859_1";
251             }
252         }
253         return new InputStreamReader(inputStream, javaEncoding);
254 
255     } // createReader(InputStream,String, Boolean): Reader
256 
257     // Adapted from:
258     // org.apache.xerces.impl.XMLEntityManager.getEncodingName
259     /***
260      * Returns the IANA encoding name that is auto-detected from
261      * the bytes specified, with the endian-ness of that encoding where
262      * appropriate.
263      *
264      * @param b4    The first four bytes of the input.
265      * @param count The number of bytes actually read.
266      * @return a 2-element array:  the first element, an IANA-encoding string,
267      *         the second element a Boolean which is true iff the document is big
268      *         endian, false if it's little-endian, and null if the distinction isn't
269      *         relevant.
270      */
271     private Object[] getEncodingName(byte[] b4, int count) {
272 
273         if (count < 2) {
274             return new Object[]{"UTF-8", null};
275         }
276 
277         // UTF-16, with BOM
278         int b0 = b4[0] & 0xFF;
279         int b1 = b4[1] & 0xFF;
280         if (b0 == 0xFE && b1 == 0xFF) {
281             // UTF-16, big-endian
282             return new Object[]{"UTF-16BE", new Boolean(true)};
283         }
284         if (b0 == 0xFF && b1 == 0xFE) {
285             // UTF-16, little-endian
286             return new Object[]{"UTF-16LE", new Boolean(false)};
287         }
288 
289         // default to UTF-8 if we don't have enough bytes to make a
290         // good determination of the encoding
291         if (count < 3) {
292             return new Object[]{"UTF-8", null};
293         }
294 
295         // UTF-8 with a BOM
296         int b2 = b4[2] & 0xFF;
297         if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
298             return new Object[]{"UTF-8", null};
299         }
300 
301         // default to UTF-8 if we don't have enough bytes to make a
302         // good determination of the encoding
303         if (count < 4) {
304             return new Object[]{"UTF-8", null};
305         }
306 
307         // other encodings
308         int b3 = b4[3] & 0xFF;
309         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
310             // UCS-4, big endian (1234)
311             return new Object[]{"ISO-10646-UCS-4", new Boolean(true)};
312         }
313         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
314             // UCS-4, little endian (4321)
315             return new Object[]{"ISO-10646-UCS-4", new Boolean(false)};
316         }
317         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
318             // UCS-4, unusual octet order (2143)
319             // REVISIT: What should this be?
320             return new Object[]{"ISO-10646-UCS-4", null};
321         }
322         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
323             // UCS-4, unusual octect order (3412)
324             // REVISIT: What should this be?
325             return new Object[]{"ISO-10646-UCS-4", null};
326         }
327         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
328             // UTF-16, big-endian, no BOM
329             // (or could turn out to be UCS-2...
330             // REVISIT: What should this be?
331             return new Object[]{"UTF-16BE", new Boolean(true)};
332         }
333         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
334             // UTF-16, little-endian, no BOM
335             // (or could turn out to be UCS-2...
336             return new Object[]{"UTF-16LE", new Boolean(false)};
337         }
338         if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
339             // EBCDIC
340             // a la xerces1, return CP037 instead of EBCDIC here
341             return new Object[]{"CP037", null};
342         }
343 
344         // default encoding
345         return new Object[]{"UTF-8", null};
346 
347     }
348 
349     // Adapted from:
350     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.isExternal
351     /***
352      * Returns true if the current entity being scanned is external.
353      */
354     public boolean isExternal() {
355         return true;
356     }
357 
358     // Adapted from:
359     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.peekChar
360     /***
361      * Returns the next character on the input.
362      * <p/>
363      * <strong>Note:</strong> The character is <em>not</em> consumed.
364      *
365      * @throws IOException  Thrown if i/o error occurs.
366      * @throws EOFException Thrown on end of file.
367      */
368     public int peekChar() throws IOException {
369 
370         // load more characters, if needed
371         if (fCurrentEntity.position == fCurrentEntity.count) {
372             load(0, true);
373         }
374 
375         // peek at character
376         int c = fCurrentEntity.ch[fCurrentEntity.position];
377 
378         // return peeked character
379         if (fCurrentEntity.isExternal()) {
380             return c != '\r' ? c : '\n';
381         } else {
382             return c;
383         }
384 
385     } // peekChar():int
386 
387     // Adapted from:
388     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanChar
389     /***
390      * Returns the next character on the input.
391      * <p/>
392      * <strong>Note:</strong> The character is consumed.
393      *
394      * @throws IOException  Thrown if i/o error occurs.
395      * @throws EOFException Thrown on end of file.
396      */
397     public int scanChar() throws IOException {
398 
399         // load more characters, if needed
400         if (fCurrentEntity.position == fCurrentEntity.count) {
401             load(0, true);
402         }
403 
404         // scan character
405         int c = fCurrentEntity.ch[fCurrentEntity.position++];
406         boolean external = false;
407         if (c == '\n' ||
408                 (c == '\r' && (external = fCurrentEntity.isExternal()))) {
409             fCurrentEntity.lineNumber++;
410             fCurrentEntity.columnNumber = 1;
411             if (fCurrentEntity.position == fCurrentEntity.count) {
412                 fCurrentEntity.ch[0] = (char) c;
413                 load(1, false);
414             }
415             if (c == '\r' && external) {
416                 if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
417                     fCurrentEntity.position--;
418                 }
419                 c = '\n';
420             }
421         }
422 
423         // return character that was scanned
424         fCurrentEntity.columnNumber++;
425         return c;
426 
427     }
428 
429     // Adapted from:
430     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanName
431     /***
432      * Returns a string matching the Name production appearing immediately
433      * on the input as a symbol, or null if no Name string is present.
434      * <p/>
435      * <strong>Note:</strong> The Name characters are consumed.
436      * <p/>
437      * <strong>Note:</strong> The string returned must be a symbol. The
438      * SymbolTable can be used for this purpose.
439      *
440      * @throws IOException  Thrown if i/o error occurs.
441      * @throws EOFException Thrown on end of file.
442      * @see SymbolTable
443      * @see XMLChar#isName
444      * @see XMLChar#isNameStart
445      */
446     public String scanName() throws IOException {
447 
448         // load more characters, if needed
449         if (fCurrentEntity.position == fCurrentEntity.count) {
450             load(0, true);
451         }
452 
453         // scan name
454         int offset = fCurrentEntity.position;
455         if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
456             if (++fCurrentEntity.position == fCurrentEntity.count) {
457                 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
458                 offset = 0;
459                 if (load(1, false)) {
460                     fCurrentEntity.columnNumber++;
461                     String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch,
462                             0, 1);
463                     return symbol;
464                 }
465             }
466             while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
467                 if (++fCurrentEntity.position == fCurrentEntity.count) {
468                     int length = fCurrentEntity.position - offset;
469                     if (length == fBufferSize) {
470                         // bad luck we have to resize our buffer
471                         char[] tmp = new char[fBufferSize * 2];
472                         System.arraycopy(fCurrentEntity.ch, offset,
473                                 tmp, 0, length);
474                         fCurrentEntity.ch = tmp;
475                         fBufferSize *= 2;
476                     } else {
477                         System.arraycopy(fCurrentEntity.ch, offset,
478                                 fCurrentEntity.ch, 0, length);
479                     }
480                     offset = 0;
481                     if (load(length, false)) {
482                         break;
483                     }
484                 }
485             }
486         }
487         int length = fCurrentEntity.position - offset;
488         fCurrentEntity.columnNumber += length;
489 
490         // return name
491         String symbol = null;
492         if (length > 0) {
493             symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
494         }
495         return symbol;
496 
497     }
498 
499     // Adapted from:
500     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanLiteral
501     /***
502      * Scans a range of attribute value data, setting the fields of the
503      * XMLString structure, appropriately.
504      * <p/>
505      * <strong>Note:</strong> The characters are consumed.
506      * <p/>
507      * <strong>Note:</strong> This method does not guarantee to return
508      * the longest run of attribute value data. This method may return
509      * before the quote character due to reaching the end of the input
510      * buffer or any other reason.
511      * <p/>
512      * <strong>Note:</strong> The fields contained in the XMLString
513      * structure are not guaranteed to remain valid upon subsequent calls
514      * to the entity scanner. Therefore, the caller is responsible for
515      * immediately using the returned character data or making a copy of
516      * the character data.
517      *
518      * @param quote   The quote character that signifies the end of the
519      *                attribute value data.
520      * @param content The content structure to fill.
521      * @return Returns the next character on the input, if known. This
522      *         value may be -1 but this does <em>note</em> designate
523      *         end of file.
524      * @throws IOException  Thrown if i/o error occurs.
525      * @throws EOFException Thrown on end of file.
526      */
527     public int scanLiteral(int quote, XMLString content)
528             throws IOException {
529 
530         // load more characters, if needed
531         if (fCurrentEntity.position == fCurrentEntity.count) {
532             load(0, true);
533         } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
534             fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
535             load(1, false);
536             fCurrentEntity.position = 0;
537         }
538 
539         // normalize newlines
540         int offset = fCurrentEntity.position;
541         int c = fCurrentEntity.ch[offset];
542         int newlines = 0;
543         boolean external = fCurrentEntity.isExternal();
544         if (c == '\n' || (c == '\r' && external)) {
545             do {
546                 c = fCurrentEntity.ch[fCurrentEntity.position++];
547                 if (c == '\r' && external) {
548                     newlines++;
549                     fCurrentEntity.lineNumber++;
550                     fCurrentEntity.columnNumber = 1;
551                     if (fCurrentEntity.position == fCurrentEntity.count) {
552                         offset = 0;
553                         fCurrentEntity.position = newlines;
554                         if (load(newlines, false)) {
555                             break;
556                         }
557                     }
558                     if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
559                         fCurrentEntity.position++;
560                         offset++;
561                     }
562                     /**** NEWLINE NORMALIZATION ***/
563                     else {
564                         newlines++;
565                     }
566                     /****/
567                 } else if (c == '\n') {
568                     newlines++;
569                     fCurrentEntity.lineNumber++;
570                     fCurrentEntity.columnNumber = 1;
571                     if (fCurrentEntity.position == fCurrentEntity.count) {
572                         offset = 0;
573                         fCurrentEntity.position = newlines;
574                         if (load(newlines, false)) {
575                             break;
576                         }
577                     }
578                     /**** NEWLINE NORMALIZATION ***
579                      if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
580                      && external) {
581                      fCurrentEntity.position++;
582                      offset++;
583                      }
584                      /***/
585                 } else {
586                     fCurrentEntity.position--;
587                     break;
588                 }
589             } while (fCurrentEntity.position < fCurrentEntity.count - 1);
590             for (int i = offset; i < fCurrentEntity.position; i++) {
591                 fCurrentEntity.ch[i] = '\n';
592             }
593             int length = fCurrentEntity.position - offset;
594             if (fCurrentEntity.position == fCurrentEntity.count - 1) {
595                 content.setValues(fCurrentEntity.ch, offset, length);
596                 return -1;
597             }
598         }
599 
600         // scan literal value
601         while (fCurrentEntity.position < fCurrentEntity.count) {
602             c = fCurrentEntity.ch[fCurrentEntity.position++];
603             if ((c == quote &&
604                     (!fCurrentEntity.literal || external))
605                     || c == '%' || !XMLChar.isContent(c)) {
606                 fCurrentEntity.position--;
607                 break;
608             }
609         }
610         int length = fCurrentEntity.position - offset;
611         fCurrentEntity.columnNumber += length - newlines;
612         content.setValues(fCurrentEntity.ch, offset, length);
613 
614         // return next character
615         if (fCurrentEntity.position != fCurrentEntity.count) {
616             c = fCurrentEntity.ch[fCurrentEntity.position];
617             // NOTE: We don't want to accidentally signal the
618             //       end of the literal if we're expanding an
619             //       entity appearing in the literal. -Ac
620             if (c == quote && fCurrentEntity.literal) {
621                 c = -1;
622             }
623         } else {
624             c = -1;
625         }
626         return c;
627 
628     }
629 
630     /***
631      * Scans a range of character data up to the specified delimiter,
632      * setting the fields of the XMLString structure, appropriately.
633      * <p/>
634      * <strong>Note:</strong> The characters are consumed.
635      * <p/>
636      * <strong>Note:</strong> This assumes that the internal buffer is
637      * at least the same size, or bigger, than the length of the delimiter
638      * and that the delimiter contains at least one character.
639      * <p/>
640      * <strong>Note:</strong> This method does not guarantee to return
641      * the longest run of character data. This method may return before
642      * the delimiter due to reaching the end of the input buffer or any
643      * other reason.
644      * <p/>
645      * <strong>Note:</strong> The fields contained in the XMLString
646      * structure are not guaranteed to remain valid upon subsequent calls
647      * to the entity scanner. Therefore, the caller is responsible for
648      * immediately using the returned character data or making a copy of
649      * the character data.
650      *
651      * @param delimiter The string that signifies the end of the character
652      *                  data to be scanned.
653      * @param buffer    The data structure to fill.
654      * @return Returns true if there is more data to scan, false otherwise.
655      * @throws IOException  Thrown if i/o error occurs.
656      * @throws EOFException Thrown on end of file.
657      */
658     public boolean scanData(String delimiter, XMLStringBuffer buffer)
659             throws IOException {
660 
661         boolean done = false;
662         int delimLen = delimiter.length();
663         char charAt0 = delimiter.charAt(0);
664         boolean external = fCurrentEntity.isExternal();
665         do {
666 
667             // load more characters, if needed
668 
669             if (fCurrentEntity.position == fCurrentEntity.count) {
670                 load(0, true);
671             } else if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
672                 System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position,
673                         fCurrentEntity.ch, 0, fCurrentEntity.count - fCurrentEntity.position);
674                 load(fCurrentEntity.count - fCurrentEntity.position, false);
675                 fCurrentEntity.position = 0;
676             }
677             if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
678                 // something must be wrong with the input: e.g., file ends an
679                 // unterminated comment
680                 int length = fCurrentEntity.count - fCurrentEntity.position;
681                 buffer.append(fCurrentEntity.ch, fCurrentEntity.position,
682                         length);
683                 fCurrentEntity.columnNumber += fCurrentEntity.count;
684                 fCurrentEntity.position = fCurrentEntity.count;
685                 load(0, true);
686                 return false;
687             }
688 
689             // normalize newlines
690             int offset = fCurrentEntity.position;
691             int c = fCurrentEntity.ch[offset];
692             int newlines = 0;
693             if (c == '\n' || (c == '\r' && external)) {
694                 do {
695                     c = fCurrentEntity.ch[fCurrentEntity.position++];
696                     if (c == '\r' && external) {
697                         newlines++;
698                         fCurrentEntity.lineNumber++;
699                         fCurrentEntity.columnNumber = 1;
700                         if (fCurrentEntity.position == fCurrentEntity.count) {
701                             offset = 0;
702                             fCurrentEntity.position = newlines;
703                             if (load(newlines, false)) {
704                                 break;
705                             }
706                         }
707                         if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
708                             fCurrentEntity.position++;
709                             offset++;
710                         }
711                         /**** NEWLINE NORMALIZATION ***/
712                         else {
713                             newlines++;
714                         }
715                     } else if (c == '\n') {
716                         newlines++;
717                         fCurrentEntity.lineNumber++;
718                         fCurrentEntity.columnNumber = 1;
719                         if (fCurrentEntity.position == fCurrentEntity.count) {
720                             offset = 0;
721                             fCurrentEntity.position = newlines;
722                             fCurrentEntity.count = newlines;
723                             if (load(newlines, false)) {
724                                 break;
725                             }
726                         }
727                     } else {
728                         fCurrentEntity.position--;
729                         break;
730                     }
731                 } while (fCurrentEntity.position < fCurrentEntity.count - 1);
732                 for (int i = offset; i < fCurrentEntity.position; i++) {
733                     fCurrentEntity.ch[i] = '\n';
734                 }
735                 int length = fCurrentEntity.position - offset;
736                 if (fCurrentEntity.position == fCurrentEntity.count - 1) {
737                     buffer.append(fCurrentEntity.ch, offset, length);
738                     return true;
739                 }
740             }
741 
742             // iterate over buffer looking for delimiter
743             OUTER:
744             while (fCurrentEntity.position < fCurrentEntity.count) {
745                 c = fCurrentEntity.ch[fCurrentEntity.position++];
746                 if (c == charAt0) {
747                     // looks like we just hit the delimiter
748                     int delimOffset = fCurrentEntity.position - 1;
749                     for (int i = 1; i < delimLen; i++) {
750                         if (fCurrentEntity.position == fCurrentEntity.count) {
751                             fCurrentEntity.position -= i;
752                             break OUTER;
753                         }
754                         c = fCurrentEntity.ch[fCurrentEntity.position++];
755                         if (delimiter.charAt(i) != c) {
756                             fCurrentEntity.position--;
757                             break;
758                         }
759                     }
760                     if (fCurrentEntity.position == delimOffset + delimLen) {
761                         done = true;
762                         break;
763                     }
764                 } else if (c == '\n' || (external && c == '\r')) {
765                     fCurrentEntity.position--;
766                     break;
767                 } else if (XMLChar.isInvalid(c)) {
768                     fCurrentEntity.position--;
769                     int length = fCurrentEntity.position - offset;
770                     fCurrentEntity.columnNumber += length - newlines;
771                     buffer.append(fCurrentEntity.ch, offset, length);
772                     return true;
773                 }
774             }
775             int length = fCurrentEntity.position - offset;
776             fCurrentEntity.columnNumber += length - newlines;
777             if (done) {
778                 length -= delimLen;
779             }
780             buffer.append(fCurrentEntity.ch, offset, length);
781 
782             // return true if string was skipped
783         } while (!done);
784         return !done;
785 
786     }
787 
788     // Adapted from:
789     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipChar
790     /***
791      * Skips a character appearing immediately on the input.
792      * <p/>
793      * <strong>Note:</strong> The character is consumed only if it matches
794      * the specified character.
795      *
796      * @param c The character to skip.
797      * @return Returns true if the character was skipped.
798      * @throws IOException  Thrown if i/o error occurs.
799      * @throws EOFException Thrown on end of file.
800      */
801     public boolean skipChar(int c) throws IOException {
802 
803         // load more characters, if needed
804         if (fCurrentEntity.position == fCurrentEntity.count) {
805             load(0, true);
806         }
807 
808         // skip character
809         int cc = fCurrentEntity.ch[fCurrentEntity.position];
810         if (cc == c) {
811             fCurrentEntity.position++;
812             if (c == '\n') {
813                 fCurrentEntity.lineNumber++;
814                 fCurrentEntity.columnNumber = 1;
815             } else {
816                 fCurrentEntity.columnNumber++;
817             }
818             return true;
819         } else if (c == '\n' && cc == '\r' && fCurrentEntity.isExternal()) {
820             // handle newlines
821             if (fCurrentEntity.position == fCurrentEntity.count) {
822                 fCurrentEntity.ch[0] = (char) cc;
823                 load(1, false);
824             }
825             fCurrentEntity.position++;
826             if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
827                 fCurrentEntity.position++;
828             }
829             fCurrentEntity.lineNumber++;
830             fCurrentEntity.columnNumber = 1;
831             return true;
832         }
833 
834         // character was not skipped
835         return false;
836 
837     }
838 
839     // Adapted from:
840     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipSpaces
841     /***
842      * Skips space characters appearing immediately on the input.
843      * <p/>
844      * <strong>Note:</strong> The characters are consumed only if they are
845      * space characters.
846      *
847      * @return Returns true if at least one space character was skipped.
848      * @throws IOException  Thrown if i/o error occurs.
849      * @throws EOFException Thrown on end of file.
850      * @see XMLChar#isSpace
851      */
852     public boolean skipSpaces() throws IOException {
853 
854         // load more characters, if needed
855         if (fCurrentEntity.position == fCurrentEntity.count) {
856             load(0, true);
857         }
858 
859         // skip spaces
860         int c = fCurrentEntity.ch[fCurrentEntity.position];
861         if (XMLChar.isSpace(c)) {
862             boolean external = fCurrentEntity.isExternal();
863             do {
864                 boolean entityChanged = false;
865                 // handle newlines
866                 if (c == '\n' || (external && c == '\r')) {
867                     fCurrentEntity.lineNumber++;
868                     fCurrentEntity.columnNumber = 1;
869                     if (fCurrentEntity.position == fCurrentEntity.count - 1) {
870                         fCurrentEntity.ch[0] = (char) c;
871                         entityChanged = load(1, true);
872                         if (!entityChanged)
873                             // the load change the position to be 1,
874                             // need to restore it when entity not changed
875                             fCurrentEntity.position = 0;
876                     }
877                     if (c == '\r' && external) {
878                         // REVISIT: Does this need to be updated to fix the
879                         //          #x0D ^#x0A newline normalization problem? -Ac
880                         if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
881                             fCurrentEntity.position--;
882                         }
883                     }
884                     /**** NEWLINE NORMALIZATION ***
885                      else {
886                      if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
887                      && external) {
888                      fCurrentEntity.position++;
889                      }
890                      }
891                      /***/
892                 } else {
893                     fCurrentEntity.columnNumber++;
894                 }
895                 // load more characters, if needed
896                 if (!entityChanged)
897                     fCurrentEntity.position++;
898                 if (fCurrentEntity.position == fCurrentEntity.count) {
899                     load(0, true);
900                 }
901             } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
902             return true;
903         }
904 
905         // no spaces were found
906         return false;
907 
908     }
909 
910     /***
911      * Skips the specified string appearing immediately on the input.
912      * <p/>
913      * <strong>Note:</strong> The characters are consumed only if they are
914      * space characters.
915      *
916      * @param s The string to skip.
917      * @return Returns true if the string was skipped.
918      * @throws IOException  Thrown if i/o error occurs.
919      * @throws EOFException Thrown on end of file.
920      */
921     public boolean skipString(String s) throws IOException {
922 
923         // load more characters, if needed
924         if (fCurrentEntity.position == fCurrentEntity.count) {
925             load(0, true);
926         }
927 
928         // skip string
929         final int length = s.length();
930         for (int i = 0; i < length; i++) {
931             char c = fCurrentEntity.ch[fCurrentEntity.position++];
932             if (c != s.charAt(i)) {
933                 fCurrentEntity.position -= i + 1;
934                 return false;
935             }
936             if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) {
937                 System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1);
938                 // REVISIT: Can a string to be skipped cross an
939                 //          entity boundary? -Ac
940                 if (load(i + 1, false)) {
941                     fCurrentEntity.position -= i + 1;
942                     return false;
943                 }
944             }
945         }
946         fCurrentEntity.columnNumber += length;
947         return true;
948 
949     }
950 
951     // Adapted from:
952     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.load
953     /***
954      * Loads a chunk of text.
955      *
956      * @param offset       The offset into the character buffer to
957      *                     read the next batch of characters.
958      * @param changeEntity True if the load should change entities
959      *                     at the end of the entity, otherwise leave
960      *                     the current entity in place and the entity
961      *                     boundary will be signaled by the return
962      *                     value.
963      * @returns Returns true if the entity changed as a result of this
964      * load operation.
965      */
966     final boolean load(int offset, boolean changeEntity)
967             throws IOException {
968 
969         // read characters
970         int length = fCurrentEntity.mayReadChunks ?
971                 (fCurrentEntity.ch.length - offset) :
972                 (DEFAULT_XMLDECL_BUFFER_SIZE);
973         int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset,
974                 length);
975 
976         // reset count and position
977         boolean entityChanged = false;
978         if (count != -1) {
979             if (count != 0) {
980                 fCurrentEntity.count = count + offset;
981                 fCurrentEntity.position = offset;
982             }
983         }
984 
985         // end of this entity
986         else {
987             fCurrentEntity.count = offset;
988             fCurrentEntity.position = offset;
989             entityChanged = true;
990             if (changeEntity) {
991                 endEntity();
992                 if (fCurrentEntity == null) {
993                     throw new EOFException();
994                 }
995                 // handle the trailing edges
996                 if (fCurrentEntity.position == fCurrentEntity.count) {
997                     load(0, false);
998                 }
999             }
1000         }
1001 
1002         return entityChanged;
1003 
1004     }
1005 
1006     // Adapted from:
1007     // org.apache.xerces.impl.XMLEntityManager.RewindableInputStream
1008     /***
1009      * This class wraps the byte inputstreams we're presented with.
1010      * We need it because java.io.InputStreams don't provide
1011      * functionality to reread processed bytes, and they have a habit
1012      * of reading more than one character when you call their read()
1013      * methods.  This means that, once we discover the true (declared)
1014      * encoding of a document, we can neither backtrack to read the
1015      * whole doc again nor start reading where we are with a new
1016      * reader.
1017      * <p/>
1018      * This class allows rewinding an inputStream by allowing a mark
1019      * to be set, and the stream reset to that position.  <strong>The
1020      * class assumes that it needs to read one character per
1021      * invocation when it's read() method is inovked, but uses the
1022      * underlying InputStream's read(char[], offset length) method--it
1023      * won't buffer data read this way!</strong>
1024      *
1025      * @author Neil Graham, IBM
1026      * @author Glenn Marcy, IBM
1027      */
1028     private final class RewindableInputStream extends InputStream {
1029 
1030         private InputStream fInputStream;
1031         private byte[] fData;
1032         private int fStartOffset;
1033         private int fEndOffset;
1034         private int fOffset;
1035         private int fLength;
1036         private int fMark;
1037 
1038         public RewindableInputStream(InputStream is) {
1039             fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE];
1040             fInputStream = is;
1041             fStartOffset = 0;
1042             fEndOffset = -1;
1043             fOffset = 0;
1044             fLength = 0;
1045             fMark = 0;
1046         }
1047 
1048         public void setStartOffset(int offset) {
1049             fStartOffset = offset;
1050         }
1051 
1052         public void rewind() {
1053             fOffset = fStartOffset;
1054         }
1055 
1056         public int read() throws IOException {
1057             int b = 0;
1058             if (fOffset < fLength) {
1059                 return fData[fOffset++] & 0xff;
1060             }
1061             if (fOffset == fEndOffset) {
1062                 return -1;
1063             }
1064             if (fOffset == fData.length) {
1065                 byte[] newData = new byte[fOffset << 1];
1066                 System.arraycopy(fData, 0, newData, 0, fOffset);
1067                 fData = newData;
1068             }
1069             b = fInputStream.read();
1070             if (b == -1) {
1071                 fEndOffset = fOffset;
1072                 return -1;
1073             }
1074             fData[fLength++] = (byte) b;
1075             fOffset++;
1076             return b & 0xff;
1077         }
1078 
1079         public int read(byte[] b, int off, int len) throws IOException {
1080             int bytesLeft = fLength - fOffset;
1081             if (bytesLeft == 0) {
1082                 if (fOffset == fEndOffset) {
1083                     return -1;
1084                 }
1085                 // better get some more for the voracious reader...
1086                 if (fCurrentEntity.mayReadChunks) {
1087                     return fInputStream.read(b, off, len);
1088                 }
1089                 int returnedVal = read();
1090                 if (returnedVal == -1) {
1091                     fEndOffset = fOffset;
1092                     return -1;
1093                 }
1094                 b[off] = (byte) returnedVal;
1095                 return 1;
1096             }
1097             if (len < bytesLeft) {
1098                 if (len <= 0) {
1099                     return 0;
1100                 }
1101             } else {
1102                 len = bytesLeft;
1103             }
1104             if (b != null) {
1105                 System.arraycopy(fData, fOffset, b, off, len);
1106             }
1107             fOffset += len;
1108             return len;
1109         }
1110 
1111         public long skip(long n)
1112                 throws IOException {
1113             int bytesLeft;
1114             if (n <= 0) {
1115                 return 0;
1116             }
1117             bytesLeft = fLength - fOffset;
1118             if (bytesLeft == 0) {
1119                 if (fOffset == fEndOffset) {
1120                     return 0;
1121                 }
1122                 return fInputStream.skip(n);
1123             }
1124             if (n <= bytesLeft) {
1125                 fOffset += n;
1126                 return n;
1127             }
1128             fOffset += bytesLeft;
1129             if (fOffset == fEndOffset) {
1130                 return bytesLeft;
1131             }
1132             n -= bytesLeft;
1133             /*
1134             * In a manner of speaking, when this class isn't permitting more
1135             * than one byte at a time to be read, it is "blocking".  The
1136             * available() method should indicate how much can be read without
1137             * blocking, so while we're in this mode, it should only indicate
1138             * that bytes in its buffer are available; otherwise, the result of
1139             * available() on the underlying InputStream is appropriate.
1140             */
1141             return fInputStream.skip(n) + bytesLeft;
1142         }
1143 
1144         public int available() throws IOException {
1145             int bytesLeft = fLength - fOffset;
1146             if (bytesLeft == 0) {
1147                 if (fOffset == fEndOffset) {
1148                     return -1;
1149                 }
1150                 return fCurrentEntity.mayReadChunks ? fInputStream.available()
1151                         : 0;
1152             }
1153             return bytesLeft;
1154         }
1155 
1156         public void mark(int howMuch) {
1157             fMark = fOffset;
1158         }
1159 
1160         public void reset() {
1161             fOffset = fMark;
1162         }
1163 
1164         public boolean markSupported() {
1165             return true;
1166         }
1167 
1168         public void close() throws IOException {
1169             if (fInputStream != null) {
1170                 fInputStream.close();
1171                 fInputStream = null;
1172             }
1173         }
1174     } // end of RewindableInputStream class
1175 
1176     // Adapted from:
1177     // org.apache.xerces.impl.XMLDocumentScannerImpl.dispatch
1178     private void scanXMLDecl() throws IOException, JasperException {
1179 
1180         if (skipString("<?xml")) {
1181             fMarkupDepth++;
1182             // NOTE: special case where document starts with a PI
1183             //       whose name starts with "xml" (e.g. "xmlfoo")
1184             if (XMLChar.isName(peekChar())) {
1185                 fStringBuffer.clear();
1186                 fStringBuffer.append("xml");
1187                 while (XMLChar.isName(peekChar())) {
1188                     fStringBuffer.append((char) scanChar());
1189                 }
1190                 String target = fSymbolTable.addSymbol(fStringBuffer.ch,
1191                         fStringBuffer.offset,
1192                         fStringBuffer.length);
1193                 scanPIData(target, fString);
1194             }
1195 
1196             // standard XML declaration
1197             else {
1198                 scanXMLDeclOrTextDecl(false);
1199             }
1200         }
1201     }
1202 
1203     // Adapted from:
1204     // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanXMLDeclOrTextDecl
1205     /***
1206      * Scans an XML or text declaration.
1207      * <p/>
1208      * <pre>
1209      * [23] XMLDecl ::= '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1210      * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
1211      * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
1212      * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1213      * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
1214      *                 | ('"' ('yes' | 'no') '"'))
1215      * <p/>
1216      * [77] TextDecl ::= '&lt;?xml' VersionInfo? EncodingDecl S? '?>'
1217      * </pre>
1218      *
1219      * @param scanningTextDecl True if a text declaration is to
1220      *                         be scanned instead of an XML
1221      *                         declaration.
1222      */
1223     private void scanXMLDeclOrTextDecl(boolean scanningTextDecl)
1224             throws IOException, JasperException {
1225 
1226         // scan decl
1227         scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
1228         fMarkupDepth--;
1229 
1230         // pseudo-attribute values
1231         String encodingPseudoAttr = fStrings[1];
1232 
1233         // set encoding on reader
1234         if (encodingPseudoAttr != null) {
1235             isEncodingSetInProlog = true;
1236             encoding = encodingPseudoAttr;
1237         }
1238     }
1239 
1240     // Adapted from:
1241     // org.apache.xerces.impl.XMLScanner.scanXMLDeclOrTextDecl
1242     /***
1243      * Scans an XML or text declaration.
1244      * <p/>
1245      * <pre>
1246      * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1247      * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
1248      * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
1249      * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1250      * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
1251      *                 | ('"' ('yes' | 'no') '"'))
1252      * <p/>
1253      * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1254      * </pre>
1255      *
1256      * @param scanningTextDecl      True if a text declaration is to
1257      *                              be scanned instead of an XML
1258      *                              declaration.
1259      * @param pseudoAttributeValues An array of size 3 to return the version,
1260      *                              encoding and standalone pseudo attribute values
1261      *                              (in that order).
1262      *                              <p/>
1263      *                              <strong>Note:</strong> This method uses fString, anything in it
1264      *                              at the time of calling is lost.
1265      */
1266     private void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
1267                                        String[] pseudoAttributeValues)
1268             throws IOException, JasperException {
1269 
1270         // pseudo-attribute values
1271         String version = null;
1272         String encoding = null;
1273         String standalone = null;
1274 
1275         // scan pseudo-attributes
1276         final int STATE_VERSION = 0;
1277         final int STATE_ENCODING = 1;
1278         final int STATE_STANDALONE = 2;
1279         final int STATE_DONE = 3;
1280         int state = STATE_VERSION;
1281 
1282         boolean dataFoundForTarget = false;
1283         boolean sawSpace = skipSpaces();
1284         while (peekChar() != '?') {
1285             dataFoundForTarget = true;
1286             String name = scanPseudoAttribute(scanningTextDecl, fString);
1287             switch (state) {
1288                 case STATE_VERSION: {
1289                     if (name == fVersionSymbol) {
1290                         if (!sawSpace) {
1291                             reportFatalError(scanningTextDecl
1292                                     ? "jsp.error.xml.spaceRequiredBeforeVersionInTextDecl"
1293                                     : "jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl",
1294                                     null);
1295                         }
1296                         version = fString.toString();
1297                         state = STATE_ENCODING;
1298                         if (!version.equals("1.0")) {
1299                             // REVISIT: XML REC says we should throw an error
1300                             // in such cases.
1301                             // some may object the throwing of fatalError.
1302                             err.jspError("jsp.error.xml.versionNotSupported",
1303                                     version);
1304                         }
1305                     } else if (name == fEncodingSymbol) {
1306                         if (!scanningTextDecl) {
1307                             err.jspError("jsp.error.xml.versionInfoRequired");
1308                         }
1309                         if (!sawSpace) {
1310                             reportFatalError(scanningTextDecl
1311                                     ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
1312                                     : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
1313                                     null);
1314                         }
1315                         encoding = fString.toString();
1316                         state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
1317                     } else {
1318                         if (scanningTextDecl) {
1319                             err.jspError("jsp.error.xml.encodingDeclRequired");
1320                         } else {
1321                             err.jspError("jsp.error.xml.versionInfoRequired");
1322                         }
1323                     }
1324                     break;
1325                 }
1326                 case STATE_ENCODING: {
1327                     if (name == fEncodingSymbol) {
1328                         if (!sawSpace) {
1329                             reportFatalError(scanningTextDecl
1330                                     ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
1331                                     : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
1332                                     null);
1333                         }
1334                         encoding = fString.toString();
1335                         state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
1336                         // TODO: check encoding name; set encoding on
1337                         //       entity scanner
1338                     } else if (!scanningTextDecl && name == fStandaloneSymbol) {
1339                         if (!sawSpace) {
1340                             err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
1341                         }
1342                         standalone = fString.toString();
1343                         state = STATE_DONE;
1344                         if (!standalone.equals("yes") && !standalone.equals("no")) {
1345                             err.jspError("jsp.error.xml.sdDeclInvalid");
1346                         }
1347                     } else {
1348                         err.jspError("jsp.error.xml.encodingDeclRequired");
1349                     }
1350                     break;
1351                 }
1352                 case STATE_STANDALONE: {
1353                     if (name == fStandaloneSymbol) {
1354                         if (!sawSpace) {
1355                             err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
1356                         }
1357                         standalone = fString.toString();
1358                         state = STATE_DONE;
1359                         if (!standalone.equals("yes") && !standalone.equals("no")) {
1360                             err.jspError("jsp.error.xml.sdDeclInvalid");
1361                         }
1362                     } else {
1363                         err.jspError("jsp.error.xml.encodingDeclRequired");
1364                     }
1365                     break;
1366                 }
1367                 default: {
1368                     err.jspError("jsp.error.xml.noMorePseudoAttributes");
1369                 }
1370             }
1371             sawSpace = skipSpaces();
1372         }
1373         // REVISIT: should we remove this error reporting?
1374         if (scanningTextDecl && state != STATE_DONE) {
1375             err.jspError("jsp.error.xml.morePseudoAttributes");
1376         }
1377 
1378         // If there is no data in the xml or text decl then we fail to report
1379         // error for version or encoding info above.
1380         if (scanningTextDecl) {
1381             if (!dataFoundForTarget && encoding == null) {
1382                 err.jspError("jsp.error.xml.encodingDeclRequired");
1383             }
1384         } else {
1385             if (!dataFoundForTarget && version == null) {
1386                 err.jspError("jsp.error.xml.versionInfoRequired");
1387             }
1388         }
1389 
1390         // end
1391         if (!skipChar('?')) {
1392             err.jspError("jsp.error.xml.xmlDeclUnterminated");
1393         }
1394         if (!skipChar('>')) {
1395             err.jspError("jsp.error.xml.xmlDeclUnterminated");
1396 
1397         }
1398 
1399         // fill in return array
1400         pseudoAttributeValues[0] = version;
1401         pseudoAttributeValues[1] = encoding;
1402         pseudoAttributeValues[2] = standalone;
1403     }
1404 
1405     // Adapted from:
1406     // org.apache.xerces.impl.XMLScanner.scanPseudoAttribute
1407     /***
1408      * Scans a pseudo attribute.
1409      *
1410      * @param scanningTextDecl True if scanning this pseudo-attribute for a
1411      *                         TextDecl; false if scanning XMLDecl. This
1412      *                         flag is needed to report the correct type of
1413      *                         error.
1414      * @param value            The string to fill in with the attribute
1415      *                         value.
1416      * @return The name of the attribute
1417      *         <p/>
1418      *         <strong>Note:</strong> This method uses fStringBuffer2, anything in it
1419      *         at the time of calling is lost.
1420      */
1421     public String scanPseudoAttribute(boolean scanningTextDecl,
1422                                       XMLString value)
1423             throws IOException, JasperException {
1424 
1425         String name = scanName();
1426         if (name == null) {
1427             err.jspError("jsp.error.xml.pseudoAttrNameExpected");
1428         }
1429         skipSpaces();
1430         if (!skipChar('=')) {
1431             reportFatalError(scanningTextDecl ?
1432                     "jsp.error.xml.eqRequiredInTextDecl"
1433                     : "jsp.error.xml.eqRequiredInXMLDecl",
1434                     name);
1435         }
1436         skipSpaces();
1437         int quote = peekChar();
1438         if (quote != '\'' && quote != '"') {
1439             reportFatalError(scanningTextDecl ?
1440                     "jsp.error.xml.quoteRequiredInTextDecl"
1441                     : "jsp.error.xml.quoteRequiredInXMLDecl",
1442                     name);
1443         }
1444         scanChar();
1445         int c = scanLiteral(quote, value);
1446         if (c != quote) {
1447             fStringBuffer2.clear();
1448             do {
1449                 fStringBuffer2.append(value);
1450                 if (c != -1) {
1451                     if (c == '&' || c == '%' || c == '<' || c == ']') {
1452                         fStringBuffer2.append((char) scanChar());
1453                     } else if (XMLChar.isHighSurrogate(c)) {
1454                         scanSurrogates(fStringBuffer2);
1455                     } else if (XMLChar.isInvalid(c)) {
1456                         String key = scanningTextDecl
1457                                 ? "jsp.error.xml.invalidCharInTextDecl"
1458                                 : "jsp.error.xml.invalidCharInXMLDecl";
1459                         reportFatalError(key, Integer.toString(c, 16));
1460                         scanChar();
1461                     }
1462                 }
1463                 c = scanLiteral(quote, value);
1464             } while (c != quote);
1465             fStringBuffer2.append(value);
1466             value.setValues(fStringBuffer2);
1467         }
1468         if (!skipChar(quote)) {
1469             reportFatalError(scanningTextDecl ?
1470                     "jsp.error.xml.closeQuoteMissingInTextDecl"
1471                     : "jsp.error.xml.closeQuoteMissingInXMLDecl",
1472                     name);
1473         }
1474 
1475         // return
1476         return name;
1477 
1478     }
1479 
1480     // Adapted from:
1481     // org.apache.xerces.impl.XMLScanner.scanPIData
1482     /***
1483      * Scans a processing data. This is needed to handle the situation
1484      * where a document starts with a processing instruction whose
1485      * target name <em>starts with</em> "xml". (e.g. xmlfoo)
1486      * <p/>
1487      * <strong>Note:</strong> This method uses fStringBuffer, anything in it
1488      * at the time of calling is lost.
1489      *
1490      * @param target The PI target
1491      * @param data   The string to fill in with the data
1492      */
1493     private void scanPIData(String target, XMLString data)
1494             throws IOException, JasperException {
1495 
1496         // check target
1497         if (target.length() == 3) {
1498             char c0 = Character.toLowerCase(target.charAt(0));
1499             char c1 = Character.toLowerCase(target.charAt(1));
1500             char c2 = Character.toLowerCase(target.charAt(2));
1501             if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
1502                 err.jspError("jsp.error.xml.reservedPITarget");
1503             }
1504         }
1505 
1506         // spaces
1507         if (!skipSpaces()) {
1508             if (skipString("?>")) {
1509                 // we found the end, there is no data
1510                 data.clear();
1511                 return;
1512             } else {
1513                 // if there is data there should be some space
1514                 err.jspError("jsp.error.xml.spaceRequiredInPI");
1515             }
1516         }
1517 
1518         fStringBuffer.clear();
1519         // data
1520         if (scanData("?>", fStringBuffer)) {
1521             do {
1522                 int c = peekChar();
1523                 if (c != -1) {
1524                     if (XMLChar.isHighSurrogate(c)) {
1525                         scanSurrogates(fStringBuffer);
1526                     } else if (XMLChar.isInvalid(c)) {
1527                         err.jspError("jsp.error.xml.invalidCharInPI",
1528                                 Integer.toHexString(c));
1529                         scanChar();
1530                     }
1531                 }
1532             } while (scanData("?>", fStringBuffer));
1533         }
1534         data.setValues(fStringBuffer);
1535 
1536     }
1537 
1538     // Adapted from:
1539     // org.apache.xerces.impl.XMLScanner.scanSurrogates
1540     /***
1541      * Scans surrogates and append them to the specified buffer.
1542      * <p/>
1543      * <strong>Note:</strong> This assumes the current char has already been
1544      * identified as a high surrogate.
1545      *
1546      * @param buf The StringBuffer to append the read surrogates to.
1547      * @returns True if it succeeded.
1548      */
1549     private boolean scanSurrogates(XMLStringBuffer buf)
1550             throws IOException, JasperException {
1551 
1552         int high = scanChar();
1553         int low = peekChar();
1554         if (!XMLChar.isLowSurrogate(low)) {
1555             err.jspError("jsp.error.xml.invalidCharInContent",
1556                     Integer.toString(high, 16));
1557             return false;
1558         }
1559         scanChar();
1560 
1561         // convert surrogates to supplemental character
1562         int c = XMLChar.supplemental((char) high, (char) low);
1563 
1564         // supplemental character must be a valid XML character
1565         if (!XMLChar.isValid(c)) {
1566             err.jspError("jsp.error.xml.invalidCharInContent",
1567                     Integer.toString(c, 16));
1568             return false;
1569         }
1570 
1571         // fill in the buffer
1572         buf.append((char) high);
1573         buf.append((char) low);
1574 
1575         return true;
1576 
1577     }
1578 
1579     // Adapted from:
1580     // org.apache.xerces.impl.XMLScanner.reportFatalError
1581     /***
1582      * Convenience function used in all XML scanners.
1583      */
1584     private void reportFatalError(String msgId, String arg)
1585             throws JasperException {
1586         err.jspError(msgId, arg);
1587     }
1588 
1589 }
1590 
1591