View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.struts2.jasper.compiler;
19  
20  import org.apache.struts2.jasper.JasperException;
21  import org.apache.struts2.jasper.JspCompilationContext;
22  import org.apache.struts2.jasper.xmlparser.XMLEncodingDetector;
23  import org.xml.sax.Attributes;
24  
25  import java.io.FileNotFoundException;
26  import java.io.IOException;
27  import java.io.InputStreamReader;
28  import java.net.JarURLConnection;
29  import java.net.URL;
30  import java.util.Stack;
31  import java.util.jar.JarFile;
32  
33  /***
34   * Controller for the parsing of a JSP page.
35   * <p/>
36   * The same ParserController instance is used for a JSP page and any JSP
37   * segments included by it (via an include directive), where each segment may
38   * be provided in standard or XML syntax. This class selects and invokes the
39   * appropriate parser for the JSP page and its included segments.
40   *
41   * @author Pierre Delisle
42   * @author Jan Luehe
43   */
44  class ParserController implements TagConstants {
45  
46      private static final String CHARSET = "charset=";
47  
48      private JspCompilationContext ctxt;
49      private Compiler compiler;
50      private ErrorDispatcher err;
51  
52      /*
53       * Indicates the syntax (XML or standard) of the file being processed
54       */
55      private boolean isXml;
56  
57      /*
58       * A stack to keep track of the 'current base directory'
59       * for include directives that refer to relative paths.
60       */
61      private Stack baseDirStack = new Stack();
62  
63      private boolean isEncodingSpecifiedInProlog;
64  
65      private String sourceEnc;
66  
67      private boolean isDefaultPageEncoding;
68      private boolean isTagFile;
69      private boolean directiveOnly;
70  
71      /*
72       * Constructor
73       */
74      public ParserController(JspCompilationContext ctxt, Compiler compiler) {
75          this.ctxt = ctxt;
76          this.compiler = compiler;
77          this.err = compiler.getErrorDispatcher();
78      }
79  
80      public JspCompilationContext getJspCompilationContext() {
81          return ctxt;
82      }
83  
84      public Compiler getCompiler() {
85          return compiler;
86      }
87  
88      /***
89       * Parses a JSP page or tag file. This is invoked by the compiler.
90       *
91       * @param inFileName The path to the JSP page or tag file to be parsed.
92       */
93      public Node.Nodes parse(String inFileName)
94              throws FileNotFoundException, JasperException, IOException {
95          // If we're parsing a packaged tag file or a resource included by it
96          // (using an include directive), ctxt.getTagFileJar() returns the
97          // JAR file from which to read the tag file or included resource,
98          // respectively.
99          isTagFile = ctxt.isTagFile();
100         directiveOnly = false;
101         return doParse(inFileName, null, ctxt.getTagFileJarUrl());
102     }
103 
104     /***
105      * Processes an include directive with the given path.
106      *
107      * @param inFileName The path to the resource to be included.
108      * @param parent     The parent node of the include directive.
109      * @param jarFile    The JAR file from which to read the included resource,
110      *                   or null of the included resource is to be read from the filesystem
111      */
112     public Node.Nodes parse(String inFileName, Node parent,
113                             URL jarFileUrl)
114             throws FileNotFoundException, JasperException, IOException {
115         // For files that are statically included, isTagfile and directiveOnly
116         // remain unchanged.
117         return doParse(inFileName, parent, jarFileUrl);
118     }
119 
120     /***
121      * Extracts tag file directive information from the tag file with the
122      * given name.
123      * <p/>
124      * This is invoked by the compiler
125      *
126      * @param inFileName The name of the tag file to be parsed.
127      */
128     public Node.Nodes parseTagFileDirectives(String inFileName)
129             throws FileNotFoundException, JasperException, IOException {
130         boolean isTagFileSave = isTagFile;
131         boolean directiveOnlySave = directiveOnly;
132         isTagFile = true;
133         directiveOnly = true;
134         Node.Nodes page = doParse(inFileName, null,
135                 (URL) ctxt.getTagFileJarUrls().get(inFileName));
136         directiveOnly = directiveOnlySave;
137         isTagFile = isTagFileSave;
138         return page;
139     }
140 
141     /***
142      * Parses the JSP page or tag file with the given path name.
143      *
144      * @param inFileName     The name of the JSP page or tag file to be parsed.
145      * @param parent         The parent node (non-null when processing an include
146      *                       directive)
147      * @param isTagFile      true if file to be parsed is tag file, and false if it
148      *                       is a regular JSP page
149      * @param directivesOnly true if the file to be parsed is a tag file and
150      *                       we are only interested in the directives needed for constructing a
151      *                       TagFileInfo.
152      * @param jarFile        The JAR file from which to read the JSP page or tag file,
153      *                       or null if the JSP page or tag file is to be read from the filesystem
154      */
155     private Node.Nodes doParse(String inFileName,
156                                Node parent,
157                                URL jarFileUrl)
158             throws FileNotFoundException, JasperException, IOException {
159 
160         Node.Nodes parsedPage = null;
161         isEncodingSpecifiedInProlog = false;
162         isDefaultPageEncoding = false;
163 
164         JarFile jarFile = getJarFile(jarFileUrl);
165         String absFileName = resolveFileName(inFileName);
166         String jspConfigPageEnc = getJspConfigPageEncoding(absFileName);
167 
168         // Figure out what type of JSP document and encoding type we are
169         // dealing with
170         determineSyntaxAndEncoding(absFileName, jarFile, jspConfigPageEnc);
171 
172         if (parent != null) {
173             // Included resource, add to dependent list
174             compiler.getPageInfo().addDependant(absFileName);
175         }
176 
177         if (isXml && isEncodingSpecifiedInProlog) {
178             /*
179             * Make sure the encoding explicitly specified in the XML
180             * prolog (if any) matches that in the JSP config element
181             * (if any), treating "UTF-16", "UTF-16BE", and "UTF-16LE" as
182             * identical.
183             */
184             if (jspConfigPageEnc != null && !jspConfigPageEnc.equals(sourceEnc)
185                     && (!jspConfigPageEnc.startsWith("UTF-16")
186                     || !sourceEnc.startsWith("UTF-16"))) {
187                 err.jspError("jsp.error.prolog_config_encoding_mismatch",
188                         sourceEnc, jspConfigPageEnc);
189             }
190         }
191 
192         // Dispatch to the appropriate parser
193         if (isXml) {
194             // JSP document (XML syntax)
195             // InputStream for jspx page is created and properly closed in
196             // JspDocumentParser.
197             parsedPage = JspDocumentParser.parse(this, absFileName,
198                     jarFile, parent,
199                     isTagFile, directiveOnly,
200                     sourceEnc,
201                     jspConfigPageEnc,
202                     isEncodingSpecifiedInProlog);
203         } else {
204             // Standard syntax
205             InputStreamReader inStreamReader = null;
206             try {
207                 inStreamReader = JspUtil.getReader(absFileName, sourceEnc,
208                         jarFile, ctxt, err);
209                 JspReader jspReader = new JspReader(ctxt, absFileName,
210                         sourceEnc, inStreamReader,
211                         err);
212                 parsedPage = Parser.parse(this, jspReader, parent, isTagFile,
213                         directiveOnly, jarFileUrl,
214                         sourceEnc, jspConfigPageEnc,
215                         isDefaultPageEncoding);
216             } finally {
217                 if (inStreamReader != null) {
218                     try {
219                         inStreamReader.close();
220                     } catch (Exception any) {
221                     }
222                 }
223             }
224         }
225 
226         if (jarFile != null) {
227             try {
228                 jarFile.close();
229             } catch (Throwable t) {
230             }
231         }
232 
233         baseDirStack.pop();
234 
235         return parsedPage;
236     }
237 
238     /*
239      * Checks to see if the given URI is matched by a URL pattern specified in
240      * a jsp-property-group in web.xml, and if so, returns the value of the
241      * <page-encoding> element.
242      *
243      * @param absFileName The URI to match
244      *
245      * @return The value of the <page-encoding> attribute of the 
246      * jsp-property-group with matching URL pattern
247      */
248     private String getJspConfigPageEncoding(String absFileName)
249             throws JasperException {
250 
251         JspConfig jspConfig = ctxt.getOptions().getJspConfig();
252         JspConfig.JspProperty jspProperty
253                 = jspConfig.findJspProperty(absFileName);
254         return jspProperty.getPageEncoding();
255     }
256 
257     /***
258      * Determines the syntax (standard or XML) and page encoding properties
259      * for the given file, and stores them in the 'isXml' and 'sourceEnc'
260      * instance variables, respectively.
261      */
262     private void determineSyntaxAndEncoding(String absFileName,
263                                             JarFile jarFile,
264                                             String jspConfigPageEnc)
265             throws JasperException, IOException {
266 
267         isXml = false;
268 
269         /*
270        * 'true' if the syntax (XML or standard) of the file is given
271        * from external information: either via a JSP configuration element,
272        * the ".jspx" suffix, or the enclosing file (for included resources)
273        */
274         boolean isExternal = false;
275 
276         /*
277        * Indicates whether we need to revert from temporary usage of
278        * "ISO-8859-1" back to "UTF-8"
279        */
280         boolean revert = false;
281 
282         JspConfig jspConfig = ctxt.getOptions().getJspConfig();
283         JspConfig.JspProperty jspProperty = jspConfig.findJspProperty(
284                 absFileName);
285         if (jspProperty.isXml() != null) {
286             // If <is-xml> is specified in a <jsp-property-group>, it is used.
287             isXml = JspUtil.booleanValue(jspProperty.isXml());
288             isExternal = true;
289         } else if (absFileName.endsWith(".jspx")
290                 || absFileName.endsWith(".tagx")) {
291             isXml = true;
292             isExternal = true;
293         }
294 
295         if (isExternal && !isXml) {
296             // JSP (standard) syntax. Use encoding specified in jsp-config
297             // if provided.
298             sourceEnc = jspConfigPageEnc;
299             if (sourceEnc != null) {
300                 return;
301             }
302             // We don't know the encoding
303             sourceEnc = "ISO-8859-1";
304         } else {
305             // XML syntax or unknown, (auto)detect encoding ...
306             Object[] ret = XMLEncodingDetector.getEncoding(absFileName,
307                     jarFile, ctxt, err);
308             sourceEnc = (String) ret[0];
309             if (((Boolean) ret[1]).booleanValue()) {
310                 isEncodingSpecifiedInProlog = true;
311             }
312 
313             if (!isXml && sourceEnc.equals("UTF-8")) {
314                 /*
315              * We don't know if we're dealing with XML or standard syntax.
316              * Therefore, we need to check to see if the page contains
317              * a <jsp:root> element.
318              *
319              * We need to be careful, because the page may be encoded in
320              * ISO-8859-1 (or something entirely different), and may
321              * contain byte sequences that will cause a UTF-8 converter to
322              * throw exceptions.
323              *
324              * It is safe to use a source encoding of ISO-8859-1 in this
325              * case, as there are no invalid byte sequences in ISO-8859-1,
326              * and the byte/character sequences we're looking for (i.e.,
327              * <jsp:root>) are identical in either encoding (both UTF-8
328              * and ISO-8859-1 are extensions of ASCII).
329              */
330                 sourceEnc = "ISO-8859-1";
331                 revert = true;
332             }
333         }
334 
335         if (isXml) {
336             // (This implies 'isExternal' is TRUE.)
337             // We know we're dealing with a JSP document (via JSP config or
338             // ".jspx" suffix), so we're done.
339             return;
340         }
341 
342         /*
343        * At this point, 'isExternal' or 'isXml' is FALSE.
344        * Search for jsp:root action, in order to determine if we're dealing
345        * with XML or standard syntax (unless we already know what we're
346        * dealing with, i.e., when 'isExternal' is TRUE and 'isXml' is FALSE).
347        * No check for XML prolog, since nothing prevents a page from
348        * outputting XML and still using JSP syntax (in this case, the
349        * XML prolog is treated as template text).
350        */
351         JspReader jspReader = null;
352         try {
353             jspReader = new JspReader(ctxt, absFileName, sourceEnc, jarFile,
354                     err);
355         } catch (FileNotFoundException ex) {
356             throw new JasperException(ex);
357         }
358         jspReader.setSingleFile(true);
359         Mark startMark = jspReader.mark();
360         if (!isExternal) {
361             jspReader.reset(startMark);
362             if (hasJspRoot(jspReader)) {
363                 isXml = true;
364                 if (revert) sourceEnc = "UTF-8";
365                 return;
366             } else {
367                 isXml = false;
368             }
369         }
370 
371         /*
372        * At this point, we know we're dealing with JSP syntax.
373        * If an XML prolog is provided, it's treated as template text.
374        * Determine the page encoding from the page directive, unless it's
375        * specified via JSP config.
376        */
377         sourceEnc = jspConfigPageEnc;
378         if (sourceEnc == null) {
379             sourceEnc = getPageEncodingForJspSyntax(jspReader, startMark);
380             if (sourceEnc == null) {
381                 // Default to "ISO-8859-1" per JSP spec
382                 sourceEnc = "ISO-8859-1";
383                 isDefaultPageEncoding = true;
384             }
385         }
386     }
387 
388     /*
389     * Determines page source encoding for page or tag file in JSP syntax,
390     * by reading (in this order) the value of the 'pageEncoding' page
391     * directive attribute, or the charset value of the 'contentType' page
392     * directive attribute.
393     *
394     * @return The page encoding, or null if not found
395     */
396     private String getPageEncodingForJspSyntax(JspReader jspReader,
397                                                Mark startMark)
398             throws JasperException {
399 
400         String encoding = null;
401         String saveEncoding = null;
402 
403         jspReader.reset(startMark);
404 
405         /*
406        * Determine page encoding from directive of the form <%@ page %>,
407        * <%@ tag %>, <jsp:directive.page > or <jsp:directive.tag >.
408        */
409         while (true) {
410             if (jspReader.skipUntil("<") == null) {
411                 break;
412             }
413             // If this is a comment, skip until its end
414             if (jspReader.matches("%--")) {
415                 if (jspReader.skipUntil("--%>") == null) {
416                     // error will be caught in Parser
417                     break;
418                 }
419                 continue;
420             }
421             boolean isDirective = jspReader.matches("%@");
422             if (isDirective) {
423                 jspReader.skipSpaces();
424             } else {
425                 isDirective = jspReader.matches("jsp:directive.");
426             }
427             if (!isDirective) {
428                 continue;
429             }
430 
431             // compare for "tag ", so we don't match "taglib"
432             if (jspReader.matches("tag ") || jspReader.matches("page")) {
433 
434                 jspReader.skipSpaces();
435                 Attributes attrs = Parser.parseAttributes(this, jspReader);
436                 encoding = getPageEncodingFromDirective(attrs, "pageEncoding");
437                 if (encoding != null) {
438                     break;
439                 }
440                 encoding = getPageEncodingFromDirective(attrs, "contentType");
441                 if (encoding != null) {
442                     saveEncoding = encoding;
443                 }
444             }
445         }
446 
447         if (encoding == null) {
448             encoding = saveEncoding;
449         }
450 
451         return encoding;
452     }
453 
454     /*
455      * Scans the given attributes for the attribute with the given name,
456      * which is either 'pageEncoding' or 'contentType', and returns the
457      * specified page encoding.
458      *
459      * In the case of 'contentType', the page encoding is taken from the
460      * content type's 'charset' component.
461      *
462      * @param attrs The page directive attributes
463      * @param attrName The name of the attribute to search for (either
464      * 'pageEncoding' or 'contentType')
465      *
466      * @return The page encoding, or null
467      */
468     private String getPageEncodingFromDirective(Attributes attrs,
469                                                 String attrName) {
470         String value = attrs.getValue(attrName);
471         if (attrName.equals("pageEncoding")) {
472             return value;
473         }
474 
475         // attrName = contentType
476         String contentType = value;
477         String encoding = null;
478         if (contentType != null) {
479             int loc = contentType.indexOf(CHARSET);
480             if (loc != -1) {
481                 encoding = contentType.substring(loc + CHARSET.length());
482             }
483         }
484 
485         return encoding;
486     }
487 
488     /*
489      * Resolve the name of the file and update baseDirStack() to keep track of
490      * the current base directory for each included file.
491      * The 'root' file is always an 'absolute' path, so no need to put an
492      * initial value in the baseDirStack.
493      */
494     private String resolveFileName(String inFileName) {
495         String fileName = inFileName.replace('//', '/');
496         String baseDir =
497                 fileName.substring(0, fileName.lastIndexOf("/") + 1);
498         baseDirStack.push(baseDir);
499         return fileName;
500     }
501 
502     /*
503      * Checks to see if the given page contains, as its first element, a <root>
504      * element whose prefix is bound to the JSP namespace, as in:
505      *
506      * <wombat:root xmlns:wombat="http://java.sun.com/JSP/Page" version="1.2">
507      *   ...
508      * </wombat:root>
509      *
510      * @param reader The reader for this page
511      *
512      * @return true if this page contains a root element whose prefix is bound
513      * to the JSP namespace, and false otherwise
514      */
515     private boolean hasJspRoot(JspReader reader) throws JasperException {
516 
517         // <prefix>:root must be the first element
518         Mark start = null;
519         while ((start = reader.skipUntil("<")) != null) {
520             int c = reader.nextChar();
521             if (c != '!' && c != '?') break;
522         }
523         if (start == null) {
524             return false;
525         }
526         Mark stop = reader.skipUntil(":root");
527         if (stop == null) {
528             return false;
529         }
530         // call substring to get rid of leading '<'
531         String prefix = reader.getText(start, stop).substring(1);
532 
533         start = stop;
534         stop = reader.skipUntil(">");
535         if (stop == null) {
536             return false;
537         }
538 
539         // Determine namespace associated with <root> element's prefix
540         String root = reader.getText(start, stop);
541         String xmlnsDecl = "xmlns:" + prefix;
542         int index = root.indexOf(xmlnsDecl);
543         if (index == -1) {
544             return false;
545         }
546         index += xmlnsDecl.length();
547         while (index < root.length()
548                 && Character.isWhitespace(root.charAt(index))) {
549             index++;
550         }
551         if (index < root.length() && root.charAt(index) == '=') {
552             index++;
553             while (index < root.length()
554                     && Character.isWhitespace(root.charAt(index))) {
555                 index++;
556             }
557             if (index < root.length() && root.charAt(index++) == '"'
558                     && root.regionMatches(index, JSP_URI, 0,
559                     JSP_URI.length())) {
560                 return true;
561             }
562         }
563 
564         return false;
565     }
566 
567     private JarFile getJarFile(URL jarFileUrl) throws IOException {
568         JarFile jarFile = null;
569 
570         if (jarFileUrl != null) {
571             JarURLConnection conn = (JarURLConnection) jarFileUrl.openConnection();
572             conn.setUseCaches(false);
573             conn.connect();
574             jarFile = conn.getJarFile();
575         }
576 
577         return jarFile;
578     }
579 
580 }