001// Copyright 2009, 2011, 2012 The Apache Software Foundation
002//
003// Licensed under the Apache License, Version 2.0 (the "License");
004// you may not use this file except in compliance with the License.
005// You may obtain a copy of the License at
006//
007//     http://www.apache.org/licenses/LICENSE-2.0
008//
009// Unless required by applicable law or agreed to in writing, software
010// distributed under the License is distributed on an "AS IS" BASIS,
011// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012// See the License for the specific language governing permissions and
013// limitations under the License.
014
015package org.apache.tapestry5.internal.services;
016
017import org.apache.tapestry5.ioc.Location;
018import org.apache.tapestry5.ioc.Resource;
019import org.apache.tapestry5.ioc.internal.util.CollectionFactory;
020import org.apache.tapestry5.ioc.internal.util.InternalUtils;
021import org.apache.tapestry5.ioc.internal.util.LocationImpl;
022import org.xml.sax.*;
023import org.xml.sax.ext.Attributes2;
024import org.xml.sax.ext.LexicalHandler;
025import org.xml.sax.helpers.XMLReaderFactory;
026
027import javax.xml.namespace.QName;
028import java.io.*;
029import java.net.URL;
030import java.util.Collections;
031import java.util.List;
032import java.util.Map;
033
034/**
035 * Parses a document as a stream of XML tokens. It includes a special hack (as of Tapestry 5.3) to support the HTML5 doctype ({@code <!DOCTYPE html>})
036 * as if it were the XHTML transitional doctype
037 * ({@code <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">}).
038 */
039public class XMLTokenStream
040{
041
042    public static final String TRANSITIONAL_DOCTYPE = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">";
043
044    private static final DTDData HTML5_DTD_DATA = new DTDData("html", null, null);
045
046    private final class SaxHandler implements LexicalHandler, EntityResolver, ContentHandler
047    {
048        private Locator locator;
049
050        private int currentLine = -1;
051
052        private Location cachedLocation;
053
054        private Location textLocation;
055
056        private final StringBuilder builder = new StringBuilder();
057
058        private boolean inCDATA, insideDTD;
059
060        private List<NamespaceMapping> namespaceMappings = CollectionFactory.newList();
061
062        private Location getLocation()
063        {
064            int line = locator.getLineNumber();
065
066            if (currentLine != line)
067                cachedLocation = null;
068
069            if (cachedLocation == null)
070            {
071                // lineOffset accounts for the extra line when a doctype is injected. The line number reported
072                // from the XML parser inlcudes the phantom doctype line, the lineOffset is used to subtract one
073                // to get the real line number.
074                cachedLocation = new LocationImpl(resource, line + lineOffset);
075            }
076
077            return cachedLocation;
078        }
079
080        private XMLToken add(XMLTokenType type)
081        {
082            XMLToken token = new XMLToken(type, getLocation());
083
084            tokens.add(token);
085
086            return token;
087        }
088
089        public InputSource resolveEntity(String publicId, String systemId) throws SAXException,
090                IOException
091        {
092            URL url = publicIdToURL.get(publicId);
093
094            try
095            {
096                if (url != null)
097                    return new InputSource(url.openStream());
098            } catch (IOException ex)
099            {
100                throw new SAXException(String.format("Unable to open stream for resource %s: %s",
101                        url, InternalUtils.toMessage(ex)), ex);
102            }
103
104            return null;
105        }
106
107        public void comment(char[] ch, int start, int length) throws SAXException
108        {
109            if (insideDTD)
110                return;
111
112            // TODO: Coalesce?
113            add(XMLTokenType.COMMENT).text = new String(ch, start, length);
114        }
115
116        public void startCDATA() throws SAXException
117        {
118            // TODO: Flush characters?
119
120            inCDATA = true;
121        }
122
123        public void endCDATA() throws SAXException
124        {
125            if (builder.length() != 0)
126            {
127                add(XMLTokenType.CDATA).text = builder.toString();
128            }
129
130            builder.setLength(0);
131            inCDATA = false;
132        }
133
134        public void characters(char[] ch, int start, int length) throws SAXException
135        {
136            if (inCDATA)
137            {
138                builder.append(ch, start, length);
139                return;
140            }
141
142            XMLToken token = new XMLToken(XMLTokenType.CHARACTERS, textLocation);
143            token.text = new String(ch, start, length);
144
145            tokens.add(token);
146        }
147
148        public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException
149        {
150            characters(ch, start, length);
151        }
152
153        public void startDTD(final String name, final String publicId, final String systemId)
154                throws SAXException
155        {
156            insideDTD = true;
157
158            if (!ignoreDTD)
159            {
160                DTDData data = html5DTD ? HTML5_DTD_DATA : new DTDData(name, publicId, systemId);
161
162                add(XMLTokenType.DTD).dtdData = data;
163            }
164        }
165
166        public void endDocument() throws SAXException
167        {
168            add(XMLTokenType.END_DOCUMENT);
169        }
170
171        public void endElement(String uri, String localName, String qName) throws SAXException
172        {
173            add(XMLTokenType.END_ELEMENT);
174        }
175
176        public void setDocumentLocator(Locator locator)
177        {
178            this.locator = locator;
179        }
180
181        /**
182         * Checks for the extra namespace injected when the transitional doctype is injected (which
183         * occurs when the template contains no doctype).
184         */
185        private boolean ignoreURI(String uri)
186        {
187            return ignoreDTD && uri.equals("http://www.w3.org/1999/xhtml");
188        }
189
190        public void startElement(String uri, String localName, String qName, Attributes attributes)
191                throws SAXException
192        {
193            XMLToken token = add(XMLTokenType.START_ELEMENT);
194
195            token.uri = ignoreURI(uri) ? "" : uri;
196            token.localName = localName;
197            token.qName = qName;
198
199            // The XML parser tends to reuse the same Attributes object, so
200            // capture the data out of it.
201
202            Attributes2 a2 = (attributes instanceof Attributes2) ? (Attributes2) attributes : null;
203
204            if (attributes.getLength() == 0)
205            {
206                token.attributes = Collections.emptyList();
207            } else
208            {
209                token.attributes = CollectionFactory.newList();
210
211                for (int i = 0; i < attributes.getLength(); i++)
212                {
213                    // Filter out attributes that are not present in the XML input stream, but were
214                    // instead provided by DTD defaulting.
215
216                    if (a2 != null && !a2.isSpecified(i))
217                    {
218                        continue;
219                    }
220
221                    String prefixedName = attributes.getQName(i);
222
223                    int lastColon = prefixedName.lastIndexOf(':');
224
225                    String prefix = lastColon > 0 ? prefixedName.substring(0, lastColon) : "";
226
227                    QName qname = new QName(attributes.getURI(i), attributes.getLocalName(i),
228                            prefix);
229
230                    token.attributes.add(new AttributeInfo(qname, attributes.getValue(i)));
231                }
232            }
233
234            token.namespaceMappings = CollectionFactory.newList(namespaceMappings);
235
236            namespaceMappings.clear();
237
238            // Any text collected starts here as well:
239
240            textLocation = getLocation();
241        }
242
243        public void startPrefixMapping(String prefix, String uri) throws SAXException
244        {
245            if (ignoreDTD && prefix.equals("") && uri.equals("http://www.w3.org/1999/xhtml"))
246            {
247                return;
248            }
249
250            namespaceMappings.add(new NamespaceMapping(prefix, uri));
251        }
252
253        public void endDTD() throws SAXException
254        {
255            insideDTD = false;
256        }
257
258        public void endEntity(String name) throws SAXException
259        {
260        }
261
262        public void startEntity(String name) throws SAXException
263        {
264        }
265
266        public void endPrefixMapping(String prefix) throws SAXException
267        {
268        }
269
270        public void processingInstruction(String target, String data) throws SAXException
271        {
272        }
273
274        public void skippedEntity(String name) throws SAXException
275        {
276        }
277
278        public void startDocument() throws SAXException
279        {
280        }
281    }
282
283    private int cursor = -1;
284
285    private final List<XMLToken> tokens = CollectionFactory.newList();
286
287    private final Resource resource;
288
289    private final Map<String, URL> publicIdToURL;
290
291    private Location exceptionLocation;
292
293    private boolean html5DTD, ignoreDTD;
294
295    private int lineOffset;
296
297    public XMLTokenStream(Resource resource, Map<String, URL> publicIdToURL)
298    {
299        this.resource = resource;
300        this.publicIdToURL = publicIdToURL;
301    }
302
303    public void parse() throws SAXException, IOException
304    {
305        SaxHandler handler = new SaxHandler();
306
307        XMLReader reader = XMLReaderFactory.createXMLReader();
308
309        reader.setContentHandler(handler);
310        reader.setEntityResolver(handler);
311        reader.setProperty("http://xml.org/sax/properties/lexical-handler", handler);
312
313        InputStream stream = openStream();
314
315        try
316        {
317            reader.parse(new InputSource(stream));
318        } catch (IOException ex)
319        {
320            this.exceptionLocation = handler.getLocation();
321
322            throw ex;
323        } catch (SAXException ex)
324        {
325            this.exceptionLocation = handler.getLocation();
326
327            throw ex;
328        } catch (RuntimeException ex)
329        {
330            this.exceptionLocation = handler.getLocation();
331
332            throw ex;
333        } finally
334        {
335            InternalUtils.close(stream);
336        }
337    }
338
339    enum State
340    {
341        MAYBE_XML, MAYBE_DOCTYPE, JUST_COPY
342    }
343
344    private InputStream openStream() throws IOException
345    {
346        InputStream rawStream = resource.openStream();
347
348        InputStreamReader rawReader = new InputStreamReader(rawStream);
349        LineNumberReader reader = new LineNumberReader(rawReader);
350
351        ByteArrayOutputStream bos = new ByteArrayOutputStream(5000);
352        PrintWriter writer = new PrintWriter(bos);
353
354        State state = State.MAYBE_XML;
355
356        try
357        {
358            while (true)
359            {
360                String line = reader.readLine();
361
362                if (line == null)
363                {
364                    break;
365                }
366
367                switch (state)
368                {
369
370                    case MAYBE_XML:
371
372                        if (line.toLowerCase().startsWith("<?xml"))
373                        {
374                            writer.println(line);
375                            state = State.MAYBE_DOCTYPE;
376                            continue;
377                        }
378
379                    case MAYBE_DOCTYPE:
380
381                        if (line.trim().length() == 0)
382                        {
383                            writer.println(line);
384                            continue;
385                        }
386
387                        String lineLower = line.toLowerCase();
388
389                        if (lineLower.equals("<!doctype html>"))
390                        {
391                            html5DTD = true;
392                            writer.println(TRANSITIONAL_DOCTYPE);
393                            state = State.JUST_COPY;
394                            continue;
395                        }
396
397
398                        if (lineLower.startsWith("<!doctype"))
399                        {
400                            writer.println(line);
401                            state = State.JUST_COPY;
402                            continue;
403                        }
404
405                        // No doctype, let's provide one.
406
407                        ignoreDTD = true;
408                        lineOffset = -1;
409                        writer.println(TRANSITIONAL_DOCTYPE);
410
411                        state = State.JUST_COPY;
412
413                        // And drop down to writing out the actual line, and all following lines.
414
415                    case JUST_COPY:
416                        writer.println(line);
417                }
418            }
419        } finally
420        {
421            writer.close();
422            reader.close();
423        }
424
425        return new ByteArrayInputStream(bos.toByteArray());
426    }
427
428    private XMLToken token()
429    {
430        return tokens.get(cursor);
431    }
432
433    /**
434     * Returns the type of the next token.
435     */
436    public XMLTokenType next()
437    {
438        cursor++;
439
440        // TODO: Check for overflow?
441
442        return getEventType();
443    }
444
445    public int getAttributeCount()
446    {
447        return token().attributes.size();
448    }
449
450    public QName getAttributeName(int i)
451    {
452        return token().attributes.get(i).attributeName;
453    }
454
455    public DTDData getDTDInfo()
456    {
457        return token().dtdData;
458    }
459
460    public XMLTokenType getEventType()
461    {
462        return token().type;
463    }
464
465    public String getLocalName()
466    {
467        return token().localName;
468    }
469
470    public Location getLocation()
471    {
472        if (exceptionLocation != null)
473            return exceptionLocation;
474
475        return token().getLocation();
476    }
477
478    public int getNamespaceCount()
479    {
480        return token().namespaceMappings.size();
481    }
482
483    public String getNamespacePrefix(int i)
484    {
485        return token().namespaceMappings.get(i).prefix;
486    }
487
488    public String getNamespaceURI()
489    {
490        return token().uri;
491    }
492
493    public String getNamespaceURI(int i)
494    {
495        return token().namespaceMappings.get(i).uri;
496    }
497
498    public String getText()
499    {
500        return token().text;
501    }
502
503    public boolean hasNext()
504    {
505        return cursor < tokens.size() - 1;
506    }
507
508    public String getAttributeValue(int i)
509    {
510        return token().attributes.get(i).value;
511    }
512
513}