001// Copyright 2009, 2011 The Apache Software Foundation
002//
003// Licensed under the Apache License, Version 2.0 (the "License");
004// you may not use this file except in compliance with the License.
005// You may obtain a copy of the License at
006//
007//     http://www.apache.org/licenses/LICENSE-2.0
008//
009// Unless required by applicable law or agreed to in writing, software
010// distributed under the License is distributed on an "AS IS" BASIS,
011// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012// See the License for the specific language governing permissions and
013// limitations under the License.
014
015package org.apache.tapestry5.internal.services;
016
017import org.apache.tapestry5.internal.parser.*;
018import org.apache.tapestry5.ioc.Location;
019import org.apache.tapestry5.ioc.Resource;
020import org.apache.tapestry5.ioc.internal.util.CollectionFactory;
021import org.apache.tapestry5.ioc.internal.util.InternalUtils;
022import org.apache.tapestry5.ioc.internal.util.TapestryException;
023
024import javax.xml.namespace.QName;
025import java.net.URL;
026import java.util.List;
027import java.util.Map;
028import java.util.Set;
029import java.util.regex.Matcher;
030import java.util.regex.Pattern;
031
032import static org.apache.tapestry5.internal.services.SaxTemplateParser.Version.*;
033
034/**
035 * SAX-based template parser logic, taking a {@link Resource} to a Tapestry
036 * template file and returning
037 * a {@link ComponentTemplate}.
038 * <p/>
039 * Earlier versions of this code used the StAX (streaming XML parser), but that
040 * was really, really bad for Google App Engine. This version uses SAX under the
041 * covers, but kind of replicates the important bits of the StAX API as
042 * {@link XMLTokenStream}.
043 *
044 * @since 5.2.0
045 */
046@SuppressWarnings(
047        {"JavaDoc"})
048public class SaxTemplateParser
049{
050    private static final String MIXINS_ATTRIBUTE_NAME = "mixins";
051
052    private static final String TYPE_ATTRIBUTE_NAME = "type";
053
054    private static final String ID_ATTRIBUTE_NAME = "id";
055
056    public static final String XML_NAMESPACE_URI = "http://www.w3.org/XML/1998/namespace";
057
058    private static final Map<String, Version> NAMESPACE_URI_TO_VERSION = CollectionFactory.newMap();
059
060    {
061        NAMESPACE_URI_TO_VERSION.put("http://tapestry.apache.org/schema/tapestry_5_0_0.xsd", T_5_0);
062        NAMESPACE_URI_TO_VERSION.put("http://tapestry.apache.org/schema/tapestry_5_1_0.xsd", T_5_1);
063        NAMESPACE_URI_TO_VERSION.put("http://tapestry.apache.org/schema/tapestry_5_3.xsd", T_5_3);
064    }
065
066    /**
067     * Special namespace used to denote Block parameters to components, as a
068     * (preferred) alternative to the t:parameter
069     * element. The simple element name is the name of the parameter.
070     */
071    private static final String TAPESTRY_PARAMETERS_URI = "tapestry:parameter";
072
073    /**
074     * URI prefix used to identify a Tapestry library, the remainder of the URI
075     * becomes a prefix on the element name.
076     */
077    private static final String LIB_NAMESPACE_URI_PREFIX = "tapestry-library:";
078
079    /**
080     * Pattern used to parse the path portion of the library namespace URI. A
081     * series of simple identifiers with slashes
082     * allowed as seperators.
083     */
084
085    private static final Pattern LIBRARY_PATH_PATTERN = Pattern.compile("^[a-z]\\w*(/[a-z]\\w*)*$",
086            Pattern.CASE_INSENSITIVE);
087
088    private static final Pattern ID_PATTERN = Pattern.compile("^[a-z]\\w*$",
089            Pattern.CASE_INSENSITIVE);
090
091    /**
092     * Any amount of mixed simple whitespace (space, tab, form feed) mixed with
093     * at least one carriage return or line
094     * feed, followed by any amount of whitespace. Will be reduced to a single
095     * linefeed.
096     */
097    private static final Pattern REDUCE_LINEBREAKS_PATTERN = Pattern.compile(
098            "[ \\t\\f]*[\\r\\n]\\s*", Pattern.MULTILINE);
099
100    /**
101     * Used when compressing whitespace, matches any sequence of simple
102     * whitespace (space, tab, formfeed). Applied after
103     * REDUCE_LINEBREAKS_PATTERN.
104     */
105    private static final Pattern REDUCE_WHITESPACE_PATTERN = Pattern.compile("[ \\t\\f]+",
106            Pattern.MULTILINE);
107
108    // Note the use of the non-greedy modifier; this prevents the pattern from
109    // merging multiple
110    // expansions on the same text line into a single large
111    // but invalid expansion.
112
113    private static final Pattern EXPANSION_PATTERN = Pattern.compile("\\$\\{\\s*(((?!\\$\\{).)*)\\s*}");
114    private static final char EXPANSION_STRING_DELIMITTER = '\'';
115    private static final char OPEN_BRACE = '{';
116    private static final char CLOSE_BRACE = '}';
117
118    private static final Set<String> MUST_BE_ROOT = CollectionFactory.newSet("extend", "container");
119
120    private final Resource resource;
121
122    private final XMLTokenStream tokenStream;
123
124    private final StringBuilder textBuffer = new StringBuilder();
125
126    private final List<TemplateToken> tokens = CollectionFactory.newList();
127
128    // This starts pointing at tokens but occasionally shifts to a list inside
129    // the overrides Map.
130    private List<TemplateToken> tokenAccumulator = tokens;
131
132    /**
133     * Primarily used as a set of componentIds (to check for duplicates and
134     * conflicts).
135     */
136    private final Map<String, Location> componentIds = CollectionFactory.newCaseInsensitiveMap();
137
138    /**
139     * Map from override id to a list of tokens; this actually works both for
140     * overrides defined by this template and
141     * overrides provided by this template.
142     */
143    private Map<String, List<TemplateToken>> overrides;
144
145    private boolean extension;
146
147    private Location textStartLocation;
148
149    private boolean active = true;
150
151    private final Map<String, Boolean> extensionPointIdSet = CollectionFactory.newCaseInsensitiveMap();
152
153    public SaxTemplateParser(Resource resource, Map<String, URL> publicIdToURL)
154    {
155        this.resource = resource;
156        this.tokenStream = new XMLTokenStream(resource, publicIdToURL);
157    }
158
159    public ComponentTemplate parse(boolean compressWhitespace)
160    {
161        try
162        {
163            tokenStream.parse();
164
165            TemplateParserState initialParserState = new TemplateParserState()
166                    .compressWhitespace(compressWhitespace);
167
168            root(initialParserState);
169
170            return new ComponentTemplateImpl(resource, tokens, componentIds, extension, overrides);
171        } catch (Exception ex)
172        {
173            throw new TapestryException(String.format("Failure parsing template %s: %s", resource,
174                    InternalUtils.toMessage(ex)), tokenStream.getLocation(), ex);
175        }
176
177    }
178
179    void root(TemplateParserState state)
180    {
181        while (active && tokenStream.hasNext())
182        {
183            switch (tokenStream.next())
184            {
185                case DTD:
186
187                    dtd();
188
189                    break;
190
191                case START_ELEMENT:
192
193                    rootElement(state);
194
195                    break;
196
197                case END_DOCUMENT:
198                    // Ignore it.
199                    break;
200
201                default:
202                    textContent(state);
203            }
204        }
205    }
206
207    private void rootElement(TemplateParserState initialState)
208    {
209        TemplateParserState state = setupForElement(initialState);
210
211        String uri = tokenStream.getNamespaceURI();
212        String name = tokenStream.getLocalName();
213        Version version = NAMESPACE_URI_TO_VERSION.get(uri);
214
215        if (T_5_1.before(version))
216        {
217            if (name.equalsIgnoreCase("extend"))
218            {
219                extend(state);
220                return;
221            }
222        }
223
224        if (version != null)
225        {
226            if (name.equalsIgnoreCase("container"))
227            {
228                container(state);
229                return;
230            }
231        }
232
233        element(state);
234    }
235
236    private void extend(TemplateParserState state)
237    {
238        extension = true;
239
240        while (active)
241        {
242            switch (tokenStream.next())
243            {
244                case START_ELEMENT:
245
246                    if (T_5_1.before(NAMESPACE_URI_TO_VERSION.get(tokenStream.getNamespaceURI()))
247                            && tokenStream.getLocalName().equalsIgnoreCase("replace"))
248                    {
249                        replace(state);
250                        break;
251                    }
252
253                    throw new RuntimeException("Child element of <extend> must be <replace>.");
254
255                case END_ELEMENT:
256
257                    return;
258
259                // Ignore spaces and characters inside <extend>.
260
261                case COMMENT:
262                case SPACE:
263                    break;
264
265                // Other content (characters, etc.) are forbidden.
266
267                case CHARACTERS:
268                    if (InternalUtils.isBlank(tokenStream.getText()))
269                        break;
270
271                default:
272                    unexpectedEventType();
273            }
274        }
275    }
276
277    private void replace(TemplateParserState state)
278    {
279        String id = getRequiredIdAttribute();
280
281        addContentToOverride(setupForElement(state), id);
282    }
283
284    private void unexpectedEventType()
285    {
286        XMLTokenType eventType = tokenStream.getEventType();
287
288        throw new IllegalStateException(String.format("Unexpected XML parse event %s.", eventType
289                .name()));
290    }
291
292    private void dtd()
293    {
294        DTDData dtdInfo = tokenStream.getDTDInfo();
295
296        tokenAccumulator.add(new DTDToken(dtdInfo.rootName, dtdInfo.publicId, dtdInfo
297                .systemId, getLocation()));
298    }
299
300    private Location getLocation()
301    {
302        return tokenStream.getLocation();
303    }
304
305    /**
306     * Processes an element through to its matching end tag.
307     * <p/>
308     * An element can be:
309     * <p/>
310     * a Tapestry component via &lt;t:type&gt;
311     * <p/>
312     * a Tapestry component via t:type="type" and/or t:id="id"
313     * <p/>
314     * a Tapestry component via a library namespace
315     * <p/>
316     * A parameter element via &lt;t:parameter&gt;
317     * <p/>
318     * A parameter element via &lt;p:name&gt;
319     * <p/>
320     * A &lt;t:remove&gt; element (in the 5.1 schema)
321     * <p/>
322     * A &lt;t:content&gt; element (in the 5.1 schema)
323     * <p/>
324     * A &lt;t:block&gt; element
325     * <p/>
326     * The body &lt;t:body&gt;
327     * <p/>
328     * An ordinary element
329     */
330    void element(TemplateParserState initialState)
331    {
332        TemplateParserState state = setupForElement(initialState);
333
334        String uri = tokenStream.getNamespaceURI();
335        String name = tokenStream.getLocalName();
336        Version version = NAMESPACE_URI_TO_VERSION.get(uri);
337
338        if (T_5_1.before(version))
339        {
340
341            if (name.equalsIgnoreCase("remove"))
342            {
343                removeContent();
344
345                return;
346            }
347
348            if (name.equalsIgnoreCase("content"))
349            {
350                limitContent(state);
351
352                return;
353            }
354
355            if (name.equalsIgnoreCase("extension-point"))
356            {
357                extensionPoint(state);
358
359                return;
360            }
361
362            if (name.equalsIgnoreCase("replace"))
363            {
364                throw new RuntimeException(
365                        "The <replace> element may only appear directly within an extend element.");
366            }
367
368            if (MUST_BE_ROOT.contains(name))
369                mustBeRoot(name);
370        }
371
372        if (version != null)
373        {
374
375            if (name.equalsIgnoreCase("body"))
376            {
377                body();
378                return;
379            }
380
381            if (name.equalsIgnoreCase("container"))
382            {
383                mustBeRoot(name);
384            }
385
386            if (name.equalsIgnoreCase("block"))
387            {
388                block(state);
389                return;
390            }
391
392            if (name.equalsIgnoreCase("parameter"))
393            {
394                if (T_5_3.before(version))
395                {
396                    throw new RuntimeException(
397                            String.format("The <parameter> element has been deprecated in Tapestry 5.3 in favour of '%s' namespace.", TAPESTRY_PARAMETERS_URI));
398                }
399
400                classicParameter(state);
401
402                return;
403            }
404
405            possibleTapestryComponent(state, null, tokenStream.getLocalName().replace('.', '/'));
406
407            return;
408        }
409
410        if (uri != null && uri.startsWith(LIB_NAMESPACE_URI_PREFIX))
411        {
412            libraryNamespaceComponent(state);
413
414            return;
415        }
416
417        if (TAPESTRY_PARAMETERS_URI.equals(uri))
418        {
419            parameterElement(state);
420
421            return;
422        }
423
424        // Just an ordinary element ... unless it has t:id or t:type
425
426        possibleTapestryComponent(state, tokenStream.getLocalName(), null);
427    }
428
429    /**
430     * Processes a body of an element including text and (recursively) nested
431     * elements. Adds an
432     * {@link org.apache.tapestry5.internal.parser.TokenType#END_ELEMENT} token
433     * before returning.
434     *
435     * @param state
436     */
437    private void processBody(TemplateParserState state)
438    {
439        while (active)
440        {
441            switch (tokenStream.next())
442            {
443                case START_ELEMENT:
444
445                    // The recursive part: when we see a new element start.
446
447                    element(state);
448                    break;
449
450                case END_ELEMENT:
451
452                    // At the end of an element, we're done and can return.
453                    // This is the matching end element for the start element
454                    // that invoked this method.
455
456                    endElement(state);
457
458                    return;
459
460                default:
461                    textContent(state);
462            }
463        }
464    }
465
466    private TemplateParserState setupForElement(TemplateParserState initialState)
467    {
468        processTextBuffer(initialState);
469
470        return checkForXMLSpaceAttribute(initialState);
471    }
472
473    /**
474     * Handles an extension point, putting a RenderExtension token in position
475     * in the template.
476     *
477     * @param state
478     */
479    private void extensionPoint(TemplateParserState state)
480    {
481        // An extension point adds a token that represents where the override
482        // (either the default
483        // provided in the parent template, or the true override from a child
484        // template) is positioned.
485
486        String id = getRequiredIdAttribute();
487
488        if (extensionPointIdSet.containsKey(id))
489        {
490            throw new TapestryException(String.format("Extension point '%s' is already defined for this template. Extension point ids must be unique.", id), getLocation(), null);
491        } else
492        {
493            extensionPointIdSet.put(id, true);
494        }
495
496        tokenAccumulator.add(new ExtensionPointToken(id, getLocation()));
497
498        addContentToOverride(state.insideComponent(false), id);
499    }
500
501    private String getRequiredIdAttribute()
502    {
503        String id = getSingleParameter("id");
504
505        if (InternalUtils.isBlank(id))
506            throw new RuntimeException(String.format("The <%s> element must have an id attribute.",
507                    tokenStream.getLocalName()));
508
509        return id;
510    }
511
512    private void addContentToOverride(TemplateParserState state, String id)
513
514    {
515        List<TemplateToken> savedTokenAccumulator = tokenAccumulator;
516
517        tokenAccumulator = CollectionFactory.newList();
518
519        // TODO: id should probably be unique; i.e., you either define an
520        // override or you
521        // provide an override, but you don't do both in the same template.
522
523        if (overrides == null)
524            overrides = CollectionFactory.newCaseInsensitiveMap();
525
526        overrides.put(id, tokenAccumulator);
527
528        while (active)
529        {
530            switch (tokenStream.next())
531            {
532                case START_ELEMENT:
533                    element(state);
534                    break;
535
536                case END_ELEMENT:
537
538                    processTextBuffer(state);
539
540                    // Restore everthing to how it was before the
541                    // extention-point was reached.
542
543                    tokenAccumulator = savedTokenAccumulator;
544                    return;
545
546                default:
547                    textContent(state);
548            }
549        }
550    }
551
552    private void mustBeRoot(String name)
553    {
554        throw new RuntimeException(String.format(
555                "Element <%s> is only valid as the root element of a template.", name));
556    }
557
558    /**
559     * Triggered by &lt;t:content&gt; element; limits template content to just
560     * what's inside.
561     */
562
563    private void limitContent(TemplateParserState state)
564    {
565        if (state.isCollectingContent())
566            throw new IllegalStateException(
567                    "The <content> element may not be nested within another <content> element.");
568
569        TemplateParserState newState = state.collectingContent().insideComponent(false);
570
571        // Clear out any tokens that precede the <t:content> element
572
573        tokens.clear();
574
575        // I'm not happy about this; you really shouldn't define overrides just
576        // to clear them out,
577        // but it is consistent. Perhaps this should be an error if overrides is
578        // non-empty.
579
580        overrides = null;
581
582        // Make sure that if the <t:content> appears inside a <t:replace> or
583        // <t:extension-point>, that
584        // it is still handled correctly.
585
586        tokenAccumulator = tokens;
587
588        while (active)
589        {
590            switch (tokenStream.next())
591            {
592                case START_ELEMENT:
593                    element(newState);
594                    break;
595
596                case END_ELEMENT:
597
598                    // The active flag is global, once we hit it, the entire
599                    // parse is aborted, leaving
600                    // tokens with just tokens defined inside <t:content>.
601
602                    active = false;
603
604                    break;
605
606                default:
607                    textContent(state);
608            }
609        }
610
611    }
612
613    private void removeContent()
614    {
615        int depth = 1;
616
617        while (active)
618        {
619            switch (tokenStream.next())
620            {
621                case START_ELEMENT:
622                    depth++;
623                    break;
624
625                // The matching end element.
626
627                case END_ELEMENT:
628                    depth--;
629
630                    if (depth == 0)
631                        return;
632
633                    break;
634
635                default:
636                    // Ignore anything else (text, comments, etc.)
637            }
638        }
639    }
640
641    private String nullForBlank(String input)
642    {
643        return InternalUtils.isBlank(input) ? null : input;
644    }
645
646    /**
647     * Added in release 5.1.
648     */
649    private void libraryNamespaceComponent(TemplateParserState state)
650    {
651        String uri = tokenStream.getNamespaceURI();
652
653        // The library path is encoded into the namespace URI.
654
655        String path = uri.substring(LIB_NAMESPACE_URI_PREFIX.length());
656
657        if (!LIBRARY_PATH_PATTERN.matcher(path).matches())
658            throw new RuntimeException(ServicesMessages.invalidPathForLibraryNamespace(uri));
659
660        possibleTapestryComponent(state, null, path + "/" + tokenStream.getLocalName());
661    }
662
663    /**
664     * @param elementName
665     * @param identifiedType the type of the element, usually null, but may be the
666     *                       component type derived from element
667     */
668    private void possibleTapestryComponent(TemplateParserState state, String elementName,
669                                           String identifiedType)
670    {
671        String id = null;
672        String type = identifiedType;
673        String mixins = null;
674
675        int count = tokenStream.getAttributeCount();
676
677        Location location = getLocation();
678
679        List<TemplateToken> attributeTokens = CollectionFactory.newList();
680
681        for (int i = 0; i < count; i++)
682        {
683            QName qname = tokenStream.getAttributeName(i);
684
685            if (isXMLSpaceAttribute(qname))
686                continue;
687
688            // The name will be blank for an xmlns: attribute
689
690            String localName = qname.getLocalPart();
691
692            if (InternalUtils.isBlank(localName))
693                continue;
694
695            String uri = qname.getNamespaceURI();
696
697            String value = tokenStream.getAttributeValue(i);
698
699            if (NAMESPACE_URI_TO_VERSION.containsKey(uri))
700            {
701                if (localName.equalsIgnoreCase(ID_ATTRIBUTE_NAME))
702                {
703                    id = nullForBlank(value);
704
705                    validateId(id, "invalid-component-id");
706
707                    continue;
708                }
709
710                if (type == null && localName.equalsIgnoreCase(TYPE_ATTRIBUTE_NAME))
711                {
712                    type = nullForBlank(value);
713                    continue;
714                }
715
716                if (localName.equalsIgnoreCase(MIXINS_ATTRIBUTE_NAME))
717                {
718                    mixins = nullForBlank(value);
719                    continue;
720                }
721
722                // Anything else is the name of a Tapestry component parameter
723                // that is simply
724                // not part of the template's doctype for the element being
725                // instrumented.
726            }
727
728            attributeTokens.add(new AttributeToken(uri, localName, value, location));
729        }
730
731        boolean isComponent = (id != null || type != null);
732
733        // If provided t:mixins but not t:id or t:type, then its not quite a
734        // component
735
736        if (mixins != null && !isComponent)
737            throw new TapestryException(ServicesMessages.mixinsInvalidWithoutIdOrType(elementName),
738                    location, null);
739
740        if (isComponent)
741        {
742            tokenAccumulator.add(new StartComponentToken(elementName, id, type, mixins, location));
743        } else
744        {
745            tokenAccumulator.add(new StartElementToken(tokenStream.getNamespaceURI(), elementName,
746                    location));
747        }
748
749        addDefineNamespaceTokens();
750
751        tokenAccumulator.addAll(attributeTokens);
752
753        if (id != null)
754            componentIds.put(id, location);
755
756        processBody(state.insideComponent(isComponent));
757    }
758
759    private void addDefineNamespaceTokens()
760    {
761        for (int i = 0; i < tokenStream.getNamespaceCount(); i++)
762        {
763            String uri = tokenStream.getNamespaceURI(i);
764
765            // These URIs are strictly part of the server-side Tapestry template
766            // and are not ever sent to the client.
767
768            if (NAMESPACE_URI_TO_VERSION.containsKey(uri))
769                continue;
770
771            if (uri.equals(TAPESTRY_PARAMETERS_URI))
772                continue;
773
774            if (uri.startsWith(LIB_NAMESPACE_URI_PREFIX))
775                continue;
776
777            tokenAccumulator.add(new DefineNamespacePrefixToken(uri, tokenStream
778                    .getNamespacePrefix(i), getLocation()));
779        }
780    }
781
782    private TemplateParserState checkForXMLSpaceAttribute(TemplateParserState state)
783    {
784        for (int i = 0; i < tokenStream.getAttributeCount(); i++)
785        {
786            QName qName = tokenStream.getAttributeName(i);
787
788            if (isXMLSpaceAttribute(qName))
789            {
790                boolean compress = !"preserve".equals(tokenStream.getAttributeValue(i));
791
792                return state.compressWhitespace(compress);
793            }
794        }
795
796        return state;
797    }
798
799    /**
800     * Processes the text buffer and then adds an end element token.
801     */
802    private void endElement(TemplateParserState state)
803    {
804        processTextBuffer(state);
805
806        tokenAccumulator.add(new EndElementToken(getLocation()));
807    }
808
809    /**
810     * Handler for Tapestry 5.0's "classic" &lt;t:parameter&gt; element. This
811     * turns into a {@link org.apache.tapestry5.internal.parser.ParameterToken}
812     * and the body and end element are provided normally.
813     */
814    private void classicParameter(TemplateParserState state)
815    {
816        String parameterName = getSingleParameter("name");
817
818        if (InternalUtils.isBlank(parameterName))
819            throw new TapestryException(ServicesMessages.parameterElementNameRequired(),
820                    getLocation(), null);
821
822        ensureParameterWithinComponent(state);
823
824        tokenAccumulator.add(new ParameterToken(parameterName, getLocation()));
825
826        processBody(state.insideComponent(false));
827    }
828
829    private void ensureParameterWithinComponent(TemplateParserState state)
830    {
831        if (!state.isInsideComponent())
832            throw new RuntimeException(
833                    "Block parameters are only allowed directly within component elements.");
834    }
835
836    /**
837     * Tapestry 5.1 uses a special namespace (usually mapped to "p:") and the
838     * name becomes the parameter element.
839     */
840    private void parameterElement(TemplateParserState state)
841    {
842        ensureParameterWithinComponent(state);
843
844        if (tokenStream.getAttributeCount() > 0)
845            throw new TapestryException(ServicesMessages.parameterElementDoesNotAllowAttributes(),
846                    getLocation(), null);
847
848        tokenAccumulator.add(new ParameterToken(tokenStream.getLocalName(), getLocation()));
849
850        processBody(state.insideComponent(false));
851    }
852
853    /**
854     * Checks that a body element is empty. Returns after the body's close
855     * element. Adds a single body token (but not an
856     * end token).
857     */
858    private void body()
859    {
860        tokenAccumulator.add(new BodyToken(getLocation()));
861
862        while (active)
863        {
864            switch (tokenStream.next())
865            {
866                case END_ELEMENT:
867                    return;
868
869                default:
870                    throw new IllegalStateException(ServicesMessages
871                            .contentInsideBodyNotAllowed(getLocation()));
872            }
873        }
874    }
875
876    /**
877     * Driven by the &lt;t:container&gt; element, this state adds elements for
878     * its body but not its start or end tags.
879     *
880     * @param state
881     */
882    private void container(TemplateParserState state)
883    {
884        while (active)
885        {
886            switch (tokenStream.next())
887            {
888                case START_ELEMENT:
889                    element(state);
890                    break;
891
892                // The matching end-element for the container. Don't add a
893                // token.
894
895                case END_ELEMENT:
896
897                    processTextBuffer(state);
898
899                    return;
900
901                default:
902                    textContent(state);
903            }
904        }
905    }
906
907    /**
908     * A block adds a token for its start tag and end tag and allows any content
909     * within.
910     */
911    private void block(TemplateParserState state)
912    {
913        String blockId = getSingleParameter("id");
914
915        validateId(blockId, "invalid-block-id");
916
917        tokenAccumulator.add(new BlockToken(blockId, getLocation()));
918
919        processBody(state.insideComponent(false));
920    }
921
922    private String getSingleParameter(String attributeName)
923    {
924        String result = null;
925
926        for (int i = 0; i < tokenStream.getAttributeCount(); i++)
927        {
928            QName qName = tokenStream.getAttributeName(i);
929
930            if (isXMLSpaceAttribute(qName))
931                continue;
932
933            if (qName.getLocalPart().equalsIgnoreCase(attributeName))
934            {
935                result = tokenStream.getAttributeValue(i);
936                continue;
937            }
938
939            // Only the named attribute is allowed.
940
941            throw new TapestryException(ServicesMessages.undefinedTapestryAttribute(tokenStream
942                    .getLocalName(), qName.toString(), attributeName), getLocation(), null);
943        }
944
945        return result;
946    }
947
948    private void validateId(String id, String messageKey)
949    {
950        if (id == null)
951            return;
952
953        if (ID_PATTERN.matcher(id).matches())
954            return;
955
956        // Not a match.
957
958        throw new TapestryException(ServicesMessages.invalidId(messageKey, id), getLocation(), null);
959    }
960
961    private boolean isXMLSpaceAttribute(QName qName)
962    {
963        return XML_NAMESPACE_URI.equals(qName.getNamespaceURI())
964                && "space".equals(qName.getLocalPart());
965    }
966
967    /**
968     * Processes text content if in the correct state, or throws an exception.
969     * This is used as a default for matching
970     * case statements.
971     *
972     * @param state
973     */
974    private void textContent(TemplateParserState state)
975    {
976        switch (tokenStream.getEventType())
977        {
978            case COMMENT:
979                comment(state);
980                break;
981
982            case CDATA:
983                cdata(state);
984                break;
985
986            case CHARACTERS:
987            case SPACE:
988                characters();
989                break;
990
991            default:
992                unexpectedEventType();
993        }
994    }
995
996    private void characters()
997    {
998        if (textStartLocation == null)
999            textStartLocation = getLocation();
1000
1001        textBuffer.append(tokenStream.getText());
1002    }
1003
1004    private void cdata(TemplateParserState state)
1005    {
1006        processTextBuffer(state);
1007
1008        tokenAccumulator.add(new CDATAToken(tokenStream.getText(), getLocation()));
1009    }
1010
1011    private void comment(TemplateParserState state)
1012    {
1013        processTextBuffer(state);
1014
1015        String comment = tokenStream.getText();
1016
1017        tokenAccumulator.add(new CommentToken(comment, getLocation()));
1018    }
1019
1020    /**
1021     * Processes the accumulated text in the text buffer as a text token.
1022     */
1023    private void processTextBuffer(TemplateParserState state)
1024    {
1025        if (textBuffer.length() != 0)
1026            convertTextBufferToTokens(state);
1027
1028        textStartLocation = null;
1029    }
1030
1031    private void convertTextBufferToTokens(TemplateParserState state)
1032    {
1033        String text = textBuffer.toString();
1034
1035        textBuffer.setLength(0);
1036
1037        if (state.isCompressWhitespace())
1038        {
1039            text = compressWhitespaceInText(text);
1040
1041            if (InternalUtils.isBlank(text))
1042                return;
1043        }
1044
1045        addTokensForText(text);
1046    }
1047
1048    /**
1049     * Reduces vertical whitespace to a single newline, then reduces horizontal
1050     * whitespace to a single space.
1051     *
1052     * @param text
1053     * @return compressed version of text
1054     */
1055    private String compressWhitespaceInText(String text)
1056    {
1057        String linebreaksReduced = REDUCE_LINEBREAKS_PATTERN.matcher(text).replaceAll("\n");
1058
1059        return REDUCE_WHITESPACE_PATTERN.matcher(linebreaksReduced).replaceAll(" ");
1060    }
1061
1062    /**
1063     * Scans the text, using a regular expression pattern, for expansion
1064     * patterns, and adds appropriate tokens for what
1065     * it finds.
1066     *
1067     * @param text to add as
1068     *             {@link org.apache.tapestry5.internal.parser.TextToken}s and
1069     *             {@link org.apache.tapestry5.internal.parser.ExpansionToken}s
1070     */
1071    private void addTokensForText(String text)
1072    {
1073        Matcher matcher = EXPANSION_PATTERN.matcher(text);
1074
1075        int startx = 0;
1076
1077        // The big problem with all this code is that everything gets assigned
1078        // to the
1079        // start of the text block, even if there are line breaks leading up to
1080        // it.
1081        // That's going to take a lot more work and there are bigger fish to
1082        // fry. In addition,
1083        // TAPESTRY-2028 means that the whitespace has likely been stripped out
1084        // of the text
1085        // already anyway.
1086        while (matcher.find())
1087        {
1088            int matchStart = matcher.start();
1089
1090            if (matchStart != startx)
1091            {
1092                String prefix = text.substring(startx, matchStart);
1093                tokenAccumulator.add(new TextToken(prefix, textStartLocation));
1094            }
1095
1096            // Group 1 includes the real text of the expansion, with whitespace
1097            // around the
1098            // expression (but inside the curly braces) excluded.
1099            // But note that we run into a problem.  The original 
1100            // EXPANSION_PATTERN used a reluctant quantifier to match the 
1101            // smallest instance of ${} possible.  But if you have ${'}'} or 
1102            // ${{'key': 'value'}} (maps, cf TAP5-1605) then you run into issues
1103            // b/c the expansion becomes {'key': 'value' which is wrong.
1104            // A fix to use greedy matching with negative lookahead to prevent 
1105            // ${...}...${...} all matching a single expansion is close, but 
1106            // has issues when an expansion is used inside a javascript function
1107            // (see TAP5-1620). The solution is to use the greedy 
1108            // EXPANSION_PATTERN as before to bound the search for a single 
1109            // expansion, then check for {} consistency, ignoring opening and 
1110            // closing braces that occur within '' (the property expression 
1111            // language doesn't support "" for strings). That should include: 
1112            // 'This string has a } in it' and 'This string has a { in it.'
1113            // Note also that the property expression language doesn't support
1114            // escaping the string character ('), so we don't have to worry 
1115            // about that. 
1116            String expression = matcher.group(1);
1117            //count of 'open' braces. Expression ends when it hits 0. In most cases,
1118            // it should end up as 1 b/c "expression" is everything inside ${}, so 
1119            // the following will typically not find the end of the expression.
1120            int openBraceCount = 1;
1121            int expressionEnd = expression.length();
1122            boolean inQuote = false;
1123            for (int i = 0; i < expression.length(); i++)
1124            {
1125                char c = expression.charAt(i);
1126                //basically, if we're inQuote, we ignore everything until we hit the quote end, so we only care if the character matches the quote start (meaning we're at the end of the quote).
1127                //note that I don't believe expression support escaped quotes...
1128                if (c == EXPANSION_STRING_DELIMITTER)
1129                {
1130                    inQuote = !inQuote;
1131                    continue;
1132                } else if (inQuote)
1133                {
1134                    continue;
1135                } else if (c == CLOSE_BRACE)
1136                {
1137                    openBraceCount--;
1138                    if (openBraceCount == 0)
1139                    {
1140                        expressionEnd = i;
1141                        break;
1142                    }
1143                } else if (c == OPEN_BRACE)
1144                {
1145                    openBraceCount++;
1146                }
1147            }
1148            if (expressionEnd < expression.length())
1149            {
1150                //then we gobbled up some } that we shouldn't have... like the closing } of a javascript
1151                //function.
1152                tokenAccumulator.add(new ExpansionToken(expression.substring(0, expressionEnd), textStartLocation));
1153                //can't just assign to 
1154                startx = matcher.start(1) + expressionEnd + 1;
1155            } else
1156            {
1157                tokenAccumulator.add(new ExpansionToken(expression.trim(), textStartLocation));
1158
1159                startx = matcher.end();
1160            }
1161        }
1162
1163        // Catch anything after the final regexp match.
1164
1165        if (startx < text.length())
1166            tokenAccumulator.add(new TextToken(text.substring(startx, text.length()),
1167                    textStartLocation));
1168    }
1169
1170    static enum Version
1171    {
1172        T_5_0(5, 0), T_5_1(5, 1), T_5_3(5, 3);
1173
1174        private int major;
1175        private int minor;
1176
1177
1178        private Version(int major, int minor)
1179        {
1180            this.major = major;
1181            this.minor = minor;
1182        }
1183
1184        public boolean before(Version other)
1185        {
1186            if (other == null)
1187                return false;
1188
1189            if (this == other)
1190                return true;
1191
1192            return major <= other.major && minor <= other.minor;
1193        }
1194    }
1195
1196}