001    // Copyright 2004, 2005 The Apache Software Foundation
002    //
003    // Licensed under the Apache License, Version 2.0 (the "License");
004    // you may not use this file except in compliance with the License.
005    // You may obtain a copy of the License at
006    //
007    //     http://www.apache.org/licenses/LICENSE-2.0
008    //
009    // Unless required by applicable law or agreed to in writing, software
010    // distributed under the License is distributed on an "AS IS" BASIS,
011    // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012    // See the License for the specific language governing permissions and
013    // limitations under the License.
014    
015    package org.apache.tapestry.util.xml;
016    
017    import java.io.IOException;
018    import java.io.InputStream;
019    import java.net.URL;
020    import java.util.ArrayList;
021    import java.util.HashMap;
022    import java.util.List;
023    import java.util.Map;
024    
025    import javax.xml.parsers.ParserConfigurationException;
026    import javax.xml.parsers.SAXParser;
027    import javax.xml.parsers.SAXParserFactory;
028    
029    import org.apache.commons.logging.Log;
030    import org.apache.commons.logging.LogFactory;
031    import org.apache.hivemind.ApplicationRuntimeException;
032    import org.apache.hivemind.HiveMind;
033    import org.apache.hivemind.Location;
034    import org.apache.hivemind.Resource;
035    import org.apache.hivemind.impl.LocationImpl;
036    import org.apache.tapestry.Tapestry;
037    import org.apache.tapestry.util.RegexpMatcher;
038    import org.xml.sax.Attributes;
039    import org.xml.sax.InputSource;
040    import org.xml.sax.Locator;
041    import org.xml.sax.SAXException;
042    import org.xml.sax.SAXParseException;
043    import org.xml.sax.helpers.DefaultHandler;
044    
045    /**
046     * A simplified version of org.apache.commons.digester.Digester. This version is without as
047     * many bells and whistles but has some key features needed when parsing a document (rather than a
048     * configuration file): <br>
049     * <ul>
050     * <li>Notifications for each bit of text</li>
051     * <li>Tracking of exact location within the document.</li>
052     * </ul>
053     * <p>
054     * Like Digester, there's an object stack and a rule stack. The rules are much simpler (more
055     * coding), in that there's a one-to-one relationship between an element and a rule.
056     * <p>
057     * Based on SAX2.
058     * 
059     * @author Howard Lewis Ship
060     * @since 3.0
061     */
062    
063    public class RuleDirectedParser extends DefaultHandler
064    {
065        private static final Log LOG = LogFactory.getLog(RuleDirectedParser.class);
066    
067        private static SAXParserFactory _parserFactory;
068        
069        private Resource _documentLocation;
070    
071        private List _ruleStack = new ArrayList();
072    
073        private List _objectStack = new ArrayList();
074    
075        private Object _documentObject;
076    
077        private Locator _locator;
078    
079        private int _line = -1;
080    
081        private int _column = -1;
082    
083        private Location _location;
084    
085        private SAXParser _parser;
086    
087        private RegexpMatcher _matcher;
088    
089        private String _uri;
090    
091        private String _localName;
092    
093        private String _qName;
094    
095        /**
096         * Map of {@link IRule}keyed on the local name of the element.
097         */
098        private Map _ruleMap = new HashMap();
099    
100        /**
101         * Used to accumlate content provided by
102         * {@link org.xml.sax.ContentHandler#characters(char[], int, int)}.
103         */
104    
105        private StringBuffer _contentBuffer = new StringBuffer();
106    
107        /**
108         * Map of paths to external entities (such as the DTD) keyed on public id.
109         */
110    
111        private Map _entities = new HashMap();
112    
113        public Object parse(Resource documentLocation)
114        {
115            if (LOG.isDebugEnabled())
116                LOG.debug("Parsing: " + documentLocation);
117    
118            try
119            {
120                _documentLocation = documentLocation;
121    
122                URL url = documentLocation.getResourceURL();
123    
124                if (url == null)
125                    throw new DocumentParseException(Tapestry.format(
126                            "RuleDrivenParser.resource-missing",
127                            documentLocation), documentLocation);
128    
129                return parse(url);
130            }
131            finally
132            {
133                _documentLocation = null;
134                _ruleStack.clear();
135                _objectStack.clear();
136                _documentObject = null;
137    
138                _uri = null;
139                _localName = null;
140                _qName = null;
141    
142                _line = -1;
143                _column = -1;
144                _location = null;
145                _locator = null;
146    
147                _contentBuffer.setLength(0);
148            }
149        }
150    
151        protected Object parse(URL url)
152        {
153            if (_parser == null)
154                _parser = constructParser();
155    
156            InputStream stream = null;
157    
158            try
159            {
160                stream = url.openStream();
161            }
162            catch (IOException ex)
163            {
164                throw new DocumentParseException(Tapestry.format(
165                        "RuleDrivenParser.unable-to-open-resource",
166                        url), _documentLocation, ex);
167            }
168    
169            InputSource source = new InputSource(stream);
170    
171            try
172            {
173                _parser.parse(source, this);
174    
175                stream.close();
176            }
177            catch (Exception ex)
178            {
179                throw new DocumentParseException(Tapestry.format(
180                        "RuleDrivenParser.parse-error",
181                        url,
182                        ex.getMessage()), getLocation(), ex);
183            }
184    
185            if (LOG.isDebugEnabled())
186                LOG.debug("Document parsed as: " + _documentObject);
187    
188            return _documentObject;
189        }
190    
191        /**
192         * Returns an {@link Location}representing the current position within the document (depending
193         * on the parser, this may be accurate to column number level).
194         */
195    
196        public Location getLocation()
197        {
198            if (_locator == null)
199                return null;
200    
201            int line = _locator.getLineNumber();
202            int column = _locator.getColumnNumber();
203    
204            if (_line != line || _column != column)
205            {
206                _location = null;
207                _line = line;
208                _column = column;
209            }
210    
211            if (_location == null)
212                _location = new LocationImpl(_documentLocation, _line, _column);
213    
214            return _location;
215        }
216    
217        /**
218         * Pushes an object onto the object stack. The first object pushed is the "document object", the
219         * root object returned by the parse.
220         */
221        public void push(Object object)
222        {
223            if (_documentObject == null)
224                _documentObject = object;
225    
226            push(_objectStack, object, "object stack");
227        }
228    
229        /**
230         * Returns the top object on the object stack.
231         */
232        public Object peek()
233        {
234            return peek(_objectStack, 0);
235        }
236    
237        /**
238         * Returns an object within the object stack, at depth. Depth 0 is the top object, depth 1 is
239         * the next-to-top object, etc.
240         */
241    
242        public Object peek(int depth)
243        {
244            return peek(_objectStack, depth);
245        }
246    
247        /**
248         * Removes and returns the top object on the object stack.
249         */
250        public Object pop()
251        {
252            return pop(_objectStack, "object stack");
253        }
254    
255        private Object pop(List list, String name)
256        {
257            Object result = list.remove(list.size() - 1);
258    
259            if (LOG.isDebugEnabled())
260                LOG.debug("Popped " + result + " off " + name + " (at " + getLocation() + ")");
261    
262            return result;
263        }
264    
265        private Object peek(List list, int depth)
266        {
267            return list.get(list.size() - 1 - depth);
268        }
269    
270        private void push(List list, Object object, String name)
271        {
272            if (LOG.isDebugEnabled())
273                LOG.debug("Pushing " + object + " onto " + name + " (at " + getLocation() + ")");
274    
275            list.add(object);
276        }
277    
278        /**
279         * Pushes a new rule onto the rule stack.
280         */
281    
282        protected void pushRule(IRule rule)
283        {
284            push(_ruleStack, rule, "rule stack");
285        }
286    
287        /**
288         * Returns the top rule on the stack.
289         */
290    
291        protected IRule peekRule()
292        {
293            return (IRule) peek(_ruleStack, 0);
294        }
295    
296        protected IRule popRule()
297        {
298            return (IRule) pop(_ruleStack, "rule stack");
299        }
300    
301        public void addRule(String localElementName, IRule rule)
302        {
303            _ruleMap.put(localElementName, rule);
304        }
305    
306        /**
307         * Registers a public id and corresponding input source. Generally, the source is a wrapper
308         * around an input stream to a package resource.
309         * 
310         * @param publicId
311         *            the public identifier to be registerred, generally the publicId of a DTD related
312         *            to the document being parsed
313         * @param entityPath
314         *            the resource path of the entity, typically a DTD file. Relative files names are
315         *            expected to be stored in the same package as the class file, otherwise a leading
316         *            slash is an absolute pathname within the classpath.
317         */
318    
319        public void registerEntity(String publicId, String entityPath)
320        {
321            if (LOG.isDebugEnabled())
322                LOG.debug("Registering " + publicId + " as " + entityPath);
323    
324            if (_entities == null)
325                _entities = new HashMap();
326    
327            _entities.put(publicId, entityPath);
328        }
329    
330        protected IRule selectRule(String localName, Attributes attributes)
331        {
332            IRule rule = (IRule) _ruleMap.get(localName);
333    
334            if (rule == null)
335                throw new DocumentParseException(Tapestry.format(
336                        "RuleDrivenParser.no-rule-for-element",
337                        localName), getLocation());
338    
339            return rule;
340        }
341    
342        /**
343         * Uses the {@link Locator}to track the position in the document as a {@link Location}. This
344         * is invoked once (before the initial element is parsed) and the Locator is retained and
345         * queried as to the current file location.
346         * 
347         * @see #getLocation()
348         */
349        public void setDocumentLocator(Locator locator)
350        {
351            _locator = locator;
352        }
353    
354        /**
355         * Accumulates the content in a buffer; the concatinated content is provided to the top rule
356         * just before any start or end tag.
357         */
358        public void characters(char[] ch, int start, int length) throws SAXException
359        {
360            _contentBuffer.append(ch, start, length);
361        }
362    
363        /**
364         * Pops the top rule off the stack and invokes {@link IRule#endElement(RuleDirectedParser)}.
365         */
366        public void endElement(String uri, String localName, String qName) throws SAXException
367        {
368            fireContentRule();
369    
370            _uri = uri;
371            _localName = localName;
372            _qName = qName;
373    
374            popRule().endElement(this);
375        }
376    
377        /**
378         * Ignorable content is ignored.
379         */
380        public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException
381        {
382        }
383    
384        /**
385         * Invokes {@link #selectRule(String, Attributes)}to choose a new rule, which is pushed onto
386         * the rule stack, then invokes {@link IRule#startElement(RuleDirectedParser, Attributes)}.
387         */
388        public void startElement(String uri, String localName, String qName, Attributes attributes)
389                throws SAXException
390        {
391            fireContentRule();
392    
393            _uri = uri;
394            _localName = localName;
395            _qName = qName;
396    
397            String name = extractName(uri, localName, qName);
398    
399            IRule newRule = selectRule(name, attributes);
400    
401            pushRule(newRule);
402    
403            newRule.startElement(this, attributes);
404        }
405    
406        private String extractName(String uri, String localName, String qName)
407        {
408            return HiveMind.isBlank(localName) ? qName : localName;
409        }
410    
411        /**
412         * Uses {@link javax.xml.parsers.SAXParserFactory}to create a instance of a validation SAX2
413         * parser.
414         */
415        protected synchronized SAXParser constructParser()
416        {
417            if (_parserFactory == null)
418            {
419                _parserFactory = SAXParserFactory.newInstance();
420                configureParserFactory(_parserFactory);
421            }
422    
423            try
424            {
425                return _parserFactory.newSAXParser();
426            }
427            catch (SAXException ex)
428            {
429                throw new ApplicationRuntimeException(ex);
430            }
431            catch (ParserConfigurationException ex)
432            {
433                throw new ApplicationRuntimeException(ex);
434            }
435    
436        }
437    
438        /**
439         * Configures a {@link SAXParserFactory}before {@link SAXParserFactory#newSAXParser()}is
440         * invoked. The default implementation sets validating to true and namespaceAware to false,
441         */
442    
443        protected void configureParserFactory(SAXParserFactory factory)
444        {
445            factory.setValidating(true);
446            factory.setNamespaceAware(false);
447        }
448    
449        /**
450         * Throws the exception.
451         */
452        public void error(SAXParseException ex) throws SAXException
453        {
454            fatalError(ex);
455        }
456    
457        /**
458         * Throws the exception.
459         */
460        public void fatalError(SAXParseException ex) throws SAXException
461        {
462            // Sometimes, a bad parse "corrupts" a parser so that it doesn't
463            // work properly for future parses (of valid documents),
464            // so discard it here.
465    
466            _parser = null;
467    
468            throw ex;
469        }
470    
471        /**
472         * Throws the exception.
473         */
474        public void warning(SAXParseException ex) throws SAXException
475        {
476            fatalError(ex);
477        }
478    
479        public InputSource resolveEntity(String publicId, String systemId) throws SAXException
480        {
481            String entityPath = null;
482    
483            if (LOG.isDebugEnabled())
484                LOG.debug("Attempting to resolve entity; publicId = " + publicId + " systemId = "
485                        + systemId);
486    
487            if (_entities != null)
488                entityPath = (String) _entities.get(publicId);
489    
490            if (entityPath == null)
491            {
492                if (LOG.isDebugEnabled())
493                    LOG.debug("Entity not found, using " + systemId);
494    
495                return null;
496            }
497    
498            InputStream stream = getClass().getResourceAsStream(entityPath);
499    
500            InputSource result = new InputSource(stream);
501    
502            if (result != null && LOG.isDebugEnabled())
503                LOG.debug("Resolved " + publicId + " as " + result + " (for " + entityPath + ")");
504    
505            return result;
506        }
507    
508        /**
509         * Validates that the input value matches against the specified Perl5 pattern. If valid, the
510         * method simply returns. If not a match, then an error message is generated (using the errorKey
511         * and the input value) and a {@link InvalidStringException}is thrown.
512         */
513    
514        public void validate(String value, String pattern, String errorKey)
515        {
516            if (_matcher == null)
517                _matcher = new RegexpMatcher();
518    
519            if (_matcher.matches(pattern, value))
520                return;
521    
522            throw new InvalidStringException(Tapestry.format(errorKey, value), value, getLocation());
523        }
524    
525        public Resource getDocumentLocation()
526        {
527            return _documentLocation;
528        }
529    
530        /**
531         * Returns the localName for the current element.
532         * 
533         * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
534         *      java.lang.String, org.xml.sax.Attributes)
535         */
536        public String getLocalName()
537        {
538            return _localName;
539        }
540    
541        /**
542         * Returns the qualified name for the current element.
543         * 
544         * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
545         *      java.lang.String, org.xml.sax.Attributes)
546         */
547        public String getQName()
548        {
549            return _qName;
550        }
551    
552        /**
553         * Returns the URI for the current element.
554         * 
555         * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
556         *      java.lang.String, org.xml.sax.Attributes)
557         */
558        public String getUri()
559        {
560            return _uri;
561        }
562    
563        private void fireContentRule()
564        {
565            String content = _contentBuffer.toString();
566            _contentBuffer.setLength(0);
567    
568            if (!_ruleStack.isEmpty())
569                peekRule().content(this, content);
570        }
571    
572    }