001 // Copyright 2004, 2005 The Apache Software Foundation 002 // 003 // Licensed under the Apache License, Version 2.0 (the "License"); 004 // you may not use this file except in compliance with the License. 005 // You may obtain a copy of the License at 006 // 007 // http://www.apache.org/licenses/LICENSE-2.0 008 // 009 // Unless required by applicable law or agreed to in writing, software 010 // distributed under the License is distributed on an "AS IS" BASIS, 011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 // See the License for the specific language governing permissions and 013 // limitations under the License. 014 015 package org.apache.tapestry.util.xml; 016 017 import java.io.IOException; 018 import java.io.InputStream; 019 import java.net.URL; 020 import java.util.ArrayList; 021 import java.util.HashMap; 022 import java.util.List; 023 import java.util.Map; 024 025 import javax.xml.parsers.ParserConfigurationException; 026 import javax.xml.parsers.SAXParser; 027 import javax.xml.parsers.SAXParserFactory; 028 029 import org.apache.commons.logging.Log; 030 import org.apache.commons.logging.LogFactory; 031 import org.apache.hivemind.ApplicationRuntimeException; 032 import org.apache.hivemind.HiveMind; 033 import org.apache.hivemind.Location; 034 import org.apache.hivemind.Resource; 035 import org.apache.hivemind.impl.LocationImpl; 036 import org.apache.tapestry.Tapestry; 037 import org.apache.tapestry.util.RegexpMatcher; 038 import org.xml.sax.Attributes; 039 import org.xml.sax.InputSource; 040 import org.xml.sax.Locator; 041 import org.xml.sax.SAXException; 042 import org.xml.sax.SAXParseException; 043 import org.xml.sax.helpers.DefaultHandler; 044 045 /** 046 * A simplified version of org.apache.commons.digester.Digester. This version is without as 047 * many bells and whistles but has some key features needed when parsing a document (rather than a 048 * configuration file): <br> 049 * <ul> 050 * <li>Notifications for each bit of text</li> 051 * <li>Tracking of exact location within the document.</li> 052 * </ul> 053 * <p> 054 * Like Digester, there's an object stack and a rule stack. The rules are much simpler (more 055 * coding), in that there's a one-to-one relationship between an element and a rule. 056 * <p> 057 * Based on SAX2. 058 * 059 * @author Howard Lewis Ship 060 * @since 3.0 061 */ 062 063 public class RuleDirectedParser extends DefaultHandler 064 { 065 private static final Log LOG = LogFactory.getLog(RuleDirectedParser.class); 066 067 private static SAXParserFactory _parserFactory; 068 069 private Resource _documentLocation; 070 071 private List _ruleStack = new ArrayList(); 072 073 private List _objectStack = new ArrayList(); 074 075 private Object _documentObject; 076 077 private Locator _locator; 078 079 private int _line = -1; 080 081 private int _column = -1; 082 083 private Location _location; 084 085 private SAXParser _parser; 086 087 private RegexpMatcher _matcher; 088 089 private String _uri; 090 091 private String _localName; 092 093 private String _qName; 094 095 /** 096 * Map of {@link IRule}keyed on the local name of the element. 097 */ 098 private Map _ruleMap = new HashMap(); 099 100 /** 101 * Used to accumlate content provided by 102 * {@link org.xml.sax.ContentHandler#characters(char[], int, int)}. 103 */ 104 105 private StringBuffer _contentBuffer = new StringBuffer(); 106 107 /** 108 * Map of paths to external entities (such as the DTD) keyed on public id. 109 */ 110 111 private Map _entities = new HashMap(); 112 113 public Object parse(Resource documentLocation) 114 { 115 if (LOG.isDebugEnabled()) 116 LOG.debug("Parsing: " + documentLocation); 117 118 try 119 { 120 _documentLocation = documentLocation; 121 122 URL url = documentLocation.getResourceURL(); 123 124 if (url == null) 125 throw new DocumentParseException(Tapestry.format( 126 "RuleDrivenParser.resource-missing", 127 documentLocation), documentLocation); 128 129 return parse(url); 130 } 131 finally 132 { 133 _documentLocation = null; 134 _ruleStack.clear(); 135 _objectStack.clear(); 136 _documentObject = null; 137 138 _uri = null; 139 _localName = null; 140 _qName = null; 141 142 _line = -1; 143 _column = -1; 144 _location = null; 145 _locator = null; 146 147 _contentBuffer.setLength(0); 148 } 149 } 150 151 protected Object parse(URL url) 152 { 153 if (_parser == null) 154 _parser = constructParser(); 155 156 InputStream stream = null; 157 158 try 159 { 160 stream = url.openStream(); 161 } 162 catch (IOException ex) 163 { 164 throw new DocumentParseException(Tapestry.format( 165 "RuleDrivenParser.unable-to-open-resource", 166 url), _documentLocation, ex); 167 } 168 169 InputSource source = new InputSource(stream); 170 171 try 172 { 173 _parser.parse(source, this); 174 175 stream.close(); 176 } 177 catch (Exception ex) 178 { 179 throw new DocumentParseException(Tapestry.format( 180 "RuleDrivenParser.parse-error", 181 url, 182 ex.getMessage()), getLocation(), ex); 183 } 184 185 if (LOG.isDebugEnabled()) 186 LOG.debug("Document parsed as: " + _documentObject); 187 188 return _documentObject; 189 } 190 191 /** 192 * Returns an {@link Location}representing the current position within the document (depending 193 * on the parser, this may be accurate to column number level). 194 */ 195 196 public Location getLocation() 197 { 198 if (_locator == null) 199 return null; 200 201 int line = _locator.getLineNumber(); 202 int column = _locator.getColumnNumber(); 203 204 if (_line != line || _column != column) 205 { 206 _location = null; 207 _line = line; 208 _column = column; 209 } 210 211 if (_location == null) 212 _location = new LocationImpl(_documentLocation, _line, _column); 213 214 return _location; 215 } 216 217 /** 218 * Pushes an object onto the object stack. The first object pushed is the "document object", the 219 * root object returned by the parse. 220 */ 221 public void push(Object object) 222 { 223 if (_documentObject == null) 224 _documentObject = object; 225 226 push(_objectStack, object, "object stack"); 227 } 228 229 /** 230 * Returns the top object on the object stack. 231 */ 232 public Object peek() 233 { 234 return peek(_objectStack, 0); 235 } 236 237 /** 238 * Returns an object within the object stack, at depth. Depth 0 is the top object, depth 1 is 239 * the next-to-top object, etc. 240 */ 241 242 public Object peek(int depth) 243 { 244 return peek(_objectStack, depth); 245 } 246 247 /** 248 * Removes and returns the top object on the object stack. 249 */ 250 public Object pop() 251 { 252 return pop(_objectStack, "object stack"); 253 } 254 255 private Object pop(List list, String name) 256 { 257 Object result = list.remove(list.size() - 1); 258 259 if (LOG.isDebugEnabled()) 260 LOG.debug("Popped " + result + " off " + name + " (at " + getLocation() + ")"); 261 262 return result; 263 } 264 265 private Object peek(List list, int depth) 266 { 267 return list.get(list.size() - 1 - depth); 268 } 269 270 private void push(List list, Object object, String name) 271 { 272 if (LOG.isDebugEnabled()) 273 LOG.debug("Pushing " + object + " onto " + name + " (at " + getLocation() + ")"); 274 275 list.add(object); 276 } 277 278 /** 279 * Pushes a new rule onto the rule stack. 280 */ 281 282 protected void pushRule(IRule rule) 283 { 284 push(_ruleStack, rule, "rule stack"); 285 } 286 287 /** 288 * Returns the top rule on the stack. 289 */ 290 291 protected IRule peekRule() 292 { 293 return (IRule) peek(_ruleStack, 0); 294 } 295 296 protected IRule popRule() 297 { 298 return (IRule) pop(_ruleStack, "rule stack"); 299 } 300 301 public void addRule(String localElementName, IRule rule) 302 { 303 _ruleMap.put(localElementName, rule); 304 } 305 306 /** 307 * Registers a public id and corresponding input source. Generally, the source is a wrapper 308 * around an input stream to a package resource. 309 * 310 * @param publicId 311 * the public identifier to be registerred, generally the publicId of a DTD related 312 * to the document being parsed 313 * @param entityPath 314 * the resource path of the entity, typically a DTD file. Relative files names are 315 * expected to be stored in the same package as the class file, otherwise a leading 316 * slash is an absolute pathname within the classpath. 317 */ 318 319 public void registerEntity(String publicId, String entityPath) 320 { 321 if (LOG.isDebugEnabled()) 322 LOG.debug("Registering " + publicId + " as " + entityPath); 323 324 if (_entities == null) 325 _entities = new HashMap(); 326 327 _entities.put(publicId, entityPath); 328 } 329 330 protected IRule selectRule(String localName, Attributes attributes) 331 { 332 IRule rule = (IRule) _ruleMap.get(localName); 333 334 if (rule == null) 335 throw new DocumentParseException(Tapestry.format( 336 "RuleDrivenParser.no-rule-for-element", 337 localName), getLocation()); 338 339 return rule; 340 } 341 342 /** 343 * Uses the {@link Locator}to track the position in the document as a {@link Location}. This 344 * is invoked once (before the initial element is parsed) and the Locator is retained and 345 * queried as to the current file location. 346 * 347 * @see #getLocation() 348 */ 349 public void setDocumentLocator(Locator locator) 350 { 351 _locator = locator; 352 } 353 354 /** 355 * Accumulates the content in a buffer; the concatinated content is provided to the top rule 356 * just before any start or end tag. 357 */ 358 public void characters(char[] ch, int start, int length) throws SAXException 359 { 360 _contentBuffer.append(ch, start, length); 361 } 362 363 /** 364 * Pops the top rule off the stack and invokes {@link IRule#endElement(RuleDirectedParser)}. 365 */ 366 public void endElement(String uri, String localName, String qName) throws SAXException 367 { 368 fireContentRule(); 369 370 _uri = uri; 371 _localName = localName; 372 _qName = qName; 373 374 popRule().endElement(this); 375 } 376 377 /** 378 * Ignorable content is ignored. 379 */ 380 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException 381 { 382 } 383 384 /** 385 * Invokes {@link #selectRule(String, Attributes)}to choose a new rule, which is pushed onto 386 * the rule stack, then invokes {@link IRule#startElement(RuleDirectedParser, Attributes)}. 387 */ 388 public void startElement(String uri, String localName, String qName, Attributes attributes) 389 throws SAXException 390 { 391 fireContentRule(); 392 393 _uri = uri; 394 _localName = localName; 395 _qName = qName; 396 397 String name = extractName(uri, localName, qName); 398 399 IRule newRule = selectRule(name, attributes); 400 401 pushRule(newRule); 402 403 newRule.startElement(this, attributes); 404 } 405 406 private String extractName(String uri, String localName, String qName) 407 { 408 return HiveMind.isBlank(localName) ? qName : localName; 409 } 410 411 /** 412 * Uses {@link javax.xml.parsers.SAXParserFactory}to create a instance of a validation SAX2 413 * parser. 414 */ 415 protected synchronized SAXParser constructParser() 416 { 417 if (_parserFactory == null) 418 { 419 _parserFactory = SAXParserFactory.newInstance(); 420 configureParserFactory(_parserFactory); 421 } 422 423 try 424 { 425 return _parserFactory.newSAXParser(); 426 } 427 catch (SAXException ex) 428 { 429 throw new ApplicationRuntimeException(ex); 430 } 431 catch (ParserConfigurationException ex) 432 { 433 throw new ApplicationRuntimeException(ex); 434 } 435 436 } 437 438 /** 439 * Configures a {@link SAXParserFactory}before {@link SAXParserFactory#newSAXParser()}is 440 * invoked. The default implementation sets validating to true and namespaceAware to false, 441 */ 442 443 protected void configureParserFactory(SAXParserFactory factory) 444 { 445 factory.setValidating(true); 446 factory.setNamespaceAware(false); 447 } 448 449 /** 450 * Throws the exception. 451 */ 452 public void error(SAXParseException ex) throws SAXException 453 { 454 fatalError(ex); 455 } 456 457 /** 458 * Throws the exception. 459 */ 460 public void fatalError(SAXParseException ex) throws SAXException 461 { 462 // Sometimes, a bad parse "corrupts" a parser so that it doesn't 463 // work properly for future parses (of valid documents), 464 // so discard it here. 465 466 _parser = null; 467 468 throw ex; 469 } 470 471 /** 472 * Throws the exception. 473 */ 474 public void warning(SAXParseException ex) throws SAXException 475 { 476 fatalError(ex); 477 } 478 479 public InputSource resolveEntity(String publicId, String systemId) throws SAXException 480 { 481 String entityPath = null; 482 483 if (LOG.isDebugEnabled()) 484 LOG.debug("Attempting to resolve entity; publicId = " + publicId + " systemId = " 485 + systemId); 486 487 if (_entities != null) 488 entityPath = (String) _entities.get(publicId); 489 490 if (entityPath == null) 491 { 492 if (LOG.isDebugEnabled()) 493 LOG.debug("Entity not found, using " + systemId); 494 495 return null; 496 } 497 498 InputStream stream = getClass().getResourceAsStream(entityPath); 499 500 InputSource result = new InputSource(stream); 501 502 if (result != null && LOG.isDebugEnabled()) 503 LOG.debug("Resolved " + publicId + " as " + result + " (for " + entityPath + ")"); 504 505 return result; 506 } 507 508 /** 509 * Validates that the input value matches against the specified Perl5 pattern. If valid, the 510 * method simply returns. If not a match, then an error message is generated (using the errorKey 511 * and the input value) and a {@link InvalidStringException}is thrown. 512 */ 513 514 public void validate(String value, String pattern, String errorKey) 515 { 516 if (_matcher == null) 517 _matcher = new RegexpMatcher(); 518 519 if (_matcher.matches(pattern, value)) 520 return; 521 522 throw new InvalidStringException(Tapestry.format(errorKey, value), value, getLocation()); 523 } 524 525 public Resource getDocumentLocation() 526 { 527 return _documentLocation; 528 } 529 530 /** 531 * Returns the localName for the current element. 532 * 533 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, 534 * java.lang.String, org.xml.sax.Attributes) 535 */ 536 public String getLocalName() 537 { 538 return _localName; 539 } 540 541 /** 542 * Returns the qualified name for the current element. 543 * 544 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, 545 * java.lang.String, org.xml.sax.Attributes) 546 */ 547 public String getQName() 548 { 549 return _qName; 550 } 551 552 /** 553 * Returns the URI for the current element. 554 * 555 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, 556 * java.lang.String, org.xml.sax.Attributes) 557 */ 558 public String getUri() 559 { 560 return _uri; 561 } 562 563 private void fireContentRule() 564 { 565 String content = _contentBuffer.toString(); 566 _contentBuffer.setLength(0); 567 568 if (!_ruleStack.isEmpty()) 569 peekRule().content(this, content); 570 } 571 572 }