1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package org.apache.commons.jelly.tags.html;
17
18 import org.apache.commons.jelly.JellyTagException;
19 import org.apache.commons.jelly.XMLOutput;
20 import org.apache.commons.jelly.tags.xml.ParseTagSupport;
21
22 import org.apache.commons.logging.Log;
23 import org.apache.commons.logging.LogFactory;
24
25 import org.cyberneko.html.parsers.SAXParser;
26
27 import org.dom4j.Document;
28 import org.dom4j.io.SAXReader;
29
30 import org.xml.sax.SAXException;
31
32
33 /*** A tag which parses some HTML and defines a variable with the parsed Document.
34 * The HTML can either be specified as its body or can be passed in via the
35 * html property which can be a Reader, InputStream, URL or String URI.
36 *
37 * @author <a href="mailto:jstrachan@apache.org">James Strachan</a>
38 * @version $Revision: 1.9 $
39 */
40 public class ParseTag extends ParseTagSupport {
41
42 /*** The Log to which logging calls will be made. */
43 private static final Log log = LogFactory.getLog(ParseTag.class);
44
45 /*** The HTML to parse, either a String URI, a Reader or InputStream */
46 private Object html;
47 private String element = "match";
48 private String attribute = "no-change";
49
50 public ParseTag() {
51 }
52
53
54
55 public void doTag(XMLOutput output) throws JellyTagException {
56 if (getVar() == null) {
57 throw new IllegalArgumentException("The var attribute cannot be null");
58 }
59 Document document = null;
60 if (html == null) {
61 String text = getText();
62 if (text != null) {
63 document = parseText(text);
64 }
65 else {
66 document = parseBody(output);
67 }
68 }
69 else {
70 document = parse(html);
71 }
72 context.setVariable(getVar(), document);
73 }
74
75
76
77 /*** Sets the source of the HTML which is either a String URI, Reader or InputStream */
78 public void setHtml(Object html) {
79 this.html = html;
80 }
81
82 /***
83 * Sets whether attributes should be converted to a different case.
84 * Possible values are "upper", "lower" or "no-change"
85 *
86 * @param attribute The processing mode of attributes
87 */
88 public void setAttribute(String attribute) {
89 this.attribute = attribute;
90 }
91
92 /***
93 * Sets whether elements should be converted to a different case
94 * Possible values are "upper", "lower" or "match"
95 *
96 * @param element The processing mode of elements
97 */
98 public void setElement(String element) {
99 this.element = element;
100 }
101
102
103
104
105
106 /***
107 * Factory method to create a new SAXReader
108 */
109 protected SAXReader createSAXReader() throws SAXException {
110
111 SAXParser parser = new SAXParser();
112 parser.setProperty(
113 "http://cyberneko.org/html/properties/names/elems",
114 element
115 );
116 parser.setProperty(
117 "http://cyberneko.org/html/properties/names/attrs",
118 attribute
119 );
120 return new SAXReader( parser );
121 }
122 }