001 package org.apache.myfaces.tobago.util;
002
003 /*
004 * Licensed to the Apache Software Foundation (ASF) under one or more
005 * contributor license agreements. See the NOTICE file distributed with
006 * this work for additional information regarding copyright ownership.
007 * The ASF licenses this file to You under the Apache License, Version 2.0
008 * (the "License"); you may not use this file except in compliance with
009 * the License. You may obtain a copy of the License at
010 *
011 * http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020 import java.io.IOException;
021 import java.io.Writer;
022
023 /**
024 * User: weber
025 * Date: Jun 28, 2005
026 * Time: 2:07:29 PM
027 */
028 public final class HtmlWriterUtil {
029
030 private static final char[][] CHARS_TO_ESCAPE;
031
032 static {
033 // init lookup table
034 CHARS_TO_ESCAPE = new char[0xA0][];
035 CHARS_TO_ESCAPE['"'] = """.toCharArray();
036 CHARS_TO_ESCAPE['&'] = "&".toCharArray();
037 CHARS_TO_ESCAPE['<'] = "<".toCharArray();
038 CHARS_TO_ESCAPE['>'] = ">".toCharArray();
039 }
040
041 private final Writer out;
042
043 private final ResponseWriterBuffer buffer;
044
045 private final boolean utf8;
046
047 public HtmlWriterUtil(final Writer out, final String characterEncoding) {
048 this.out = out;
049 utf8 = "utf-8".equalsIgnoreCase(characterEncoding);
050 buffer = new ResponseWriterBuffer(out);
051 }
052
053 public void writeAttributeValue(final String text)
054 throws IOException {
055 writeEncodedValue(text.toCharArray(), 0, text.length(), true);
056 }
057
058 public void writeText(final String text) throws IOException {
059 writeEncodedValue(text.toCharArray(), 0, text.length(), false);
060 }
061
062 public void writeText(final char[] text, final int start, final int length)
063 throws IOException {
064 writeEncodedValue(text, start, length, false);
065 }
066
067 private void writeEncodedValue(final char[] text, final int start,
068 final int length, final boolean isAttribute)
069 throws IOException {
070
071 int localIndex = -1;
072
073 final int end = start + length;
074 for (int i = start; i < end; i++) {
075 char ch = text[i];
076 if (ch >= CHARS_TO_ESCAPE.length || CHARS_TO_ESCAPE[ch] != null) {
077 localIndex = i;
078 break;
079 }
080 }
081
082 if (localIndex == -1) {
083 // no need to escape
084 out.write(text, start, length);
085 } else {
086 // write until localIndex and then encode the remainder
087 out.write(text, start, localIndex);
088
089 for (int i = localIndex; i < end; i++) {
090 final char ch = text[i];
091
092 // Tilde or less...
093 if (ch < CHARS_TO_ESCAPE.length) {
094 if (isAttribute && ch == '&' && (i + 1 < end) && text[i + 1] == '{') {
095 // HTML 4.0, section B.7.1: ampersands followed by
096 // an open brace don't get escaped
097 buffer.addToBuffer('&');
098 } else if (CHARS_TO_ESCAPE[ch] != null) {
099 buffer.addToBuffer(CHARS_TO_ESCAPE[ch]);
100 } else {
101 buffer.addToBuffer(ch);
102 }
103 } else if (utf8) {
104 buffer.addToBuffer(ch);
105 } else if (ch <= 0xff) {
106 // ISO-8859-1 entities: encode as needed
107 buffer.flushBuffer();
108
109 out.write('&');
110 char[] chars = ISO8859_1_ENTITIES[ch - 0xA0];
111 out.write(chars, 0, chars.length);
112 out.write(';');
113 } else {
114 buffer.flushBuffer();
115
116 // Double-byte characters to encode.
117 // PENDING: when outputting to an encoding that
118 // supports double-byte characters (UTF-8, for example),
119 // we should not be encoding
120 writeDecRef(ch);
121 }
122 }
123
124 buffer.flushBuffer();
125 }
126 }
127
128
129 /**
130 * Writes a character as a decimal escape. Hex escapes are smaller than
131 * the decimal version, but Netscape didn't support hex escapes until
132 * 4.7.4.
133 */
134 private void writeDecRef(final char ch) throws IOException {
135 if (ch == '\u20ac') {
136 out.write("€");
137 return;
138 }
139 out.write("&#");
140 // Formerly used String.valueOf(). This version tests out
141 // about 40% faster in a microbenchmark (and on systems where GC is
142 // going gonzo, it should be even better)
143 int i = (int) ch;
144 if (i > 10000) {
145 out.write('0' + (i / 10000));
146 i = i % 10000;
147 out.write('0' + (i / 1000));
148 i = i % 1000;
149 out.write('0' + (i / 100));
150 i = i % 100;
151 out.write('0' + (i / 10));
152 i = i % 10;
153 out.write('0' + i);
154 } else if (i > 1000) {
155 out.write('0' + (i / 1000));
156 i = i % 1000;
157 out.write('0' + (i / 100));
158 i = i % 100;
159 out.write('0' + (i / 10));
160 i = i % 10;
161 out.write('0' + i);
162 } else {
163 out.write('0' + (i / 100));
164 i = i % 100;
165 out.write('0' + (i / 10));
166 i = i % 10;
167 out.write('0' + i);
168 }
169
170 out.write(';');
171 }
172
173 public static boolean attributeValueMustEscaped(final String name) {
174 // this is 30% faster then the .equals(name) version
175 // tested with 100 loops over 19871 names
176 // (extracted from logfile over all demo pages)
177
178 try {
179 switch (name.charAt(0)) {
180 case 'i': // 'id'
181 if (name.length() == 2 && name.charAt(1) == 'd') {
182 return false;
183 }
184 break;
185 case 'n': // 'name'
186 if (name.length() == 4 && name.charAt(1) == 'a' && name.charAt(2) == 'm'
187 && name.charAt(3) == 'e') {
188 return false;
189 }
190 break;
191 case 'c': // 'class'
192 if (name.length() == 5 && name.charAt(1) == 'l' && name.charAt(2) == 'a'
193 && name.charAt(3) == 's' && name.charAt(4) == 's') {
194 return false;
195 }
196 break;
197 default:
198 return true;
199 }
200 } catch (NullPointerException e) {
201 // ignore
202 } catch (StringIndexOutOfBoundsException e) {
203 // ignore
204 }
205 return true;
206 }
207
208 //
209 // Entities from HTML 4.0, section 24.2.1; character codes 0xA0 to 0xFF
210 //
211 private static final char [][] ISO8859_1_ENTITIES = new char [][]{
212 "nbsp".toCharArray(),
213 "iexcl".toCharArray(),
214 "cent".toCharArray(),
215 "pound".toCharArray(),
216 "curren".toCharArray(),
217 "yen".toCharArray(),
218 "brvbar".toCharArray(),
219 "sect".toCharArray(),
220 "uml".toCharArray(),
221 "copy".toCharArray(),
222 "ordf".toCharArray(),
223 "laquo".toCharArray(),
224 "not".toCharArray(),
225 "shy".toCharArray(),
226 "reg".toCharArray(),
227 "macr".toCharArray(),
228 "deg".toCharArray(),
229 "plusmn".toCharArray(),
230 "sup2".toCharArray(),
231 "sup3".toCharArray(),
232 "acute".toCharArray(),
233 "micro".toCharArray(),
234 "para".toCharArray(),
235 "middot".toCharArray(),
236 "cedil".toCharArray(),
237 "sup1".toCharArray(),
238 "ordm".toCharArray(),
239 "raquo".toCharArray(),
240 "frac14".toCharArray(),
241 "frac12".toCharArray(),
242 "frac34".toCharArray(),
243 "iquest".toCharArray(),
244 "Agrave".toCharArray(),
245 "Aacute".toCharArray(),
246 "Acirc".toCharArray(),
247 "Atilde".toCharArray(),
248 "Auml".toCharArray(),
249 "Aring".toCharArray(),
250 "AElig".toCharArray(),
251 "Ccedil".toCharArray(),
252 "Egrave".toCharArray(),
253 "Eacute".toCharArray(),
254 "Ecirc".toCharArray(),
255 "Euml".toCharArray(),
256 "Igrave".toCharArray(),
257 "Iacute".toCharArray(),
258 "Icirc".toCharArray(),
259 "Iuml".toCharArray(),
260 "ETH".toCharArray(),
261 "Ntilde".toCharArray(),
262 "Ograve".toCharArray(),
263 "Oacute".toCharArray(),
264 "Ocirc".toCharArray(),
265 "Otilde".toCharArray(),
266 "Ouml".toCharArray(),
267 "times".toCharArray(),
268 "Oslash".toCharArray(),
269 "Ugrave".toCharArray(),
270 "Uacute".toCharArray(),
271 "Ucirc".toCharArray(),
272 "Uuml".toCharArray(),
273 "Yacute".toCharArray(),
274 "THORN".toCharArray(),
275 "szlig".toCharArray(),
276 "agrave".toCharArray(),
277 "aacute".toCharArray(),
278 "acirc".toCharArray(),
279 "atilde".toCharArray(),
280 "auml".toCharArray(),
281 "aring".toCharArray(),
282 "aelig".toCharArray(),
283 "ccedil".toCharArray(),
284 "egrave".toCharArray(),
285 "eacute".toCharArray(),
286 "ecirc".toCharArray(),
287 "euml".toCharArray(),
288 "igrave".toCharArray(),
289 "iacute".toCharArray(),
290 "icirc".toCharArray(),
291 "iuml".toCharArray(),
292 "eth".toCharArray(),
293 "ntilde".toCharArray(),
294 "ograve".toCharArray(),
295 "oacute".toCharArray(),
296 "ocirc".toCharArray(),
297 "otilde".toCharArray(),
298 "ouml".toCharArray(),
299 "divide".toCharArray(),
300 "oslash".toCharArray(),
301 "ugrave".toCharArray(),
302 "uacute".toCharArray(),
303 "ucirc".toCharArray(),
304 "uuml".toCharArray(),
305 "yacute".toCharArray(),
306 "thorn".toCharArray(),
307 "yuml".toCharArray()
308 };
309 }