001 package org.apache.myfaces.tobago.util; 002 003 /* 004 * Licensed to the Apache Software Foundation (ASF) under one or more 005 * contributor license agreements. See the NOTICE file distributed with 006 * this work for additional information regarding copyright ownership. 007 * The ASF licenses this file to You under the Apache License, Version 2.0 008 * (the "License"); you may not use this file except in compliance with 009 * the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019 020 import java.io.IOException; 021 import java.io.Writer; 022 023 /** 024 * User: weber 025 * Date: Jun 28, 2005 026 * Time: 2:07:29 PM 027 */ 028 public final class HtmlWriterUtil { 029 030 private static final char[][] CHARS_TO_ESCAPE; 031 032 static { 033 // init lookup table 034 CHARS_TO_ESCAPE = new char[0xA0][]; 035 CHARS_TO_ESCAPE['"'] = """.toCharArray(); 036 CHARS_TO_ESCAPE['&'] = "&".toCharArray(); 037 CHARS_TO_ESCAPE['<'] = "<".toCharArray(); 038 CHARS_TO_ESCAPE['>'] = ">".toCharArray(); 039 } 040 041 private final Writer out; 042 043 private final ResponseWriterBuffer buffer; 044 045 private final boolean utf8; 046 047 public HtmlWriterUtil(Writer out, String characterEncoding) { 048 this.out = out; 049 utf8 = "utf-8".equalsIgnoreCase(characterEncoding); 050 buffer = new ResponseWriterBuffer(out); 051 } 052 053 public void writeAttributeValue(final String text) 054 throws IOException { 055 writeAttributeValue(text.toCharArray(), 0, text.length()); 056 } 057 058 private void writeAttributeValue( 059 final char[] text, final int start, final int length) 060 throws IOException { 061 writeEncodedValue(text, start, length, true); 062 } 063 064 065 public void writeText(final String text) throws IOException { 066 writeText(text.toCharArray(), 0, text.length()); 067 } 068 069 public void writeText(final char[] text, final int start, final int length) 070 throws IOException { 071 writeEncodedValue(text, start, length, false); 072 } 073 074 private void writeEncodedValue(final char[] text, final int start, 075 final int length, final boolean isAttribute) 076 throws IOException { 077 078 int localIndex = -1; 079 080 final int end = start + length; 081 for (int i = start; i < end; i++) { 082 char ch = text[i]; 083 if (ch >= CHARS_TO_ESCAPE.length || CHARS_TO_ESCAPE[ch] != null) { 084 localIndex = i; 085 break; 086 } 087 } 088 089 if (localIndex == -1) { 090 // no need to escape 091 out.write(text, start, length); 092 } else { 093 // write until localIndex and then encode the remainder 094 out.write(text, start, localIndex); 095 096 for (int i = localIndex; i < end; i++) { 097 final char ch = text[i]; 098 099 // Tilde or less... 100 if (ch < CHARS_TO_ESCAPE.length) { 101 if (isAttribute && ch == '&' && (i + 1 < end) && text[i + 1] == '{') { 102 // HTML 4.0, section B.7.1: ampersands followed by 103 // an open brace don't get escaped 104 buffer.addToBuffer('&'); 105 } else if (CHARS_TO_ESCAPE[ch] != null) { 106 for (char cha : CHARS_TO_ESCAPE[ch]) { 107 buffer.addToBuffer(cha); 108 } 109 } else { 110 buffer.addToBuffer(ch); 111 } 112 } else if (utf8) { 113 buffer.addToBuffer(ch); 114 } else if (ch <= 0xff) { 115 // ISO-8859-1 entities: encode as needed 116 buffer.flushBuffer(); 117 118 out.write('&'); 119 // FIXME? write(String) sets the startStillOpen=false 120 // out.write(sISO8859_1_Entities[ch - 0xA0]); 121 for (char c : ISO8859_1_ENTITIES[ch - 0xA0].toCharArray()) { 122 out.write(c); 123 } 124 out.write(';'); 125 } else { 126 buffer.flushBuffer(); 127 128 // Double-byte characters to encode. 129 // PENDING: when outputting to an encoding that 130 // supports double-byte characters (UTF-8, for example), 131 // we should not be encoding 132 writeDecRef(ch); 133 } 134 } 135 136 buffer.flushBuffer(); 137 } 138 } 139 140 141 /** 142 * Writes a character as a decimal escape. Hex escapes are smaller than 143 * the decimal version, but Netscape didn't support hex escapes until 144 * 4.7.4. 145 */ 146 private void writeDecRef(final char ch) throws IOException { 147 if (ch == '\u20ac') { 148 out.write("€"); 149 return; 150 } 151 out.write("&#"); 152 // Formerly used String.valueOf(). This version tests out 153 // about 40% faster in a microbenchmark (and on systems where GC is 154 // going gonzo, it should be even better) 155 int i = (int) ch; 156 if (i > 10000) { 157 out.write('0' + (i / 10000)); 158 i = i % 10000; 159 out.write('0' + (i / 1000)); 160 i = i % 1000; 161 out.write('0' + (i / 100)); 162 i = i % 100; 163 out.write('0' + (i / 10)); 164 i = i % 10; 165 out.write('0' + i); 166 } else if (i > 1000) { 167 out.write('0' + (i / 1000)); 168 i = i % 1000; 169 out.write('0' + (i / 100)); 170 i = i % 100; 171 out.write('0' + (i / 10)); 172 i = i % 10; 173 out.write('0' + i); 174 } else { 175 out.write('0' + (i / 100)); 176 i = i % 100; 177 out.write('0' + (i / 10)); 178 i = i % 10; 179 out.write('0' + i); 180 } 181 182 out.write(';'); 183 } 184 185 public static boolean attributeValueMustEscaped(final String name) { 186 // this is 30% faster then the .equals(name) version 187 // tested with 100 loops over 19871 names 188 // (extracted from logfile over all demo pages) 189 190 try { 191 switch (name.charAt(0)) { 192 case 'i': // 'id' 193 if (name.length() == 2 && name.charAt(1) == 'd') { 194 return false; 195 } 196 break; 197 case 'n': // 'name' 198 if (name.length() == 4 && name.charAt(1) == 'a' && name.charAt(2) == 'm' 199 && name.charAt(3) == 'e') { 200 return false; 201 } 202 break; 203 case 'c': // 'class' 204 if (name.length() == 5 && name.charAt(1) == 'l' && name.charAt(2) == 'a' 205 && name.charAt(3) == 's' && name.charAt(4) == 's') { 206 return false; 207 } 208 break; 209 default: 210 return true; 211 } 212 } catch (NullPointerException e) { 213 // ignore 214 } catch (StringIndexOutOfBoundsException e) { 215 // ignore 216 } 217 return true; 218 } 219 220 // 221 // Entities from HTML 4.0, section 24.2.1; character codes 0xA0 to 0xFF 222 // 223 private static final String[] ISO8859_1_ENTITIES = new String[]{ 224 "nbsp", 225 "iexcl", 226 "cent", 227 "pound", 228 "curren", 229 "yen", 230 "brvbar", 231 "sect", 232 "uml", 233 "copy", 234 "ordf", 235 "laquo", 236 "not", 237 "shy", 238 "reg", 239 "macr", 240 "deg", 241 "plusmn", 242 "sup2", 243 "sup3", 244 "acute", 245 "micro", 246 "para", 247 "middot", 248 "cedil", 249 "sup1", 250 "ordm", 251 "raquo", 252 "frac14", 253 "frac12", 254 "frac34", 255 "iquest", 256 "Agrave", 257 "Aacute", 258 "Acirc", 259 "Atilde", 260 "Auml", 261 "Aring", 262 "AElig", 263 "Ccedil", 264 "Egrave", 265 "Eacute", 266 "Ecirc", 267 "Euml", 268 "Igrave", 269 "Iacute", 270 "Icirc", 271 "Iuml", 272 "ETH", 273 "Ntilde", 274 "Ograve", 275 "Oacute", 276 "Ocirc", 277 "Otilde", 278 "Ouml", 279 "times", 280 "Oslash", 281 "Ugrave", 282 "Uacute", 283 "Ucirc", 284 "Uuml", 285 "Yacute", 286 "THORN", 287 "szlig", 288 "agrave", 289 "aacute", 290 "acirc", 291 "atilde", 292 "auml", 293 "aring", 294 "aelig", 295 "ccedil", 296 "egrave", 297 "eacute", 298 "ecirc", 299 "euml", 300 "igrave", 301 "iacute", 302 "icirc", 303 "iuml", 304 "eth", 305 "ntilde", 306 "ograve", 307 "oacute", 308 "ocirc", 309 "otilde", 310 "ouml", 311 "divide", 312 "oslash", 313 "ugrave", 314 "uacute", 315 "ucirc", 316 "uuml", 317 "yacute", 318 "thorn", 319 "yuml" 320 }; 321 }