001 package org.apache.myfaces.tobago.util; 002 003 /* 004 * Licensed to the Apache Software Foundation (ASF) under one or more 005 * contributor license agreements. See the NOTICE file distributed with 006 * this work for additional information regarding copyright ownership. 007 * The ASF licenses this file to You under the Apache License, Version 2.0 008 * (the "License"); you may not use this file except in compliance with 009 * the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019 020 import java.io.IOException; 021 import java.io.Writer; 022 023 /** 024 * User: weber 025 * Date: Jun 28, 2005 026 * Time: 2:07:29 PM 027 */ 028 public final class HtmlWriterUtil { 029 030 private static final char[][] CHARS_TO_ESCAPE; 031 032 static { 033 // init lookup table 034 CHARS_TO_ESCAPE = new char[0xA0][]; 035 CHARS_TO_ESCAPE['"'] = """.toCharArray(); 036 CHARS_TO_ESCAPE['&'] = "&".toCharArray(); 037 CHARS_TO_ESCAPE['<'] = "<".toCharArray(); 038 CHARS_TO_ESCAPE['>'] = ">".toCharArray(); 039 } 040 041 private final Writer out; 042 043 private final ResponseWriterBuffer buffer; 044 045 private final boolean utf8; 046 047 public HtmlWriterUtil(final Writer out, final String characterEncoding) { 048 this.out = out; 049 utf8 = "utf-8".equalsIgnoreCase(characterEncoding); 050 buffer = new ResponseWriterBuffer(out); 051 } 052 053 public void writeAttributeValue(final String text) 054 throws IOException { 055 writeEncodedValue(text.toCharArray(), 0, text.length(), true); 056 } 057 058 public void writeText(final String text) throws IOException { 059 writeEncodedValue(text.toCharArray(), 0, text.length(), false); 060 } 061 062 public void writeText(final char[] text, final int start, final int length) 063 throws IOException { 064 writeEncodedValue(text, start, length, false); 065 } 066 067 private void writeEncodedValue(final char[] text, final int start, 068 final int length, final boolean isAttribute) 069 throws IOException { 070 071 int localIndex = -1; 072 073 final int end = start + length; 074 for (int i = start; i < end; i++) { 075 char ch = text[i]; 076 if (ch >= CHARS_TO_ESCAPE.length || CHARS_TO_ESCAPE[ch] != null) { 077 localIndex = i; 078 break; 079 } 080 } 081 082 if (localIndex == -1) { 083 // no need to escape 084 out.write(text, start, length); 085 } else { 086 // write until localIndex and then encode the remainder 087 out.write(text, start, localIndex); 088 089 for (int i = localIndex; i < end; i++) { 090 final char ch = text[i]; 091 092 // Tilde or less... 093 if (ch < CHARS_TO_ESCAPE.length) { 094 if (isAttribute && ch == '&' && (i + 1 < end) && text[i + 1] == '{') { 095 // HTML 4.0, section B.7.1: ampersands followed by 096 // an open brace don't get escaped 097 buffer.addToBuffer('&'); 098 } else if (CHARS_TO_ESCAPE[ch] != null) { 099 buffer.addToBuffer(CHARS_TO_ESCAPE[ch]); 100 } else { 101 buffer.addToBuffer(ch); 102 } 103 } else if (utf8) { 104 buffer.addToBuffer(ch); 105 } else if (ch <= 0xff) { 106 // ISO-8859-1 entities: encode as needed 107 buffer.flushBuffer(); 108 109 out.write('&'); 110 char[] chars = ISO8859_1_ENTITIES[ch - 0xA0]; 111 out.write(chars, 0, chars.length); 112 out.write(';'); 113 } else { 114 buffer.flushBuffer(); 115 116 // Double-byte characters to encode. 117 // PENDING: when outputting to an encoding that 118 // supports double-byte characters (UTF-8, for example), 119 // we should not be encoding 120 writeDecRef(ch); 121 } 122 } 123 124 buffer.flushBuffer(); 125 } 126 } 127 128 129 /** 130 * Writes a character as a decimal escape. Hex escapes are smaller than 131 * the decimal version, but Netscape didn't support hex escapes until 132 * 4.7.4. 133 */ 134 private void writeDecRef(final char ch) throws IOException { 135 if (ch == '\u20ac') { 136 out.write("€"); 137 return; 138 } 139 out.write("&#"); 140 // Formerly used String.valueOf(). This version tests out 141 // about 40% faster in a microbenchmark (and on systems where GC is 142 // going gonzo, it should be even better) 143 int i = (int) ch; 144 if (i > 10000) { 145 out.write('0' + (i / 10000)); 146 i = i % 10000; 147 out.write('0' + (i / 1000)); 148 i = i % 1000; 149 out.write('0' + (i / 100)); 150 i = i % 100; 151 out.write('0' + (i / 10)); 152 i = i % 10; 153 out.write('0' + i); 154 } else if (i > 1000) { 155 out.write('0' + (i / 1000)); 156 i = i % 1000; 157 out.write('0' + (i / 100)); 158 i = i % 100; 159 out.write('0' + (i / 10)); 160 i = i % 10; 161 out.write('0' + i); 162 } else { 163 out.write('0' + (i / 100)); 164 i = i % 100; 165 out.write('0' + (i / 10)); 166 i = i % 10; 167 out.write('0' + i); 168 } 169 170 out.write(';'); 171 } 172 173 public static boolean attributeValueMustEscaped(final String name) { 174 // this is 30% faster then the .equals(name) version 175 // tested with 100 loops over 19871 names 176 // (extracted from logfile over all demo pages) 177 178 try { 179 switch (name.charAt(0)) { 180 case 'i': // 'id' 181 if (name.length() == 2 && name.charAt(1) == 'd') { 182 return false; 183 } 184 break; 185 case 'n': // 'name' 186 if (name.length() == 4 && name.charAt(1) == 'a' && name.charAt(2) == 'm' 187 && name.charAt(3) == 'e') { 188 return false; 189 } 190 break; 191 case 'c': // 'class' 192 if (name.length() == 5 && name.charAt(1) == 'l' && name.charAt(2) == 'a' 193 && name.charAt(3) == 's' && name.charAt(4) == 's') { 194 return false; 195 } 196 break; 197 default: 198 return true; 199 } 200 } catch (NullPointerException e) { 201 // ignore 202 } catch (StringIndexOutOfBoundsException e) { 203 // ignore 204 } 205 return true; 206 } 207 208 // 209 // Entities from HTML 4.0, section 24.2.1; character codes 0xA0 to 0xFF 210 // 211 private static final char [][] ISO8859_1_ENTITIES = new char [][]{ 212 "nbsp".toCharArray(), 213 "iexcl".toCharArray(), 214 "cent".toCharArray(), 215 "pound".toCharArray(), 216 "curren".toCharArray(), 217 "yen".toCharArray(), 218 "brvbar".toCharArray(), 219 "sect".toCharArray(), 220 "uml".toCharArray(), 221 "copy".toCharArray(), 222 "ordf".toCharArray(), 223 "laquo".toCharArray(), 224 "not".toCharArray(), 225 "shy".toCharArray(), 226 "reg".toCharArray(), 227 "macr".toCharArray(), 228 "deg".toCharArray(), 229 "plusmn".toCharArray(), 230 "sup2".toCharArray(), 231 "sup3".toCharArray(), 232 "acute".toCharArray(), 233 "micro".toCharArray(), 234 "para".toCharArray(), 235 "middot".toCharArray(), 236 "cedil".toCharArray(), 237 "sup1".toCharArray(), 238 "ordm".toCharArray(), 239 "raquo".toCharArray(), 240 "frac14".toCharArray(), 241 "frac12".toCharArray(), 242 "frac34".toCharArray(), 243 "iquest".toCharArray(), 244 "Agrave".toCharArray(), 245 "Aacute".toCharArray(), 246 "Acirc".toCharArray(), 247 "Atilde".toCharArray(), 248 "Auml".toCharArray(), 249 "Aring".toCharArray(), 250 "AElig".toCharArray(), 251 "Ccedil".toCharArray(), 252 "Egrave".toCharArray(), 253 "Eacute".toCharArray(), 254 "Ecirc".toCharArray(), 255 "Euml".toCharArray(), 256 "Igrave".toCharArray(), 257 "Iacute".toCharArray(), 258 "Icirc".toCharArray(), 259 "Iuml".toCharArray(), 260 "ETH".toCharArray(), 261 "Ntilde".toCharArray(), 262 "Ograve".toCharArray(), 263 "Oacute".toCharArray(), 264 "Ocirc".toCharArray(), 265 "Otilde".toCharArray(), 266 "Ouml".toCharArray(), 267 "times".toCharArray(), 268 "Oslash".toCharArray(), 269 "Ugrave".toCharArray(), 270 "Uacute".toCharArray(), 271 "Ucirc".toCharArray(), 272 "Uuml".toCharArray(), 273 "Yacute".toCharArray(), 274 "THORN".toCharArray(), 275 "szlig".toCharArray(), 276 "agrave".toCharArray(), 277 "aacute".toCharArray(), 278 "acirc".toCharArray(), 279 "atilde".toCharArray(), 280 "auml".toCharArray(), 281 "aring".toCharArray(), 282 "aelig".toCharArray(), 283 "ccedil".toCharArray(), 284 "egrave".toCharArray(), 285 "eacute".toCharArray(), 286 "ecirc".toCharArray(), 287 "euml".toCharArray(), 288 "igrave".toCharArray(), 289 "iacute".toCharArray(), 290 "icirc".toCharArray(), 291 "iuml".toCharArray(), 292 "eth".toCharArray(), 293 "ntilde".toCharArray(), 294 "ograve".toCharArray(), 295 "oacute".toCharArray(), 296 "ocirc".toCharArray(), 297 "otilde".toCharArray(), 298 "ouml".toCharArray(), 299 "divide".toCharArray(), 300 "oslash".toCharArray(), 301 "ugrave".toCharArray(), 302 "uacute".toCharArray(), 303 "ucirc".toCharArray(), 304 "uuml".toCharArray(), 305 "yacute".toCharArray(), 306 "thorn".toCharArray(), 307 "yuml".toCharArray() 308 }; 309 }