001    package org.apache.myfaces.tobago.util;
002    
003    /*
004     * Licensed to the Apache Software Foundation (ASF) under one or more
005     * contributor license agreements.  See the NOTICE file distributed with
006     * this work for additional information regarding copyright ownership.
007     * The ASF licenses this file to You under the Apache License, Version 2.0
008     * (the "License"); you may not use this file except in compliance with
009     * the License.  You may obtain a copy of the License at
010     *
011     *      http://www.apache.org/licenses/LICENSE-2.0
012     *
013     * Unless required by applicable law or agreed to in writing, software
014     * distributed under the License is distributed on an "AS IS" BASIS,
015     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016     * See the License for the specific language governing permissions and
017     * limitations under the License.
018     */
019    
020    import java.io.IOException;
021    import java.io.Writer;
022    
023    /**
024     * User: weber
025     * Date: Jun 28, 2005
026     * Time: 2:07:29 PM
027     */
028    public final class HtmlWriterUtil {
029    
030      private static final char[][] CHARS_TO_ESCAPE;
031    
032      static {
033        // init lookup table
034        CHARS_TO_ESCAPE = new char[0xA0][];
035        CHARS_TO_ESCAPE['"'] = """.toCharArray();
036        CHARS_TO_ESCAPE['&'] = "&".toCharArray();
037        CHARS_TO_ESCAPE['<'] = "&lt;".toCharArray();
038        CHARS_TO_ESCAPE['>'] = "&gt;".toCharArray();
039      }
040    
041      private final Writer out;
042    
043      private final ResponseWriterBuffer buffer;
044    
045      private final boolean utf8;
046    
047      public HtmlWriterUtil(Writer out, String characterEncoding) {
048        this.out = out;
049        utf8 = "utf-8".equalsIgnoreCase(characterEncoding);
050        buffer = new ResponseWriterBuffer(out);
051      }
052    
053      public void writeAttributeValue(final String text)
054          throws IOException {
055        writeAttributeValue(text.toCharArray(), 0, text.length());
056      }
057    
058      private void writeAttributeValue(
059          final char[] text, final int start, final int length)
060          throws IOException {
061        writeEncodedValue(text, start, length, true);
062      }
063    
064    
065      public void writeText(final String text) throws IOException {
066        writeText(text.toCharArray(), 0, text.length());
067      }
068    
069      public void writeText(final char[] text, final int start, final int length)
070          throws IOException {
071        writeEncodedValue(text, start, length, false);
072      }
073    
074      private void writeEncodedValue(final char[] text, final int start,
075          final int length, final boolean isAttribute)
076          throws IOException {
077    
078        int localIndex = -1;
079    
080        final int end = start + length;
081        for (int i = start; i < end; i++) {
082          char ch = text[i];
083          if (ch >= CHARS_TO_ESCAPE.length || CHARS_TO_ESCAPE[ch] != null) {
084            localIndex = i;
085            break;
086          }
087        }
088    
089        if (localIndex == -1) {
090          // no need to escape
091          out.write(text, start, length);
092        } else {
093          // write until localIndex and then encode the remainder
094          out.write(text, start, localIndex);
095    
096          for (int i = localIndex; i < end; i++) {
097            final char ch = text[i];
098    
099            // Tilde or less...
100            if (ch < CHARS_TO_ESCAPE.length) {
101              if (isAttribute && ch == '&' && (i + 1 < end) && text[i + 1] == '{') {
102                // HTML 4.0, section B.7.1: ampersands followed by
103                // an open brace don't get escaped
104                buffer.addToBuffer('&');
105              } else if (CHARS_TO_ESCAPE[ch] != null) {
106                for (char cha : CHARS_TO_ESCAPE[ch]) {
107                  buffer.addToBuffer(cha);
108                }
109              } else {
110                buffer.addToBuffer(ch);
111              }
112            } else if (utf8) {
113              buffer.addToBuffer(ch);
114            } else if (ch <= 0xff) {
115              // ISO-8859-1 entities: encode as needed
116              buffer.flushBuffer();
117    
118              out.write('&');
119    //          FIXME? write(String) sets the startStillOpen=false
120    //          out.write(sISO8859_1_Entities[ch - 0xA0]);
121              for (char c : ISO8859_1_ENTITIES[ch - 0xA0].toCharArray()) {
122                out.write(c);
123              }
124              out.write(';');
125            } else {
126              buffer.flushBuffer();
127    
128              // Double-byte characters to encode.
129              // PENDING: when outputting to an encoding that
130              // supports double-byte characters (UTF-8, for example),
131              // we should not be encoding
132              writeDecRef(ch);
133            }
134          }
135    
136          buffer.flushBuffer();
137        }
138      }
139    
140    
141      /**
142       * Writes a character as a decimal escape.  Hex escapes are smaller than
143       * the decimal version, but Netscape didn't support hex escapes until
144       * 4.7.4.
145       */
146      private void writeDecRef(final char ch) throws IOException {
147        if (ch == '\u20ac') {
148          out.write("&euro;");
149          return;
150        }
151        out.write("&#");
152        // Formerly used String.valueOf().  This version tests out
153        // about 40% faster in a microbenchmark (and on systems where GC is
154        // going gonzo, it should be even better)
155        int i = (int) ch;
156        if (i > 10000) {
157          out.write('0' + (i / 10000));
158          i = i % 10000;
159          out.write('0' + (i / 1000));
160          i = i % 1000;
161          out.write('0' + (i / 100));
162          i = i % 100;
163          out.write('0' + (i / 10));
164          i = i % 10;
165          out.write('0' + i);
166        } else if (i > 1000) {
167          out.write('0' + (i / 1000));
168          i = i % 1000;
169          out.write('0' + (i / 100));
170          i = i % 100;
171          out.write('0' + (i / 10));
172          i = i % 10;
173          out.write('0' + i);
174        } else {
175          out.write('0' + (i / 100));
176          i = i % 100;
177          out.write('0' + (i / 10));
178          i = i % 10;
179          out.write('0' + i);
180        }
181    
182        out.write(';');
183      }
184    
185      public static boolean attributeValueMustEscaped(final String name) {
186        // this is 30% faster then the  .equals(name) version
187        // tested with 100 loops over 19871 names
188        //       (extracted from logfile over all demo pages)
189    
190        try {
191          switch (name.charAt(0)) {
192            case 'i' : // 'id'
193              if (name.length() == 2 && name.charAt(1) == 'd') {
194                return false;
195              }
196              break;
197            case 'n' : // 'name'
198              if (name.length() == 4 && name.charAt(1) == 'a' && name.charAt(2) == 'm'
199                  && name.charAt(3) == 'e') {
200                return false;
201              }
202              break;
203            case 'c' : // 'class'
204              if (name.length() == 5 && name.charAt(1) == 'l' && name.charAt(2) == 'a'
205                  && name.charAt(3) == 's' && name.charAt(4) == 's') {
206                return false;
207              }
208              break;
209            default:
210              return true;
211          }
212        } catch (NullPointerException e) {
213          // ignore
214        } catch (StringIndexOutOfBoundsException e) {
215          // ignore
216        }
217        return true;
218      }
219    
220      //
221      // Entities from HTML 4.0, section 24.2.1; character codes 0xA0 to 0xFF
222      //
223      private static final String[] ISO8859_1_ENTITIES = new String[]{
224          "nbsp",
225          "iexcl",
226          "cent",
227          "pound",
228          "curren",
229          "yen",
230          "brvbar",
231          "sect",
232          "uml",
233          "copy",
234          "ordf",
235          "laquo",
236          "not",
237          "shy",
238          "reg",
239          "macr",
240          "deg",
241          "plusmn",
242          "sup2",
243          "sup3",
244          "acute",
245          "micro",
246          "para",
247          "middot",
248          "cedil",
249          "sup1",
250          "ordm",
251          "raquo",
252          "frac14",
253          "frac12",
254          "frac34",
255          "iquest",
256          "Agrave",
257          "Aacute",
258          "Acirc",
259          "Atilde",
260          "Auml",
261          "Aring",
262          "AElig",
263          "Ccedil",
264          "Egrave",
265          "Eacute",
266          "Ecirc",
267          "Euml",
268          "Igrave",
269          "Iacute",
270          "Icirc",
271          "Iuml",
272          "ETH",
273          "Ntilde",
274          "Ograve",
275          "Oacute",
276          "Ocirc",
277          "Otilde",
278          "Ouml",
279          "times",
280          "Oslash",
281          "Ugrave",
282          "Uacute",
283          "Ucirc",
284          "Uuml",
285          "Yacute",
286          "THORN",
287          "szlig",
288          "agrave",
289          "aacute",
290          "acirc",
291          "atilde",
292          "auml",
293          "aring",
294          "aelig",
295          "ccedil",
296          "egrave",
297          "eacute",
298          "ecirc",
299          "euml",
300          "igrave",
301          "iacute",
302          "icirc",
303          "iuml",
304          "eth",
305          "ntilde",
306          "ograve",
307          "oacute",
308          "ocirc",
309          "otilde",
310          "ouml",
311          "divide",
312          "oslash",
313          "ugrave",
314          "uacute",
315          "ucirc",
316          "uuml",
317          "yacute",
318          "thorn",
319          "yuml"
320      };
321    }