001    package org.apache.myfaces.tobago.util;
002    
003    /*
004     * Licensed to the Apache Software Foundation (ASF) under one or more
005     * contributor license agreements.  See the NOTICE file distributed with
006     * this work for additional information regarding copyright ownership.
007     * The ASF licenses this file to You under the Apache License, Version 2.0
008     * (the "License"); you may not use this file except in compliance with
009     * the License.  You may obtain a copy of the License at
010     *
011     *      http://www.apache.org/licenses/LICENSE-2.0
012     *
013     * Unless required by applicable law or agreed to in writing, software
014     * distributed under the License is distributed on an "AS IS" BASIS,
015     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016     * See the License for the specific language governing permissions and
017     * limitations under the License.
018     */
019    
020    import java.io.IOException;
021    import java.io.Writer;
022    
023    /**
024     * User: weber
025     * Date: Jun 28, 2005
026     * Time: 2:07:29 PM
027     */
028    public final class HtmlWriterUtil {
029    
030      private static final char[][] CHARS_TO_ESCAPE;
031    
032      static {
033        // init lookup table
034        CHARS_TO_ESCAPE = new char[0xA0][];
035        CHARS_TO_ESCAPE['"'] = """.toCharArray();
036        CHARS_TO_ESCAPE['&'] = "&".toCharArray();
037        CHARS_TO_ESCAPE['<'] = "&lt;".toCharArray();
038        CHARS_TO_ESCAPE['>'] = "&gt;".toCharArray();
039      }
040    
041      private final Writer out;
042    
043      private final ResponseWriterBuffer buffer;
044    
045      private final boolean utf8;
046    
047      public HtmlWriterUtil(final Writer out, final String characterEncoding) {
048        this.out = out;
049        utf8 = "utf-8".equalsIgnoreCase(characterEncoding);
050        buffer = new ResponseWriterBuffer(out);
051      }
052    
053      public void writeAttributeValue(final String text)
054          throws IOException {
055        writeEncodedValue(text.toCharArray(), 0, text.length(), true);
056      }
057    
058      public void writeText(final String text) throws IOException {
059        writeEncodedValue(text.toCharArray(), 0, text.length(), false);
060      }
061    
062      public void writeText(final char[] text, final int start, final int length)
063          throws IOException {
064        writeEncodedValue(text, start, length, false);
065      }
066    
067      private void writeEncodedValue(final char[] text, final int start,
068          final int length, final boolean isAttribute)
069          throws IOException {
070    
071        int localIndex = -1;
072    
073        final int end = start + length;
074        for (int i = start; i < end; i++) {
075          char ch = text[i];
076          if (ch >= CHARS_TO_ESCAPE.length || CHARS_TO_ESCAPE[ch] != null) {
077            localIndex = i;
078            break;
079          }
080        }
081    
082        if (localIndex == -1) {
083          // no need to escape
084          out.write(text, start, length);
085        } else {
086          // write until localIndex and then encode the remainder
087          out.write(text, start, localIndex);
088    
089          for (int i = localIndex; i < end; i++) {
090            final char ch = text[i];
091    
092            // Tilde or less...
093            if (ch < CHARS_TO_ESCAPE.length) {
094              if (isAttribute && ch == '&' && (i + 1 < end) && text[i + 1] == '{') {
095                // HTML 4.0, section B.7.1: ampersands followed by
096                // an open brace don't get escaped
097                buffer.addToBuffer('&');
098              } else if (CHARS_TO_ESCAPE[ch] != null) {
099                buffer.addToBuffer(CHARS_TO_ESCAPE[ch]);
100              } else {
101                buffer.addToBuffer(ch);
102              }
103            } else if (utf8) {
104              buffer.addToBuffer(ch);
105            } else if (ch <= 0xff) {
106              // ISO-8859-1 entities: encode as needed
107              buffer.flushBuffer();
108    
109              out.write('&');
110              char[] chars = ISO8859_1_ENTITIES[ch - 0xA0];
111              out.write(chars, 0, chars.length);
112              out.write(';');
113            } else {
114              buffer.flushBuffer();
115    
116              // Double-byte characters to encode.
117              // PENDING: when outputting to an encoding that
118              // supports double-byte characters (UTF-8, for example),
119              // we should not be encoding
120              writeDecRef(ch);
121            }
122          }
123    
124          buffer.flushBuffer();
125        }
126      }
127    
128    
129      /**
130       * Writes a character as a decimal escape.  Hex escapes are smaller than
131       * the decimal version, but Netscape didn't support hex escapes until
132       * 4.7.4.
133       */
134      private void writeDecRef(final char ch) throws IOException {
135        if (ch == '\u20ac') {
136          out.write("&euro;");
137          return;
138        }
139        out.write("&#");
140        // Formerly used String.valueOf().  This version tests out
141        // about 40% faster in a microbenchmark (and on systems where GC is
142        // going gonzo, it should be even better)
143        int i = (int) ch;
144        if (i > 10000) {
145          out.write('0' + (i / 10000));
146          i = i % 10000;
147          out.write('0' + (i / 1000));
148          i = i % 1000;
149          out.write('0' + (i / 100));
150          i = i % 100;
151          out.write('0' + (i / 10));
152          i = i % 10;
153          out.write('0' + i);
154        } else if (i > 1000) {
155          out.write('0' + (i / 1000));
156          i = i % 1000;
157          out.write('0' + (i / 100));
158          i = i % 100;
159          out.write('0' + (i / 10));
160          i = i % 10;
161          out.write('0' + i);
162        } else {
163          out.write('0' + (i / 100));
164          i = i % 100;
165          out.write('0' + (i / 10));
166          i = i % 10;
167          out.write('0' + i);
168        }
169    
170        out.write(';');
171      }
172    
173      public static boolean attributeValueMustEscaped(final String name) {
174        // this is 30% faster then the  .equals(name) version
175        // tested with 100 loops over 19871 names
176        //       (extracted from logfile over all demo pages)
177    
178        try {
179          switch (name.charAt(0)) {
180            case 'i': // 'id'
181              if (name.length() == 2 && name.charAt(1) == 'd') {
182                return false;
183              }
184              break;
185            case 'n': // 'name'
186              if (name.length() == 4 && name.charAt(1) == 'a' && name.charAt(2) == 'm'
187                  && name.charAt(3) == 'e') {
188                return false;
189              }
190              break;
191            case 'c': // 'class'
192              if (name.length() == 5 && name.charAt(1) == 'l' && name.charAt(2) == 'a'
193                  && name.charAt(3) == 's' && name.charAt(4) == 's') {
194                return false;
195              }
196              break;
197            default:
198              return true;
199          }
200        } catch (NullPointerException e) {
201          // ignore
202        } catch (StringIndexOutOfBoundsException e) {
203          // ignore
204        }
205        return true;
206      }
207    
208      //
209      // Entities from HTML 4.0, section 24.2.1; character codes 0xA0 to 0xFF
210      //
211      private static final char [][] ISO8859_1_ENTITIES = new char [][]{
212          "nbsp".toCharArray(),
213          "iexcl".toCharArray(),
214          "cent".toCharArray(),
215          "pound".toCharArray(),
216          "curren".toCharArray(),
217          "yen".toCharArray(),
218          "brvbar".toCharArray(),
219          "sect".toCharArray(),
220          "uml".toCharArray(),
221          "copy".toCharArray(),
222          "ordf".toCharArray(),
223          "laquo".toCharArray(),
224          "not".toCharArray(),
225          "shy".toCharArray(),
226          "reg".toCharArray(),
227          "macr".toCharArray(),
228          "deg".toCharArray(),
229          "plusmn".toCharArray(),
230          "sup2".toCharArray(),
231          "sup3".toCharArray(),
232          "acute".toCharArray(),
233          "micro".toCharArray(),
234          "para".toCharArray(),
235          "middot".toCharArray(),
236          "cedil".toCharArray(),
237          "sup1".toCharArray(),
238          "ordm".toCharArray(),
239          "raquo".toCharArray(),
240          "frac14".toCharArray(),
241          "frac12".toCharArray(),
242          "frac34".toCharArray(),
243          "iquest".toCharArray(),
244          "Agrave".toCharArray(),
245          "Aacute".toCharArray(),
246          "Acirc".toCharArray(),
247          "Atilde".toCharArray(),
248          "Auml".toCharArray(),
249          "Aring".toCharArray(),
250          "AElig".toCharArray(),
251          "Ccedil".toCharArray(),
252          "Egrave".toCharArray(),
253          "Eacute".toCharArray(),
254          "Ecirc".toCharArray(),
255          "Euml".toCharArray(),
256          "Igrave".toCharArray(),
257          "Iacute".toCharArray(),
258          "Icirc".toCharArray(),
259          "Iuml".toCharArray(),
260          "ETH".toCharArray(),
261          "Ntilde".toCharArray(),
262          "Ograve".toCharArray(),
263          "Oacute".toCharArray(),
264          "Ocirc".toCharArray(),
265          "Otilde".toCharArray(),
266          "Ouml".toCharArray(),
267          "times".toCharArray(),
268          "Oslash".toCharArray(),
269          "Ugrave".toCharArray(),
270          "Uacute".toCharArray(),
271          "Ucirc".toCharArray(),
272          "Uuml".toCharArray(),
273          "Yacute".toCharArray(),
274          "THORN".toCharArray(),
275          "szlig".toCharArray(),
276          "agrave".toCharArray(),
277          "aacute".toCharArray(),
278          "acirc".toCharArray(),
279          "atilde".toCharArray(),
280          "auml".toCharArray(),
281          "aring".toCharArray(),
282          "aelig".toCharArray(),
283          "ccedil".toCharArray(),
284          "egrave".toCharArray(),
285          "eacute".toCharArray(),
286          "ecirc".toCharArray(),
287          "euml".toCharArray(),
288          "igrave".toCharArray(),
289          "iacute".toCharArray(),
290          "icirc".toCharArray(),
291          "iuml".toCharArray(),
292          "eth".toCharArray(),
293          "ntilde".toCharArray(),
294          "ograve".toCharArray(),
295          "oacute".toCharArray(),
296          "ocirc".toCharArray(),
297          "otilde".toCharArray(),
298          "ouml".toCharArray(),
299          "divide".toCharArray(),
300          "oslash".toCharArray(),
301          "ugrave".toCharArray(),
302          "uacute".toCharArray(),
303          "ucirc".toCharArray(),
304          "uuml".toCharArray(),
305          "yacute".toCharArray(),
306          "thorn".toCharArray(),
307          "yuml".toCharArray()
308      };
309    }