View Javadoc

1   /*
2    * $Id: TextUtil.java 515882 2007-03-08 01:33:04Z rgielen $
3    *
4    * Copyright 2006 The Apache Software Foundation.
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *      http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.struts2.views.util;
19  
20  
21  /***
22   * This class handles HTML escaping of text.
23   * It was written and optimized to be as fast as possible.
24   *
25   */
26  public class TextUtil {
27  
28      protected static final int MAX_LENGTH = 300;
29  
30      /***
31       * We use arrays of char in the lookup table because it is faster
32       * appending this to a StringBuffer than appending a String
33       */
34      protected static final char[][] _stringChars = new char[MAX_LENGTH][];
35  
36      static {
37          // Initialize the mapping table
38          initMapping();
39      }
40  
41  
42      /***
43       * Call escapeHTML(s, false)
44       */
45      public static final String escapeHTML(String s) {
46          return escapeHTML(s, false);
47      }
48  
49      /***
50       * Escape HTML.
51       *
52       * @param s           string to be escaped
53       * @param escapeEmpty if true, then empty string will be escaped.
54       */
55      public static final String escapeHTML(String s, boolean escapeEmpty) {
56          int len = s.length();
57  
58          if (len == 0) {
59              return s;
60          }
61  
62          if (!escapeEmpty) {
63              String trimmed = s.trim();
64  
65              if ((trimmed.length() == 0) || ("\"\"").equals(trimmed)) {
66                  return s;
67              }
68          }
69  
70          int i = 0;
71  
72          // First loop through String and check if escaping is needed at all
73          // No buffers are copied at this time
74          do {
75              int index = s.charAt(i);
76  
77              if (index >= MAX_LENGTH) {
78                  if (index != 0x20AC) { // If not euro symbol
79  
80                      continue;
81                  }
82  
83                  break;
84              } else if (_stringChars[index] != null) {
85                  break;
86              }
87          } while (++i < len);
88  
89          // If the check went to the end with no escaping then i should be == len now
90          // otherwise we must continue escaping for real
91          if (i == len) {
92              return s;
93          }
94  
95          // We found a character to escape and broke out at position i
96          // Now copy all characters before that to StringBuffer sb
97          // Since a char[] will be used for copying we might as well get
98          // a complete copy of it so that we can use array indexing instead of charAt
99          StringBuffer sb = new StringBuffer(len + 40);
100         char[] chars = new char[len];
101 
102         // Copy all chars from the String s to the chars buffer
103         s.getChars(0, len, chars, 0);
104 
105         // Append the first i characters that we have checked to the resulting StringBuffer
106         sb.append(chars, 0, i);
107 
108         int last = i;
109         char[] subst;
110 
111         for (; i < len; i++) {
112             char c = chars[i];
113             int index = c;
114 
115             if (index < MAX_LENGTH) {
116                 subst = _stringChars[index];
117 
118                 // It is faster to append a char[] than a String which is why we use this
119                 if (subst != null) {
120                     if (i > last) {
121                         sb.append(chars, last, i - last);
122                     }
123 
124                     sb.append(subst);
125                     last = i + 1;
126                 }
127             }
128             // Check if it is the euro symbol. This could be changed to check in a second lookup
129             // table in case one wants to convert more characters in that area
130             else if (index == 0x20AC) {
131                 if (i > last) {
132                     sb.append(chars, last, i - last);
133                 }
134 
135                 sb.append("&euro;");
136                 last = i + 1;
137             }
138         }
139 
140         if (i > last) {
141             sb.append(chars, last, i - last);
142         }
143 
144         return sb.toString();
145     }
146 
147     protected static void addMapping(int c, String txt, String[] strings) {
148         strings[c] = txt;
149     }
150 
151     protected static void initMapping() {
152         String[] strings = new String[MAX_LENGTH];
153 
154         addMapping(0x22, "&quot;", strings); // "
155         addMapping(0x26, "&amp;", strings); // &
156         addMapping(0x3c, "&lt;", strings); // <
157         addMapping(0x3e, "&gt;", strings); // >
158 
159         addMapping(0xa1, "&iexcl;", strings); //
160         addMapping(0xa2, "&cent;", strings); //
161         addMapping(0xa3, "&pound;", strings); //
162         addMapping(0xa9, "&copy;", strings); //
163         addMapping(0xae, "&reg;", strings); //
164         addMapping(0xbf, "&iquest;", strings); //
165 
166         addMapping(0xc0, "&Agrave;", strings); //
167         addMapping(0xc1, "&Aacute;", strings); //
168         addMapping(0xc2, "&Acirc;", strings); //
169         addMapping(0xc3, "&Atilde;", strings); //
170         addMapping(0xc4, "&Auml;", strings); //
171         addMapping(0xc5, "&Aring;", strings); //
172         addMapping(0xc6, "&AElig;", strings); //
173         addMapping(0xc7, "&Ccedil;", strings); //
174         addMapping(0xc8, "&Egrave;", strings); //
175         addMapping(0xc9, "&Eacute;", strings); //
176         addMapping(0xca, "&Ecirc;", strings); //
177         addMapping(0xcb, "&Euml;", strings); //
178         addMapping(0xcc, "&Igrave;", strings); //
179         addMapping(0xcd, "&Iacute;", strings); //
180         addMapping(0xce, "&Icirc;", strings); //
181         addMapping(0xcf, "&Iuml;", strings); //
182 
183         addMapping(0xd0, "&ETH;", strings); //
184         addMapping(0xd1, "&Ntilde;", strings); //
185         addMapping(0xd2, "&Ograve;", strings); //
186         addMapping(0xd3, "&Oacute;", strings); //
187         addMapping(0xd4, "&Ocirc;", strings); //
188         addMapping(0xd5, "&Otilde;", strings); //
189         addMapping(0xd6, "&Ouml;", strings); //
190         addMapping(0xd7, "&times;", strings); //
191         addMapping(0xd8, "&Oslash;", strings); //
192         addMapping(0xd9, "&Ugrave;", strings); //
193         addMapping(0xda, "&Uacute;", strings); //
194         addMapping(0xdb, "&Ucirc;", strings); //
195         addMapping(0xdc, "&Uuml;", strings); //
196         addMapping(0xdd, "&Yacute;", strings); //
197         addMapping(0xde, "&THORN;", strings); //
198         addMapping(0xdf, "&szlig;", strings); //
199 
200         addMapping(0xe0, "&agrave;", strings); //
201         addMapping(0xe1, "&aacute;", strings); //
202         addMapping(0xe2, "&acirc;", strings); //
203         addMapping(0xe3, "&atilde;", strings); //
204         addMapping(0xe4, "&auml;", strings); //
205         addMapping(0xe5, "&aring;", strings); //
206         addMapping(0xe6, "&aelig;", strings); //
207         addMapping(0xe7, "&ccedil;", strings); //
208         addMapping(0xe8, "&egrave;", strings); //
209         addMapping(0xe9, "&eacute;", strings); //
210         addMapping(0xea, "&ecirc;", strings); //
211         addMapping(0xeb, "&euml;", strings); //
212         addMapping(0xec, "&igrave;", strings); //
213         addMapping(0xed, "&iacute;", strings); //
214         addMapping(0xee, "&icirc;", strings); //
215         addMapping(0xef, "&iuml;", strings); //
216 
217         addMapping(0xf0, "&eth;", strings); //
218         addMapping(0xf1, "&ntilde;", strings); //
219         addMapping(0xf2, "&ograve;", strings); //
220         addMapping(0xf3, "&oacute;", strings); //
221         addMapping(0xf4, "&ocirc;", strings); //
222         addMapping(0xf5, "&otilde;", strings); //
223         addMapping(0xf6, "&ouml;", strings); //
224         addMapping(0xf7, "&divide;", strings); //
225         addMapping(0xf8, "&oslash;", strings); //
226         addMapping(0xf9, "&ugrave;", strings); //
227         addMapping(0xfa, "&uacute;", strings); //
228         addMapping(0xfb, "&ucirc;", strings); //
229         addMapping(0xfc, "&uuml;", strings); //
230         addMapping(0xfd, "&yacute;", strings); //
231         addMapping(0xfe, "&thorn;", strings); //
232         addMapping(0xff, "&yuml;", strings); //
233 
234         for (int i = 0; i < strings.length; i++) {
235             String str = strings[i];
236 
237             if (str != null) {
238                 _stringChars[i] = str.toCharArray();
239             }
240         }
241     }
242 }