1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 package org.apache.struts2.jasper.xmlparser;
27
28 /***
29 * This class is a symbol table implementation that guarantees that
30 * strings used as identifiers are unique references. Multiple calls
31 * to <code>addSymbol</code> will always return the same string
32 * reference.
33 * <p/>
34 * The symbol table performs the same task as <code>String.intern()</code>
35 * with the following differences:
36 * <ul>
37 * <li>
38 * A new string object does not need to be created in order to
39 * retrieve a unique reference. Symbols can be added by using
40 * a series of characters in a character array.
41 * </li>
42 * <li>
43 * Users of the symbol table can provide their own symbol hashing
44 * implementation. For example, a simple string hashing algorithm
45 * may fail to produce a balanced set of hashcodes for symbols
46 * that are <em>mostly</em> unique. Strings with similar leading
47 * characters are especially prone to this poor hashing behavior.
48 * </li>
49 * </ul>
50 *
51 * @author Andy Clark
52 * @version $Id: SymbolTable.java 466606 2006-10-21 23:07:12Z markt $
53 */
54 public class SymbolTable {
55
56
57
58
59
60 /***
61 * Default table size.
62 */
63 protected static final int TABLE_SIZE = 101;
64
65
66
67
68
69 /***
70 * Buckets.
71 */
72 protected Entry[] fBuckets = null;
73
74
75 protected int fTableSize;
76
77
78
79
80
81 /***
82 * Constructs a symbol table with a default number of buckets.
83 */
84 public SymbolTable() {
85 this(TABLE_SIZE);
86 }
87
88 /***
89 * Constructs a symbol table with a specified number of buckets.
90 */
91 public SymbolTable(int tableSize) {
92 fTableSize = tableSize;
93 fBuckets = new Entry[fTableSize];
94 }
95
96
97
98
99
100 /***
101 * Adds the specified symbol to the symbol table and returns a
102 * reference to the unique symbol. If the symbol already exists,
103 * the previous symbol reference is returned instead, in order
104 * guarantee that symbol references remain unique.
105 *
106 * @param symbol The new symbol.
107 */
108 public String addSymbol(String symbol) {
109
110
111 int bucket = hash(symbol) % fTableSize;
112 int length = symbol.length();
113 OUTER:
114 for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
115 if (length == entry.characters.length) {
116 for (int i = 0; i < length; i++) {
117 if (symbol.charAt(i) != entry.characters[i]) {
118 continue OUTER;
119 }
120 }
121 return entry.symbol;
122 }
123 }
124
125
126 Entry entry = new Entry(symbol, fBuckets[bucket]);
127 fBuckets[bucket] = entry;
128 return entry.symbol;
129
130 }
131
132 /***
133 * Adds the specified symbol to the symbol table and returns a
134 * reference to the unique symbol. If the symbol already exists,
135 * the previous symbol reference is returned instead, in order
136 * guarantee that symbol references remain unique.
137 *
138 * @param buffer The buffer containing the new symbol.
139 * @param offset The offset into the buffer of the new symbol.
140 * @param length The length of the new symbol in the buffer.
141 */
142 public String addSymbol(char[] buffer, int offset, int length) {
143
144
145 int bucket = hash(buffer, offset, length) % fTableSize;
146 OUTER:
147 for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
148 if (length == entry.characters.length) {
149 for (int i = 0; i < length; i++) {
150 if (buffer[offset + i] != entry.characters[i]) {
151 continue OUTER;
152 }
153 }
154 return entry.symbol;
155 }
156 }
157
158
159 Entry entry = new Entry(buffer, offset, length, fBuckets[bucket]);
160 fBuckets[bucket] = entry;
161 return entry.symbol;
162
163 }
164
165 /***
166 * Returns a hashcode value for the specified symbol. The value
167 * returned by this method must be identical to the value returned
168 * by the <code>hash(char[],int,int)</code> method when called
169 * with the character array that comprises the symbol string.
170 *
171 * @param symbol The symbol to hash.
172 */
173 public int hash(String symbol) {
174
175 int code = 0;
176 int length = symbol.length();
177 for (int i = 0; i < length; i++) {
178 code = code * 37 + symbol.charAt(i);
179 }
180 return code & 0x7FFFFFF;
181
182 }
183
184 /***
185 * Returns a hashcode value for the specified symbol information.
186 * The value returned by this method must be identical to the value
187 * returned by the <code>hash(String)</code> method when called
188 * with the string object created from the symbol information.
189 *
190 * @param buffer The character buffer containing the symbol.
191 * @param offset The offset into the character buffer of the start
192 * of the symbol.
193 * @param length The length of the symbol.
194 */
195 public int hash(char[] buffer, int offset, int length) {
196
197 int code = 0;
198 for (int i = 0; i < length; i++) {
199 code = code * 37 + buffer[offset + i];
200 }
201 return code & 0x7FFFFFF;
202
203 }
204
205 /***
206 * Returns true if the symbol table already contains the specified
207 * symbol.
208 *
209 * @param symbol The symbol to look for.
210 */
211 public boolean containsSymbol(String symbol) {
212
213
214 int bucket = hash(symbol) % fTableSize;
215 int length = symbol.length();
216 OUTER:
217 for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
218 if (length == entry.characters.length) {
219 for (int i = 0; i < length; i++) {
220 if (symbol.charAt(i) != entry.characters[i]) {
221 continue OUTER;
222 }
223 }
224 return true;
225 }
226 }
227
228 return false;
229
230 }
231
232 /***
233 * Returns true if the symbol table already contains the specified
234 * symbol.
235 *
236 * @param buffer The buffer containing the symbol to look for.
237 * @param offset The offset into the buffer.
238 * @param length The length of the symbol in the buffer.
239 */
240 public boolean containsSymbol(char[] buffer, int offset, int length) {
241
242
243 int bucket = hash(buffer, offset, length) % fTableSize;
244 OUTER:
245 for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
246 if (length == entry.characters.length) {
247 for (int i = 0; i < length; i++) {
248 if (buffer[offset + i] != entry.characters[i]) {
249 continue OUTER;
250 }
251 }
252 return true;
253 }
254 }
255
256 return false;
257
258 }
259
260
261
262
263
264 /***
265 * This class is a symbol table entry. Each entry acts as a node
266 * in a linked list.
267 */
268 protected static final class Entry {
269
270
271
272
273
274 /***
275 * Symbol.
276 */
277 public String symbol;
278
279 /***
280 * Symbol characters. This information is duplicated here for
281 * comparison performance.
282 */
283 public char[] characters;
284
285 /***
286 * The next entry.
287 */
288 public Entry next;
289
290
291
292
293
294 /***
295 * Constructs a new entry from the specified symbol and next entry
296 * reference.
297 */
298 public Entry(String symbol, Entry next) {
299 this.symbol = symbol.intern();
300 characters = new char[symbol.length()];
301 symbol.getChars(0, characters.length, characters, 0);
302 this.next = next;
303 }
304
305 /***
306 * Constructs a new entry from the specified symbol information and
307 * next entry reference.
308 */
309 public Entry(char[] ch, int offset, int length, Entry next) {
310 characters = new char[length];
311 System.arraycopy(ch, offset, characters, 0, length);
312 symbol = new String(characters).intern();
313 this.next = next;
314 }
315
316 }
317
318 }