View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   * ====================================================================
17   *
18   * This software consists of voluntary contributions made by many
19   * individuals on behalf of the Apache Software Foundation and was
20   * originally based on software copyright (c) 1999, International
21   * Business Machines, Inc., http://www.apache.org.  For more
22   * information on the Apache Software Foundation, please see
23   * <http://www.apache.org/>.
24   */
25  
26  package org.apache.struts2.jasper.xmlparser;
27  
28  /***
29   * This class is a symbol table implementation that guarantees that
30   * strings used as identifiers are unique references. Multiple calls
31   * to <code>addSymbol</code> will always return the same string
32   * reference.
33   * <p/>
34   * The symbol table performs the same task as <code>String.intern()</code>
35   * with the following differences:
36   * <ul>
37   * <li>
38   * A new string object does not need to be created in order to
39   * retrieve a unique reference. Symbols can be added by using
40   * a series of characters in a character array.
41   * </li>
42   * <li>
43   * Users of the symbol table can provide their own symbol hashing
44   * implementation. For example, a simple string hashing algorithm
45   * may fail to produce a balanced set of hashcodes for symbols
46   * that are <em>mostly</em> unique. Strings with similar leading
47   * characters are especially prone to this poor hashing behavior.
48   * </li>
49   * </ul>
50   *
51   * @author Andy Clark
52   * @version $Id: SymbolTable.java 466606 2006-10-21 23:07:12Z markt $
53   */
54  public class SymbolTable {
55  
56      //
57      // Constants
58      //
59  
60      /***
61       * Default table size.
62       */
63      protected static final int TABLE_SIZE = 101;
64  
65      //
66      // Data
67      //
68  
69      /***
70       * Buckets.
71       */
72      protected Entry[] fBuckets = null;
73  
74      // actual table size
75      protected int fTableSize;
76  
77      //
78      // Constructors
79      //
80  
81      /***
82       * Constructs a symbol table with a default number of buckets.
83       */
84      public SymbolTable() {
85          this(TABLE_SIZE);
86      }
87  
88      /***
89       * Constructs a symbol table with a specified number of buckets.
90       */
91      public SymbolTable(int tableSize) {
92          fTableSize = tableSize;
93          fBuckets = new Entry[fTableSize];
94      }
95  
96      //
97      // Public methods
98      //
99  
100     /***
101      * Adds the specified symbol to the symbol table and returns a
102      * reference to the unique symbol. If the symbol already exists,
103      * the previous symbol reference is returned instead, in order
104      * guarantee that symbol references remain unique.
105      *
106      * @param symbol The new symbol.
107      */
108     public String addSymbol(String symbol) {
109 
110         // search for identical symbol
111         int bucket = hash(symbol) % fTableSize;
112         int length = symbol.length();
113         OUTER:
114         for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
115             if (length == entry.characters.length) {
116                 for (int i = 0; i < length; i++) {
117                     if (symbol.charAt(i) != entry.characters[i]) {
118                         continue OUTER;
119                     }
120                 }
121                 return entry.symbol;
122             }
123         }
124 
125         // create new entry
126         Entry entry = new Entry(symbol, fBuckets[bucket]);
127         fBuckets[bucket] = entry;
128         return entry.symbol;
129 
130     } // addSymbol(String):String
131 
132     /***
133      * Adds the specified symbol to the symbol table and returns a
134      * reference to the unique symbol. If the symbol already exists,
135      * the previous symbol reference is returned instead, in order
136      * guarantee that symbol references remain unique.
137      *
138      * @param buffer The buffer containing the new symbol.
139      * @param offset The offset into the buffer of the new symbol.
140      * @param length The length of the new symbol in the buffer.
141      */
142     public String addSymbol(char[] buffer, int offset, int length) {
143 
144         // search for identical symbol
145         int bucket = hash(buffer, offset, length) % fTableSize;
146         OUTER:
147         for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
148             if (length == entry.characters.length) {
149                 for (int i = 0; i < length; i++) {
150                     if (buffer[offset + i] != entry.characters[i]) {
151                         continue OUTER;
152                     }
153                 }
154                 return entry.symbol;
155             }
156         }
157 
158         // add new entry
159         Entry entry = new Entry(buffer, offset, length, fBuckets[bucket]);
160         fBuckets[bucket] = entry;
161         return entry.symbol;
162 
163     } // addSymbol(char[],int,int):String
164 
165     /***
166      * Returns a hashcode value for the specified symbol. The value
167      * returned by this method must be identical to the value returned
168      * by the <code>hash(char[],int,int)</code> method when called
169      * with the character array that comprises the symbol string.
170      *
171      * @param symbol The symbol to hash.
172      */
173     public int hash(String symbol) {
174 
175         int code = 0;
176         int length = symbol.length();
177         for (int i = 0; i < length; i++) {
178             code = code * 37 + symbol.charAt(i);
179         }
180         return code & 0x7FFFFFF;
181 
182     } // hash(String):int
183 
184     /***
185      * Returns a hashcode value for the specified symbol information.
186      * The value returned by this method must be identical to the value
187      * returned by the <code>hash(String)</code> method when called
188      * with the string object created from the symbol information.
189      *
190      * @param buffer The character buffer containing the symbol.
191      * @param offset The offset into the character buffer of the start
192      *               of the symbol.
193      * @param length The length of the symbol.
194      */
195     public int hash(char[] buffer, int offset, int length) {
196 
197         int code = 0;
198         for (int i = 0; i < length; i++) {
199             code = code * 37 + buffer[offset + i];
200         }
201         return code & 0x7FFFFFF;
202 
203     } // hash(char[],int,int):int
204 
205     /***
206      * Returns true if the symbol table already contains the specified
207      * symbol.
208      *
209      * @param symbol The symbol to look for.
210      */
211     public boolean containsSymbol(String symbol) {
212 
213         // search for identical symbol
214         int bucket = hash(symbol) % fTableSize;
215         int length = symbol.length();
216         OUTER:
217         for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
218             if (length == entry.characters.length) {
219                 for (int i = 0; i < length; i++) {
220                     if (symbol.charAt(i) != entry.characters[i]) {
221                         continue OUTER;
222                     }
223                 }
224                 return true;
225             }
226         }
227 
228         return false;
229 
230     } // containsSymbol(String):boolean
231 
232     /***
233      * Returns true if the symbol table already contains the specified
234      * symbol.
235      *
236      * @param buffer The buffer containing the symbol to look for.
237      * @param offset The offset into the buffer.
238      * @param length The length of the symbol in the buffer.
239      */
240     public boolean containsSymbol(char[] buffer, int offset, int length) {
241 
242         // search for identical symbol
243         int bucket = hash(buffer, offset, length) % fTableSize;
244         OUTER:
245         for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
246             if (length == entry.characters.length) {
247                 for (int i = 0; i < length; i++) {
248                     if (buffer[offset + i] != entry.characters[i]) {
249                         continue OUTER;
250                     }
251                 }
252                 return true;
253             }
254         }
255 
256         return false;
257 
258     } // containsSymbol(char[],int,int):boolean
259 
260     //
261     // Classes
262     //
263 
264     /***
265      * This class is a symbol table entry. Each entry acts as a node
266      * in a linked list.
267      */
268     protected static final class Entry {
269 
270         //
271         // Data
272         //
273 
274         /***
275          * Symbol.
276          */
277         public String symbol;
278 
279         /***
280          * Symbol characters. This information is duplicated here for
281          * comparison performance.
282          */
283         public char[] characters;
284 
285         /***
286          * The next entry.
287          */
288         public Entry next;
289 
290         //
291         // Constructors
292         //
293 
294         /***
295          * Constructs a new entry from the specified symbol and next entry
296          * reference.
297          */
298         public Entry(String symbol, Entry next) {
299             this.symbol = symbol.intern();
300             characters = new char[symbol.length()];
301             symbol.getChars(0, characters.length, characters, 0);
302             this.next = next;
303         }
304 
305         /***
306          * Constructs a new entry from the specified symbol information and
307          * next entry reference.
308          */
309         public Entry(char[] ch, int offset, int length, Entry next) {
310             characters = new char[length];
311             System.arraycopy(ch, offset, characters, 0, length);
312             symbol = new String(characters).intern();
313             this.next = next;
314         }
315 
316     } // class Entry
317 
318 } // class SymbolTable