View Javadoc

1   /*
2    * $HeadURL: https://svn.apache.org/repos/asf/jakarta/commons/proper/httpclient/trunk/src/java/org/apache/commons/httpclient/URI.java $
3    * $Revision: 415961 $
4    * $Date: 2006-06-21 13:05:19 +0200 (Wed, 21 Jun 2006) $
5    *
6    * ====================================================================
7    *
8    *  Copyright 2002-2004 The Apache Software Foundation
9    *
10   *  Licensed under the Apache License, Version 2.0 (the "License");
11   *  you may not use this file except in compliance with the License.
12   *  You may obtain a copy of the License at
13   *
14   *      http://www.apache.org/licenses/LICENSE-2.0
15   *
16   *  Unless required by applicable law or agreed to in writing, software
17   *  distributed under the License is distributed on an "AS IS" BASIS,
18   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19   *  See the License for the specific language governing permissions and
20   *  limitations under the License.
21   * ====================================================================
22   *
23   * This software consists of voluntary contributions made by many
24   * individuals on behalf of the Apache Software Foundation.  For more
25   * information on the Apache Software Foundation, please see
26   * <http://www.apache.org/>.
27   *
28   */
29  
30  package org.apache.commons.httpclient;
31  
32  import java.io.IOException;
33  import java.io.ObjectInputStream;
34  import java.io.ObjectOutputStream;
35  import java.io.Serializable;
36  import java.util.Arrays;
37  import java.util.Locale;
38  import java.util.BitSet;
39  import java.util.Hashtable;
40  
41  import org.apache.commons.codec.DecoderException;
42  import org.apache.commons.codec.net.URLCodec;
43  import org.apache.commons.httpclient.util.EncodingUtil;
44  
45  /***
46   * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
47   * This class has the purpose of supportting of parsing a URI reference to
48   * extend any specific protocols, the character encoding of the protocol to 
49   * be transported and the charset of the document.
50   * <p>
51   * A URI is always in an "escaped" form, since escaping or unescaping a
52   * completed URI might change its semantics.  
53   * <p>
54   * Implementers should be careful not to escape or unescape the same string
55   * more than once, since unescaping an already unescaped string might lead to
56   * misinterpreting a percent data character as another escaped character,
57   * or vice versa in the case of escaping an already escaped string.
58   * <p>
59   * In order to avoid these problems, data types used as follows:
60   * <p><blockquote><pre>
61   *   URI character sequence: char
62   *   octet sequence: byte
63   *   original character sequence: String
64   * </pre></blockquote><p>
65   *
66   * So, a URI is a sequence of characters as an array of a char type, which
67   * is not always represented as a sequence of octets as an array of byte.
68   * <p>
69   * 
70   * URI Syntactic Components
71   * <p><blockquote><pre>
72   * - In general, written as follows:
73   *   Absolute URI = &lt;scheme&gt:&lt;scheme-specific-part&gt;
74   *   Generic URI = &lt;scheme&gt;://&lt;authority&gt;&lt;path&gt;?&lt;query&gt;
75   *
76   * - Syntax
77   *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
78   *   hier_part     = ( net_path | abs_path ) [ "?" query ]
79   *   net_path      = "//" authority [ abs_path ]
80   *   abs_path      = "/"  path_segments
81   * </pre></blockquote><p>
82   *
83   * The following examples illustrate URI that are in common use.
84   * <pre>
85   * ftp://ftp.is.co.za/rfc/rfc1808.txt
86   *    -- ftp scheme for File Transfer Protocol services
87   * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
88   *    -- gopher scheme for Gopher and Gopher+ Protocol services
89   * http://www.math.uio.no/faq/compression-faq/part1.html
90   *    -- http scheme for Hypertext Transfer Protocol services
91   * mailto:mduerst@ifi.unizh.ch
92   *    -- mailto scheme for electronic mail addresses
93   * news:comp.infosystems.www.servers.unix
94   *    -- news scheme for USENET news groups and articles
95   * telnet://melvyl.ucop.edu/
96   *    -- telnet scheme for interactive services via the TELNET Protocol
97   * </pre>
98   * Please, notice that there are many modifications from URL(RFC 1738) and
99   * relative URL(RFC 1808).
100  * <p>
101  * <b>The expressions for a URI</b>
102  * <p><pre>
103  * For escaped URI forms
104  *  - URI(char[]) // constructor
105  *  - char[] getRawXxx() // method
106  *  - String getEscapedXxx() // method
107  *  - String toString() // method
108  * <p>
109  * For unescaped URI forms
110  *  - URI(String) // constructor
111  *  - String getXXX() // method
112  * </pre><p>
113  *
114  * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
115  * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
116  * @version $Revision: 415961 $ $Date: 2002/03/14 15:14:01 
117  */
118 public class URI implements Cloneable, Comparable, Serializable {
119 
120 
121     // ----------------------------------------------------------- Constructors
122 
123     /*** Create an instance as an internal use */
124     protected URI() {
125     }
126 
127     /***
128      * Construct a URI from a string with the given charset. The input string can 
129      * be either in escaped or unescaped form. 
130      *
131      * @param s URI character sequence
132      * @param escaped <tt>true</tt> if URI character sequence is in escaped form. 
133      *                <tt>false</tt> otherwise. 
134      * @param charset the charset string to do escape encoding, if required
135      * 
136      * @throws URIException If the URI cannot be created.
137      * @throws NullPointerException if input string is <code>null</code>
138      * 
139      * @see #getProtocolCharset
140      * 
141      * @since 3.0
142      */
143     public URI(String s, boolean escaped, String charset)
144         throws URIException, NullPointerException {
145         protocolCharset = charset;
146         parseUriReference(s, escaped);
147     }
148 
149     /***
150      * Construct a URI from a string with the given charset. The input string can 
151      * be either in escaped or unescaped form. 
152      *
153      * @param s URI character sequence
154      * @param escaped <tt>true</tt> if URI character sequence is in escaped form. 
155      *                <tt>false</tt> otherwise. 
156      * 
157      * @throws URIException If the URI cannot be created.
158      * @throws NullPointerException if input string is <code>null</code>
159      * 
160      * @see #getProtocolCharset
161      * 
162      * @since 3.0
163      */
164     public URI(String s, boolean escaped)
165         throws URIException, NullPointerException {
166         parseUriReference(s, escaped);
167     }
168 
169     /***
170      * Construct a URI as an escaped form of a character array with the given
171      * charset.
172      *
173      * @param escaped the URI character sequence
174      * @param charset the charset string to do escape encoding
175      * @throws URIException If the URI cannot be created.
176      * @throws NullPointerException if <code>escaped</code> is <code>null</code>
177      * @see #getProtocolCharset
178      * 
179      * @deprecated Use #URI(String, boolean, String)
180      */
181     public URI(char[] escaped, String charset) 
182         throws URIException, NullPointerException {
183         protocolCharset = charset;
184         parseUriReference(new String(escaped), true);
185     }
186 
187 
188     /***
189      * Construct a URI as an escaped form of a character array.
190      * An URI can be placed within double-quotes or angle brackets like 
191      * "http://test.com/" and &lt;http://test.com/&gt;
192      * 
193      * @param escaped the URI character sequence
194      * @throws URIException If the URI cannot be created.
195      * @throws NullPointerException if <code>escaped</code> is <code>null</code>
196      * @see #getDefaultProtocolCharset
197      * 
198      * @deprecated Use #URI(String, boolean)
199      */
200     public URI(char[] escaped) 
201         throws URIException, NullPointerException {
202         parseUriReference(new String(escaped), true);
203     }
204 
205 
206     /***
207      * Construct a URI from the given string with the given charset.
208      *
209      * @param original the string to be represented to URI character sequence
210      * It is one of absoluteURI and relativeURI.
211      * @param charset the charset string to do escape encoding
212      * @throws URIException If the URI cannot be created.
213      * @see #getProtocolCharset
214      * 
215      * @deprecated Use #URI(String, boolean, String)
216      */
217     public URI(String original, String charset) throws URIException {
218         protocolCharset = charset;
219         parseUriReference(original, false);
220     }
221 
222 
223     /***
224      * Construct a URI from the given string.
225      * <p><blockquote><pre>
226      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
227      * </pre></blockquote><p>
228      * An URI can be placed within double-quotes or angle brackets like 
229      * "http://test.com/" and &lt;http://test.com/&gt;
230      *
231      * @param original the string to be represented to URI character sequence
232      * It is one of absoluteURI and relativeURI.
233      * @throws URIException If the URI cannot be created.
234      * @see #getDefaultProtocolCharset
235      * 
236      * @deprecated Use #URI(String, boolean)
237      */
238     public URI(String original) throws URIException {
239         parseUriReference(original, false);
240     }
241 
242 
243     /***
244      * Construct a general URI from the given components.
245      * <p><blockquote><pre>
246      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
247      *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
248      *   opaque_part   = uric_no_slash *uric
249      * </pre></blockquote><p>
250      * It's for absolute URI = &lt;scheme&gt;:&lt;scheme-specific-part&gt;#
251      * &lt;fragment&gt;.
252      *
253      * @param scheme the scheme string
254      * @param schemeSpecificPart scheme_specific_part
255      * @param fragment the fragment string
256      * @throws URIException If the URI cannot be created.
257      * @see #getDefaultProtocolCharset
258      */
259     public URI(String scheme, String schemeSpecificPart, String fragment)
260         throws URIException {
261 
262         // validate and contruct the URI character sequence
263         if (scheme == null) {
264            throw new URIException(URIException.PARSING, "scheme required");
265         }
266         char[] s = scheme.toLowerCase().toCharArray();
267         if (validate(s, URI.scheme)) {
268             _scheme = s; // is_absoluteURI
269         } else {
270             throw new URIException(URIException.PARSING, "incorrect scheme");
271         }
272         _opaque = encode(schemeSpecificPart, allowed_opaque_part,
273                 getProtocolCharset());
274         // Set flag
275         _is_opaque_part = true;
276         _fragment = fragment == null ? null : fragment.toCharArray(); 
277         setURI();
278     }
279 
280 
281     /***
282      * Construct a general URI from the given components.
283      * <p><blockquote><pre>
284      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
285      *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
286      *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
287      *   hier_part     = ( net_path | abs_path ) [ "?" query ]
288      * </pre></blockquote><p>
289      * It's for absolute URI = &lt;scheme&gt;:&lt;path&gt;?&lt;query&gt;#&lt;
290      * fragment&gt; and relative URI = &lt;path&gt;?&lt;query&gt;#&lt;fragment
291      * &gt;.
292      *
293      * @param scheme the scheme string
294      * @param authority the authority string
295      * @param path the path string
296      * @param query the query string
297      * @param fragment the fragment string
298      * @throws URIException If the new URI cannot be created.
299      * @see #getDefaultProtocolCharset
300      */
301     public URI(String scheme, String authority, String path, String query,
302                String fragment) throws URIException {
303 
304         // validate and contruct the URI character sequence
305         StringBuffer buff = new StringBuffer();
306         if (scheme != null) {
307             buff.append(scheme);
308             buff.append(':');
309         }
310         if (authority != null) {
311             buff.append("//");
312             buff.append(authority);
313         }
314         if (path != null) {  // accept empty path
315             if ((scheme != null || authority != null)
316                     && !path.startsWith("/")) {
317                 throw new URIException(URIException.PARSING,
318                         "abs_path requested");
319             }
320             buff.append(path);
321         }
322         if (query != null) {
323             buff.append('?');
324             buff.append(query);
325         }
326         if (fragment != null) {
327             buff.append('#');
328             buff.append(fragment);
329         }
330         parseUriReference(buff.toString(), false);
331     }
332 
333 
334     /***
335      * Construct a general URI from the given components.
336      *
337      * @param scheme the scheme string
338      * @param userinfo the userinfo string
339      * @param host the host string
340      * @param port the port number
341      * @throws URIException If the new URI cannot be created.
342      * @see #getDefaultProtocolCharset
343      */
344     public URI(String scheme, String userinfo, String host, int port)
345         throws URIException {
346 
347         this(scheme, userinfo, host, port, null, null, null);
348     }
349 
350 
351     /***
352      * Construct a general URI from the given components.
353      *
354      * @param scheme the scheme string
355      * @param userinfo the userinfo string
356      * @param host the host string
357      * @param port the port number
358      * @param path the path string
359      * @throws URIException If the new URI cannot be created.
360      * @see #getDefaultProtocolCharset
361      */
362     public URI(String scheme, String userinfo, String host, int port,
363             String path) throws URIException {
364 
365         this(scheme, userinfo, host, port, path, null, null);
366     }
367 
368 
369     /***
370      * Construct a general URI from the given components.
371      *
372      * @param scheme the scheme string
373      * @param userinfo the userinfo string
374      * @param host the host string
375      * @param port the port number
376      * @param path the path string
377      * @param query the query string
378      * @throws URIException If the new URI cannot be created.
379      * @see #getDefaultProtocolCharset
380      */
381     public URI(String scheme, String userinfo, String host, int port,
382             String path, String query) throws URIException {
383 
384         this(scheme, userinfo, host, port, path, query, null);
385     }
386 
387 
388     /***
389      * Construct a general URI from the given components.
390      *
391      * @param scheme the scheme string
392      * @param userinfo the userinfo string
393      * @param host the host string
394      * @param port the port number
395      * @param path the path string
396      * @param query the query string
397      * @param fragment the fragment string
398      * @throws URIException If the new URI cannot be created.
399      * @see #getDefaultProtocolCharset
400      */
401     public URI(String scheme, String userinfo, String host, int port,
402             String path, String query, String fragment) throws URIException {
403 
404         this(scheme, (host == null) ? null 
405             : ((userinfo != null) ? userinfo + '@' : "") + host 
406                 + ((port != -1) ? ":" + port : ""), path, query, fragment);
407     }
408 
409 
410     /***
411      * Construct a general URI from the given components.
412      *
413      * @param scheme the scheme string
414      * @param host the host string
415      * @param path the path string
416      * @param fragment the fragment string
417      * @throws URIException If the new URI cannot be created.
418      * @see #getDefaultProtocolCharset
419      */
420     public URI(String scheme, String host, String path, String fragment)
421         throws URIException {
422 
423         this(scheme, host, path, null, fragment);
424     }
425 
426 
427     /***
428      * Construct a general URI with the given relative URI string.
429      *
430      * @param base the base URI
431      * @param relative the relative URI string
432      * @throws URIException If the new URI cannot be created.
433      * 
434      * @deprecated Use #URI(URI, String, boolean)
435      */
436     public URI(URI base, String relative) throws URIException {
437         this(base, new URI(relative));
438     }
439 
440 
441     /***
442      * Construct a general URI with the given relative URI string.
443      *
444      * @param base the base URI
445      * @param relative the relative URI string
446      * @param escaped <tt>true</tt> if URI character sequence is in escaped form. 
447      *                <tt>false</tt> otherwise.
448      *  
449      * @throws URIException If the new URI cannot be created.
450      * 
451      * @since 3.0
452      */
453     public URI(URI base, String relative, boolean escaped) throws URIException {
454         this(base, new URI(relative, escaped));
455     }
456 
457 
458     /***
459      * Construct a general URI with the given relative URI.
460      * <p><blockquote><pre>
461      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
462      *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
463      * </pre></blockquote><p>
464      * Resolving Relative References to Absolute Form.
465      *
466      * <strong>Examples of Resolving Relative URI References</strong>
467      *
468      * Within an object with a well-defined base URI of
469      * <p><blockquote><pre>
470      *   http://a/b/c/d;p?q
471      * </pre></blockquote><p>
472      * the relative URI would be resolved as follows:
473      *
474      * Normal Examples
475      *
476      * <p><blockquote><pre>
477      *   g:h           =  g:h
478      *   g             =  http://a/b/c/g
479      *   ./g           =  http://a/b/c/g
480      *   g/            =  http://a/b/c/g/
481      *   /g            =  http://a/g
482      *   //g           =  http://g
483      *   ?y            =  http://a/b/c/?y
484      *   g?y           =  http://a/b/c/g?y
485      *   #s            =  (current document)#s
486      *   g#s           =  http://a/b/c/g#s
487      *   g?y#s         =  http://a/b/c/g?y#s
488      *   ;x            =  http://a/b/c/;x
489      *   g;x           =  http://a/b/c/g;x
490      *   g;x?y#s       =  http://a/b/c/g;x?y#s
491      *   .             =  http://a/b/c/
492      *   ./            =  http://a/b/c/
493      *   ..            =  http://a/b/
494      *   ../           =  http://a/b/
495      *   ../g          =  http://a/b/g
496      *   ../..         =  http://a/
497      *   ../../        =  http://a/ 
498      *   ../../g       =  http://a/g
499      * </pre></blockquote><p>
500      *
501      * Some URI schemes do not allow a hierarchical syntax matching the
502      * <hier_part> syntax, and thus cannot use relative references.
503      *
504      * @param base the base URI
505      * @param relative the relative URI
506      * @throws URIException If the new URI cannot be created.
507      */
508     public URI(URI base, URI relative) throws URIException {
509 
510         if (base._scheme == null) {
511             throw new URIException(URIException.PARSING, "base URI required");
512         }
513         if (base._scheme != null) {
514             this._scheme = base._scheme;
515             this._authority = base._authority;
516             this._is_net_path = base._is_net_path; 
517         }
518         if (base._is_opaque_part || relative._is_opaque_part) {
519             this._scheme = base._scheme;
520             this._is_opaque_part = base._is_opaque_part 
521                 || relative._is_opaque_part;
522             this._opaque = relative._opaque;
523             this._fragment = relative._fragment;
524             this.setURI();
525             return;
526         }
527         boolean schemesEqual = Arrays.equals(base._scheme,relative._scheme);
528         if (relative._scheme != null 
529                 && (!schemesEqual  || relative._authority != null)) {
530             this._scheme = relative._scheme;
531             this._is_net_path = relative._is_net_path;
532             this._authority = relative._authority;
533             if (relative._is_server) {
534                 this._is_server = relative._is_server;
535                 this._userinfo = relative._userinfo;
536                 this._host = relative._host;
537                 this._port = relative._port;
538             } else if (relative._is_reg_name) {
539                 this._is_reg_name = relative._is_reg_name;
540             }
541             this._is_abs_path = relative._is_abs_path;
542             this._is_rel_path = relative._is_rel_path;
543             this._path = relative._path;
544         } else if (base._authority != null && relative._scheme == null) {
545             this._is_net_path = base._is_net_path;
546             this._authority = base._authority;
547             if (base._is_server) {
548                 this._is_server = base._is_server;
549                 this._userinfo = base._userinfo;
550                 this._host = base._host;
551                 this._port = base._port;
552             } else if (base._is_reg_name) {
553                 this._is_reg_name = base._is_reg_name;
554             }
555         }
556         if (relative._authority != null) {
557             this._is_net_path = relative._is_net_path;
558             this._authority = relative._authority;
559             if (relative._is_server) {
560                 this._is_server = relative._is_server;
561                 this._userinfo = relative._userinfo;
562                 this._host = relative._host;
563                 this._port = relative._port;
564             } else if (relative._is_reg_name) {
565                 this._is_reg_name = relative._is_reg_name;
566             }
567             this._is_abs_path = relative._is_abs_path;
568             this._is_rel_path = relative._is_rel_path;
569             this._path = relative._path;
570         }
571         // resolve the path and query if necessary
572         if (relative._authority == null 
573             && (relative._scheme == null || schemesEqual)) {
574             if ((relative._path == null || relative._path.length == 0)
575                 && relative._query == null) {
576                 // handle a reference to the current document, see RFC 2396 
577                 // section 5.2 step 2
578                 this._path = base._path;
579                 this._query = base._query;
580             } else {
581                 this._path = resolvePath(base._path, relative._path);
582             }
583         }
584         // base._query removed
585         if (relative._query != null) {
586             this._query = relative._query;
587         }
588         // base._fragment removed
589         if (relative._fragment != null) {
590             this._fragment = relative._fragment;
591         }
592         this.setURI();
593         // reparse the newly built URI, this will ensure that all flags are set correctly.
594         // TODO there must be a better way to do this
595         parseUriReference(new String(_uri), true);
596     }
597 
598     // --------------------------------------------------- Instance Variables
599 
600     /*** Version ID for serialization */
601     static final long serialVersionUID = 604752400577948726L;
602 
603 
604     /***
605      * Cache the hash code for this URI.
606      */
607     protected int hash = 0;
608 
609 
610     /***
611      * This Uniform Resource Identifier (URI).
612      * The URI is always in an "escaped" form, since escaping or unescaping
613      * a completed URI might change its semantics.  
614      */
615     protected char[] _uri = null;
616 
617 
618     /***
619      * The charset of the protocol used by this URI instance.
620      */
621     protected String protocolCharset = null;
622 
623 
624     /***
625      * The default charset of the protocol.  RFC 2277, 2396
626      */
627     protected static String defaultProtocolCharset = "UTF-8";
628 
629 
630     /***
631      * The default charset of the document.  RFC 2277, 2396
632      * The platform's charset is used for the document by default.
633      */
634     protected static String defaultDocumentCharset = null;
635     protected static String defaultDocumentCharsetByLocale = null;
636     protected static String defaultDocumentCharsetByPlatform = null;
637     // Static initializer for defaultDocumentCharset
638     static {
639         Locale locale = Locale.getDefault();
640         // in order to support backward compatiblity
641         if (locale != null) {
642             defaultDocumentCharsetByLocale =
643                 LocaleToCharsetMap.getCharset(locale);
644             // set the default document charset
645             defaultDocumentCharset = defaultDocumentCharsetByLocale;
646         }
647         // in order to support platform encoding
648         try {
649             defaultDocumentCharsetByPlatform = System.getProperty("file.encoding");
650         } catch (SecurityException ignore) {
651         }
652         if (defaultDocumentCharset == null) {
653             // set the default document charset
654             defaultDocumentCharset = defaultDocumentCharsetByPlatform;
655         }
656     }
657 
658 
659     /***
660      * The scheme.
661      */
662     protected char[] _scheme = null;
663 
664 
665     /***
666      * The opaque.
667      */
668     protected char[] _opaque = null;
669 
670 
671     /***
672      * The authority.
673      */
674     protected char[] _authority = null;
675 
676 
677     /***
678      * The userinfo.
679      */
680     protected char[] _userinfo = null;
681 
682 
683     /***
684      * The host.
685      */
686     protected char[] _host = null;
687 
688 
689     /***
690      * The port.
691      */
692     protected int _port = -1;
693 
694 
695     /***
696      * The path.
697      */
698     protected char[] _path = null;
699 
700 
701     /***
702      * The query.
703      */
704     protected char[] _query = null;
705 
706 
707     /***
708      * The fragment.
709      */
710     protected char[] _fragment = null;
711 
712 
713     /***
714      * The root path.
715      */
716     protected static char[] rootPath = { '/' };
717 
718     // ---------------------- Generous characters for each component validation
719 
720     /***
721      * The percent "%" character always has the reserved purpose of being the
722      * escape indicator, it must be escaped as "%25" in order to be used as
723      * data within a URI.
724      */
725     protected static final BitSet percent = new BitSet(256);
726     // Static initializer for percent
727     static {
728         percent.set('%');
729     }
730 
731 
732     /***
733      * BitSet for digit.
734      * <p><blockquote><pre>
735      * digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
736      *            "8" | "9"
737      * </pre></blockquote><p>
738      */
739     protected static final BitSet digit = new BitSet(256);
740     // Static initializer for digit
741     static {
742         for (int i = '0'; i <= '9'; i++) {
743             digit.set(i);
744         }
745     }
746 
747 
748     /***
749      * BitSet for alpha.
750      * <p><blockquote><pre>
751      * alpha         = lowalpha | upalpha
752      * </pre></blockquote><p>
753      */
754     protected static final BitSet alpha = new BitSet(256);
755     // Static initializer for alpha
756     static {
757         for (int i = 'a'; i <= 'z'; i++) {
758             alpha.set(i);
759         }
760         for (int i = 'A'; i <= 'Z'; i++) {
761             alpha.set(i);
762         }
763     }
764 
765 
766     /***
767      * BitSet for alphanum (join of alpha &amp; digit).
768      * <p><blockquote><pre>
769      *  alphanum      = alpha | digit
770      * </pre></blockquote><p>
771      */
772     protected static final BitSet alphanum = new BitSet(256);
773     // Static initializer for alphanum
774     static {
775         alphanum.or(alpha);
776         alphanum.or(digit);
777     }
778 
779 
780     /***
781      * BitSet for hex.
782      * <p><blockquote><pre>
783      * hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
784      *                         "a" | "b" | "c" | "d" | "e" | "f"
785      * </pre></blockquote><p>
786      */
787     protected static final BitSet hex = new BitSet(256);
788     // Static initializer for hex
789     static {
790         hex.or(digit);
791         for (int i = 'a'; i <= 'f'; i++) {
792             hex.set(i);
793         }
794         for (int i = 'A'; i <= 'F'; i++) {
795             hex.set(i);
796         }
797     }
798 
799 
800     /***
801      * BitSet for escaped.
802      * <p><blockquote><pre>
803      * escaped       = "%" hex hex
804      * </pre></blockquote><p>
805      */
806     protected static final BitSet escaped = new BitSet(256);
807     // Static initializer for escaped
808     static {
809         escaped.or(percent);
810         escaped.or(hex);
811     }
812 
813 
814     /***
815      * BitSet for mark.
816      * <p><blockquote><pre>
817      * mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
818      *                 "(" | ")"
819      * </pre></blockquote><p>
820      */
821     protected static final BitSet mark = new BitSet(256);
822     // Static initializer for mark
823     static {
824         mark.set('-');
825         mark.set('_');
826         mark.set('.');
827         mark.set('!');
828         mark.set('~');
829         mark.set('*');
830         mark.set('\'');
831         mark.set('(');
832         mark.set(')');
833     }
834 
835 
836     /***
837      * Data characters that are allowed in a URI but do not have a reserved
838      * purpose are called unreserved.
839      * <p><blockquote><pre>
840      * unreserved    = alphanum | mark
841      * </pre></blockquote><p>
842      */
843     protected static final BitSet unreserved = new BitSet(256);
844     // Static initializer for unreserved
845     static {
846         unreserved.or(alphanum);
847         unreserved.or(mark);
848     }
849 
850 
851     /***
852      * BitSet for reserved.
853      * <p><blockquote><pre>
854      * reserved      = ";" | "/" | "?" | ":" | "@" | "&amp;" | "=" | "+" |
855      *                 "$" | ","
856      * </pre></blockquote><p>
857      */
858     protected static final BitSet reserved = new BitSet(256);
859     // Static initializer for reserved
860     static {
861         reserved.set(';');
862         reserved.set('/');
863         reserved.set('?');
864         reserved.set(':');
865         reserved.set('@');
866         reserved.set('&');
867         reserved.set('=');
868         reserved.set('+');
869         reserved.set('$');
870         reserved.set(',');
871     }
872 
873 
874     /***
875      * BitSet for uric.
876      * <p><blockquote><pre>
877      * uric          = reserved | unreserved | escaped
878      * </pre></blockquote><p>
879      */
880     protected static final BitSet uric = new BitSet(256);
881     // Static initializer for uric
882     static {
883         uric.or(reserved);
884         uric.or(unreserved);
885         uric.or(escaped);
886     }
887 
888 
889     /***
890      * BitSet for fragment (alias for uric).
891      * <p><blockquote><pre>
892      * fragment      = *uric
893      * </pre></blockquote><p>
894      */
895     protected static final BitSet fragment = uric;
896 
897 
898     /***
899      * BitSet for query (alias for uric).
900      * <p><blockquote><pre>
901      * query         = *uric
902      * </pre></blockquote><p>
903      */
904     protected static final BitSet query = uric;
905 
906 
907     /***
908      * BitSet for pchar.
909      * <p><blockquote><pre>
910      * pchar         = unreserved | escaped |
911      *                 ":" | "@" | "&amp;" | "=" | "+" | "$" | ","
912      * </pre></blockquote><p>
913      */
914     protected static final BitSet pchar = new BitSet(256);
915     // Static initializer for pchar
916     static {
917         pchar.or(unreserved);
918         pchar.or(escaped);
919         pchar.set(':');
920         pchar.set('@');
921         pchar.set('&');
922         pchar.set('=');
923         pchar.set('+');
924         pchar.set('$');
925         pchar.set(',');
926     }
927 
928 
929     /***
930      * BitSet for param (alias for pchar).
931      * <p><blockquote><pre>
932      * param         = *pchar
933      * </pre></blockquote><p>
934      */
935     protected static final BitSet param = pchar;
936 
937 
938     /***
939      * BitSet for segment.
940      * <p><blockquote><pre>
941      * segment       = *pchar *( ";" param )
942      * </pre></blockquote><p>
943      */
944     protected static final BitSet segment = new BitSet(256);
945     // Static initializer for segment
946     static {
947         segment.or(pchar);
948         segment.set(';');
949         segment.or(param);
950     }
951 
952 
953     /***
954      * BitSet for path segments.
955      * <p><blockquote><pre>
956      * path_segments = segment *( "/" segment )
957      * </pre></blockquote><p>
958      */
959     protected static final BitSet path_segments = new BitSet(256);
960     // Static initializer for path_segments
961     static {
962         path_segments.set('/');
963         path_segments.or(segment);
964     }
965 
966 
967     /***
968      * URI absolute path.
969      * <p><blockquote><pre>
970      * abs_path      = "/"  path_segments
971      * </pre></blockquote><p>
972      */
973     protected static final BitSet abs_path = new BitSet(256);
974     // Static initializer for abs_path
975     static {
976         abs_path.set('/');
977         abs_path.or(path_segments);
978     }
979 
980 
981     /***
982      * URI bitset for encoding typical non-slash characters.
983      * <p><blockquote><pre>
984      * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
985      *                 "&amp;" | "=" | "+" | "$" | ","
986      * </pre></blockquote><p>
987      */
988     protected static final BitSet uric_no_slash = new BitSet(256);
989     // Static initializer for uric_no_slash
990     static {
991         uric_no_slash.or(unreserved);
992         uric_no_slash.or(escaped);
993         uric_no_slash.set(';');
994         uric_no_slash.set('?');
995         uric_no_slash.set(';');
996         uric_no_slash.set('@');
997         uric_no_slash.set('&');
998         uric_no_slash.set('=');
999         uric_no_slash.set('+');
1000         uric_no_slash.set('$');
1001         uric_no_slash.set(',');
1002     }
1003     
1004 
1005     /***
1006      * URI bitset that combines uric_no_slash and uric.
1007      * <p><blockquote><pre>
1008      * opaque_part   = uric_no_slash *uric
1009      * </pre></blockquote><p>
1010      */
1011     protected static final BitSet opaque_part = new BitSet(256);
1012     // Static initializer for opaque_part
1013     static {
1014         // it's generous. because first character must not include a slash
1015         opaque_part.or(uric_no_slash);
1016         opaque_part.or(uric);
1017     }
1018     
1019 
1020     /***
1021      * URI bitset that combines absolute path and opaque part.
1022      * <p><blockquote><pre>
1023      * path          = [ abs_path | opaque_part ]
1024      * </pre></blockquote><p>
1025      */
1026     protected static final BitSet path = new BitSet(256);
1027     // Static initializer for path
1028     static {
1029         path.or(abs_path);
1030         path.or(opaque_part);
1031     }
1032 
1033 
1034     /***
1035      * Port, a logical alias for digit.
1036      */
1037     protected static final BitSet port = digit;
1038 
1039 
1040     /***
1041      * Bitset that combines digit and dot fo IPv$address.
1042      * <p><blockquote><pre>
1043      * IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
1044      * </pre></blockquote><p>
1045      */
1046     protected static final BitSet IPv4address = new BitSet(256);
1047     // Static initializer for IPv4address
1048     static {
1049         IPv4address.or(digit);
1050         IPv4address.set('.');
1051     }
1052 
1053 
1054     /***
1055      * RFC 2373.
1056      * <p><blockquote><pre>
1057      * IPv6address = hexpart [ ":" IPv4address ]
1058      * </pre></blockquote><p>
1059      */
1060     protected static final BitSet IPv6address = new BitSet(256);
1061     // Static initializer for IPv6address reference
1062     static {
1063         IPv6address.or(hex); // hexpart
1064         IPv6address.set(':');
1065         IPv6address.or(IPv4address);
1066     }
1067 
1068 
1069     /***
1070      * RFC 2732, 2373.
1071      * <p><blockquote><pre>
1072      * IPv6reference   = "[" IPv6address "]"
1073      * </pre></blockquote><p>
1074      */
1075     protected static final BitSet IPv6reference = new BitSet(256);
1076     // Static initializer for IPv6reference
1077     static {
1078         IPv6reference.set('[');
1079         IPv6reference.or(IPv6address);
1080         IPv6reference.set(']');
1081     }
1082 
1083 
1084     /***
1085      * BitSet for toplabel.
1086      * <p><blockquote><pre>
1087      * toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
1088      * </pre></blockquote><p>
1089      */
1090     protected static final BitSet toplabel = new BitSet(256);
1091     // Static initializer for toplabel
1092     static {
1093         toplabel.or(alphanum);
1094         toplabel.set('-');
1095     }
1096 
1097 
1098     /***
1099      * BitSet for domainlabel.
1100      * <p><blockquote><pre>
1101      * domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
1102      * </pre></blockquote><p>
1103      */
1104     protected static final BitSet domainlabel = toplabel;
1105 
1106 
1107     /***
1108      * BitSet for hostname.
1109      * <p><blockquote><pre>
1110      * hostname      = *( domainlabel "." ) toplabel [ "." ]
1111      * </pre></blockquote><p>
1112      */
1113     protected static final BitSet hostname = new BitSet(256);
1114     // Static initializer for hostname
1115     static {
1116         hostname.or(toplabel);
1117         // hostname.or(domainlabel);
1118         hostname.set('.');
1119     }
1120 
1121 
1122     /***
1123      * BitSet for host.
1124      * <p><blockquote><pre>
1125      * host          = hostname | IPv4address | IPv6reference
1126      * </pre></blockquote><p>
1127      */
1128     protected static final BitSet host = new BitSet(256);
1129     // Static initializer for host
1130     static {
1131         host.or(hostname);
1132         // host.or(IPv4address);
1133         host.or(IPv6reference); // IPv4address
1134     }
1135 
1136 
1137     /***
1138      * BitSet for hostport.
1139      * <p><blockquote><pre>
1140      * hostport      = host [ ":" port ]
1141      * </pre></blockquote><p>
1142      */
1143     protected static final BitSet hostport = new BitSet(256);
1144     // Static initializer for hostport
1145     static {
1146         hostport.or(host);
1147         hostport.set(':');
1148         hostport.or(port);
1149     }
1150 
1151 
1152     /***
1153      * Bitset for userinfo.
1154      * <p><blockquote><pre>
1155      * userinfo      = *( unreserved | escaped |
1156      *                    ";" | ":" | "&amp;" | "=" | "+" | "$" | "," )
1157      * </pre></blockquote><p>
1158      */
1159     protected static final BitSet userinfo = new BitSet(256);
1160     // Static initializer for userinfo
1161     static {
1162         userinfo.or(unreserved);
1163         userinfo.or(escaped);
1164         userinfo.set(';');
1165         userinfo.set(':');
1166         userinfo.set('&');
1167         userinfo.set('=');
1168         userinfo.set('+');
1169         userinfo.set('$');
1170         userinfo.set(',');
1171     }
1172 
1173 
1174     /***
1175      * BitSet for within the userinfo component like user and password.
1176      */
1177     public static final BitSet within_userinfo = new BitSet(256);
1178     // Static initializer for within_userinfo
1179     static {
1180         within_userinfo.or(userinfo);
1181         within_userinfo.clear(';'); // reserved within authority
1182         within_userinfo.clear(':');
1183         within_userinfo.clear('@');
1184         within_userinfo.clear('?');
1185         within_userinfo.clear('/');
1186     }
1187 
1188 
1189     /***
1190      * Bitset for server.
1191      * <p><blockquote><pre>
1192      * server        = [ [ userinfo "@" ] hostport ]
1193      * </pre></blockquote><p>
1194      */
1195     protected static final BitSet server = new BitSet(256);
1196     // Static initializer for server
1197     static {
1198         server.or(userinfo);
1199         server.set('@');
1200         server.or(hostport);
1201     }
1202 
1203 
1204     /***
1205      * BitSet for reg_name.
1206      * <p><blockquote><pre>
1207      * reg_name      = 1*( unreserved | escaped | "$" | "," |
1208      *                     ";" | ":" | "@" | "&amp;" | "=" | "+" )
1209      * </pre></blockquote><p>
1210      */
1211     protected static final BitSet reg_name = new BitSet(256);
1212     // Static initializer for reg_name
1213     static {
1214         reg_name.or(unreserved);
1215         reg_name.or(escaped);
1216         reg_name.set('$');
1217         reg_name.set(',');
1218         reg_name.set(';');
1219         reg_name.set(':');
1220         reg_name.set('@');
1221         reg_name.set('&');
1222         reg_name.set('=');
1223         reg_name.set('+');
1224     }
1225 
1226 
1227     /***
1228      * BitSet for authority.
1229      * <p><blockquote><pre>
1230      * authority     = server | reg_name
1231      * </pre></blockquote><p>
1232      */
1233     protected static final BitSet authority = new BitSet(256);
1234     // Static initializer for authority
1235     static {
1236         authority.or(server);
1237         authority.or(reg_name);
1238     }
1239 
1240 
1241     /***
1242      * BitSet for scheme.
1243      * <p><blockquote><pre>
1244      * scheme        = alpha *( alpha | digit | "+" | "-" | "." )
1245      * </pre></blockquote><p>
1246      */
1247     protected static final BitSet scheme = new BitSet(256);
1248     // Static initializer for scheme
1249     static {
1250         scheme.or(alpha);
1251         scheme.or(digit);
1252         scheme.set('+');
1253         scheme.set('-');
1254         scheme.set('.');
1255     }
1256 
1257 
1258     /***
1259      * BitSet for rel_segment.
1260      * <p><blockquote><pre>
1261      * rel_segment   = 1*( unreserved | escaped |
1262      *                     ";" | "@" | "&amp;" | "=" | "+" | "$" | "," )
1263      * </pre></blockquote><p>
1264      */
1265     protected static final BitSet rel_segment = new BitSet(256);
1266     // Static initializer for rel_segment
1267     static {
1268         rel_segment.or(unreserved);
1269         rel_segment.or(escaped);
1270         rel_segment.set(';');
1271         rel_segment.set('@');
1272         rel_segment.set('&');
1273         rel_segment.set('=');
1274         rel_segment.set('+');
1275         rel_segment.set('$');
1276         rel_segment.set(',');
1277     }
1278 
1279 
1280     /***
1281      * BitSet for rel_path.
1282      * <p><blockquote><pre>
1283      * rel_path      = rel_segment [ abs_path ]
1284      * </pre></blockquote><p>
1285      */
1286     protected static final BitSet rel_path = new BitSet(256);
1287     // Static initializer for rel_path
1288     static {
1289         rel_path.or(rel_segment);
1290         rel_path.or(abs_path);
1291     }
1292 
1293 
1294     /***
1295      * BitSet for net_path.
1296      * <p><blockquote><pre>
1297      * net_path      = "//" authority [ abs_path ]
1298      * </pre></blockquote><p>
1299      */
1300     protected static final BitSet net_path = new BitSet(256);
1301     // Static initializer for net_path
1302     static {
1303         net_path.set('/');
1304         net_path.or(authority);
1305         net_path.or(abs_path);
1306     }
1307     
1308 
1309     /***
1310      * BitSet for hier_part.
1311      * <p><blockquote><pre>
1312      * hier_part     = ( net_path | abs_path ) [ "?" query ]
1313      * </pre></blockquote><p>
1314      */
1315     protected static final BitSet hier_part = new BitSet(256);
1316     // Static initializer for hier_part
1317     static {
1318         hier_part.or(net_path);
1319         hier_part.or(abs_path);
1320         // hier_part.set('?'); aleady included
1321         hier_part.or(query);
1322     }
1323 
1324 
1325     /***
1326      * BitSet for relativeURI.
1327      * <p><blockquote><pre>
1328      * relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
1329      * </pre></blockquote><p>
1330      */
1331     protected static final BitSet relativeURI = new BitSet(256);
1332     // Static initializer for relativeURI
1333     static {
1334         relativeURI.or(net_path);
1335         relativeURI.or(abs_path);
1336         relativeURI.or(rel_path);
1337         // relativeURI.set('?'); aleady included
1338         relativeURI.or(query);
1339     }
1340 
1341 
1342     /***
1343      * BitSet for absoluteURI.
1344      * <p><blockquote><pre>
1345      * absoluteURI   = scheme ":" ( hier_part | opaque_part )
1346      * </pre></blockquote><p>
1347      */
1348     protected static final BitSet absoluteURI = new BitSet(256);
1349     // Static initializer for absoluteURI
1350     static {
1351         absoluteURI.or(scheme);
1352         absoluteURI.set(':');
1353         absoluteURI.or(hier_part);
1354         absoluteURI.or(opaque_part);
1355     }
1356 
1357 
1358     /***
1359      * BitSet for URI-reference.
1360      * <p><blockquote><pre>
1361      * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1362      * </pre></blockquote><p>
1363      */
1364     protected static final BitSet URI_reference = new BitSet(256);
1365     // Static initializer for URI_reference
1366     static {
1367         URI_reference.or(absoluteURI);
1368         URI_reference.or(relativeURI);
1369         URI_reference.set('#');
1370         URI_reference.or(fragment);
1371     }
1372 
1373     // ---------------------------- Characters disallowed within the URI syntax
1374     // Excluded US-ASCII Characters are like control, space, delims and unwise
1375 
1376     /***
1377      * BitSet for control.
1378      */
1379     public static final BitSet control = new BitSet(256);
1380     // Static initializer for control
1381     static {
1382         for (int i = 0; i <= 0x1F; i++) {
1383             control.set(i);
1384         }
1385         control.set(0x7F);
1386     }
1387 
1388     /***
1389      * BitSet for space.
1390      */
1391     public static final BitSet space = new BitSet(256);
1392     // Static initializer for space
1393     static {
1394         space.set(0x20);
1395     }
1396 
1397 
1398     /***
1399      * BitSet for delims.
1400      */
1401     public static final BitSet delims = new BitSet(256);
1402     // Static initializer for delims
1403     static {
1404         delims.set('<');
1405         delims.set('>');
1406         delims.set('#');
1407         delims.set('%');
1408         delims.set('"');
1409     }
1410 
1411 
1412     /***
1413      * BitSet for unwise.
1414      */
1415     public static final BitSet unwise = new BitSet(256);
1416     // Static initializer for unwise
1417     static {
1418         unwise.set('{');
1419         unwise.set('}');
1420         unwise.set('|');
1421         unwise.set('//');
1422         unwise.set('^');
1423         unwise.set('[');
1424         unwise.set(']');
1425         unwise.set('`');
1426     }
1427 
1428 
1429     /***
1430      * Disallowed rel_path before escaping.
1431      */
1432     public static final BitSet disallowed_rel_path = new BitSet(256);
1433     // Static initializer for disallowed_rel_path
1434     static {
1435         disallowed_rel_path.or(uric);
1436         disallowed_rel_path.andNot(rel_path);
1437     }
1438 
1439 
1440     /***
1441      * Disallowed opaque_part before escaping.
1442      */
1443     public static final BitSet disallowed_opaque_part = new BitSet(256);
1444     // Static initializer for disallowed_opaque_part
1445     static {
1446         disallowed_opaque_part.or(uric);
1447         disallowed_opaque_part.andNot(opaque_part);
1448     }
1449 
1450     // ----------------------- Characters allowed within and for each component
1451 
1452     /***
1453      * Those characters that are allowed for the authority component.
1454      */
1455     public static final BitSet allowed_authority = new BitSet(256);
1456     // Static initializer for allowed_authority
1457     static {
1458         allowed_authority.or(authority);
1459         allowed_authority.clear('%');
1460     }
1461 
1462 
1463     /***
1464      * Those characters that are allowed for the opaque_part.
1465      */
1466     public static final BitSet allowed_opaque_part = new BitSet(256);
1467     // Static initializer for allowed_opaque_part 
1468     static {
1469         allowed_opaque_part.or(opaque_part);
1470         allowed_opaque_part.clear('%');
1471     }
1472 
1473 
1474     /***
1475      * Those characters that are allowed for the reg_name.
1476      */
1477     public static final BitSet allowed_reg_name = new BitSet(256);
1478     // Static initializer for allowed_reg_name 
1479     static {
1480         allowed_reg_name.or(reg_name);
1481         // allowed_reg_name.andNot(percent);
1482         allowed_reg_name.clear('%');
1483     }
1484 
1485 
1486     /***
1487      * Those characters that are allowed for the userinfo component.
1488      */
1489     public static final BitSet allowed_userinfo = new BitSet(256);
1490     // Static initializer for allowed_userinfo
1491     static {
1492         allowed_userinfo.or(userinfo);
1493         // allowed_userinfo.andNot(percent);
1494         allowed_userinfo.clear('%');
1495     }
1496 
1497 
1498     /***
1499      * Those characters that are allowed for within the userinfo component.
1500      */
1501     public static final BitSet allowed_within_userinfo = new BitSet(256);
1502     // Static initializer for allowed_within_userinfo
1503     static {
1504         allowed_within_userinfo.or(within_userinfo);
1505         allowed_within_userinfo.clear('%');
1506     }
1507 
1508 
1509     /***
1510      * Those characters that are allowed for the IPv6reference component.
1511      * The characters '[', ']' in IPv6reference should be excluded.
1512      */
1513     public static final BitSet allowed_IPv6reference = new BitSet(256);
1514     // Static initializer for allowed_IPv6reference
1515     static {
1516         allowed_IPv6reference.or(IPv6reference);
1517         // allowed_IPv6reference.andNot(unwise);
1518         allowed_IPv6reference.clear('[');
1519         allowed_IPv6reference.clear(']');
1520     }
1521 
1522 
1523     /***
1524      * Those characters that are allowed for the host component.
1525      * The characters '[', ']' in IPv6reference should be excluded.
1526      */
1527     public static final BitSet allowed_host = new BitSet(256);
1528     // Static initializer for allowed_host
1529     static {
1530         allowed_host.or(hostname);
1531         allowed_host.or(allowed_IPv6reference);
1532     }
1533 
1534 
1535     /***
1536      * Those characters that are allowed for the authority component.
1537      */
1538     public static final BitSet allowed_within_authority = new BitSet(256);
1539     // Static initializer for allowed_within_authority
1540     static {
1541         allowed_within_authority.or(server);
1542         allowed_within_authority.or(reg_name);
1543         allowed_within_authority.clear(';');
1544         allowed_within_authority.clear(':');
1545         allowed_within_authority.clear('@');
1546         allowed_within_authority.clear('?');
1547         allowed_within_authority.clear('/');
1548     }
1549 
1550 
1551     /***
1552      * Those characters that are allowed for the abs_path.
1553      */
1554     public static final BitSet allowed_abs_path = new BitSet(256);
1555     // Static initializer for allowed_abs_path
1556     static {
1557         allowed_abs_path.or(abs_path);
1558         // allowed_abs_path.set('/');  // aleady included
1559         allowed_abs_path.andNot(percent);
1560         allowed_abs_path.clear('+');
1561     }
1562 
1563 
1564     /***
1565      * Those characters that are allowed for the rel_path.
1566      */
1567     public static final BitSet allowed_rel_path = new BitSet(256);
1568     // Static initializer for allowed_rel_path
1569     static {
1570         allowed_rel_path.or(rel_path);
1571         allowed_rel_path.clear('%');
1572         allowed_rel_path.clear('+');
1573     }
1574 
1575 
1576     /***
1577      * Those characters that are allowed within the path.
1578      */
1579     public static final BitSet allowed_within_path = new BitSet(256);
1580     // Static initializer for allowed_within_path
1581     static {
1582         allowed_within_path.or(abs_path);
1583         allowed_within_path.clear('/');
1584         allowed_within_path.clear(';');
1585         allowed_within_path.clear('=');
1586         allowed_within_path.clear('?');
1587     }
1588 
1589 
1590     /***
1591      * Those characters that are allowed for the query component.
1592      */
1593     public static final BitSet allowed_query = new BitSet(256);
1594     // Static initializer for allowed_query
1595     static {
1596         allowed_query.or(uric);
1597         allowed_query.clear('%');
1598     }
1599 
1600 
1601     /***
1602      * Those characters that are allowed within the query component.
1603      */
1604     public static final BitSet allowed_within_query = new BitSet(256);
1605     // Static initializer for allowed_within_query
1606     static {
1607         allowed_within_query.or(allowed_query);
1608         allowed_within_query.andNot(reserved); // excluded 'reserved'
1609     }
1610 
1611 
1612     /***
1613      * Those characters that are allowed for the fragment component.
1614      */
1615     public static final BitSet allowed_fragment = new BitSet(256);
1616     // Static initializer for allowed_fragment
1617     static {
1618         allowed_fragment.or(uric);
1619         allowed_fragment.clear('%');
1620     }
1621 
1622     // ------------------------------------------- Flags for this URI-reference
1623 
1624     // TODO: Figure out what all these variables are for and provide javadoc
1625 
1626     // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1627     // absoluteURI   = scheme ":" ( hier_part | opaque_part )
1628     protected boolean _is_hier_part;
1629     protected boolean _is_opaque_part;
1630     // relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ] 
1631     // hier_part     = ( net_path | abs_path ) [ "?" query ]
1632     protected boolean _is_net_path;
1633     protected boolean _is_abs_path;
1634     protected boolean _is_rel_path;
1635     // net_path      = "//" authority [ abs_path ] 
1636     // authority     = server | reg_name
1637     protected boolean _is_reg_name;
1638     protected boolean _is_server;  // = _has_server
1639     // server        = [ [ userinfo "@" ] hostport ]
1640     // host          = hostname | IPv4address | IPv6reference
1641     protected boolean _is_hostname;
1642     protected boolean _is_IPv4address;
1643     protected boolean _is_IPv6reference;
1644 
1645     // ------------------------------------------ Character and escape encoding
1646     
1647     /***
1648      * Encodes URI string.
1649      *
1650      * This is a two mapping, one from original characters to octets, and
1651      * subsequently a second from octets to URI characters:
1652      * <p><blockquote><pre>
1653      *   original character sequence->octet sequence->URI character sequence
1654      * </pre></blockquote><p>
1655      *
1656      * An escaped octet is encoded as a character triplet, consisting of the
1657      * percent character "%" followed by the two hexadecimal digits
1658      * representing the octet code. For example, "%20" is the escaped
1659      * encoding for the US-ASCII space character.
1660      * <p>
1661      * Conversion from the local filesystem character set to UTF-8 will
1662      * normally involve a two step process. First convert the local character
1663      * set to the UCS; then convert the UCS to UTF-8.
1664      * The first step in the process can be performed by maintaining a mapping
1665      * table that includes the local character set code and the corresponding
1666      * UCS code.
1667      * The next step is to convert the UCS character code to the UTF-8 encoding.
1668      * <p>
1669      * Mapping between vendor codepages can be done in a very similar manner
1670      * as described above.
1671      * <p>
1672      * The only time escape encodings can allowedly be made is when a URI is
1673      * being created from its component parts.  The escape and validate methods
1674      * are internally performed within this method.
1675      *
1676      * @param original the original character sequence
1677      * @param allowed those characters that are allowed within a component
1678      * @param charset the protocol charset
1679      * @return URI character sequence
1680      * @throws URIException null component or unsupported character encoding
1681      */
1682         
1683     protected static char[] encode(String original, BitSet allowed,
1684             String charset) throws URIException {
1685         if (original == null) {
1686             throw new IllegalArgumentException("Original string may not be null");
1687         }
1688         if (allowed == null) {
1689             throw new IllegalArgumentException("Allowed bitset may not be null");
1690         }
1691         byte[] rawdata = URLCodec.encodeUrl(allowed, EncodingUtil.getBytes(original, charset));
1692         return EncodingUtil.getAsciiString(rawdata).toCharArray();
1693     }
1694 
1695     /***
1696      * Decodes URI encoded string.
1697      *
1698      * This is a two mapping, one from URI characters to octets, and
1699      * subsequently a second from octets to original characters:
1700      * <p><blockquote><pre>
1701      *   URI character sequence->octet sequence->original character sequence
1702      * </pre></blockquote><p>
1703      *
1704      * A URI must be separated into its components before the escaped
1705      * characters within those components can be allowedly decoded.
1706      * <p>
1707      * Notice that there is a chance that URI characters that are non UTF-8
1708      * may be parsed as valid UTF-8.  A recent non-scientific analysis found
1709      * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
1710      * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
1711      * false reading.
1712      * <p>
1713      * The percent "%" character always has the reserved purpose of being
1714      * the escape indicator, it must be escaped as "%25" in order to be used
1715      * as data within a URI.
1716      * <p>
1717      * The unescape method is internally performed within this method.
1718      *
1719      * @param component the URI character sequence
1720      * @param charset the protocol charset
1721      * @return original character sequence
1722      * @throws URIException incomplete trailing escape pattern or unsupported
1723      * character encoding
1724      */
1725     protected static String decode(char[] component, String charset) 
1726         throws URIException {
1727         if (component == null) {
1728             throw new IllegalArgumentException("Component array of chars may not be null");
1729         }
1730         return decode(new String(component), charset);
1731     }
1732 
1733     /***
1734      * Decodes URI encoded string.
1735      *
1736      * This is a two mapping, one from URI characters to octets, and
1737      * subsequently a second from octets to original characters:
1738      * <p><blockquote><pre>
1739      *   URI character sequence->octet sequence->original character sequence
1740      * </pre></blockquote><p>
1741      *
1742      * A URI must be separated into its components before the escaped
1743      * characters within those components can be allowedly decoded.
1744      * <p>
1745      * Notice that there is a chance that URI characters that are non UTF-8
1746      * may be parsed as valid UTF-8.  A recent non-scientific analysis found
1747      * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
1748      * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
1749      * false reading.
1750      * <p>
1751      * The percent "%" character always has the reserved purpose of being
1752      * the escape indicator, it must be escaped as "%25" in order to be used
1753      * as data within a URI.
1754      * <p>
1755      * The unescape method is internally performed within this method.
1756      *
1757      * @param component the URI character sequence
1758      * @param charset the protocol charset
1759      * @return original character sequence
1760      * @throws URIException incomplete trailing escape pattern or unsupported
1761      * character encoding
1762      * 
1763      * @since 3.0
1764      */
1765     protected static String decode(String component, String charset) 
1766         throws URIException {
1767         if (component == null) {
1768             throw new IllegalArgumentException("Component array of chars may not be null");
1769         }
1770         byte[] rawdata = null;
1771         try { 
1772             rawdata = URLCodec.decodeUrl(EncodingUtil.getAsciiBytes(component));
1773         } catch (DecoderException e) {
1774             throw new URIException(e.getMessage());
1775         }
1776         return EncodingUtil.getString(rawdata, charset);
1777     }
1778     /***
1779      * Pre-validate the unescaped URI string within a specific component.
1780      *
1781      * @param component the component string within the component
1782      * @param disallowed those characters disallowed within the component
1783      * @return if true, it doesn't have the disallowed characters
1784      * if false, the component is undefined or an incorrect one
1785      */
1786     protected boolean prevalidate(String component, BitSet disallowed) {
1787         // prevalidate the given component by disallowed characters
1788         if (component == null) {
1789             return false; // undefined
1790         }
1791         char[] target = component.toCharArray();
1792         for (int i = 0; i < target.length; i++) {
1793             if (disallowed.get(target[i])) {
1794                 return false;
1795             }
1796         }
1797         return true;
1798     }
1799 
1800 
1801     /***
1802      * Validate the URI characters within a specific component.
1803      * The component must be performed after escape encoding. Or it doesn't
1804      * include escaped characters.
1805      *
1806      * @param component the characters sequence within the component
1807      * @param generous those characters that are allowed within a component
1808      * @return if true, it's the correct URI character sequence
1809      */
1810     protected boolean validate(char[] component, BitSet generous) {
1811         // validate each component by generous characters
1812         return validate(component, 0, -1, generous);
1813     }
1814 
1815 
1816     /***
1817      * Validate the URI characters within a specific component.
1818      * The component must be performed after escape encoding. Or it doesn't
1819      * include escaped characters.
1820      * <p>
1821      * It's not that much strict, generous.  The strict validation might be 
1822      * performed before being called this method.
1823      *
1824      * @param component the characters sequence within the component
1825      * @param soffset the starting offset of the given component
1826      * @param eoffset the ending offset of the given component
1827      * if -1, it means the length of the component
1828      * @param generous those characters that are allowed within a component
1829      * @return if true, it's the correct URI character sequence
1830      */
1831     protected boolean validate(char[] component, int soffset, int eoffset,
1832             BitSet generous) {
1833         // validate each component by generous characters
1834         if (eoffset == -1) {
1835             eoffset = component.length - 1;
1836         }
1837         for (int i = soffset; i <= eoffset; i++) {
1838             if (!generous.get(component[i])) { 
1839                 return false;
1840             }
1841         }
1842         return true;
1843     }
1844 
1845 
1846     /***
1847      * In order to avoid any possilbity of conflict with non-ASCII characters,
1848      * Parse a URI reference as a <code>String</code> with the character
1849      * encoding of the local system or the document.
1850      * <p>
1851      * The following line is the regular expression for breaking-down a URI
1852      * reference into its components.
1853      * <p><blockquote><pre>
1854      *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1855      *    12            3  4          5       6  7        8 9
1856      * </pre></blockquote><p>
1857      * For example, matching the above expression to
1858      *   http://jakarta.apache.org/ietf/uri/#Related
1859      * results in the following subexpression matches:
1860      * <p><blockquote><pre>
1861      *               $1 = http:
1862      *  scheme    =  $2 = http
1863      *               $3 = //jakarta.apache.org
1864      *  authority =  $4 = jakarta.apache.org
1865      *  path      =  $5 = /ietf/uri/
1866      *               $6 = <undefined>
1867      *  query     =  $7 = <undefined>
1868      *               $8 = #Related
1869      *  fragment  =  $9 = Related
1870      * </pre></blockquote><p>
1871      *
1872      * @param original the original character sequence
1873      * @param escaped <code>true</code> if <code>original</code> is escaped
1874      * @throws URIException If an error occurs.
1875      */
1876     protected void parseUriReference(String original, boolean escaped)
1877         throws URIException {
1878 
1879         // validate and contruct the URI character sequence
1880         if (original == null) {
1881             throw new URIException("URI-Reference required");
1882         }
1883 
1884         /* @
1885          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1886          */
1887         String tmp = original.trim();
1888         
1889         /*
1890          * The length of the string sequence of characters.
1891          * It may not be equal to the length of the byte array.
1892          */
1893         int length = tmp.length();
1894 
1895         /*
1896          * Remove the delimiters like angle brackets around an URI.
1897          */
1898         if (length > 0) {
1899             char[] firstDelimiter = { tmp.charAt(0) };
1900             if (validate(firstDelimiter, delims)) {
1901                 if (length >= 2) {
1902                     char[] lastDelimiter = { tmp.charAt(length - 1) };
1903                     if (validate(lastDelimiter, delims)) {
1904                         tmp = tmp.substring(1, length - 1);
1905                         length = length - 2;
1906                     }
1907                 }
1908             }
1909         }
1910 
1911         /*
1912          * The starting index
1913          */
1914         int from = 0;
1915 
1916         /*
1917          * The test flag whether the URI is started from the path component.
1918          */
1919         boolean isStartedFromPath = false;
1920         int atColon = tmp.indexOf(':');
1921         int atSlash = tmp.indexOf('/');
1922         if ((atColon <= 0 && !tmp.startsWith("//"))
1923             || (atSlash >= 0 && atSlash < atColon)) {
1924             isStartedFromPath = true;
1925         }
1926 
1927         /*
1928          * <p><blockquote><pre>
1929          *     @@@@@@@@
1930          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1931          * </pre></blockquote><p>
1932          */
1933         int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
1934         if (at == -1) { 
1935             at = 0;
1936         }
1937 
1938         /*
1939          * Parse the scheme.
1940          * <p><blockquote><pre>
1941          *  scheme    =  $2 = http
1942          *              @
1943          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1944          * </pre></blockquote><p>
1945          */
1946         if (at > 0 && at < length && tmp.charAt(at) == ':') {
1947             char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
1948             if (validate(target, scheme)) {
1949                 _scheme = target;
1950             } else {
1951                 throw new URIException("incorrect scheme");
1952             }
1953             from = ++at;
1954         }
1955 
1956         /*
1957          * Parse the authority component.
1958          * <p><blockquote><pre>
1959          *  authority =  $4 = jakarta.apache.org
1960          *                  @@
1961          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1962          * </pre></blockquote><p>
1963          */
1964         // Reset flags
1965         _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
1966         if (0 <= at && at < length && tmp.charAt(at) == '/') {
1967             // Set flag
1968             _is_hier_part = true;
1969             if (at + 2 < length && tmp.charAt(at + 1) == '/' 
1970                 && !isStartedFromPath) {
1971                 // the temporary index to start the search from
1972                 int next = indexFirstOf(tmp, "/?#", at + 2);
1973                 if (next == -1) {
1974                     next = (tmp.substring(at + 2).length() == 0) ? at + 2 
1975                         : tmp.length();
1976                 }
1977                 parseAuthority(tmp.substring(at + 2, next), escaped);
1978                 from = at = next;
1979                 // Set flag
1980                 _is_net_path = true;
1981             }
1982             if (from == at) {
1983                 // Set flag
1984                 _is_abs_path = true;
1985             }
1986         }
1987 
1988         /*
1989          * Parse the path component.
1990          * <p><blockquote><pre>
1991          *  path      =  $5 = /ietf/uri/
1992          *                                @@@@@@
1993          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1994          * </pre></blockquote><p>
1995          */
1996         if (from < length) {
1997             // rel_path = rel_segment [ abs_path ]
1998             int next = indexFirstOf(tmp, "?#", from);
1999             if (next == -1) {
2000                 next = tmp.length();
2001             }
2002             if (!_is_abs_path) {
2003                 if (!escaped 
2004                     && prevalidate(tmp.substring(from, next), disallowed_rel_path) 
2005                     || escaped 
2006                     && validate(tmp.substring(from, next).toCharArray(), rel_path)) {
2007                     // Set flag
2008                     _is_rel_path = true;
2009                 } else if (!escaped 
2010                     && prevalidate(tmp.substring(from, next), disallowed_opaque_part) 
2011                     || escaped 
2012                     && validate(tmp.substring(from, next).toCharArray(), opaque_part)) {
2013                     // Set flag
2014                     _is_opaque_part = true;
2015                 } else {
2016                     // the path component may be empty
2017                     _path = null;
2018                 }
2019             }
2020             String s = tmp.substring(from, next);
2021             if (escaped) {
2022                 setRawPath(s.toCharArray());
2023             } else {
2024                 setPath(s);
2025             }
2026             at = next;
2027         }
2028 
2029         // set the charset to do escape encoding
2030         String charset = getProtocolCharset();
2031 
2032         /*
2033          * Parse the query component.
2034          * <p><blockquote><pre>
2035          *  query     =  $7 = <undefined>
2036          *                                        @@@@@@@@@
2037          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2038          * </pre></blockquote><p>
2039          */
2040         if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
2041             int next = tmp.indexOf('#', at + 1);
2042             if (next == -1) {
2043                 next = tmp.length();
2044             }
2045             if (escaped) {
2046                 _query = tmp.substring(at + 1, next).toCharArray();
2047                 if (!validate(_query, uric)) {
2048                     throw new URIException("Invalid query");
2049                 }
2050             } else {
2051                 _query = encode(tmp.substring(at + 1, next), allowed_query, charset);
2052             }
2053             at = next;
2054         }
2055 
2056         /*
2057          * Parse the fragment component.
2058          * <p><blockquote><pre>
2059          *  fragment  =  $9 = Related
2060          *                                                   @@@@@@@@
2061          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2062          * </pre></blockquote><p>
2063          */
2064         if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
2065             if (at + 1 == length) { // empty fragment
2066                 _fragment = "".toCharArray();
2067             } else {
2068                 _fragment = (escaped) ? tmp.substring(at + 1).toCharArray() 
2069                     : encode(tmp.substring(at + 1), allowed_fragment, charset);
2070             }
2071         }
2072 
2073         // set this URI.
2074         setURI();
2075     }
2076 
2077 
2078     /***
2079      * Get the earlier index that to be searched for the first occurrance in
2080      * one of any of the given string.
2081      *
2082      * @param s the string to be indexed
2083      * @param delims the delimiters used to index
2084      * @return the earlier index if there are delimiters
2085      */
2086     protected int indexFirstOf(String s, String delims) {
2087         return indexFirstOf(s, delims, -1);
2088     }
2089 
2090 
2091     /***
2092      * Get the earlier index that to be searched for the first occurrance in
2093      * one of any of the given string.
2094      *
2095      * @param s the string to be indexed
2096      * @param delims the delimiters used to index
2097      * @param offset the from index
2098      * @return the earlier index if there are delimiters
2099      */
2100     protected int indexFirstOf(String s, String delims, int offset) {
2101         if (s == null || s.length() == 0) {
2102             return -1;
2103         }
2104         if (delims == null || delims.length() == 0) {
2105             return -1;
2106         }
2107         // check boundaries
2108         if (offset < 0) {
2109             offset = 0;
2110         } else if (offset > s.length()) {
2111             return -1;
2112         }
2113         // s is never null
2114         int min = s.length();
2115         char[] delim = delims.toCharArray();
2116         for (int i = 0; i < delim.length; i++) {
2117             int at = s.indexOf(delim[i], offset);
2118             if (at >= 0 && at < min) {
2119                 min = at;
2120             }
2121         }
2122         return (min == s.length()) ? -1 : min;
2123     }
2124 
2125 
2126     /***
2127      * Get the earlier index that to be searched for the first occurrance in
2128      * one of any of the given array.
2129      *
2130      * @param s the character array to be indexed
2131      * @param delim the delimiter used to index
2132      * @return the ealier index if there are a delimiter
2133      */
2134     protected int indexFirstOf(char[] s, char delim) {
2135         return indexFirstOf(s, delim, 0);
2136     }
2137 
2138 
2139     /***
2140      * Get the earlier index that to be searched for the first occurrance in
2141      * one of any of the given array.
2142      *
2143      * @param s the character array to be indexed
2144      * @param delim the delimiter used to index
2145      * @param offset The offset.
2146      * @return the ealier index if there is a delimiter
2147      */
2148     protected int indexFirstOf(char[] s, char delim, int offset) {
2149         if (s == null || s.length == 0) {
2150             return -1;
2151         }
2152         // check boundaries
2153         if (offset < 0) {
2154             offset = 0;
2155         } else if (offset > s.length) {
2156             return -1;
2157         }
2158         for (int i = offset; i < s.length; i++) {
2159             if (s[i] == delim) {
2160                 return i;
2161             }
2162         }
2163         return -1;
2164     }
2165 
2166 
2167     /***
2168      * Parse the authority component.
2169      *
2170      * @param original the original character sequence of authority component
2171      * @param escaped <code>true</code> if <code>original</code> is escaped
2172      * @throws URIException If an error occurs.
2173      */
2174     protected void parseAuthority(String original, boolean escaped)
2175         throws URIException {
2176 
2177         // Reset flags
2178         _is_reg_name = _is_server =
2179         _is_hostname = _is_IPv4address = _is_IPv6reference = false;
2180 
2181         // set the charset to do escape encoding
2182         String charset = getProtocolCharset();
2183 
2184         boolean hasPort = true;
2185         int from = 0;
2186         int next = original.indexOf('@');
2187         if (next != -1) { // neither -1 and 0
2188             // each protocol extented from URI supports the specific userinfo
2189             _userinfo = (escaped) ? original.substring(0, next).toCharArray() 
2190                 : encode(original.substring(0, next), allowed_userinfo,
2191                         charset);
2192             from = next + 1;
2193         }
2194         next = original.indexOf('[', from);
2195         if (next >= from) {
2196             next = original.indexOf(']', from);
2197             if (next == -1) {
2198                 throw new URIException(URIException.PARSING, "IPv6reference");
2199             } else {
2200                 next++;
2201             }
2202             // In IPv6reference, '[', ']' should be excluded
2203             _host = (escaped) ? original.substring(from, next).toCharArray() 
2204                 : encode(original.substring(from, next), allowed_IPv6reference,
2205                         charset);
2206             // Set flag
2207             _is_IPv6reference = true;
2208         } else { // only for !_is_IPv6reference
2209             next = original.indexOf(':', from);
2210             if (next == -1) {
2211                 next = original.length();
2212                 hasPort = false;
2213             }
2214             // REMINDME: it doesn't need the pre-validation
2215             _host = original.substring(from, next).toCharArray();
2216             if (validate(_host, IPv4address)) {
2217                 // Set flag
2218                 _is_IPv4address = true;
2219             } else if (validate(_host, hostname)) {
2220                 // Set flag
2221                 _is_hostname = true;
2222             } else {
2223                 // Set flag
2224                 _is_reg_name = true;
2225             }
2226         }
2227         if (_is_reg_name) {
2228             // Reset flags for a server-based naming authority
2229             _is_server = _is_hostname = _is_IPv4address =
2230             _is_IPv6reference = false;
2231             // set a registry-based naming authority
2232             if (escaped) {
2233                 _authority = original.toString().toCharArray();
2234                 if (!validate(_authority, reg_name)) {
2235                     throw new URIException("Invalid authority");
2236                 }
2237             } else {
2238                 _authority = encode(original.toString(), allowed_reg_name, charset);
2239             }
2240         } else {
2241             if (original.length() - 1 > next && hasPort 
2242                 && original.charAt(next) == ':') { // not empty
2243                 from = next + 1;
2244                 try {
2245                     _port = Integer.parseInt(original.substring(from));
2246                 } catch (NumberFormatException error) {
2247                     throw new URIException(URIException.PARSING,
2248                             "invalid port number");
2249                 }
2250             }
2251             // set a server-based naming authority
2252             StringBuffer buf = new StringBuffer();
2253             if (_userinfo != null) { // has_userinfo
2254                 buf.append(_userinfo);
2255                 buf.append('@');
2256             }
2257             if (_host != null) {
2258                 buf.append(_host);
2259                 if (_port != -1) {
2260                     buf.append(':');
2261                     buf.append(_port);
2262                 }
2263             }
2264             _authority = buf.toString().toCharArray();
2265             // Set flag
2266             _is_server = true;
2267         }
2268     }
2269 
2270 
2271     /***
2272      * Once it's parsed successfully, set this URI.
2273      *
2274      * @see #getRawURI
2275      */
2276     protected void setURI() {
2277         // set _uri
2278         StringBuffer buf = new StringBuffer();
2279         // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2280         if (_scheme != null) {
2281             buf.append(_scheme);
2282             buf.append(':');
2283         }
2284         if (_is_net_path) {
2285             buf.append("//");
2286             if (_authority != null) { // has_authority
2287                 buf.append(_authority);
2288             }
2289         }
2290         if (_opaque != null && _is_opaque_part) {
2291             buf.append(_opaque);
2292         } else if (_path != null) {
2293             // _is_hier_part or _is_relativeURI
2294             if (_path.length != 0) {
2295                 buf.append(_path);
2296             }
2297         }
2298         if (_query != null) { // has_query
2299             buf.append('?');
2300             buf.append(_query);
2301         }
2302         // ignore the fragment identifier
2303         _uri = buf.toString().toCharArray();
2304         hash = 0;
2305     }
2306 
2307     // ----------------------------------------------------------- Test methods
2308   
2309 
2310     /***
2311      * Tell whether or not this URI is absolute.
2312      *
2313      * @return true iif this URI is absoluteURI
2314      */
2315     public boolean isAbsoluteURI() {
2316         return (_scheme != null);
2317     }
2318   
2319 
2320     /***
2321      * Tell whether or not this URI is relative.
2322      *
2323      * @return true iif this URI is relativeURI
2324      */
2325     public boolean isRelativeURI() {
2326         return (_scheme == null);
2327     }
2328 
2329 
2330     /***
2331      * Tell whether or not the absoluteURI of this URI is hier_part.
2332      *
2333      * @return true iif the absoluteURI is hier_part
2334      */
2335     public boolean isHierPart() {
2336         return _is_hier_part;
2337     }
2338 
2339 
2340     /***
2341      * Tell whether or not the absoluteURI of this URI is opaque_part.
2342      *
2343      * @return true iif the absoluteURI is opaque_part
2344      */
2345     public boolean isOpaquePart() {
2346         return _is_opaque_part;
2347     }
2348 
2349 
2350     /***
2351      * Tell whether or not the relativeURI or heir_part of this URI is net_path.
2352      * It's the same function as the has_authority() method.
2353      *
2354      * @return true iif the relativeURI or heir_part is net_path
2355      * @see #hasAuthority
2356      */
2357     public boolean isNetPath() {
2358         return _is_net_path || (_authority != null);
2359     }
2360 
2361 
2362     /***
2363      * Tell whether or not the relativeURI or hier_part of this URI is abs_path.
2364      *
2365      * @return true iif the relativeURI or hier_part is abs_path
2366      */
2367     public boolean isAbsPath() {
2368         return _is_abs_path;
2369     }
2370 
2371 
2372     /***
2373      * Tell whether or not the relativeURI of this URI is rel_path.
2374      *
2375      * @return true iif the relativeURI is rel_path
2376      */
2377     public boolean isRelPath() {
2378         return _is_rel_path;
2379     }
2380 
2381 
2382     /***
2383      * Tell whether or not this URI has authority.
2384      * It's the same function as the is_net_path() method.
2385      *
2386      * @return true iif this URI has authority
2387      * @see #isNetPath
2388      */
2389     public boolean hasAuthority() {
2390         return (_authority != null) || _is_net_path;
2391     }
2392 
2393     /***
2394      * Tell whether or not the authority component of this URI is reg_name.
2395      *
2396      * @return true iif the authority component is reg_name
2397      */
2398     public boolean isRegName() {
2399         return _is_reg_name;
2400     }
2401   
2402 
2403     /***
2404      * Tell whether or not the authority component of this URI is server.
2405      *
2406      * @return true iif the authority component is server
2407      */
2408     public boolean isServer() {
2409         return _is_server;
2410     }
2411   
2412 
2413     /***
2414      * Tell whether or not this URI has userinfo.
2415      *
2416      * @return true iif this URI has userinfo
2417      */
2418     public boolean hasUserinfo() {
2419         return (_userinfo != null);
2420     }
2421   
2422 
2423     /***
2424      * Tell whether or not the host part of this URI is hostname.
2425      *
2426      * @return true iif the host part is hostname
2427      */
2428     public boolean isHostname() {
2429         return _is_hostname;
2430     }
2431 
2432 
2433     /***
2434      * Tell whether or not the host part of this URI is IPv4address.
2435      *
2436      * @return true iif the host part is IPv4address
2437      */
2438     public boolean isIPv4address() {
2439         return _is_IPv4address;
2440     }
2441 
2442 
2443     /***
2444      * Tell whether or not the host part of this URI is IPv6reference.
2445      *
2446      * @return true iif the host part is IPv6reference
2447      */
2448     public boolean isIPv6reference() {
2449         return _is_IPv6reference;
2450     }
2451 
2452 
2453     /***
2454      * Tell whether or not this URI has query.
2455      *
2456      * @return true iif this URI has query
2457      */
2458     public boolean hasQuery() {
2459         return (_query != null);
2460     }
2461    
2462 
2463     /***
2464      * Tell whether or not this URI has fragment.
2465      *
2466      * @return true iif this URI has fragment
2467      */
2468     public boolean hasFragment() {
2469         return (_fragment != null);
2470     }
2471    
2472    
2473     // ---------------------------------------------------------------- Charset
2474 
2475 
2476     /***
2477      * Set the default charset of the protocol.
2478      * <p>
2479      * The character set used to store files SHALL remain a local decision and
2480      * MAY depend on the capability of local operating systems. Prior to the
2481      * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format
2482      * and UTF-8 encoded. This approach, while allowing international exchange
2483      * of URIs, will still allow backward compatibility with older systems
2484      * because the code set positions for ASCII characters are identical to the
2485      * one byte sequence in UTF-8.
2486      * <p>
2487      * An individual URI scheme may require a single charset, define a default
2488      * charset, or provide a way to indicate the charset used.
2489      *
2490      * <p>
2491      * Always all the time, the setter method is always succeeded and throws
2492      * <code>DefaultCharsetChanged</code> exception.
2493      *
2494      * So API programmer must follow the following way:
2495      * <code><pre>
2496      *  import org.apache.util.URI$DefaultCharsetChanged;
2497      *      .
2498      *      .
2499      *      .
2500      *  try {
2501      *      URI.setDefaultProtocolCharset("UTF-8");
2502      *  } catch (DefaultCharsetChanged cc) {
2503      *      // CASE 1: the exception could be ignored, when it is set by user
2504      *      if (cc.getReasonCode() == DefaultCharsetChanged.PROTOCOL_CHARSET) {
2505      *      // CASE 2: let user know the default protocol charset changed
2506      *      } else {
2507      *      // CASE 2: let user know the default document charset changed
2508      *      }
2509      *  }
2510      *  </pre></code>
2511      *
2512      * The API programmer is responsible to set the correct charset.
2513      * And each application should remember its own charset to support.
2514      *
2515      * @param charset the default charset for each protocol
2516      * @throws DefaultCharsetChanged default charset changed
2517      */
2518     public static void setDefaultProtocolCharset(String charset) 
2519         throws DefaultCharsetChanged {
2520             
2521         defaultProtocolCharset = charset;
2522         throw new DefaultCharsetChanged(DefaultCharsetChanged.PROTOCOL_CHARSET,
2523                 "the default protocol charset changed");
2524     }
2525 
2526 
2527     /***
2528      * Get the default charset of the protocol.
2529      * <p>
2530      * An individual URI scheme may require a single charset, define a default
2531      * charset, or provide a way to indicate the charset used.
2532      * <p>
2533      * To work globally either requires support of a number of character sets
2534      * and to be able to convert between them, or the use of a single preferred
2535      * character set.
2536      * For support of global compatibility it is STRONGLY RECOMMENDED that
2537      * clients and servers use UTF-8 encoding when exchanging URIs.
2538      *
2539      * @return the default charset string
2540      */
2541     public static String getDefaultProtocolCharset() {
2542         return defaultProtocolCharset;
2543     }
2544 
2545 
2546     /***
2547      * Get the protocol charset used by this current URI instance.
2548      * It was set by the constructor for this instance. If it was not set by
2549      * contructor, it will return the default protocol charset.
2550      *
2551      * @return the protocol charset string
2552      * @see #getDefaultProtocolCharset
2553      */
2554     public String getProtocolCharset() {
2555         return (protocolCharset != null) 
2556             ? protocolCharset 
2557             : defaultProtocolCharset;
2558     }
2559 
2560 
2561     /***
2562      * Set the default charset of the document.
2563      * <p>
2564      * Notice that it will be possible to contain mixed characters (e.g.
2565      * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional
2566      * display of these character sets, the protocol charset could be simply
2567      * used again. Because it's not yet implemented that the insertion of BIDI
2568      * control characters at different points during composition is extracted.
2569      * <p>
2570      *
2571      * Always all the time, the setter method is always succeeded and throws
2572      * <code>DefaultCharsetChanged</code> exception.
2573      *
2574      * So API programmer must follow the following way:
2575      * <code><pre>
2576      *  import org.apache.util.URI$DefaultCharsetChanged;
2577      *      .
2578      *      .
2579      *      .
2580      *  try {
2581      *      URI.setDefaultDocumentCharset("EUC-KR");
2582      *  } catch (DefaultCharsetChanged cc) {
2583      *      // CASE 1: the exception could be ignored, when it is set by user
2584      *      if (cc.getReasonCode() == DefaultCharsetChanged.DOCUMENT_CHARSET) {
2585      *      // CASE 2: let user know the default document charset changed
2586      *      } else {
2587      *      // CASE 2: let user know the default protocol charset changed
2588      *      }
2589      *  }
2590      *  </pre></code>
2591      *
2592      * The API programmer is responsible to set the correct charset.
2593      * And each application should remember its own charset to support.
2594      *
2595      * @param charset the default charset for the document
2596      * @throws DefaultCharsetChanged default charset changed
2597      */
2598     public static void setDefaultDocumentCharset(String charset) 
2599         throws DefaultCharsetChanged {
2600             
2601         defaultDocumentCharset = charset;
2602         throw new DefaultCharsetChanged(DefaultCharsetChanged.DOCUMENT_CHARSET,
2603                 "the default document charset changed");
2604     }
2605 
2606 
2607     /***
2608      * Get the recommended default charset of the document.
2609      *
2610      * @return the default charset string
2611      */
2612     public static String getDefaultDocumentCharset() {
2613         return defaultDocumentCharset;
2614     }
2615 
2616 
2617     /***
2618      * Get the default charset of the document by locale.
2619      *
2620      * @return the default charset string by locale
2621      */
2622     public static String getDefaultDocumentCharsetByLocale() {
2623         return defaultDocumentCharsetByLocale;
2624     }
2625 
2626 
2627     /***
2628      * Get the default charset of the document by platform.
2629      *
2630      * @return the default charset string by platform
2631      */
2632     public static String getDefaultDocumentCharsetByPlatform() {
2633         return defaultDocumentCharsetByPlatform;
2634     }
2635 
2636     // ------------------------------------------------------------- The scheme
2637 
2638     /***
2639      * Get the scheme.
2640      *
2641      * @return the scheme
2642      */
2643     public char[] getRawScheme() {
2644         return _scheme;
2645     }
2646 
2647 
2648     /***
2649      * Get the scheme.
2650      *
2651      * @return the scheme
2652      * null if undefined scheme
2653      */
2654     public String getScheme() {
2655         return (_scheme == null) ? null : new String(_scheme);
2656     }
2657 
2658     // ---------------------------------------------------------- The authority
2659 
2660     /***
2661      * Set the authority.  It can be one type of server, hostport, hostname,
2662      * IPv4address, IPv6reference and reg_name.
2663      * <p><blockquote><pre>
2664      *   authority     = server | reg_name
2665      * </pre></blockquote><p>
2666      *
2667      * @param escapedAuthority the raw escaped authority
2668      * @throws URIException If {@link 
2669      * #parseAuthority(java.lang.String,boolean)} fails
2670      * @throws NullPointerException null authority
2671      */
2672     public void setRawAuthority(char[] escapedAuthority) 
2673         throws URIException, NullPointerException {
2674             
2675         parseAuthority(new String(escapedAuthority), true);
2676         setURI();
2677     }
2678 
2679 
2680     /***
2681      * Set the authority.  It can be one type of server, hostport, hostname,
2682      * IPv4address, IPv6reference and reg_name.
2683      * Note that there is no setAuthority method by the escape encoding reason.
2684      *
2685      * @param escapedAuthority the escaped authority string
2686      * @throws URIException If {@link 
2687      * #parseAuthority(java.lang.String,boolean)} fails
2688      */
2689     public void setEscapedAuthority(String escapedAuthority)
2690         throws URIException {
2691 
2692         parseAuthority(escapedAuthority, true);
2693         setURI();
2694     }
2695 
2696 
2697     /***
2698      * Get the raw-escaped authority.
2699      *
2700      * @return the raw-escaped authority
2701      */
2702     public char[] getRawAuthority() {
2703         return _authority;
2704     }
2705 
2706 
2707     /***
2708      * Get the escaped authority.
2709      *
2710      * @return the escaped authority
2711      */
2712     public String getEscapedAuthority() {
2713         return (_authority == null) ? null : new String(_authority);
2714     }
2715 
2716 
2717     /***
2718      * Get the authority.
2719      *
2720      * @return the authority
2721      * @throws URIException If {@link #decode} fails
2722      */
2723     public String getAuthority() throws URIException {
2724         return (_authority == null) ? null : decode(_authority,
2725                 getProtocolCharset());
2726     }
2727 
2728     // ----------------------------------------------------------- The userinfo
2729 
2730     /***
2731      * Get the raw-escaped userinfo.
2732      *
2733      * @return the raw-escaped userinfo
2734      * @see #getAuthority
2735      */
2736     public char[] getRawUserinfo() {
2737         return _userinfo;
2738     }
2739 
2740 
2741     /***
2742      * Get the escaped userinfo.
2743      *
2744      * @return the escaped userinfo
2745      * @see #getAuthority
2746      */
2747     public String getEscapedUserinfo() {
2748         return (_userinfo == null) ? null : new String(_userinfo);
2749     }
2750 
2751 
2752     /***
2753      * Get the userinfo.
2754      *
2755      * @return the userinfo
2756      * @throws URIException If {@link #decode} fails
2757      * @see #getAuthority
2758      */
2759     public String getUserinfo() throws URIException {
2760         return (_userinfo == null) ? null : decode(_userinfo,
2761                 getProtocolCharset());
2762     }
2763 
2764     // --------------------------------------------------------------- The host
2765 
2766     /***
2767      * Get the host.
2768      * <p><blockquote><pre>
2769      *   host          = hostname | IPv4address | IPv6reference
2770      * </pre></blockquote><p>
2771      *
2772      * @return the host
2773      * @see #getAuthority
2774      */
2775     public char[] getRawHost() {
2776         return _host;
2777     }
2778 
2779 
2780     /***
2781      * Get the host.
2782      * <p><blockquote><pre>
2783      *   host          = hostname | IPv4address | IPv6reference
2784      * </pre></blockquote><p>
2785      *
2786      * @return the host
2787      * @throws URIException If {@link #decode} fails
2788      * @see #getAuthority
2789      */
2790     public String getHost() throws URIException {
2791         if (_host != null) {
2792             return decode(_host, getProtocolCharset());
2793         } else {
2794             return null;
2795         }
2796     }
2797 
2798     // --------------------------------------------------------------- The port
2799 
2800     /***
2801      * Get the port.  In order to get the specfic default port, the specific
2802      * protocol-supported class extended from the URI class should be used.
2803      * It has the server-based naming authority.
2804      *
2805      * @return the port
2806      * if -1, it has the default port for the scheme or the server-based
2807      * naming authority is not supported in the specific URI.
2808      */
2809     public int getPort() {
2810         return _port;
2811     }
2812 
2813     // --------------------------------------------------------------- The path
2814 
2815     /***
2816      * Set the raw-escaped path.
2817      *
2818      * @param escapedPath the path character sequence
2819      * @throws URIException encoding error or not proper for initial instance
2820      * @see #encode
2821      */
2822     public void setRawPath(char[] escapedPath) throws URIException {
2823         if (escapedPath == null || escapedPath.length == 0) {
2824             _path = _opaque = escapedPath;
2825             setURI();
2826             return;
2827         }
2828         // remove the fragment identifier
2829         escapedPath = removeFragmentIdentifier(escapedPath);
2830         if (_is_net_path || _is_abs_path) {
2831             if (escapedPath[0] != '/') {
2832                 throw new URIException(URIException.PARSING,
2833                         "not absolute path");
2834             }
2835             if (!validate(escapedPath, abs_path)) {
2836                 throw new URIException(URIException.ESCAPING,
2837                         "escaped absolute path not valid");
2838             }
2839             _path = escapedPath;
2840         } else if (_is_rel_path) {
2841             int at = indexFirstOf(escapedPath, '/');
2842             if (at == 0) {
2843                 throw new URIException(URIException.PARSING, "incorrect path");
2844             }
2845             if (at > 0 && !validate(escapedPath, 0, at - 1, rel_segment) 
2846                 && !validate(escapedPath, at, -1, abs_path) 
2847                 || at < 0 && !validate(escapedPath, 0, -1, rel_segment)) {
2848             
2849                 throw new URIException(URIException.ESCAPING,
2850                         "escaped relative path not valid");
2851             }
2852             _path = escapedPath;
2853         } else if (_is_opaque_part) {
2854             if (!uric_no_slash.get(escapedPath[0]) 
2855                 && !validate(escapedPath, 1, -1, uric)) {
2856                 throw new URIException(URIException.ESCAPING,
2857                     "escaped opaque part not valid");
2858             }
2859             _opaque = escapedPath;
2860         } else {
2861             throw new URIException(URIException.PARSING, "incorrect path");
2862         }
2863         setURI();
2864     }
2865 
2866 
2867     /***
2868      * Set the escaped path.
2869      *
2870      * @param escapedPath the escaped path string
2871      * @throws URIException encoding error or not proper for initial instance
2872      * @see #encode
2873      */
2874     public void setEscapedPath(String escapedPath) throws URIException {
2875         if (escapedPath == null) {
2876             _path = _opaque = null;
2877             setURI();
2878             return;
2879         }
2880         setRawPath(escapedPath.toCharArray());
2881     }
2882 
2883 
2884     /***
2885      * Set the path.
2886      *
2887      * @param path the path string
2888      * @throws URIException set incorrectly or fragment only
2889      * @see #encode
2890      */
2891     public void setPath(String path) throws URIException {
2892 
2893         if (path == null || path.length() == 0) {
2894             _path = _opaque = (path == null) ? null : path.toCharArray();
2895             setURI();
2896             return;
2897         }
2898         // set the charset to do escape encoding
2899         String charset = getProtocolCharset();
2900 
2901         if (_is_net_path || _is_abs_path) {
2902             _path = encode(path, allowed_abs_path, charset);
2903         } else if (_is_rel_path) {
2904             StringBuffer buff = new StringBuffer(path.length());
2905             int at = path.indexOf('/');
2906             if (at == 0) { // never 0
2907                 throw new URIException(URIException.PARSING,
2908                         "incorrect relative path");
2909             }
2910             if (at > 0) {
2911                 buff.append(encode(path.substring(0, at), allowed_rel_path,
2912                             charset));
2913                 buff.append(encode(path.substring(at), allowed_abs_path,
2914                             charset));
2915             } else {
2916                 buff.append(encode(path, allowed_rel_path, charset));
2917             }
2918             _path = buff.toString().toCharArray();
2919         } else if (_is_opaque_part) {
2920             StringBuffer buf = new StringBuffer();
2921             buf.insert(0, encode(path.substring(0, 1), uric_no_slash, charset));
2922             buf.insert(1, encode(path.substring(1), uric, charset));
2923             _opaque = buf.toString().toCharArray();
2924         } else {
2925             throw new URIException(URIException.PARSING, "incorrect path");
2926         }
2927         setURI();
2928     }
2929 
2930 
2931     /***
2932      * Resolve the base and relative path.
2933      *
2934      * @param basePath a character array of the basePath
2935      * @param relPath a character array of the relPath
2936      * @return the resolved path
2937      * @throws URIException no more higher path level to be resolved
2938      */
2939     protected char[] resolvePath(char[] basePath, char[] relPath)
2940         throws URIException {
2941 
2942         // REMINDME: paths are never null
2943         String base = (basePath == null) ? "" : new String(basePath);
2944         int at = base.lastIndexOf('/');
2945         if (at != -1) {
2946             basePath = base.substring(0, at + 1).toCharArray();
2947         }
2948         // _path could be empty
2949         if (relPath == null || relPath.length == 0) {
2950             return normalize(basePath);
2951         } else if (relPath[0] == '/') {
2952             return normalize(relPath);
2953         } else {
2954             StringBuffer buff = new StringBuffer(base.length() 
2955                 + relPath.length);
2956             buff.append((at != -1) ? base.substring(0, at + 1) : "/");
2957             buff.append(relPath);
2958             return normalize(buff.toString().toCharArray());
2959         }
2960     }
2961 
2962 
2963     /***
2964      * Get the raw-escaped current hierarchy level in the given path.
2965      * If the last namespace is a collection, the slash mark ('/') should be
2966      * ended with at the last character of the path string.
2967      *
2968      * @param path the path
2969      * @return the current hierarchy level
2970      * @throws URIException no hierarchy level
2971      */
2972     protected char[] getRawCurrentHierPath(char[] path) throws URIException {
2973 
2974         if (_is_opaque_part) {
2975             throw new URIException(URIException.PARSING, "no hierarchy level");
2976         }
2977         if (path == null) {
2978             throw new URIException(URIException.PARSING, "empty path");
2979         }
2980         String buff = new String(path);
2981         int first = buff.indexOf('/');
2982         int last = buff.lastIndexOf('/');
2983         if (last == 0) {
2984             return rootPath;
2985         } else if (first != last && last != -1) {
2986             return buff.substring(0, last).toCharArray();
2987         }
2988         // FIXME: it could be a document on the server side
2989         return path;
2990     }
2991 
2992 
2993     /***
2994      * Get the raw-escaped current hierarchy level.
2995      *
2996      * @return the raw-escaped current hierarchy level
2997      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2998      */
2999     public char[] getRawCurrentHierPath() throws URIException {
3000         return (_path == null) ? null : getRawCurrentHierPath(_path);
3001     }
3002  
3003 
3004     /***
3005      * Get the escaped current hierarchy level.
3006      *
3007      * @return the escaped current hierarchy level
3008      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3009      */
3010     public String getEscapedCurrentHierPath() throws URIException {
3011         char[] path = getRawCurrentHierPath();
3012         return (path == null) ? null : new String(path);
3013     }
3014  
3015 
3016     /***
3017      * Get the current hierarchy level.
3018      *
3019      * @return the current hierarchy level
3020      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3021      * @see #decode
3022      */
3023     public String getCurrentHierPath() throws URIException {
3024         char[] path = getRawCurrentHierPath();
3025         return (path == null) ? null : decode(path, getProtocolCharset());
3026     }
3027 
3028 
3029     /***
3030      * Get the level above the this hierarchy level.
3031      *
3032      * @return the raw above hierarchy level
3033      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3034      */
3035     public char[] getRawAboveHierPath() throws URIException {
3036         char[] path = getRawCurrentHierPath();
3037         return (path == null) ? null : getRawCurrentHierPath(path);
3038     }
3039 
3040 
3041     /***
3042      * Get the level above the this hierarchy level.
3043      *
3044      * @return the raw above hierarchy level
3045      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3046      */
3047     public String getEscapedAboveHierPath() throws URIException {
3048         char[] path = getRawAboveHierPath();
3049         return (path == null) ? null : new String(path);
3050     }
3051 
3052 
3053     /***
3054      * Get the level above the this hierarchy level.
3055      *
3056      * @return the above hierarchy level
3057      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3058      * @see #decode
3059      */
3060     public String getAboveHierPath() throws URIException {
3061         char[] path = getRawAboveHierPath();
3062         return (path == null) ? null : decode(path, getProtocolCharset());
3063     }
3064 
3065 
3066     /***
3067      * Get the raw-escaped path.
3068      * <p><blockquote><pre>
3069      *   path          = [ abs_path | opaque_part ]
3070      * </pre></blockquote><p>
3071      *
3072      * @return the raw-escaped path
3073      */
3074     public char[] getRawPath() {
3075         return _is_opaque_part ? _opaque : _path;
3076     }
3077 
3078 
3079     /***
3080      * Get the escaped path.
3081      * <p><blockquote><pre>
3082      *   path          = [ abs_path | opaque_part ]
3083      *   abs_path      = "/"  path_segments 
3084      *   opaque_part   = uric_no_slash *uric
3085      * </pre></blockquote><p>
3086      *
3087      * @return the escaped path string
3088      */
3089     public String getEscapedPath() {
3090         char[] path = getRawPath();
3091         return (path == null) ? null : new String(path);
3092     }
3093 
3094 
3095     /***
3096      * Get the path.
3097      * <p><blockquote><pre>
3098      *   path          = [ abs_path | opaque_part ]
3099      * </pre></blockquote><p>
3100      * @return the path string
3101      * @throws URIException If {@link #decode} fails.
3102      * @see #decode
3103      */
3104     public String getPath() throws URIException { 
3105         char[] path =  getRawPath();
3106         return (path == null) ? null : decode(path, getProtocolCharset());
3107     }
3108 
3109 
3110     /***
3111      * Get the raw-escaped basename of the path.
3112      *
3113      * @return the raw-escaped basename
3114      */
3115     public char[] getRawName() {
3116         if (_path == null) { 
3117             return null;
3118         }
3119 
3120         int at = 0;
3121         for (int i = _path.length - 1; i >= 0; i--) {
3122             if (_path[i] == '/') {
3123                 at = i + 1;
3124                 break;
3125             }
3126         }
3127         int len = _path.length - at;
3128         char[] basename =  new char[len];
3129         System.arraycopy(_path, at, basename, 0, len);
3130         return basename;
3131     }
3132 
3133 
3134     /***
3135      * Get the escaped basename of the path.
3136      *
3137      * @return the escaped basename string
3138      */
3139     public String getEscapedName() {
3140         char[] basename = getRawName();
3141         return (basename == null) ? null : new String(basename);
3142     }
3143 
3144 
3145     /***
3146      * Get the basename of the path.
3147      *
3148      * @return the basename string
3149      * @throws URIException incomplete trailing escape pattern or unsupported
3150      * character encoding
3151      * @see #decode
3152      */
3153     public String getName() throws URIException {
3154         char[] basename = getRawName();
3155         return (basename == null) ? null : decode(getRawName(),
3156                 getProtocolCharset());
3157     }
3158 
3159     // ----------------------------------------------------- The path and query 
3160 
3161     /***
3162      * Get the raw-escaped path and query.
3163      *
3164      * @return the raw-escaped path and query
3165      */
3166     public char[] getRawPathQuery() {
3167 
3168         if (_path == null && _query == null) {
3169             return null;
3170         }
3171         StringBuffer buff = new StringBuffer();
3172         if (_path != null) {
3173             buff.append(_path);
3174         }
3175         if (_query != null) {
3176             buff.append('?');
3177             buff.append(_query);
3178         }
3179         return buff.toString().toCharArray();
3180     }
3181 
3182 
3183     /***
3184      * Get the escaped query.
3185      *
3186      * @return the escaped path and query string
3187      */
3188     public String getEscapedPathQuery() {
3189         char[] rawPathQuery = getRawPathQuery();
3190         return (rawPathQuery == null) ? null : new String(rawPathQuery);
3191     }
3192 
3193 
3194     /***
3195      * Get the path and query.
3196      *
3197      * @return the path and query string.
3198      * @throws URIException incomplete trailing escape pattern or unsupported
3199      * character encoding
3200      * @see #decode
3201      */
3202     public String getPathQuery() throws URIException {
3203         char[] rawPathQuery = getRawPathQuery();
3204         return (rawPathQuery == null) ? null : decode(rawPathQuery,
3205                 getProtocolCharset());
3206     }
3207 
3208     // -------------------------------------------------------------- The query 
3209 
3210     /***
3211      * Set the raw-escaped query.
3212      *
3213      * @param escapedQuery the raw-escaped query
3214      * @throws URIException escaped query not valid
3215      */
3216     public void setRawQuery(char[] escapedQuery) throws URIException {
3217         if (escapedQuery == null || escapedQuery.length == 0) {
3218             _query = escapedQuery;
3219             setURI();
3220             return;
3221         }
3222         // remove the fragment identifier
3223         escapedQuery = removeFragmentIdentifier(escapedQuery);
3224         if (!validate(escapedQuery, query)) {
3225             throw new URIException(URIException.ESCAPING,
3226                     "escaped query not valid");
3227         }
3228         _query = escapedQuery;
3229         setURI();
3230     }
3231 
3232 
3233     /***
3234      * Set the escaped query string.
3235      *
3236      * @param escapedQuery the escaped query string
3237      * @throws URIException escaped query not valid
3238      */
3239     public void setEscapedQuery(String escapedQuery) throws URIException {
3240         if (escapedQuery == null) {
3241             _query = null;
3242             setURI();
3243             return;
3244         }
3245         setRawQuery(escapedQuery.toCharArray());
3246     }
3247 
3248 
3249     /***
3250      * Set the query.
3251      * <p>
3252      * When a query string is not misunderstood the reserved special characters
3253      * ("&amp;", "=", "+", ",", and "$") within a query component, it is
3254      * recommended to use in encoding the whole query with this method.
3255      * <p>
3256      * The additional APIs for the special purpose using by the reserved
3257      * special characters used in each protocol are implemented in each protocol
3258      * classes inherited from <code>URI</code>.  So refer to the same-named APIs
3259      * implemented in each specific protocol instance.
3260      *
3261      * @param query the query string.
3262      * @throws URIException incomplete trailing escape pattern or unsupported
3263      * character encoding
3264      * @see #encode
3265      */
3266     public void setQuery(String query) throws URIException {
3267         if (query == null || query.length() == 0) {
3268             _query = (query == null) ? null : query.toCharArray();
3269             setURI();
3270             return;
3271         }
3272         setRawQuery(encode(query, allowed_query, getProtocolCharset()));
3273     }
3274 
3275 
3276     /***
3277      * Get the raw-escaped query.
3278      *
3279      * @return the raw-escaped query
3280      */
3281     public char[] getRawQuery() {
3282         return _query;
3283     }
3284 
3285 
3286     /***
3287      * Get the escaped query.
3288      *
3289      * @return the escaped query string
3290      */
3291     public String getEscapedQuery() {
3292         return (_query == null) ? null : new String(_query);
3293     }
3294 
3295 
3296     /***
3297      * Get the query.
3298      *
3299      * @return the query string.
3300      * @throws URIException incomplete trailing escape pattern or unsupported
3301      * character encoding
3302      * @see #decode
3303      */
3304     public String getQuery() throws URIException {
3305         return (_query == null) ? null : decode(_query, getProtocolCharset());
3306     }
3307 
3308     // ----------------------------------------------------------- The fragment 
3309 
3310     /***
3311      * Set the raw-escaped fragment.
3312      *
3313      * @param escapedFragment the raw-escaped fragment
3314      * @throws URIException escaped fragment not valid
3315      */
3316     public void setRawFragment(char[] escapedFragment) throws URIException {
3317         if (escapedFragment == null || escapedFragment.length == 0) {
3318             _fragment = escapedFragment;
3319             hash = 0;
3320             return;
3321         }
3322         if (!validate(escapedFragment, fragment)) {
3323             throw new URIException(URIException.ESCAPING,
3324                     "escaped fragment not valid");
3325         }
3326         _fragment = escapedFragment;
3327         hash = 0;
3328     }
3329 
3330 
3331     /***
3332      * Set the escaped fragment string.
3333      *
3334      * @param escapedFragment the escaped fragment string
3335      * @throws URIException escaped fragment not valid
3336      */
3337     public void setEscapedFragment(String escapedFragment) throws URIException {
3338         if (escapedFragment == null) {
3339             _fragment = null;
3340             hash = 0;
3341             return;
3342         }
3343         setRawFragment(escapedFragment.toCharArray());
3344     }
3345 
3346 
3347     /***
3348      * Set the fragment.
3349      *
3350      * @param fragment the fragment string.
3351      * @throws URIException If an error occurs.
3352      */
3353     public void setFragment(String fragment) throws URIException {
3354         if (fragment == null || fragment.length() == 0) {
3355             _fragment = (fragment == null) ? null : fragment.toCharArray();
3356             hash = 0;
3357             return;
3358         }
3359         _fragment = encode(fragment, allowed_fragment, getProtocolCharset());
3360         hash = 0;
3361     }
3362 
3363 
3364     /***
3365      * Get the raw-escaped fragment.
3366      * <p>
3367      * The optional fragment identifier is not part of a URI, but is often used
3368      * in conjunction with a URI.
3369      * <p>
3370      * The format and interpretation of fragment identifiers is dependent on
3371      * the media type [RFC2046] of the retrieval result.
3372      * <p>
3373      * A fragment identifier is only meaningful when a URI reference is
3374      * intended for retrieval and the result of that retrieval is a document
3375      * for which the identified fragment is consistently defined.
3376      *
3377      * @return the raw-escaped fragment
3378      */
3379     public char[] getRawFragment() {
3380         return _fragment;
3381     }
3382 
3383 
3384     /***
3385      * Get the escaped fragment.
3386      *
3387      * @return the escaped fragment string
3388      */
3389     public String getEscapedFragment() {
3390         return (_fragment == null) ? null : new String(_fragment);
3391     }
3392 
3393 
3394     /***
3395      * Get the fragment.
3396      *
3397      * @return the fragment string
3398      * @throws URIException incomplete trailing escape pattern or unsupported
3399      * character encoding
3400      * @see #decode
3401      */
3402     public String getFragment() throws URIException {
3403         return (_fragment == null) ? null : decode(_fragment,
3404                 getProtocolCharset());
3405     }
3406 
3407     // ------------------------------------------------------------- Utilities 
3408 
3409     /***
3410      * Remove the fragment identifier of the given component.
3411      *
3412      * @param component the component that a fragment may be included
3413      * @return the component that the fragment identifier is removed
3414      */
3415     protected char[] removeFragmentIdentifier(char[] component) {
3416         if (component == null) { 
3417             return null;
3418         }
3419         int lastIndex = new String(component).indexOf('#');
3420         if (lastIndex != -1) {
3421             component = new String(component).substring(0,
3422                     lastIndex).toCharArray();
3423         }
3424         return component;
3425     }
3426 
3427 
3428     /***
3429      * Normalize the given hier path part.
3430      * 
3431      * <p>Algorithm taken from URI reference parser at 
3432      * http://www.apache.org/~fielding/uri/rev-2002/issues.html.
3433      *
3434      * @param path the path to normalize
3435      * @return the normalized path
3436      * @throws URIException no more higher path level to be normalized
3437      */
3438     protected char[] normalize(char[] path) throws URIException {
3439 
3440         if (path == null) { 
3441             return null;
3442         }
3443 
3444         String normalized = new String(path);
3445 
3446         // If the buffer begins with "./" or "../", the "." or ".." is removed.
3447         if (normalized.startsWith("./")) {
3448             normalized = normalized.substring(1);
3449         } else if (normalized.startsWith("../")) {
3450             normalized = normalized.substring(2);
3451         } else if (normalized.startsWith("..")) {
3452             normalized = normalized.substring(2);
3453         }
3454 
3455         // All occurrences of "/./" in the buffer are replaced with "/"
3456         int index = -1;
3457         while ((index = normalized.indexOf("/./")) != -1) {
3458             normalized = normalized.substring(0, index) + normalized.substring(index + 2);
3459         }
3460 
3461         // If the buffer ends with "/.", the "." is removed.
3462         if (normalized.endsWith("/.")) {
3463             normalized = normalized.substring(0, normalized.length() - 1);
3464         }
3465 
3466         int startIndex = 0;
3467 
3468         // All occurrences of "/<segment>/../" in the buffer, where ".."
3469         // and <segment> are complete path segments, are iteratively replaced
3470         // with "/" in order from left to right until no matching pattern remains.
3471         // If the buffer ends with "/<segment>/..", that is also replaced
3472         // with "/".  Note that <segment> may be empty.
3473         while ((index = normalized.indexOf("/../", startIndex)) != -1) {
3474             int slashIndex = normalized.lastIndexOf('/', index - 1);
3475             if (slashIndex >= 0) {
3476                 normalized = normalized.substring(0, slashIndex) + normalized.substring(index + 3);
3477             } else {
3478                 startIndex = index + 3;   
3479             }
3480         }
3481         if (normalized.endsWith("/..")) {
3482             int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3483             if (slashIndex >= 0) {
3484                 normalized = normalized.substring(0, slashIndex + 1);
3485             }
3486         }
3487 
3488         // All prefixes of "<segment>/../" in the buffer, where ".."
3489         // and <segment> are complete path segments, are iteratively replaced
3490         // with "/" in order from left to right until no matching pattern remains.
3491         // If the buffer ends with "<segment>/..", that is also replaced
3492         // with "/".  Note that <segment> may be empty.
3493         while ((index = normalized.indexOf("/../")) != -1) {
3494             int slashIndex = normalized.lastIndexOf('/', index - 1);
3495             if (slashIndex >= 0) {
3496                 break;
3497             } else {
3498                 normalized = normalized.substring(index + 3);
3499             }
3500         }
3501         if (normalized.endsWith("/..")) {
3502             int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3503             if (slashIndex < 0) {
3504                 normalized = "/";
3505             }
3506         }
3507 
3508         return normalized.toCharArray();
3509     }
3510 
3511 
3512     /***
3513      * Normalizes the path part of this URI.  Normalization is only meant to be performed on 
3514      * URIs with an absolute path.  Calling this method on a relative path URI will have no
3515      * effect.
3516      *
3517      * @throws URIException no more higher path level to be normalized
3518      * 
3519      * @see #isAbsPath()
3520      */
3521     public void normalize() throws URIException {
3522         if (isAbsPath()) {
3523             _path = normalize(_path);
3524             setURI();
3525         }
3526     }
3527 
3528 
3529     /***
3530      * Test if the first array is equal to the second array.
3531      *
3532      * @param first the first character array
3533      * @param second the second character array
3534      * @return true if they're equal
3535      */
3536     protected boolean equals(char[] first, char[] second) {
3537 
3538         if (first == null && second == null) {
3539             return true;
3540         }
3541         if (first == null || second == null) {
3542             return false;
3543         }
3544         if (first.length != second.length) {
3545             return false;
3546         }
3547         for (int i = 0; i < first.length; i++) {
3548             if (first[i] != second[i]) {
3549                 return false;
3550             }
3551         }
3552         return true;
3553     }
3554 
3555 
3556     /***
3557      * Test an object if this URI is equal to another.
3558      *
3559      * @param obj an object to compare
3560      * @return true if two URI objects are equal
3561      */
3562     public boolean equals(Object obj) {
3563 
3564         // normalize and test each components
3565         if (obj == this) {
3566             return true;
3567         }
3568         if (!(obj instanceof URI)) {
3569             return false;
3570         }
3571         URI another = (URI) obj;
3572         // scheme
3573         if (!equals(_scheme, another._scheme)) {
3574             return false;
3575         }
3576         // is_opaque_part or is_hier_part?  and opaque
3577         if (!equals(_opaque, another._opaque)) {
3578             return false;
3579         }
3580         // is_hier_part
3581         // has_authority
3582         if (!equals(_authority, another._authority)) {
3583             return false;
3584         }
3585         // path
3586         if (!equals(_path, another._path)) {
3587             return false;
3588         }
3589         // has_query
3590         if (!equals(_query, another._query)) {
3591             return false;
3592         }
3593         // has_fragment?  should be careful of the only fragment case.
3594         if (!equals(_fragment, another._fragment)) {
3595             return false;
3596         }
3597         return true;
3598     }
3599 
3600     // ---------------------------------------------------------- Serialization
3601 
3602     /***
3603      * Write the content of this URI.
3604      *
3605      * @param oos the object-output stream
3606      * @throws IOException If an IO problem occurs.
3607      */
3608     protected void writeObject(ObjectOutputStream oos)
3609         throws IOException {
3610 
3611         oos.defaultWriteObject();
3612     }
3613 
3614 
3615     /***
3616      * Read a URI.
3617      *
3618      * @param ois the object-input stream
3619      * @throws ClassNotFoundException If one of the classes specified in the
3620      * input stream cannot be found.
3621      * @throws IOException If an IO problem occurs.
3622      */
3623     protected void readObject(ObjectInputStream ois)
3624         throws ClassNotFoundException, IOException {
3625 
3626         ois.defaultReadObject();
3627     }
3628 
3629     // -------------------------------------------------------------- Hash code
3630 
3631     /***
3632      * Return a hash code for this URI.
3633      *
3634      * @return a has code value for this URI
3635      */
3636     public int hashCode() {
3637         if (hash == 0) {
3638             char[] c = _uri;
3639             if (c != null) {
3640                 for (int i = 0, len = c.length; i < len; i++) {
3641                     hash = 31 * hash + c[i];
3642                 }
3643             }
3644             c = _fragment;
3645             if (c != null) {
3646                 for (int i = 0, len = c.length; i < len; i++) {
3647                     hash = 31 * hash + c[i];
3648                 }
3649             }
3650         }
3651         return hash;
3652     }
3653 
3654     // ------------------------------------------------------------- Comparison 
3655 
3656     /***
3657      * Compare this URI to another object. 
3658      *
3659      * @param obj the object to be compared.
3660      * @return 0, if it's same,
3661      * -1, if failed, first being compared with in the authority component
3662      * @throws ClassCastException not URI argument
3663      */
3664     public int compareTo(Object obj) throws ClassCastException {
3665 
3666         URI another = (URI) obj;
3667         if (!equals(_authority, another.getRawAuthority())) { 
3668             return -1;
3669         }
3670         return toString().compareTo(another.toString());
3671     }
3672 
3673     // ------------------------------------------------------------------ Clone
3674 
3675     /***
3676      * Create and return a copy of this object, the URI-reference containing
3677      * the userinfo component.  Notice that the whole URI-reference including
3678      * the userinfo component counld not be gotten as a <code>String</code>.
3679      * <p>
3680      * To copy the identical <code>URI</code> object including the userinfo
3681      * component, it should be used.
3682      *
3683      * @return a clone of this instance
3684      */
3685     public synchronized Object clone() {
3686 
3687         URI instance = new URI();
3688 
3689         instance._uri = _uri;
3690         instance._scheme = _scheme;
3691         instance._opaque = _opaque;
3692         instance._authority = _authority;
3693         instance._userinfo = _userinfo;
3694         instance._host = _host;
3695         instance._port = _port;
3696         instance._path = _path;
3697         instance._query = _query;
3698         instance._fragment = _fragment;
3699         // the charset to do escape encoding for this instance
3700         instance.protocolCharset = protocolCharset;
3701         // flags
3702         instance._is_hier_part = _is_hier_part;
3703         instance._is_opaque_part = _is_opaque_part;
3704         instance._is_net_path = _is_net_path;
3705         instance._is_abs_path = _is_abs_path;
3706         instance._is_rel_path = _is_rel_path;
3707         instance._is_reg_name = _is_reg_name;
3708         instance._is_server = _is_server;
3709         instance._is_hostname = _is_hostname;
3710         instance._is_IPv4address = _is_IPv4address;
3711         instance._is_IPv6reference = _is_IPv6reference;
3712 
3713         return instance;
3714     }
3715 
3716     // ------------------------------------------------------------ Get the URI
3717 
3718     /***
3719      * It can be gotten the URI character sequence. It's raw-escaped.
3720      * For the purpose of the protocol to be transported, it will be useful.
3721      * <p>
3722      * It is clearly unwise to use a URL that contains a password which is
3723      * intended to be secret. In particular, the use of a password within
3724      * the 'userinfo' component of a URL is strongly disrecommended except
3725      * in those rare cases where the 'password' parameter is intended to be
3726      * public.
3727      * <p>
3728      * When you want to get each part of the userinfo, you need to use the
3729      * specific methods in the specific URL. It depends on the specific URL.
3730      *
3731      * @return the URI character sequence
3732      */
3733     public char[] getRawURI() {
3734         return _uri;
3735     }
3736 
3737 
3738     /***
3739      * It can be gotten the URI character sequence. It's escaped.
3740      * For the purpose of the protocol to be transported, it will be useful.
3741      *
3742      * @return the escaped URI string
3743      */
3744     public String getEscapedURI() {
3745         return (_uri == null) ? null : new String(_uri);
3746     }
3747     
3748 
3749     /***
3750      * It can be gotten the URI character sequence.
3751      *
3752      * @return the original URI string
3753      * @throws URIException incomplete trailing escape pattern or unsupported
3754      * character encoding
3755      * @see #decode
3756      */
3757     public String getURI() throws URIException {
3758         return (_uri == null) ? null : decode(_uri, getProtocolCharset());
3759     }
3760 
3761 
3762     /***
3763      * Get the URI reference character sequence.
3764      *
3765      * @return the URI reference character sequence
3766      */
3767     public char[] getRawURIReference() {
3768         if (_fragment == null) { 
3769             return _uri;
3770         }
3771         if (_uri == null) { 
3772             return _fragment;
3773         }
3774         // if _uri != null &&  _fragment != null
3775         String uriReference = new String(_uri) + "#" + new String(_fragment);
3776         return uriReference.toCharArray();
3777     }
3778 
3779 
3780     /***
3781      * Get the escaped URI reference string.
3782      *
3783      * @return the escaped URI reference string
3784      */
3785     public String getEscapedURIReference() {
3786         char[] uriReference = getRawURIReference();
3787         return (uriReference == null) ? null : new String(uriReference);
3788     }
3789 
3790 
3791     /***
3792      * Get the original URI reference string.
3793      *
3794      * @return the original URI reference string
3795      * @throws URIException If {@link #decode} fails.
3796      */
3797     public String getURIReference() throws URIException {
3798         char[] uriReference = getRawURIReference();
3799         return (uriReference == null) ? null : decode(uriReference,
3800                 getProtocolCharset());
3801     }
3802 
3803 
3804     /***
3805      * Get the escaped URI string.
3806      * <p>
3807      * On the document, the URI-reference form is only used without the userinfo
3808      * component like http://jakarta.apache.org/ by the security reason.
3809      * But the URI-reference form with the userinfo component could be parsed.
3810      * <p>
3811      * In other words, this URI and any its subclasses must not expose the
3812      * URI-reference expression with the userinfo component like
3813      * http://user:password@hostport/restricted_zone.<br>
3814      * It means that the API client programmer should extract each user and
3815      * password to access manually.  Probably it will be supported in the each
3816      * subclass, however, not a whole URI-reference expression.
3817      *
3818      * @return the escaped URI string
3819      * @see #clone()
3820      */
3821     public String toString() {
3822         return getEscapedURI();
3823     }
3824 
3825 
3826     // ------------------------------------------------------------ Inner class
3827 
3828     /*** 
3829      * The charset-changed normal operation to represent to be required to
3830      * alert to user the fact the default charset is changed.
3831      */
3832     public static class DefaultCharsetChanged extends RuntimeException {
3833 
3834         // ------------------------------------------------------- constructors
3835 
3836         /***
3837          * The constructor with a reason string and its code arguments.
3838          *
3839          * @param reasonCode the reason code
3840          * @param reason the reason
3841          */
3842         public DefaultCharsetChanged(int reasonCode, String reason) {
3843             super(reason);
3844             this.reason = reason;
3845             this.reasonCode = reasonCode;
3846         }
3847 
3848         // ---------------------------------------------------------- constants
3849 
3850         /*** No specified reason code. */
3851         public static final int UNKNOWN = 0;
3852 
3853         /*** Protocol charset changed. */
3854         public static final int PROTOCOL_CHARSET = 1;
3855 
3856         /*** Document charset changed. */
3857         public static final int DOCUMENT_CHARSET = 2;
3858 
3859         // ------------------------------------------------- instance variables
3860 
3861         /*** The reason code. */
3862         private int reasonCode;
3863 
3864         /*** The reason message. */
3865         private String reason;
3866 
3867         // ------------------------------------------------------------ methods
3868 
3869         /***
3870          * Get the reason code.
3871          *
3872          * @return the reason code
3873          */
3874         public int getReasonCode() {
3875             return reasonCode;
3876         }
3877 
3878         /***
3879          * Get the reason message.
3880          *
3881          * @return the reason message
3882          */
3883         public String getReason() {
3884             return reason;
3885         }
3886 
3887     }
3888 
3889 
3890     /*** 
3891      * A mapping to determine the (somewhat arbitrarily) preferred charset for a
3892      * given locale.  Supports all locales recognized in JDK 1.1.
3893      * <p>
3894      * The distribution of this class is Servlets.com.    It was originally
3895      * written by Jason Hunter [jhunter at acm.org] and used by with permission.
3896      */
3897     public static class LocaleToCharsetMap {
3898 
3899         /*** A mapping of language code to charset */
3900         private static final Hashtable LOCALE_TO_CHARSET_MAP;
3901         static {
3902             LOCALE_TO_CHARSET_MAP = new Hashtable();
3903             LOCALE_TO_CHARSET_MAP.put("ar", "ISO-8859-6");
3904             LOCALE_TO_CHARSET_MAP.put("be", "ISO-8859-5");
3905             LOCALE_TO_CHARSET_MAP.put("bg", "ISO-8859-5");
3906             LOCALE_TO_CHARSET_MAP.put("ca", "ISO-8859-1");
3907             LOCALE_TO_CHARSET_MAP.put("cs", "ISO-8859-2");
3908             LOCALE_TO_CHARSET_MAP.put("da", "ISO-8859-1");
3909             LOCALE_TO_CHARSET_MAP.put("de", "ISO-8859-1");
3910             LOCALE_TO_CHARSET_MAP.put("el", "ISO-8859-7");
3911             LOCALE_TO_CHARSET_MAP.put("en", "ISO-8859-1");
3912             LOCALE_TO_CHARSET_MAP.put("es", "ISO-8859-1");
3913             LOCALE_TO_CHARSET_MAP.put("et", "ISO-8859-1");
3914             LOCALE_TO_CHARSET_MAP.put("fi", "ISO-8859-1");
3915             LOCALE_TO_CHARSET_MAP.put("fr", "ISO-8859-1");
3916             LOCALE_TO_CHARSET_MAP.put("hr", "ISO-8859-2");
3917             LOCALE_TO_CHARSET_MAP.put("hu", "ISO-8859-2");
3918             LOCALE_TO_CHARSET_MAP.put("is", "ISO-8859-1");
3919             LOCALE_TO_CHARSET_MAP.put("it", "ISO-8859-1");
3920             LOCALE_TO_CHARSET_MAP.put("iw", "ISO-8859-8");
3921             LOCALE_TO_CHARSET_MAP.put("ja", "Shift_JIS");
3922             LOCALE_TO_CHARSET_MAP.put("ko", "EUC-KR");
3923             LOCALE_TO_CHARSET_MAP.put("lt", "ISO-8859-2");
3924             LOCALE_TO_CHARSET_MAP.put("lv", "ISO-8859-2");
3925             LOCALE_TO_CHARSET_MAP.put("mk", "ISO-8859-5");
3926             LOCALE_TO_CHARSET_MAP.put("nl", "ISO-8859-1");
3927             LOCALE_TO_CHARSET_MAP.put("no", "ISO-8859-1");
3928             LOCALE_TO_CHARSET_MAP.put("pl", "ISO-8859-2");
3929             LOCALE_TO_CHARSET_MAP.put("pt", "ISO-8859-1");
3930             LOCALE_TO_CHARSET_MAP.put("ro", "ISO-8859-2");
3931             LOCALE_TO_CHARSET_MAP.put("ru", "ISO-8859-5");
3932             LOCALE_TO_CHARSET_MAP.put("sh", "ISO-8859-5");
3933             LOCALE_TO_CHARSET_MAP.put("sk", "ISO-8859-2");
3934             LOCALE_TO_CHARSET_MAP.put("sl", "ISO-8859-2");
3935             LOCALE_TO_CHARSET_MAP.put("sq", "ISO-8859-2");
3936             LOCALE_TO_CHARSET_MAP.put("sr", "ISO-8859-5");
3937             LOCALE_TO_CHARSET_MAP.put("sv", "ISO-8859-1");
3938             LOCALE_TO_CHARSET_MAP.put("tr", "ISO-8859-9");
3939             LOCALE_TO_CHARSET_MAP.put("uk", "ISO-8859-5");
3940             LOCALE_TO_CHARSET_MAP.put("zh", "GB2312");
3941             LOCALE_TO_CHARSET_MAP.put("zh_TW", "Big5");
3942         }
3943        
3944         /***
3945          * Get the preferred charset for the given locale.
3946          *
3947          * @param locale the locale
3948          * @return the preferred charset or null if the locale is not
3949          * recognized.
3950          */
3951         public static String getCharset(Locale locale) {
3952             // try for an full name match (may include country)
3953             String charset =
3954                 (String) LOCALE_TO_CHARSET_MAP.get(locale.toString());
3955             if (charset != null) { 
3956                 return charset;
3957             }
3958            
3959             // if a full name didn't match, try just the language
3960             charset = (String) LOCALE_TO_CHARSET_MAP.get(locale.getLanguage());
3961             return charset;  // may be null
3962         }
3963 
3964     }
3965 
3966 }
3967