1 /*
2 * $Header: /home/cvs/jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/URI.java,v 1.36 2003/07/01 01:12:29 mbecke Exp $
3 * $Revision: 1.36 $
4 * $Date: 2003/07/01 01:12:29 $
5 *
6 * ====================================================================
7 *
8 * The Apache Software License, Version 1.1
9 *
10 * Copyright (c) 2002-2003 The Apache Software Foundation. All rights
11 * reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 *
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 *
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in
22 * the documentation and/or other materials provided with the
23 * distribution.
24 *
25 * 3. The end-user documentation included with the redistribution, if
26 * any, must include the following acknowlegement:
27 * "This product includes software developed by the
28 * Apache Software Foundation (http://www.apache.org/)."
29 * Alternately, this acknowlegement may appear in the software itself,
30 * if and wherever such third-party acknowlegements normally appear.
31 *
32 * 4. The names "The Jakarta Project", "Commons", and "Apache Software
33 * Foundation" must not be used to endorse or promote products derived
34 * from this software without prior written permission. For written
35 * permission, please contact apache@apache.org.
36 *
37 * 5. Products derived from this software may not be called "Apache"
38 * nor may "Apache" appear in their names without prior written
39 * permission of the Apache Group.
40 *
41 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
42 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
43 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
44 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
45 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
48 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
49 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
50 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
51 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 * ====================================================================
54 *
55 * This software consists of voluntary contributions made by many
56 * individuals on behalf of the Apache Software Foundation. For more
57 * information on the Apache Software Foundation, please see
58 * <http://www.apache.org/>.
59 *
60 * [Additional notices, if required by prior licensing conditions]
61 *
62 */
63
64 package org.apache.commons.httpclient;
65
66 import java.io.IOException;
67 import java.io.ObjectInputStream;
68 import java.io.ObjectOutputStream;
69 import java.io.Serializable;
70 import java.io.UnsupportedEncodingException;
71 import java.util.Locale;
72 import java.util.BitSet;
73 import java.util.Hashtable;
74 import java.net.URL;
75 import java.security.AccessController;
76 import sun.security.action.GetPropertyAction;
77
78 /***
79 * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
80 * This class has the purpose of supportting of parsing a URI reference to
81 * extend any specific protocols, the character encoding of the protocol to
82 * be transported and the charset of the document.
83 * <p>
84 * A URI is always in an "escaped" form, since escaping or unescaping a
85 * completed URI might change its semantics.
86 * <p>
87 * Implementers should be careful not to escape or unescape the same string
88 * more than once, since unescaping an already unescaped string might lead to
89 * misinterpreting a percent data character as another escaped character,
90 * or vice versa in the case of escaping an already escaped string.
91 * <p>
92 * In order to avoid these problems, data types used as follows:
93 * <p><blockquote><pre>
94 * URI character sequence: char
95 * octet sequence: byte
96 * original character sequence: String
97 * </pre></blockquote><p>
98 *
99 * So, a URI is a sequence of characters as an array of a char type, which
100 * is not always represented as a sequence of octets as an array of byte.
101 * <p>
102 *
103 * URI Syntactic Components
104 * <p><blockquote><pre>
105 * - In general, written as follows:
106 * Absolute URI = <scheme>:<scheme-specific-part>
107 * Generic URI = <scheme>://<authority><path>?<query>
108 *
109 * - Syntax
110 * absoluteURI = scheme ":" ( hier_part | opaque_part )
111 * hier_part = ( net_path | abs_path ) [ "?" query ]
112 * net_path = "//" authority [ abs_path ]
113 * abs_path = "/" path_segments
114 * </pre></blockquote><p>
115 *
116 * The following examples illustrate URI that are in common use.
117 * <pre>
118 * ftp://ftp.is.co.za/rfc/rfc1808.txt
119 * -- ftp scheme for File Transfer Protocol services
120 * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
121 * -- gopher scheme for Gopher and Gopher+ Protocol services
122 * http://www.math.uio.no/faq/compression-faq/part1.html
123 * -- http scheme for Hypertext Transfer Protocol services
124 * mailto:mduerst@ifi.unizh.ch
125 * -- mailto scheme for electronic mail addresses
126 * news:comp.infosystems.www.servers.unix
127 * -- news scheme for USENET news groups and articles
128 * telnet://melvyl.ucop.edu/
129 * -- telnet scheme for interactive services via the TELNET Protocol
130 * </pre>
131 * Please, notice that there are many modifications from URL(RFC 1738) and
132 * relative URL(RFC 1808).
133 * <p>
134 * <b>The expressions for a URI</b>
135 * <p><pre>
136 * For escaped URI forms
137 * - URI(char[]) // constructor
138 * - char[] getRawXxx() // method
139 * - String getEscapedXxx() // method
140 * - String toString() // method
141 * <p>
142 * For unescaped URI forms
143 * - URI(String) // constructor
144 * - String getXXX() // method
145 * </pre><p>
146 *
147 * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
148 * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
149 * @version $Revision: 1.36 $ $Date: 2002/03/14 15:14:01
150 */
151 public class URI implements Cloneable, Comparable, Serializable {
152
153
154 // ----------------------------------------------------------- Constructors
155
156 /*** Create an instance as an internal use */
157 protected URI() {
158 }
159
160
161 /***
162 * Construct a URI as an escaped form of a character array with the given
163 * charset.
164 *
165 * @param escaped the URI character sequence
166 * @param charset the charset string to do escape encoding
167 * @throws URIException If the URI cannot be created.
168 * @throws NullPointerException if <code>escaped</code> is <code>null</code>
169 * @see #getProtocolCharset
170 */
171 public URI(char[] escaped, String charset)
172 throws URIException, NullPointerException {
173 protocolCharset = charset;
174 parseUriReference(new String(escaped), true);
175 }
176
177
178 /***
179 * Construct a URI as an escaped form of a character array.
180 * An URI can be placed within double-quotes or angle brackets like
181 * "http://test.com/" and <http://test.com/>
182 *
183 * @param escaped the URI character sequence
184 * @throws URIException If the URI cannot be created.
185 * @throws NullPointerException if <code>escaped</code> is <code>null</code>
186 * @see #getDefaultProtocolCharset
187 */
188 public URI(char[] escaped)
189 throws URIException, NullPointerException {
190 parseUriReference(new String(escaped), true);
191 }
192
193
194 /***
195 * Construct a URI from the given string with the given charset.
196 *
197 * @param original the string to be represented to URI character sequence
198 * It is one of absoluteURI and relativeURI.
199 * @param charset the charset string to do escape encoding
200 * @throws URIException If the URI cannot be created.
201 * @see #getProtocolCharset
202 */
203 public URI(String original, String charset) throws URIException {
204 protocolCharset = charset;
205 parseUriReference(original, false);
206 }
207
208
209 /***
210 * Construct a URI from the given string.
211 * <p><blockquote><pre>
212 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
213 * </pre></blockquote><p>
214 * An URI can be placed within double-quotes or angle brackets like
215 * "http://test.com/" and <http://test.com/>
216 *
217 * @param original the string to be represented to URI character sequence
218 * It is one of absoluteURI and relativeURI.
219 * @throws URIException If the URI cannot be created.
220 * @see #getDefaultProtocolCharset
221 */
222 public URI(String original) throws URIException {
223 parseUriReference(original, false);
224 }
225
226
227 /***
228 * Construct a URI from a URL.
229 *
230 * @param url a valid URL.
231 * @throws URIException If the URI cannot be created.
232 * @since 2.0
233 * @deprecated currently somewhat wrong and diffrent with java.net.URL usage
234 */
235 public URI(URL url) throws URIException {
236 this(url.toString());
237 }
238
239
240 /***
241 * Construct a general URI from the given components.
242 * <p><blockquote><pre>
243 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
244 * absoluteURI = scheme ":" ( hier_part | opaque_part )
245 * opaque_part = uric_no_slash *uric
246 * </pre></blockquote><p>
247 * It's for absolute URI = <scheme>:<scheme-specific-part>#
248 * <fragment>.
249 *
250 * @param scheme the scheme string
251 * @param schemeSpecificPart scheme_specific_part
252 * @param fragment the fragment string
253 * @throws URIException If the URI cannot be created.
254 * @see #getDefaultProtocolCharset
255 */
256 public URI(String scheme, String schemeSpecificPart, String fragment)
257 throws URIException {
258
259 // validate and contruct the URI character sequence
260 if (scheme == null) {
261 throw new URIException(URIException.PARSING, "scheme required");
262 }
263 char[] s = scheme.toLowerCase().toCharArray();
264 if (validate(s, URI.scheme)) {
265 _scheme = s; // is_absoluteURI
266 } else {
267 throw new URIException(URIException.PARSING, "incorrect scheme");
268 }
269 _opaque = encode(schemeSpecificPart, allowed_opaque_part,
270 getProtocolCharset());
271 // Set flag
272 _is_opaque_part = true;
273 _fragment = fragment.toCharArray();
274
275 setURI();
276 }
277
278
279 /***
280 * Construct a general URI from the given components.
281 * <p><blockquote><pre>
282 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
283 * absoluteURI = scheme ":" ( hier_part | opaque_part )
284 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
285 * hier_part = ( net_path | abs_path ) [ "?" query ]
286 * </pre></blockquote><p>
287 * It's for absolute URI = <scheme>:<path>?<query>#<
288 * fragment> and relative URI = <path>?<query>#<fragment
289 * >.
290 *
291 * @param scheme the scheme string
292 * @param authority the authority string
293 * @param path the path string
294 * @param query the query string
295 * @param fragment the fragment string
296 * @throws URIException If the new URI cannot be created.
297 * @see #getDefaultProtocolCharset
298 */
299 public URI(String scheme, String authority, String path, String query,
300 String fragment) throws URIException {
301
302 // validate and contruct the URI character sequence
303 StringBuffer buff = new StringBuffer();
304 if (scheme != null) {
305 buff.append(scheme);
306 buff.append(':');
307 }
308 if (authority != null) {
309 buff.append("//");
310 buff.append(authority);
311 }
312 if (path != null) { // accept empty path
313 if ((scheme != null || authority != null)
314 && !path.startsWith("/")) {
315 throw new URIException(URIException.PARSING,
316 "abs_path requested");
317 }
318 buff.append(path);
319 }
320 if (query != null) {
321 buff.append('?');
322 buff.append(query);
323 }
324 if (fragment != null) {
325 buff.append('#');
326 buff.append(fragment);
327 }
328 parseUriReference(buff.toString(), false);
329 }
330
331
332 /***
333 * Construct a general URI from the given components.
334 *
335 * @param scheme the scheme string
336 * @param userinfo the userinfo string
337 * @param host the host string
338 * @param port the port number
339 * @throws URIException If the new URI cannot be created.
340 * @see #getDefaultProtocolCharset
341 */
342 public URI(String scheme, String userinfo, String host, int port)
343 throws URIException {
344
345 this(scheme, userinfo, host, port, null, null, null);
346 }
347
348
349 /***
350 * Construct a general URI from the given components.
351 *
352 * @param scheme the scheme string
353 * @param userinfo the userinfo string
354 * @param host the host string
355 * @param port the port number
356 * @param path the path string
357 * @throws URIException If the new URI cannot be created.
358 * @see #getDefaultProtocolCharset
359 */
360 public URI(String scheme, String userinfo, String host, int port,
361 String path) throws URIException {
362
363 this(scheme, userinfo, host, port, path, null, null);
364 }
365
366
367 /***
368 * Construct a general URI from the given components.
369 *
370 * @param scheme the scheme string
371 * @param userinfo the userinfo string
372 * @param host the host string
373 * @param port the port number
374 * @param path the path string
375 * @param query the query string
376 * @throws URIException If the new URI cannot be created.
377 * @see #getDefaultProtocolCharset
378 */
379 public URI(String scheme, String userinfo, String host, int port,
380 String path, String query) throws URIException {
381
382 this(scheme, userinfo, host, port, path, query, null);
383 }
384
385
386 /***
387 * Construct a general URI from the given components.
388 *
389 * @param scheme the scheme string
390 * @param userinfo the userinfo string
391 * @param host the host string
392 * @param port the port number
393 * @param path the path string
394 * @param query the query string
395 * @param fragment the fragment string
396 * @throws URIException If the new URI cannot be created.
397 * @see #getDefaultProtocolCharset
398 */
399 public URI(String scheme, String userinfo, String host, int port,
400 String path, String query, String fragment) throws URIException {
401
402 this(scheme, (host == null) ? null
403 : ((userinfo != null) ? userinfo + '@' : "") + host
404 + ((port != -1) ? ":" + port : ""), path, query, fragment);
405 }
406
407
408 /***
409 * Construct a general URI from the given components.
410 *
411 * @param scheme the scheme string
412 * @param host the host string
413 * @param path the path string
414 * @param fragment the fragment string
415 * @throws URIException If the new URI cannot be created.
416 * @see #getDefaultProtocolCharset
417 */
418 public URI(String scheme, String host, String path, String fragment)
419 throws URIException {
420
421 this(scheme, host, path, null, fragment);
422 }
423
424
425 /***
426 * Construct a general URI with the given relative URI string.
427 *
428 * @param base the base URI
429 * @param relative the relative URI string
430 * @throws URIException If the new URI cannot be created.
431 */
432 public URI(URI base, String relative) throws URIException {
433 this(base, new URI(relative));
434 }
435
436
437 /***
438 * Construct a general URI with the given relative URI.
439 * <p><blockquote><pre>
440 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
441 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
442 * </pre></blockquote><p>
443 * Resolving Relative References to Absolute Form.
444 *
445 * <strong>Examples of Resolving Relative URI References</strong>
446 *
447 * Within an object with a well-defined base URI of
448 * <p><blockquote><pre>
449 * http://a/b/c/d;p?q
450 * </pre></blockquote><p>
451 * the relative URI would be resolved as follows:
452 *
453 * Normal Examples
454 *
455 * <p><blockquote><pre>
456 * g:h = g:h
457 * g = http://a/b/c/g
458 * ./g = http://a/b/c/g
459 * g/ = http://a/b/c/g/
460 * /g = http://a/g
461 * //g = http://g
462 * ?y = http://a/b/c/?y
463 * g?y = http://a/b/c/g?y
464 * #s = (current document)#s
465 * g#s = http://a/b/c/g#s
466 * g?y#s = http://a/b/c/g?y#s
467 * ;x = http://a/b/c/;x
468 * g;x = http://a/b/c/g;x
469 * g;x?y#s = http://a/b/c/g;x?y#s
470 * . = http://a/b/c/
471 * ./ = http://a/b/c/
472 * .. = http://a/b/
473 * ../ = http://a/b/
474 * ../g = http://a/b/g
475 * ../.. = http://a/
476 * ../../ = http://a/
477 * ../../g = http://a/g
478 * </pre></blockquote><p>
479 *
480 * Some URI schemes do not allow a hierarchical syntax matching the
481 * <hier_part> syntax, and thus cannot use relative references.
482 *
483 * @param base the base URI
484 * @param relative the relative URI
485 * @throws URIException If the new URI cannot be created.
486 */
487 public URI(URI base, URI relative) throws URIException {
488
489 if (base._scheme == null) {
490 throw new URIException(URIException.PARSING, "base URI required");
491 }
492 if (base._scheme != null) {
493 this._scheme = base._scheme;
494 this._authority = base._authority;
495 }
496 if (base._is_opaque_part || relative._is_opaque_part) {
497 this._scheme = base._scheme;
498 this._is_opaque_part = base._is_opaque_part
499 || relative._is_opaque_part;
500 this._opaque = relative._opaque;
501 this._fragment = relative._fragment;
502 this.setURI();
503 return;
504 }
505 if (relative._scheme != null) {
506 this._scheme = relative._scheme;
507 this._is_net_path = relative._is_net_path;
508 this._authority = relative._authority;
509 if (relative._is_server) {
510 this._is_server = relative._is_server;
511 this._userinfo = relative._userinfo;
512 this._host = relative._host;
513 this._port = relative._port;
514 } else if (relative._is_reg_name) {
515 this._is_reg_name = relative._is_reg_name;
516 }
517 this._is_abs_path = relative._is_abs_path;
518 this._is_rel_path = relative._is_rel_path;
519 this._path = relative._path;
520 } else if (base._authority != null && relative._scheme == null) {
521 this._is_net_path = base._is_net_path;
522 this._authority = base._authority;
523 if (base._is_server) {
524 this._is_server = base._is_server;
525 this._userinfo = base._userinfo;
526 this._host = base._host;
527 this._port = base._port;
528 } else if (base._is_reg_name) {
529 this._is_reg_name = base._is_reg_name;
530 }
531 }
532 if (relative._authority != null) {
533 this._is_net_path = relative._is_net_path;
534 this._authority = relative._authority;
535 if (relative._is_server) {
536 this._is_server = relative._is_server;
537 this._userinfo = relative._userinfo;
538 this._host = relative._host;
539 this._port = relative._port;
540 } else if (relative._is_reg_name) {
541 this._is_reg_name = relative._is_reg_name;
542 }
543 this._is_abs_path = relative._is_abs_path;
544 this._is_rel_path = relative._is_rel_path;
545 this._path = relative._path;
546 }
547 // resolve the path and query if necessary
548 if (relative._scheme == null && relative._authority == null) {
549 if ((relative._path == null || relative._path.length == 0)
550 && relative._query == null) {
551 // handle a reference to the current document, see RFC 2396
552 // section 5.2 step 2
553 this._path = base._path;
554 this._query = base._query;
555 } else {
556 this._path = resolvePath(base._path, relative._path);
557 }
558 }
559 // base._query removed
560 if (relative._query != null) {
561 this._query = relative._query;
562 }
563 // base._fragment removed
564 if (relative._fragment != null) {
565 this._fragment = relative._fragment;
566 }
567 this.setURI();
568 }
569
570 // --------------------------------------------------- Instance Variables
571
572 /*** Version ID for serialization */
573 static final long serialVersionUID = 604752400577948726L;
574
575
576 /***
577 * Cache the hash code for this URI.
578 */
579 protected int hash = 0;
580
581
582 /***
583 * This Uniform Resource Identifier (URI).
584 * The URI is always in an "escaped" form, since escaping or unescaping
585 * a completed URI might change its semantics.
586 */
587 protected char[] _uri = null;
588
589
590 /***
591 * The charset of the protocol used by this URI instance.
592 */
593 protected String protocolCharset = null;
594
595
596 /***
597 * The default charset of the protocol. RFC 2277, 2396
598 */
599 protected static String defaultProtocolCharset = "UTF-8";
600
601
602 /***
603 * The default charset of the document. RFC 2277, 2396
604 * The platform's charset is used for the document by default.
605 */
606 protected static String defaultDocumentCharset = null;
607 protected static String defaultDocumentCharsetByLocale = null;
608 protected static String defaultDocumentCharsetByPlatform = null;
609 // Static initializer for defaultDocumentCharset
610 static {
611 Locale locale = Locale.getDefault();
612 // in order to support backward compatiblity
613 if (locale != null) {
614 defaultDocumentCharsetByLocale =
615 LocaleToCharsetMap.getCharset(locale);
616 // set the default document charset
617 defaultDocumentCharset = defaultDocumentCharsetByLocale;
618 }
619 // in order to support platform encoding
620 defaultDocumentCharsetByPlatform =
621 (String) AccessController.doPrivileged(
622 new GetPropertyAction("file.encoding"));
623 if (defaultDocumentCharset == null) {
624 // set the default document charset
625 defaultDocumentCharset = defaultDocumentCharsetByPlatform;
626 }
627 }
628
629
630 /***
631 * The scheme.
632 */
633 protected char[] _scheme = null;
634
635
636 /***
637 * The opaque.
638 */
639 protected char[] _opaque = null;
640
641
642 /***
643 * The authority.
644 */
645 protected char[] _authority = null;
646
647
648 /***
649 * The userinfo.
650 */
651 protected char[] _userinfo = null;
652
653
654 /***
655 * The host.
656 */
657 protected char[] _host = null;
658
659
660 /***
661 * The port.
662 */
663 protected int _port = -1;
664
665
666 /***
667 * The path.
668 */
669 protected char[] _path = null;
670
671
672 /***
673 * The query.
674 */
675 protected char[] _query = null;
676
677
678 /***
679 * The fragment.
680 */
681 protected char[] _fragment = null;
682
683
684 /***
685 * The root path.
686 */
687 protected static char[] rootPath = { '/' };
688
689 // ---------------------- Generous characters for each component validation
690
691 /***
692 * The percent "%" character always has the reserved purpose of being the
693 * escape indicator, it must be escaped as "%25" in order to be used as
694 * data within a URI.
695 */
696 protected static final BitSet percent = new BitSet(256);
697 // Static initializer for percent
698 static {
699 percent.set('%');
700 }
701
702
703 /***
704 * BitSet for digit.
705 * <p><blockquote><pre>
706 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
707 * "8" | "9"
708 * </pre></blockquote><p>
709 */
710 protected static final BitSet digit = new BitSet(256);
711 // Static initializer for digit
712 static {
713 for (int i = '0'; i <= '9'; i++) {
714 digit.set(i);
715 }
716 }
717
718
719 /***
720 * BitSet for alpha.
721 * <p><blockquote><pre>
722 * alpha = lowalpha | upalpha
723 * </pre></blockquote><p>
724 */
725 protected static final BitSet alpha = new BitSet(256);
726 // Static initializer for alpha
727 static {
728 for (int i = 'a'; i <= 'z'; i++) {
729 alpha.set(i);
730 }
731 for (int i = 'A'; i <= 'Z'; i++) {
732 alpha.set(i);
733 }
734 }
735
736
737 /***
738 * BitSet for alphanum (join of alpha & digit).
739 * <p><blockquote><pre>
740 * alphanum = alpha | digit
741 * </pre></blockquote><p>
742 */
743 protected static final BitSet alphanum = new BitSet(256);
744 // Static initializer for alphanum
745 static {
746 alphanum.or(alpha);
747 alphanum.or(digit);
748 }
749
750
751 /***
752 * BitSet for hex.
753 * <p><blockquote><pre>
754 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
755 * "a" | "b" | "c" | "d" | "e" | "f"
756 * </pre></blockquote><p>
757 */
758 protected static final BitSet hex = new BitSet(256);
759 // Static initializer for hex
760 static {
761 hex.or(digit);
762 for (int i = 'a'; i <= 'f'; i++) {
763 hex.set(i);
764 }
765 for (int i = 'A'; i <= 'F'; i++) {
766 hex.set(i);
767 }
768 }
769
770
771 /***
772 * BitSet for escaped.
773 * <p><blockquote><pre>
774 * escaped = "%" hex hex
775 * </pre></blockquote><p>
776 */
777 protected static final BitSet escaped = new BitSet(256);
778 // Static initializer for escaped
779 static {
780 escaped.or(percent);
781 escaped.or(hex);
782 }
783
784
785 /***
786 * BitSet for mark.
787 * <p><blockquote><pre>
788 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
789 * "(" | ")"
790 * </pre></blockquote><p>
791 */
792 protected static final BitSet mark = new BitSet(256);
793 // Static initializer for mark
794 static {
795 mark.set('-');
796 mark.set('_');
797 mark.set('.');
798 mark.set('!');
799 mark.set('~');
800 mark.set('*');
801 mark.set('\'');
802 mark.set('(');
803 mark.set(')');
804 }
805
806
807 /***
808 * Data characters that are allowed in a URI but do not have a reserved
809 * purpose are called unreserved.
810 * <p><blockquote><pre>
811 * unreserved = alphanum | mark
812 * </pre></blockquote><p>
813 */
814 protected static final BitSet unreserved = new BitSet(256);
815 // Static initializer for unreserved
816 static {
817 unreserved.or(alphanum);
818 unreserved.or(mark);
819 }
820
821
822 /***
823 * BitSet for reserved.
824 * <p><blockquote><pre>
825 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
826 * "$" | ","
827 * </pre></blockquote><p>
828 */
829 protected static final BitSet reserved = new BitSet(256);
830 // Static initializer for reserved
831 static {
832 reserved.set(';');
833 reserved.set('/');
834 reserved.set('?');
835 reserved.set(':');
836 reserved.set('@');
837 reserved.set('&');
838 reserved.set('=');
839 reserved.set('+');
840 reserved.set('$');
841 reserved.set(',');
842 }
843
844
845 /***
846 * BitSet for uric.
847 * <p><blockquote><pre>
848 * uric = reserved | unreserved | escaped
849 * </pre></blockquote><p>
850 */
851 protected static final BitSet uric = new BitSet(256);
852 // Static initializer for uric
853 static {
854 uric.or(reserved);
855 uric.or(unreserved);
856 uric.or(escaped);
857 }
858
859
860 /***
861 * BitSet for fragment (alias for uric).
862 * <p><blockquote><pre>
863 * fragment = *uric
864 * </pre></blockquote><p>
865 */
866 protected static final BitSet fragment = uric;
867
868
869 /***
870 * BitSet for query (alias for uric).
871 * <p><blockquote><pre>
872 * query = *uric
873 * </pre></blockquote><p>
874 */
875 protected static final BitSet query = uric;
876
877
878 /***
879 * BitSet for pchar.
880 * <p><blockquote><pre>
881 * pchar = unreserved | escaped |
882 * ":" | "@" | "&" | "=" | "+" | "$" | ","
883 * </pre></blockquote><p>
884 */
885 protected static final BitSet pchar = new BitSet(256);
886 // Static initializer for pchar
887 static {
888 pchar.or(unreserved);
889 pchar.or(escaped);
890 pchar.set(':');
891 pchar.set('@');
892 pchar.set('&');
893 pchar.set('=');
894 pchar.set('+');
895 pchar.set('$');
896 pchar.set(',');
897 }
898
899
900 /***
901 * BitSet for param (alias for pchar).
902 * <p><blockquote><pre>
903 * param = *pchar
904 * </pre></blockquote><p>
905 */
906 protected static final BitSet param = pchar;
907
908
909 /***
910 * BitSet for segment.
911 * <p><blockquote><pre>
912 * segment = *pchar *( ";" param )
913 * </pre></blockquote><p>
914 */
915 protected static final BitSet segment = new BitSet(256);
916 // Static initializer for segment
917 static {
918 segment.or(pchar);
919 segment.set(';');
920 segment.or(param);
921 }
922
923
924 /***
925 * BitSet for path segments.
926 * <p><blockquote><pre>
927 * path_segments = segment *( "/" segment )
928 * </pre></blockquote><p>
929 */
930 protected static final BitSet path_segments = new BitSet(256);
931 // Static initializer for path_segments
932 static {
933 path_segments.set('/');
934 path_segments.or(segment);
935 }
936
937
938 /***
939 * URI absolute path.
940 * <p><blockquote><pre>
941 * abs_path = "/" path_segments
942 * </pre></blockquote><p>
943 */
944 protected static final BitSet abs_path = new BitSet(256);
945 // Static initializer for abs_path
946 static {
947 abs_path.set('/');
948 abs_path.or(path_segments);
949 }
950
951
952 /***
953 * URI bitset for encoding typical non-slash characters.
954 * <p><blockquote><pre>
955 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
956 * "&" | "=" | "+" | "$" | ","
957 * </pre></blockquote><p>
958 */
959 protected static final BitSet uric_no_slash = new BitSet(256);
960 // Static initializer for uric_no_slash
961 static {
962 uric_no_slash.or(unreserved);
963 uric_no_slash.or(escaped);
964 uric_no_slash.set(';');
965 uric_no_slash.set('?');
966 uric_no_slash.set(';');
967 uric_no_slash.set('@');
968 uric_no_slash.set('&');
969 uric_no_slash.set('=');
970 uric_no_slash.set('+');
971 uric_no_slash.set('$');
972 uric_no_slash.set(',');
973 }
974
975
976 /***
977 * URI bitset that combines uric_no_slash and uric.
978 * <p><blockquote><pre>
979 * opaque_part = uric_no_slash *uric
980 * </pre></blockquote><p>
981 */
982 protected static final BitSet opaque_part = new BitSet(256);
983 // Static initializer for opaque_part
984 static {
985 // it's generous. because first character must not include a slash
986 opaque_part.or(uric_no_slash);
987 opaque_part.or(uric);
988 }
989
990
991 /***
992 * URI bitset that combines absolute path and opaque part.
993 * <p><blockquote><pre>
994 * path = [ abs_path | opaque_part ]
995 * </pre></blockquote><p>
996 */
997 protected static final BitSet path = new BitSet(256);
998 // Static initializer for path
999 static {
1000 path.or(abs_path);
1001 path.or(opaque_part);
1002 }
1003
1004
1005 /***
1006 * Port, a logical alias for digit.
1007 */
1008 protected static final BitSet port = digit;
1009
1010
1011 /***
1012 * Bitset that combines digit and dot fo IPv$address.
1013 * <p><blockquote><pre>
1014 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
1015 * </pre></blockquote><p>
1016 */
1017 protected static final BitSet IPv4address = new BitSet(256);
1018 // Static initializer for IPv4address
1019 static {
1020 IPv4address.or(digit);
1021 IPv4address.set('.');
1022 }
1023
1024
1025 /***
1026 * RFC 2373.
1027 * <p><blockquote><pre>
1028 * IPv6address = hexpart [ ":" IPv4address ]
1029 * </pre></blockquote><p>
1030 */
1031 protected static final BitSet IPv6address = new BitSet(256);
1032 // Static initializer for IPv6address reference
1033 static {
1034 IPv6address.or(hex); // hexpart
1035 IPv6address.set(':');
1036 IPv6address.or(IPv4address);
1037 }
1038
1039
1040 /***
1041 * RFC 2732, 2373.
1042 * <p><blockquote><pre>
1043 * IPv6reference = "[" IPv6address "]"
1044 * </pre></blockquote><p>
1045 */
1046 protected static final BitSet IPv6reference = new BitSet(256);
1047 // Static initializer for IPv6reference
1048 static {
1049 IPv6reference.set('[');
1050 IPv6reference.or(IPv6address);
1051 IPv6reference.set(']');
1052 }
1053
1054
1055 /***
1056 * BitSet for toplabel.
1057 * <p><blockquote><pre>
1058 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1059 * </pre></blockquote><p>
1060 */
1061 protected static final BitSet toplabel = new BitSet(256);
1062 // Static initializer for toplabel
1063 static {
1064 toplabel.or(alphanum);
1065 toplabel.set('-');
1066 }
1067
1068
1069 /***
1070 * BitSet for domainlabel.
1071 * <p><blockquote><pre>
1072 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1073 * </pre></blockquote><p>
1074 */
1075 protected static final BitSet domainlabel = toplabel;
1076
1077
1078 /***
1079 * BitSet for hostname.
1080 * <p><blockquote><pre>
1081 * hostname = *( domainlabel "." ) toplabel [ "." ]
1082 * </pre></blockquote><p>
1083 */
1084 protected static final BitSet hostname = new BitSet(256);
1085 // Static initializer for hostname
1086 static {
1087 hostname.or(toplabel);
1088 // hostname.or(domainlabel);
1089 hostname.set('.');
1090 }
1091
1092
1093 /***
1094 * BitSet for host.
1095 * <p><blockquote><pre>
1096 * host = hostname | IPv4address | IPv6reference
1097 * </pre></blockquote><p>
1098 */
1099 protected static final BitSet host = new BitSet(256);
1100 // Static initializer for host
1101 static {
1102 host.or(hostname);
1103 // host.or(IPv4address);
1104 host.or(IPv6reference); // IPv4address
1105 }
1106
1107
1108 /***
1109 * BitSet for hostport.
1110 * <p><blockquote><pre>
1111 * hostport = host [ ":" port ]
1112 * </pre></blockquote><p>
1113 */
1114 protected static final BitSet hostport = new BitSet(256);
1115 // Static initializer for hostport
1116 static {
1117 hostport.or(host);
1118 hostport.set(':');
1119 hostport.or(port);
1120 }
1121
1122
1123 /***
1124 * Bitset for userinfo.
1125 * <p><blockquote><pre>
1126 * userinfo = *( unreserved | escaped |
1127 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1128 * </pre></blockquote><p>
1129 */
1130 protected static final BitSet userinfo = new BitSet(256);
1131 // Static initializer for userinfo
1132 static {
1133 userinfo.or(unreserved);
1134 userinfo.or(escaped);
1135 userinfo.set(';');
1136 userinfo.set(':');
1137 userinfo.set('&');
1138 userinfo.set('=');
1139 userinfo.set('+');
1140 userinfo.set('$');
1141 userinfo.set(',');
1142 }
1143
1144
1145 /***
1146 * BitSet for within the userinfo component like user and password.
1147 */
1148 public static final BitSet within_userinfo = new BitSet(256);
1149 // Static initializer for within_userinfo
1150 static {
1151 within_userinfo.or(userinfo);
1152 within_userinfo.clear(';'); // reserved within authority
1153 within_userinfo.clear(':');
1154 within_userinfo.clear('@');
1155 within_userinfo.clear('?');
1156 within_userinfo.clear('/');
1157 }
1158
1159
1160 /***
1161 * Bitset for server.
1162 * <p><blockquote><pre>
1163 * server = [ [ userinfo "@" ] hostport ]
1164 * </pre></blockquote><p>
1165 */
1166 protected static final BitSet server = new BitSet(256);
1167 // Static initializer for server
1168 static {
1169 server.or(userinfo);
1170 server.set('@');
1171 server.or(hostport);
1172 }
1173
1174
1175 /***
1176 * BitSet for reg_name.
1177 * <p><blockquote><pre>
1178 * reg_name = 1*( unreserved | escaped | "$" | "," |
1179 * ";" | ":" | "@" | "&" | "=" | "+" )
1180 * </pre></blockquote><p>
1181 */
1182 protected static final BitSet reg_name = new BitSet(256);
1183 // Static initializer for reg_name
1184 static {
1185 reg_name.or(unreserved);
1186 reg_name.or(escaped);
1187 reg_name.set('$');
1188 reg_name.set(',');
1189 reg_name.set(';');
1190 reg_name.set(':');
1191 reg_name.set('@');
1192 reg_name.set('&');
1193 reg_name.set('=');
1194 reg_name.set('+');
1195 }
1196
1197
1198 /***
1199 * BitSet for authority.
1200 * <p><blockquote><pre>
1201 * authority = server | reg_name
1202 * </pre></blockquote><p>
1203 */
1204 protected static final BitSet authority = new BitSet(256);
1205 // Static initializer for authority
1206 static {
1207 authority.or(server);
1208 authority.or(reg_name);
1209 }
1210
1211
1212 /***
1213 * BitSet for scheme.
1214 * <p><blockquote><pre>
1215 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1216 * </pre></blockquote><p>
1217 */
1218 protected static final BitSet scheme = new BitSet(256);
1219 // Static initializer for scheme
1220 static {
1221 scheme.or(alpha);
1222 scheme.or(digit);
1223 scheme.set('+');
1224 scheme.set('-');
1225 scheme.set('.');
1226 }
1227
1228
1229 /***
1230 * BitSet for rel_segment.
1231 * <p><blockquote><pre>
1232 * rel_segment = 1*( unreserved | escaped |
1233 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
1234 * </pre></blockquote><p>
1235 */
1236 protected static final BitSet rel_segment = new BitSet(256);
1237 // Static initializer for rel_segment
1238 static {
1239 rel_segment.or(unreserved);
1240 rel_segment.or(escaped);
1241 rel_segment.set(';');
1242 rel_segment.set('@');
1243 rel_segment.set('&');
1244 rel_segment.set('=');
1245 rel_segment.set('+');
1246 rel_segment.set('$');
1247 rel_segment.set(',');
1248 }
1249
1250
1251 /***
1252 * BitSet for rel_path.
1253 * <p><blockquote><pre>
1254 * rel_path = rel_segment [ abs_path ]
1255 * </pre></blockquote><p>
1256 */
1257 protected static final BitSet rel_path = new BitSet(256);
1258 // Static initializer for rel_path
1259 static {
1260 rel_path.or(rel_segment);
1261 rel_path.or(abs_path);
1262 }
1263
1264
1265 /***
1266 * BitSet for net_path.
1267 * <p><blockquote><pre>
1268 * net_path = "//" authority [ abs_path ]
1269 * </pre></blockquote><p>
1270 */
1271 protected static final BitSet net_path = new BitSet(256);
1272 // Static initializer for net_path
1273 static {
1274 net_path.set('/');
1275 net_path.or(authority);
1276 net_path.or(abs_path);
1277 }
1278
1279
1280 /***
1281 * BitSet for hier_part.
1282 * <p><blockquote><pre>
1283 * hier_part = ( net_path | abs_path ) [ "?" query ]
1284 * </pre></blockquote><p>
1285 */
1286 protected static final BitSet hier_part = new BitSet(256);
1287 // Static initializer for hier_part
1288 static {
1289 hier_part.or(net_path);
1290 hier_part.or(abs_path);
1291 // hier_part.set('?'); aleady included
1292 hier_part.or(query);
1293 }
1294
1295
1296 /***
1297 * BitSet for relativeURI.
1298 * <p><blockquote><pre>
1299 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1300 * </pre></blockquote><p>
1301 */
1302 protected static final BitSet relativeURI = new BitSet(256);
1303 // Static initializer for relativeURI
1304 static {
1305 relativeURI.or(net_path);
1306 relativeURI.or(abs_path);
1307 relativeURI.or(rel_path);
1308 // relativeURI.set('?'); aleady included
1309 relativeURI.or(query);
1310 }
1311
1312
1313 /***
1314 * BitSet for absoluteURI.
1315 * <p><blockquote><pre>
1316 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1317 * </pre></blockquote><p>
1318 */
1319 protected static final BitSet absoluteURI = new BitSet(256);
1320 // Static initializer for absoluteURI
1321 static {
1322 absoluteURI.or(scheme);
1323 absoluteURI.set(':');
1324 absoluteURI.or(hier_part);
1325 absoluteURI.or(opaque_part);
1326 }
1327
1328
1329 /***
1330 * BitSet for URI-reference.
1331 * <p><blockquote><pre>
1332 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1333 * </pre></blockquote><p>
1334 */
1335 protected static final BitSet URI_reference = new BitSet(256);
1336 // Static initializer for URI_reference
1337 static {
1338 URI_reference.or(absoluteURI);
1339 URI_reference.or(relativeURI);
1340 URI_reference.set('#');
1341 URI_reference.or(fragment);
1342 }
1343
1344 // ---------------------------- Characters disallowed within the URI syntax
1345 // Excluded US-ASCII Characters are like control, space, delims and unwise
1346
1347 /***
1348 * BitSet for control.
1349 */
1350 public static final BitSet control = new BitSet(256);
1351 // Static initializer for control
1352 static {
1353 for (int i = 0; i <= 0x1F; i++) {
1354 control.set(i);
1355 }
1356 control.set(0x7F);
1357 }
1358
1359 /***
1360 * BitSet for space.
1361 */
1362 public static final BitSet space = new BitSet(256);
1363 // Static initializer for space
1364 static {
1365 space.set(0x20);
1366 }
1367
1368
1369 /***
1370 * BitSet for delims.
1371 */
1372 public static final BitSet delims = new BitSet(256);
1373 // Static initializer for delims
1374 static {
1375 delims.set('<');
1376 delims.set('>');
1377 delims.set('#');
1378 delims.set('%');
1379 delims.set('"');
1380 }
1381
1382
1383 /***
1384 * BitSet for unwise.
1385 */
1386 public static final BitSet unwise = new BitSet(256);
1387 // Static initializer for unwise
1388 static {
1389 unwise.set('{');
1390 unwise.set('}');
1391 unwise.set('|');
1392 unwise.set('//');
1393 unwise.set('^');
1394 unwise.set('[');
1395 unwise.set(']');
1396 unwise.set('`');
1397 }
1398
1399
1400 /***
1401 * Disallowed rel_path before escaping.
1402 */
1403 public static final BitSet disallowed_rel_path = new BitSet(256);
1404 // Static initializer for disallowed_rel_path
1405 static {
1406 disallowed_rel_path.or(uric);
1407 disallowed_rel_path.andNot(rel_path);
1408 }
1409
1410
1411 /***
1412 * Disallowed opaque_part before escaping.
1413 */
1414 public static final BitSet disallowed_opaque_part = new BitSet(256);
1415 // Static initializer for disallowed_opaque_part
1416 static {
1417 disallowed_opaque_part.or(uric);
1418 disallowed_opaque_part.andNot(opaque_part);
1419 }
1420
1421 // ----------------------- Characters allowed within and for each component
1422
1423 /***
1424 * Those characters that are allowed for the authority component.
1425 */
1426 public static final BitSet allowed_authority = new BitSet(256);
1427 // Static initializer for allowed_authority
1428 static {
1429 allowed_authority.or(authority);
1430 allowed_authority.clear('%');
1431 }
1432
1433
1434 /***
1435 * Those characters that are allowed for the opaque_part.
1436 */
1437 public static final BitSet allowed_opaque_part = new BitSet(256);
1438 // Static initializer for allowed_opaque_part
1439 static {
1440 allowed_opaque_part.or(opaque_part);
1441 allowed_opaque_part.clear('%');
1442 }
1443
1444
1445 /***
1446 * Those characters that are allowed for the reg_name.
1447 */
1448 public static final BitSet allowed_reg_name = new BitSet(256);
1449 // Static initializer for allowed_reg_name
1450 static {
1451 allowed_reg_name.or(reg_name);
1452 // allowed_reg_name.andNot(percent);
1453 allowed_reg_name.clear('%');
1454 }
1455
1456
1457 /***
1458 * Those characters that are allowed for the userinfo component.
1459 */
1460 public static final BitSet allowed_userinfo = new BitSet(256);
1461 // Static initializer for allowed_userinfo
1462 static {
1463 allowed_userinfo.or(userinfo);
1464 // allowed_userinfo.andNot(percent);
1465 allowed_userinfo.clear('%');
1466 }
1467
1468
1469 /***
1470 * Those characters that are allowed for within the userinfo component.
1471 */
1472 public static final BitSet allowed_within_userinfo = new BitSet(256);
1473 // Static initializer for allowed_within_userinfo
1474 static {
1475 allowed_within_userinfo.or(within_userinfo);
1476 allowed_within_userinfo.clear('%');
1477 }
1478
1479
1480 /***
1481 * Those characters that are allowed for the IPv6reference component.
1482 * The characters '[', ']' in IPv6reference should be excluded.
1483 */
1484 public static final BitSet allowed_IPv6reference = new BitSet(256);
1485 // Static initializer for allowed_IPv6reference
1486 static {
1487 allowed_IPv6reference.or(IPv6reference);
1488 // allowed_IPv6reference.andNot(unwise);
1489 allowed_IPv6reference.clear('[');
1490 allowed_IPv6reference.clear(']');
1491 }
1492
1493
1494 /***
1495 * Those characters that are allowed for the host component.
1496 * The characters '[', ']' in IPv6reference should be excluded.
1497 */
1498 public static final BitSet allowed_host = new BitSet(256);
1499 // Static initializer for allowed_host
1500 static {
1501 allowed_host.or(hostname);
1502 allowed_host.or(allowed_IPv6reference);
1503 }
1504
1505
1506 /***
1507 * Those characters that are allowed for the authority component.
1508 */
1509 public static final BitSet allowed_within_authority = new BitSet(256);
1510 // Static initializer for allowed_within_authority
1511 static {
1512 allowed_within_authority.or(server);
1513 allowed_within_authority.or(reg_name);
1514 allowed_within_authority.clear(';');
1515 allowed_within_authority.clear(':');
1516 allowed_within_authority.clear('@');
1517 allowed_within_authority.clear('?');
1518 allowed_within_authority.clear('/');
1519 }
1520
1521
1522 /***
1523 * Those characters that are allowed for the abs_path.
1524 */
1525 public static final BitSet allowed_abs_path = new BitSet(256);
1526 // Static initializer for allowed_abs_path
1527 static {
1528 allowed_abs_path.or(abs_path);
1529 // allowed_abs_path.set('/'); // aleady included
1530 allowed_abs_path.andNot(percent);
1531 }
1532
1533
1534 /***
1535 * Those characters that are allowed for the rel_path.
1536 */
1537 public static final BitSet allowed_rel_path = new BitSet(256);
1538 // Static initializer for allowed_rel_path
1539 static {
1540 allowed_rel_path.or(rel_path);
1541 allowed_rel_path.clear('%');
1542 }
1543
1544
1545 /***
1546 * Those characters that are allowed within the path.
1547 */
1548 public static final BitSet allowed_within_path = new BitSet(256);
1549 // Static initializer for allowed_within_path
1550 static {
1551 allowed_within_path.or(abs_path);
1552 allowed_within_path.clear('/');
1553 allowed_within_path.clear(';');
1554 allowed_within_path.clear('=');
1555 allowed_within_path.clear('?');
1556 }
1557
1558
1559 /***
1560 * Those characters that are allowed for the query component.
1561 */
1562 public static final BitSet allowed_query = new BitSet(256);
1563 // Static initializer for allowed_query
1564 static {
1565 allowed_query.or(uric);
1566 allowed_query.clear('%');
1567 }
1568
1569
1570 /***
1571 * Those characters that are allowed within the query component.
1572 */
1573 public static final BitSet allowed_within_query = new BitSet(256);
1574 // Static initializer for allowed_within_query
1575 static {
1576 allowed_within_query.or(allowed_query);
1577 allowed_within_query.andNot(reserved); // excluded 'reserved'
1578 }
1579
1580
1581 /***
1582 * Those characters that are allowed for the fragment component.
1583 */
1584 public static final BitSet allowed_fragment = new BitSet(256);
1585 // Static initializer for allowed_fragment
1586 static {
1587 allowed_fragment.or(uric);
1588 allowed_fragment.clear('%');
1589 }
1590
1591 // ------------------------------------------- Flags for this URI-reference
1592
1593 // TODO: Figure out what all these variables are for and provide javadoc
1594
1595 // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1596 // absoluteURI = scheme ":" ( hier_part | opaque_part )
1597 protected boolean _is_hier_part;
1598 protected boolean _is_opaque_part;
1599 // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1600 // hier_part = ( net_path | abs_path ) [ "?" query ]
1601 protected boolean _is_net_path;
1602 protected boolean _is_abs_path;
1603 protected boolean _is_rel_path;
1604 // net_path = "//" authority [ abs_path ]
1605 // authority = server | reg_name
1606 protected boolean _is_reg_name;
1607 protected boolean _is_server; // = _has_server
1608 // server = [ [ userinfo "@" ] hostport ]
1609 // host = hostname | IPv4address | IPv6reference
1610 protected boolean _is_hostname;
1611 protected boolean _is_IPv4address;
1612 protected boolean _is_IPv6reference;
1613
1614 // ------------------------------------------ Character and escape encoding
1615
1616 /***
1617 * Encodes URI string.
1618 *
1619 * This is a two mapping, one from original characters to octets, and
1620 * subsequently a second from octets to URI characters:
1621 * <p><blockquote><pre>
1622 * original character sequence->octet sequence->URI character sequence
1623 * </pre></blockquote><p>
1624 *
1625 * An escaped octet is encoded as a character triplet, consisting of the
1626 * percent character "%" followed by the two hexadecimal digits
1627 * representing the octet code. For example, "%20" is the escaped
1628 * encoding for the US-ASCII space character.
1629 * <p>
1630 * Conversion from the local filesystem character set to UTF-8 will
1631 * normally involve a two step process. First convert the local character
1632 * set to the UCS; then convert the UCS to UTF-8.
1633 * The first step in the process can be performed by maintaining a mapping
1634 * table that includes the local character set code and the corresponding
1635 * UCS code.
1636 * The next step is to convert the UCS character code to the UTF-8 encoding.
1637 * <p>
1638 * Mapping between vendor codepages can be done in a very similar manner
1639 * as described above.
1640 * <p>
1641 * The only time escape encodings can allowedly be made is when a URI is
1642 * being created from its component parts. The escape and validate methods
1643 * are internally performed within this method.
1644 *
1645 * @param original the original character sequence
1646 * @param allowed those characters that are allowed within a component
1647 * @param charset the protocol charset
1648 * @return URI character sequence
1649 * @throws URIException null component or unsupported character encoding
1650 */
1651 protected static char[] encode(String original, BitSet allowed,
1652 String charset) throws URIException {
1653
1654 // encode original to uri characters.
1655 if (original == null) {
1656 throw new URIException(URIException.PARSING, "null");
1657 }
1658 // escape octet to uri characters.
1659 if (allowed == null) {
1660 throw new URIException(URIException.PARSING,
1661 "null allowed characters");
1662 }
1663 byte[] octets;
1664 try {
1665 octets = original.getBytes(charset);
1666 } catch (UnsupportedEncodingException error) {
1667 throw new URIException(URIException.UNSUPPORTED_ENCODING, charset);
1668 }
1669 StringBuffer buf = new StringBuffer(octets.length);
1670 for (int i = 0; i < octets.length; i++) {
1671 char c = (char) octets[i];
1672 if (allowed.get(c)) {
1673 buf.append(c);
1674 } else {
1675 buf.append('%');
1676 byte b = octets[i]; // use the original byte value
1677 char hexadecimal = Character.forDigit((b >> 4) & 0xF, 16);
1678 buf.append(Character.toUpperCase(hexadecimal)); // high
1679 hexadecimal = Character.forDigit(b & 0xF, 16);
1680 buf.append(Character.toUpperCase(hexadecimal)); // low
1681 }
1682 }
1683
1684 return buf.toString().toCharArray();
1685 }
1686
1687
1688 /***
1689 * Decodes URI encoded string.
1690 *
1691 * This is a two mapping, one from URI characters to octets, and
1692 * subsequently a second from octets to original characters:
1693 * <p><blockquote><pre>
1694 * URI character sequence->octet sequence->original character sequence
1695 * </pre></blockquote><p>
1696 *
1697 * A URI must be separated into its components before the escaped
1698 * characters within those components can be allowedly decoded.
1699 * <p>
1700 * Notice that there is a chance that URI characters that are non UTF-8
1701 * may be parsed as valid UTF-8. A recent non-scientific analysis found
1702 * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
1703 * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
1704 * false reading.
1705 * <p>
1706 * The percent "%" character always has the reserved purpose of being
1707 * the escape indicator, it must be escaped as "%25" in order to be used
1708 * as data within a URI.
1709 * <p>
1710 * The unescape method is internally performed within this method.
1711 *
1712 * @param component the URI character sequence
1713 * @param charset the protocol charset
1714 * @return original character sequence
1715 * @throws URIException incomplete trailing escape pattern or unsupported
1716 * character encoding
1717 */
1718 protected static String decode(char[] component, String charset)
1719 throws URIException {
1720
1721 // unescape uri characters to octets
1722 if (component == null) {
1723 return null;
1724 }
1725
1726 byte[] octets;
1727 try {
1728 octets = new String(component).getBytes(charset);
1729 } catch (UnsupportedEncodingException error) {
1730 throw new URIException(URIException.UNSUPPORTED_ENCODING,
1731 "not supported " + charset + " encoding");
1732 }
1733 int length = octets.length;
1734 int oi = 0; // output index
1735 for (int ii = 0; ii < length; oi++) {
1736 byte aByte = (byte) octets[ii++];
1737 if (aByte == '%' && ii + 2 <= length) {
1738 byte high = (byte) Character.digit((char) octets[ii++], 16);
1739 byte low = (byte) Character.digit((char) octets[ii++], 16);
1740 if (high == -1 || low == -1) {
1741 throw new URIException(URIException.ESCAPING,
1742 "incomplete trailing escape pattern");
1743
1744 }
1745 aByte = (byte) ((high << 4) + low);
1746 }
1747 octets[oi] = (byte) aByte;
1748 }
1749
1750 String result;
1751 try {
1752 result = new String(octets, 0, oi, charset);
1753 } catch (UnsupportedEncodingException error) {
1754 throw new URIException(URIException.UNSUPPORTED_ENCODING,
1755 "not supported " + charset + " encoding");
1756 }
1757
1758 return result;
1759 }
1760
1761
1762 /***
1763 * Pre-validate the unescaped URI string within a specific component.
1764 *
1765 * @param component the component string within the component
1766 * @param disallowed those characters disallowed within the component
1767 * @return if true, it doesn't have the disallowed characters
1768 * if false, the component is undefined or an incorrect one
1769 */
1770 protected boolean prevalidate(String component, BitSet disallowed) {
1771 // prevalidate the given component by disallowed characters
1772 if (component == null) {
1773 return false; // undefined
1774 }
1775 char[] target = component.toCharArray();
1776 for (int i = 0; i < target.length; i++) {
1777 if (disallowed.get(target[i])) {
1778 return false;
1779 }
1780 }
1781 return true;
1782 }
1783
1784
1785 /***
1786 * Validate the URI characters within a specific component.
1787 * The component must be performed after escape encoding. Or it doesn't
1788 * include escaped characters.
1789 *
1790 * @param component the characters sequence within the component
1791 * @param generous those characters that are allowed within a component
1792 * @return if true, it's the correct URI character sequence
1793 */
1794 protected boolean validate(char[] component, BitSet generous) {
1795 // validate each component by generous characters
1796 return validate(component, 0, -1, generous);
1797 }
1798
1799
1800 /***
1801 * Validate the URI characters within a specific component.
1802 * The component must be performed after escape encoding. Or it doesn't
1803 * include escaped characters.
1804 * <p>
1805 * It's not that much strict, generous. The strict validation might be
1806 * performed before being called this method.
1807 *
1808 * @param component the characters sequence within the component
1809 * @param soffset the starting offset of the given component
1810 * @param eoffset the ending offset of the given component
1811 * if -1, it means the length of the component
1812 * @param generous those characters that are allowed within a component
1813 * @return if true, it's the correct URI character sequence
1814 */
1815 protected boolean validate(char[] component, int soffset, int eoffset,
1816 BitSet generous) {
1817 // validate each component by generous characters
1818 if (eoffset == -1) {
1819 eoffset = component.length - 1;
1820 }
1821 for (int i = soffset; i <= eoffset; i++) {
1822 if (!generous.get(component[i])) {
1823 return false;
1824 }
1825 }
1826 return true;
1827 }
1828
1829
1830 /***
1831 * In order to avoid any possilbity of conflict with non-ASCII characters,
1832 * Parse a URI reference as a <code>String</code> with the character
1833 * encoding of the local system or the document.
1834 * <p>
1835 * The following line is the regular expression for breaking-down a URI
1836 * reference into its components.
1837 * <p><blockquote><pre>
1838 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1839 * 12 3 4 5 6 7 8 9
1840 * </pre></blockquote><p>
1841 * For example, matching the above expression to
1842 * http://jakarta.apache.org/ietf/uri/#Related
1843 * results in the following subexpression matches:
1844 * <p><blockquote><pre>
1845 * $1 = http:
1846 * scheme = $2 = http
1847 * $3 = //jakarta.apache.org
1848 * authority = $4 = jakarta.apache.org
1849 * path = $5 = /ietf/uri/
1850 * $6 = <undefined>
1851 * query = $7 = <undefined>
1852 * $8 = #Related
1853 * fragment = $9 = Related
1854 * </pre></blockquote><p>
1855 *
1856 * @param original the original character sequence
1857 * @param escaped <code>true</code> if <code>original</code> is escaped
1858 * @throws URIException If an error occurs.
1859 */
1860 protected void parseUriReference(String original, boolean escaped)
1861 throws URIException {
1862
1863 // validate and contruct the URI character sequence
1864 if (original == null) {
1865 throw new URIException("URI-Reference required");
1866 }
1867
1868 /* @
1869 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1870 */
1871 String tmp = original.trim();
1872
1873 /*
1874 * The length of the string sequence of characters.
1875 * It may not be equal to the length of the byte array.
1876 */
1877 int length = tmp.length();
1878
1879 /*
1880 * Remove the delimiters like angle brackets around an URI.
1881 */
1882 if (length > 0) {
1883 char[] firstDelimiter = { tmp.charAt(0) };
1884 if (validate(firstDelimiter, delims)) {
1885 if (length >= 2) {
1886 char[] lastDelimiter = { tmp.charAt(length - 1) };
1887 if (validate(lastDelimiter, delims)) {
1888 tmp = tmp.substring(1, length - 1);
1889 length = length - 2;
1890 }
1891 }
1892 }
1893 }
1894
1895 /*
1896 * The starting index
1897 */
1898 int from = 0;
1899
1900 /*
1901 * The test flag whether the URI is started from the path component.
1902 */
1903 boolean isStartedFromPath = false;
1904 int atColon = tmp.indexOf(':');
1905 int atSlash = tmp.indexOf('/');
1906 if (atColon < 0 || (atSlash >= 0 && atSlash < atColon)) {
1907 isStartedFromPath = true;
1908 }
1909
1910 /*
1911 * <p><blockquote><pre>
1912 * @@@@@@@@
1913 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1914 * </pre></blockquote><p>
1915 */
1916 int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
1917 if (at == -1) {
1918 at = 0;
1919 }
1920
1921 /*
1922 * Parse the scheme.
1923 * <p><blockquote><pre>
1924 * scheme = $2 = http
1925 * @
1926 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1927 * </pre></blockquote><p>
1928 */
1929 if (at < length && tmp.charAt(at) == ':') {
1930 char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
1931 if (validate(target, scheme)) {
1932 _scheme = target;
1933 } else {
1934 throw new URIException("incorrect scheme");
1935 }
1936 from = ++at;
1937 }
1938
1939 /*
1940 * Parse the authority component.
1941 * <p><blockquote><pre>
1942 * authority = $4 = jakarta.apache.org
1943 * @@
1944 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1945 * </pre></blockquote><p>
1946 */
1947 // Reset flags
1948 _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
1949 if (0 <= at && at < length && tmp.charAt(at) == '/') {
1950 // Set flag
1951 _is_hier_part = true;
1952 if (at + 2 < length && tmp.charAt(at + 1) == '/') {
1953 // the temporary index to start the search from
1954 int next = indexFirstOf(tmp, "/?#", at + 2);
1955 if (next == -1) {
1956 next = (tmp.substring(at + 2).length() == 0) ? at + 2
1957 : tmp.length();
1958 }
1959 parseAuthority(tmp.substring(at + 2, next), escaped);
1960 from = at = next;
1961 // Set flag
1962 _is_net_path = true;
1963 }
1964 if (from == at) {
1965 // Set flag
1966 _is_abs_path = true;
1967 }
1968 }
1969
1970 /*
1971 * Parse the path component.
1972 * <p><blockquote><pre>
1973 * path = $5 = /ietf/uri/
1974 * @@@@@@
1975 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1976 * </pre></blockquote><p>
1977 */
1978 if (from < length) {
1979 // rel_path = rel_segment [ abs_path ]
1980 int next = indexFirstOf(tmp, "?#", from);
1981 if (next == -1) {
1982 next = tmp.length();
1983 }
1984 if (!_is_abs_path) {
1985 if (!escaped
1986 && prevalidate(tmp.substring(from, next), disallowed_rel_path)
1987 || escaped
1988 && validate(tmp.substring(from, next).toCharArray(), rel_path)) {
1989 // Set flag
1990 _is_rel_path = true;
1991 } else if (!escaped
1992 && prevalidate(tmp.substring(from, next), disallowed_opaque_part)
1993 || escaped
1994 && validate(tmp.substring(from, next).toCharArray(), opaque_part)) {
1995 // Set flag
1996 _is_opaque_part = true;
1997 } else {
1998 // the path component may be empty
1999 _path = null;
2000 }
2001 }
2002 if (escaped) {
2003 setRawPath(tmp.substring(from, next).toCharArray());
2004 } else {
2005 setPath(tmp.substring(from, next));
2006 }
2007 at = next;
2008 }
2009
2010 // set the charset to do escape encoding
2011 String charset = getProtocolCharset();
2012
2013 /*
2014 * Parse the query component.
2015 * <p><blockquote><pre>
2016 * query = $7 = <undefined>
2017 * @@@@@@@@@
2018 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2019 * </pre></blockquote><p>
2020 */
2021 if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
2022 int next = tmp.indexOf('#', at + 1);
2023 if (next == -1) {
2024 next = tmp.length();
2025 }
2026 _query = (escaped) ? tmp.substring(at + 1, next).toCharArray()
2027 : encode(tmp.substring(at + 1, next), allowed_query, charset);
2028 at = next;
2029 }
2030
2031 /*
2032 * Parse the fragment component.
2033 * <p><blockquote><pre>
2034 * fragment = $9 = Related
2035 * @@@@@@@@
2036 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2037 * </pre></blockquote><p>
2038 */
2039 if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
2040 if (at + 1 == length) { // empty fragment
2041 _fragment = "".toCharArray();
2042 } else {
2043 _fragment = (escaped) ? tmp.substring(at + 1).toCharArray()
2044 : encode(tmp.substring(at + 1), allowed_fragment, charset);
2045 }
2046 }
2047
2048 // set this URI.
2049 setURI();
2050 }
2051
2052
2053 /***
2054 * Get the earlier index that to be searched for the first occurrance in
2055 * one of any of the given string.
2056 *
2057 * @param s the string to be indexed
2058 * @param delims the delimiters used to index
2059 * @return the earlier index if there are delimiters
2060 */
2061 protected int indexFirstOf(String s, String delims) {
2062 return indexFirstOf(s, delims, -1);
2063 }
2064
2065
2066 /***
2067 * Get the earlier index that to be searched for the first occurrance in
2068 * one of any of the given string.
2069 *
2070 * @param s the string to be indexed
2071 * @param delims the delimiters used to index
2072 * @param offset the from index
2073 * @return the earlier index if there are delimiters
2074 */
2075 protected int indexFirstOf(String s, String delims, int offset) {
2076 if (s == null || s.length() == 0) {
2077 return -1;
2078 }
2079 if (delims == null || delims.length() == 0) {
2080 return -1;
2081 }
2082 // check boundaries
2083 if (offset < 0) {
2084 offset = 0;
2085 } else if (offset > s.length()) {
2086 return -1;
2087 }
2088 // s is never null
2089 int min = s.length();
2090 char[] delim = delims.toCharArray();
2091 for (int i = 0; i < delim.length; i++) {
2092 int at = s.indexOf(delim[i], offset);
2093 if (at >= 0 && at < min) {
2094 min = at;
2095 }
2096 }
2097 return (min == s.length()) ? -1 : min;
2098 }
2099
2100
2101 /***
2102 * Get the earlier index that to be searched for the first occurrance in
2103 * one of any of the given array.
2104 *
2105 * @param s the character array to be indexed
2106 * @param delim the delimiter used to index
2107 * @return the ealier index if there are a delimiter
2108 */
2109 protected int indexFirstOf(char[] s, char delim) {
2110 return indexFirstOf(s, delim, 0);
2111 }
2112
2113
2114 /***
2115 * Get the earlier index that to be searched for the first occurrance in
2116 * one of any of the given array.
2117 *
2118 * @param s the character array to be indexed
2119 * @param delim the delimiter used to index
2120 * @param offset The offset.
2121 * @return the ealier index if there is a delimiter
2122 */
2123 protected int indexFirstOf(char[] s, char delim, int offset) {
2124 if (s == null || s.length == 0) {
2125 return -1;
2126 }
2127 // check boundaries
2128 if (offset < 0) {
2129 offset = 0;
2130 } else if (offset > s.length) {
2131 return -1;
2132 }
2133 for (int i = offset; i < s.length; i++) {
2134 if (s[i] == delim) {
2135 return i;
2136 }
2137 }
2138 return -1;
2139 }
2140
2141
2142 /***
2143 * Parse the authority component.
2144 *
2145 * @param original the original character sequence of authority component
2146 * @param escaped <code>true</code> if <code>original</code> is escaped
2147 * @throws URIException If an error occurs.
2148 */
2149 protected void parseAuthority(String original, boolean escaped)
2150 throws URIException {
2151
2152 // Reset flags
2153 _is_reg_name = _is_server =
2154 _is_hostname = _is_IPv4address = _is_IPv6reference = false;
2155
2156 // set the charset to do escape encoding
2157 String charset = getProtocolCharset();
2158
2159 boolean hasPort = true;
2160 int from = 0;
2161 int next = original.indexOf('@');
2162 if (next != -1) { // neither -1 and 0
2163 // each protocol extented from URI supports the specific userinfo
2164 _userinfo = (escaped) ? original.substring(0, next).toCharArray()
2165 : encode(original.substring(0, next), allowed_userinfo,
2166 charset);
2167 from = next + 1;
2168 }
2169 next = original.indexOf('[', from);
2170 if (next >= from) {
2171 next = original.indexOf(']', from);
2172 if (next == -1) {
2173 throw new URIException(URIException.PARSING, "IPv6reference");
2174 } else {
2175 next++;
2176 }
2177 // In IPv6reference, '[', ']' should be excluded
2178 _host = (escaped) ? original.substring(from, next).toCharArray()
2179 : encode(original.substring(from, next), allowed_IPv6reference,
2180 charset);
2181 // Set flag
2182 _is_IPv6reference = true;
2183 } else { // only for !_is_IPv6reference
2184 next = original.indexOf(':', from);
2185 if (next == -1) {
2186 next = original.length();
2187 hasPort = false;
2188 }
2189 // REMINDME: it doesn't need the pre-validation
2190 _host = original.substring(from, next).toCharArray();
2191 if (validate(_host, IPv4address)) {
2192 // Set flag
2193 _is_IPv4address = true;
2194 } else if (validate(_host, hostname)) {
2195 // Set flag
2196 _is_hostname = true;
2197 } else {
2198 // Set flag
2199 _is_reg_name = true;
2200 }
2201 }
2202 if (_is_reg_name) {
2203 // Reset flags for a server-based naming authority
2204 _is_server = _is_hostname = _is_IPv4address =
2205 _is_IPv6reference = false;
2206 // set a registry-based naming authority
2207 _authority = (escaped) ? original.toString().toCharArray()
2208 : encode(original.toString(), allowed_reg_name, charset);
2209 } else {
2210 if (original.length() - 1 > next && hasPort
2211 && original.charAt(next) == ':') { // not empty
2212 from = next + 1;
2213 try {
2214 _port = Integer.parseInt(original.substring(from));
2215 } catch (NumberFormatException error) {
2216 throw new URIException(URIException.PARSING,
2217 "invalid port number");
2218 }
2219 }
2220 // set a server-based naming authority
2221 StringBuffer buf = new StringBuffer();
2222 if (_userinfo != null) { // has_userinfo
2223 buf.append(_userinfo);
2224 buf.append('@');
2225 }
2226 if (_host != null) {
2227 buf.append(_host);
2228 if (_port != -1) {
2229 buf.append(':');
2230 buf.append(_port);
2231 }
2232 }
2233 _authority = buf.toString().toCharArray();
2234 // Set flag
2235 _is_server = true;
2236 }
2237 }
2238
2239
2240 /***
2241 * Once it's parsed successfully, set this URI.
2242 *
2243 * @see #getRawURI
2244 */
2245 protected void setURI() {
2246 // set _uri
2247 StringBuffer buf = new StringBuffer();
2248 // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2249 if (_scheme != null) {
2250 buf.append(_scheme);
2251 buf.append(':');
2252 }
2253 if (_is_net_path) {
2254 buf.append("//");
2255 if (_authority != null) { // has_authority
2256 if (_userinfo != null) { // by default, remove userinfo part
2257 if (_host != null) {
2258 buf.append(_host);
2259 if (_port != -1) {
2260 buf.append(':');
2261 buf.append(_port);
2262 }
2263 }
2264 } else {
2265 buf.append(_authority);
2266 }
2267 }
2268 }
2269 if (_opaque != null && _is_opaque_part) {
2270 buf.append(_opaque);
2271 } else if (_path != null) {
2272 // _is_hier_part or _is_relativeURI
2273 if (_path.length != 0) {
2274 buf.append(_path);
2275 }
2276 }
2277 if (_query != null) { // has_query
2278 buf.append('?');
2279 buf.append(_query);
2280 }
2281 // ignore the fragment identifier
2282 _uri = buf.toString().toCharArray();
2283 hash = 0;
2284 }
2285
2286 // ----------------------------------------------------------- Test methods
2287
2288
2289 /***
2290 * Tell whether or not this URI is absolute.
2291 *
2292 * @return true iif this URI is absoluteURI
2293 */
2294 public boolean isAbsoluteURI() {
2295 return (_scheme != null);
2296 }
2297
2298
2299 /***
2300 * Tell whether or not this URI is relative.
2301 *
2302 * @return true iif this URI is relativeURI
2303 */
2304 public boolean isRelativeURI() {
2305 return (_scheme == null);
2306 }
2307
2308
2309 /***
2310 * Tell whether or not the absoluteURI of this URI is hier_part.
2311 *
2312 * @return true iif the absoluteURI is hier_part
2313 */
2314 public boolean isHierPart() {
2315 return _is_hier_part;
2316 }
2317
2318
2319 /***
2320 * Tell whether or not the absoluteURI of this URI is opaque_part.
2321 *
2322 * @return true iif the absoluteURI is opaque_part
2323 */
2324 public boolean isOpaquePart() {
2325 return _is_opaque_part;
2326 }
2327
2328
2329 /***
2330 * Tell whether or not the relativeURI or heir_part of this URI is net_path.
2331 * It's the same function as the has_authority() method.
2332 *
2333 * @return true iif the relativeURI or heir_part is net_path
2334 * @see #hasAuthority
2335 */
2336 public boolean isNetPath() {
2337 return _is_net_path || (_authority != null);
2338 }
2339
2340
2341 /***
2342 * Tell whether or not the relativeURI or hier_part of this URI is abs_path.
2343 *
2344 * @return true iif the relativeURI or hier_part is abs_path
2345 */
2346 public boolean isAbsPath() {
2347 return _is_abs_path;
2348 }
2349
2350
2351 /***
2352 * Tell whether or not the relativeURI of this URI is rel_path.
2353 *
2354 * @return true iif the relativeURI is rel_path
2355 */
2356 public boolean isRelPath() {
2357 return _is_rel_path;
2358 }
2359
2360
2361 /***
2362 * Tell whether or not this URI has authority.
2363 * It's the same function as the is_net_path() method.
2364 *
2365 * @return true iif this URI has authority
2366 * @see #isNetPath
2367 */
2368 public boolean hasAuthority() {
2369 return (_authority != null) || _is_net_path;
2370 }
2371
2372 /***
2373 * Tell whether or not the authority component of this URI is reg_name.
2374 *
2375 * @return true iif the authority component is reg_name
2376 */
2377 public boolean isRegName() {
2378 return _is_reg_name;
2379 }
2380
2381
2382 /***
2383 * Tell whether or not the authority component of this URI is server.
2384 *
2385 * @return true iif the authority component is server
2386 */
2387 public boolean isServer() {
2388 return _is_server;
2389 }
2390
2391
2392 /***
2393 * Tell whether or not this URI has userinfo.
2394 *
2395 * @return true iif this URI has userinfo
2396 */
2397 public boolean hasUserinfo() {
2398 return (_userinfo != null);
2399 }
2400
2401
2402 /***
2403 * Tell whether or not the host part of this URI is hostname.
2404 *
2405 * @return true iif the host part is hostname
2406 */
2407 public boolean isHostname() {
2408 return _is_hostname;
2409 }
2410
2411
2412 /***
2413 * Tell whether or not the host part of this URI is IPv4address.
2414 *
2415 * @return true iif the host part is IPv4address
2416 */
2417 public boolean isIPv4address() {
2418 return _is_IPv4address;
2419 }
2420
2421
2422 /***
2423 * Tell whether or not the host part of this URI is IPv6reference.
2424 *
2425 * @return true iif the host part is IPv6reference
2426 */
2427 public boolean isIPv6reference() {
2428 return _is_IPv6reference;
2429 }
2430
2431
2432 /***
2433 * Tell whether or not this URI has query.
2434 *
2435 * @return true iif this URI has query
2436 */
2437 public boolean hasQuery() {
2438 return (_query != null);
2439 }
2440
2441
2442 /***
2443 * Tell whether or not this URI has fragment.
2444 *
2445 * @return true iif this URI has fragment
2446 */
2447 public boolean hasFragment() {
2448 return (_fragment != null);
2449 }
2450
2451
2452 // ---------------------------------------------------------------- Charset
2453
2454
2455 /***
2456 * Set the default charset of the protocol.
2457 * <p>
2458 * The character set used to store files SHALL remain a local decision and
2459 * MAY depend on the capability of local operating systems. Prior to the
2460 * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format
2461 * and UTF-8 encoded. This approach, while allowing international exchange
2462 * of URIs, will still allow backward compatibility with older systems
2463 * because the code set positions for ASCII characters are identical to the
2464 * one byte sequence in UTF-8.
2465 * <p>
2466 * An individual URI scheme may require a single charset, define a default
2467 * charset, or provide a way to indicate the charset used.
2468 *
2469 * <p>
2470 * Always all the time, the setter method is always succeeded and throws
2471 * <code>DefaultCharsetChanged</code> exception.
2472 *
2473 * So API programmer must follow the following way:
2474 * <code><pre>
2475 * import org.apache.util.URI$DefaultCharsetChanged;
2476 * .
2477 * .
2478 * .
2479 * try {
2480 * URI.setDefaultProtocolCharset("UTF-8");
2481 * } catch (DefaultCharsetChanged cc) {
2482 * // CASE 1: the exception could be ignored, when it is set by user
2483 * if (cc.getReasonCode() == DefaultCharsetChanged.PROTOCOL_CHARSET) {
2484 * // CASE 2: let user know the default protocol charset changed
2485 * } else {
2486 * // CASE 2: let user know the default document charset changed
2487 * }
2488 * }
2489 * </pre></code>
2490 *
2491 * The API programmer is responsible to set the correct charset.
2492 * And each application should remember its own charset to support.
2493 *
2494 * @param charset the default charset for each protocol
2495 * @throws DefaultCharsetChanged default charset changed
2496 */
2497 public static void setDefaultProtocolCharset(String charset)
2498 throws DefaultCharsetChanged {
2499
2500 defaultProtocolCharset = charset;
2501 throw new DefaultCharsetChanged(DefaultCharsetChanged.PROTOCOL_CHARSET,
2502 "the default protocol charset changed");
2503 }
2504
2505
2506 /***
2507 * Get the default charset of the protocol.
2508 * <p>
2509 * An individual URI scheme may require a single charset, define a default
2510 * charset, or provide a way to indicate the charset used.
2511 * <p>
2512 * To work globally either requires support of a number of character sets
2513 * and to be able to convert between them, or the use of a single preferred
2514 * character set.
2515 * For support of global compatibility it is STRONGLY RECOMMENDED that
2516 * clients and servers use UTF-8 encoding when exchanging URIs.
2517 *
2518 * @return the default charset string
2519 */
2520 public static String getDefaultProtocolCharset() {
2521 return defaultProtocolCharset;
2522 }
2523
2524
2525 /***
2526 * Get the protocol charset used by this current URI instance.
2527 * It was set by the constructor for this instance. If it was not set by
2528 * contructor, it will return the default protocol charset.
2529 *
2530 * @return the protocol charset string
2531 * @see #getDefaultProtocolCharset
2532 */
2533 public String getProtocolCharset() {
2534 return (protocolCharset != null)
2535 ? protocolCharset
2536 : defaultProtocolCharset;
2537 }
2538
2539
2540 /***
2541 * Set the default charset of the document.
2542 * <p>
2543 * Notice that it will be possible to contain mixed characters (e.g.
2544 * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional
2545 * display of these character sets, the protocol charset could be simply
2546 * used again. Because it's not yet implemented that the insertion of BIDI
2547 * control characters at different points during composition is extracted.
2548 * <p>
2549 *
2550 * Always all the time, the setter method is always succeeded and throws
2551 * <code>DefaultCharsetChanged</code> exception.
2552 *
2553 * So API programmer must follow the following way:
2554 * <code><pre>
2555 * import org.apache.util.URI$DefaultCharsetChanged;
2556 * .
2557 * .
2558 * .
2559 * try {
2560 * URI.setDefaultDocumentCharset("EUC-KR");
2561 * } catch (DefaultCharsetChanged cc) {
2562 * // CASE 1: the exception could be ignored, when it is set by user
2563 * if (cc.getReasonCode() == DefaultCharsetChanged.DOCUMENT_CHARSET) {
2564 * // CASE 2: let user know the default document charset changed
2565 * } else {
2566 * // CASE 2: let user know the default protocol charset changed
2567 * }
2568 * }
2569 * </pre></code>
2570 *
2571 * The API programmer is responsible to set the correct charset.
2572 * And each application should remember its own charset to support.
2573 *
2574 * @param charset the default charset for the document
2575 * @throws DefaultCharsetChanged default charset changed
2576 */
2577 public static void setDefaultDocumentCharset(String charset)
2578 throws DefaultCharsetChanged {
2579
2580 defaultDocumentCharset = charset;
2581 throw new DefaultCharsetChanged(DefaultCharsetChanged.DOCUMENT_CHARSET,
2582 "the default document charset changed");
2583 }
2584
2585
2586 /***
2587 * Get the recommended default charset of the document.
2588 *
2589 * @return the default charset string
2590 */
2591 public static String getDefaultDocumentCharset() {
2592 return defaultDocumentCharset;
2593 }
2594
2595
2596 /***
2597 * Get the default charset of the document by locale.
2598 *
2599 * @return the default charset string by locale
2600 */
2601 public static String getDefaultDocumentCharsetByLocale() {
2602 return defaultDocumentCharsetByLocale;
2603 }
2604
2605
2606 /***
2607 * Get the default charset of the document by platform.
2608 *
2609 * @return the default charset string by platform
2610 */
2611 public static String getDefaultDocumentCharsetByPlatform() {
2612 return defaultDocumentCharsetByPlatform;
2613 }
2614
2615 // ------------------------------------------------------------- The scheme
2616
2617 /***
2618 * Get the scheme.
2619 *
2620 * @return the scheme
2621 */
2622 public char[] getRawScheme() {
2623 return _scheme;
2624 }
2625
2626
2627 /***
2628 * Get the scheme.
2629 *
2630 * @return the scheme
2631 * null if undefined scheme
2632 */
2633 public String getScheme() {
2634 return (_scheme == null) ? null : new String(_scheme);
2635 }
2636
2637 // ---------------------------------------------------------- The authority
2638
2639 /***
2640 * Set the authority. It can be one type of server, hostport, hostname,
2641 * IPv4address, IPv6reference and reg_name.
2642 * <p><blockquote><pre>
2643 * authority = server | reg_name
2644 * </pre></blockquote><p>
2645 *
2646 * @param escapedAuthority the raw escaped authority
2647 * @throws URIException If {@link
2648 * #parseAuthority(java.lang.String,boolean)} fails
2649 * @throws NullPointerException null authority
2650 */
2651 public void setRawAuthority(char[] escapedAuthority)
2652 throws URIException, NullPointerException {
2653
2654 parseAuthority(new String(escapedAuthority), true);
2655 setURI();
2656 }
2657
2658
2659 /***
2660 * Set the authority. It can be one type of server, hostport, hostname,
2661 * IPv4address, IPv6reference and reg_name.
2662 * Note that there is no setAuthority method by the escape encoding reason.
2663 *
2664 * @param escapedAuthority the escaped authority string
2665 * @throws URIException If {@link
2666 * #parseAuthority(java.lang.String,boolean)} fails
2667 */
2668 public void setEscapedAuthority(String escapedAuthority)
2669 throws URIException {
2670
2671 parseAuthority(escapedAuthority, true);
2672 setURI();
2673 }
2674
2675
2676 /***
2677 * Get the raw-escaped authority.
2678 *
2679 * @return the raw-escaped authority
2680 */
2681 public char[] getRawAuthority() {
2682 return _authority;
2683 }
2684
2685
2686 /***
2687 * Get the escaped authority.
2688 *
2689 * @return the escaped authority
2690 */
2691 public String getEscapedAuthority() {
2692 return (_authority == null) ? null : new String(_authority);
2693 }
2694
2695
2696 /***
2697 * Get the authority.
2698 *
2699 * @return the authority
2700 * @throws URIException If {@link #decode} fails
2701 */
2702 public String getAuthority() throws URIException {
2703 return (_authority == null) ? null : decode(_authority,
2704 getProtocolCharset());
2705 }
2706
2707 // ----------------------------------------------------------- The userinfo
2708
2709 /***
2710 * Get the raw-escaped userinfo.
2711 *
2712 * @return the raw-escaped userinfo
2713 * @see #getAuthority
2714 */
2715 public char[] getRawUserinfo() {
2716 return _userinfo;
2717 }
2718
2719
2720 /***
2721 * Get the escaped userinfo.
2722 *
2723 * @return the escaped userinfo
2724 * @see #getAuthority
2725 */
2726 public String getEscapedUserinfo() {
2727 return (_userinfo == null) ? null : new String(_userinfo);
2728 }
2729
2730
2731 /***
2732 * Get the userinfo.
2733 *
2734 * @return the userinfo
2735 * @throws URIException If {@link #decode} fails
2736 * @see #getAuthority
2737 */
2738 public String getUserinfo() throws URIException {
2739 return (_userinfo == null) ? null : decode(_userinfo,
2740 getProtocolCharset());
2741 }
2742
2743 // --------------------------------------------------------------- The host
2744
2745 /***
2746 * Get the host.
2747 * <p><blockquote><pre>
2748 * host = hostname | IPv4address | IPv6reference
2749 * </pre></blockquote><p>
2750 *
2751 * @return the host
2752 * @see #getAuthority
2753 */
2754 public char[] getRawHost() {
2755 return _host;
2756 }
2757
2758
2759 /***
2760 * Get the host.
2761 * <p><blockquote><pre>
2762 * host = hostname | IPv4address | IPv6reference
2763 * </pre></blockquote><p>
2764 *
2765 * @return the host
2766 * @throws URIException If {@link #decode} fails
2767 * @see #getAuthority
2768 */
2769 public String getHost() throws URIException {
2770 return decode(_host, getProtocolCharset());
2771 }
2772
2773 // --------------------------------------------------------------- The port
2774
2775 /***
2776 * Get the port. In order to get the specfic default port, the specific
2777 * protocol-supported class extended from the URI class should be used.
2778 * It has the server-based naming authority.
2779 *
2780 * @return the port
2781 * if -1, it has the default port for the scheme or the server-based
2782 * naming authority is not supported in the specific URI.
2783 */
2784 public int getPort() {
2785 return _port;
2786 }
2787
2788 // --------------------------------------------------------------- The path
2789
2790 /***
2791 * Set the raw-escaped path.
2792 *
2793 * @param escapedPath the path character sequence
2794 * @throws URIException encoding error or not proper for initial instance
2795 * @see #encode
2796 */
2797 public void setRawPath(char[] escapedPath) throws URIException {
2798 if (escapedPath == null || escapedPath.length == 0) {
2799 _path = _opaque = escapedPath;
2800 setURI();
2801 return;
2802 }
2803 // remove the fragment identifier
2804 escapedPath = removeFragmentIdentifier(escapedPath);
2805 if (_is_net_path || _is_abs_path) {
2806 if (escapedPath[0] != '/') {
2807 throw new URIException(URIException.PARSING,
2808 "not absolute path");
2809 }
2810 if (!validate(escapedPath, abs_path)) {
2811 throw new URIException(URIException.ESCAPING,
2812 "escaped absolute path not valid");
2813 }
2814 _path = escapedPath;
2815 } else if (_is_rel_path) {
2816 int at = indexFirstOf(escapedPath, '/');
2817 if (at == 0) {
2818 throw new URIException(URIException.PARSING, "incorrect path");
2819 }
2820 if (at > 0 && !validate(escapedPath, 0, at - 1, rel_segment)
2821 && !validate(escapedPath, at, -1, abs_path)
2822 || at < 0 && !validate(escapedPath, 0, -1, rel_segment)) {
2823
2824 throw new URIException(URIException.ESCAPING,
2825 "escaped relative path not valid");
2826 }
2827 _path = escapedPath;
2828 } else if (_is_opaque_part) {
2829 if (!uric_no_slash.get(escapedPath[0])
2830 && !validate(escapedPath, 1, -1, uric)) {
2831 throw new URIException(URIException.ESCAPING,
2832 "escaped opaque part not valid");
2833 }
2834 _opaque = escapedPath;
2835 } else {
2836 throw new URIException(URIException.PARSING, "incorrect path");
2837 }
2838 setURI();
2839 }
2840
2841
2842 /***
2843 * Set the escaped path.
2844 *
2845 * @param escapedPath the escaped path string
2846 * @throws URIException encoding error or not proper for initial instance
2847 * @see #encode
2848 */
2849 public void setEscapedPath(String escapedPath) throws URIException {
2850 if (escapedPath == null) {
2851 _path = _opaque = null;
2852 setURI();
2853 return;
2854 }
2855 setRawPath(escapedPath.toCharArray());
2856 }
2857
2858
2859 /***
2860 * Set the path.
2861 *
2862 * @param path the path string
2863 * @throws URIException set incorrectly or fragment only
2864 * @see #encode
2865 */
2866 public void setPath(String path) throws URIException {
2867
2868 if (path == null || path.length() == 0) {
2869 _path = _opaque = (path == null) ? null : path.toCharArray();
2870 setURI();
2871 return;
2872 }
2873 // set the charset to do escape encoding
2874 String charset = getProtocolCharset();
2875
2876 if (_is_net_path || _is_abs_path) {
2877 _path = encode(path, allowed_abs_path, charset);
2878 } else if (_is_rel_path) {
2879 StringBuffer buff = new StringBuffer(path.length());
2880 int at = path.indexOf('/');
2881 if (at == 0) { // never 0
2882 throw new URIException(URIException.PARSING,
2883 "incorrect relative path");
2884 }
2885 if (at > 0) {
2886 buff.append(encode(path.substring(0, at), allowed_rel_path,
2887 charset));
2888 buff.append(encode(path.substring(at), allowed_abs_path,
2889 charset));
2890 } else {
2891 buff.append(encode(path, allowed_rel_path, charset));
2892 }
2893 _path = buff.toString().toCharArray();
2894 } else if (_is_opaque_part) {
2895 StringBuffer buf = new StringBuffer();
2896 buf.insert(0, encode(path.substring(0, 1), uric_no_slash, charset));
2897 buf.insert(1, encode(path.substring(1), uric, charset));
2898 _opaque = buf.toString().toCharArray();
2899 } else {
2900 throw new URIException(URIException.PARSING, "incorrect path");
2901 }
2902 setURI();
2903 }
2904
2905
2906 /***
2907 * Resolve the base and relative path.
2908 *
2909 * @param basePath a character array of the basePath
2910 * @param relPath a character array of the relPath
2911 * @return the resolved path
2912 * @throws URIException no more higher path level to be resolved
2913 */
2914 protected char[] resolvePath(char[] basePath, char[] relPath)
2915 throws URIException {
2916
2917 // REMINDME: paths are never null
2918 String base = (basePath == null) ? "" : new String(basePath);
2919 int at = base.lastIndexOf('/');
2920 if (at != -1) {
2921 basePath = base.substring(0, at + 1).toCharArray();
2922 }
2923 // _path could be empty
2924 if (relPath == null || relPath.length == 0) {
2925 return normalize(basePath);
2926 } else if (relPath[0] == '/') {
2927 return normalize(relPath);
2928 } else {
2929 StringBuffer buff = new StringBuffer(base.length()
2930 + relPath.length);
2931 buff.append((at != -1) ? base.substring(0, at + 1) : "/");
2932 buff.append(relPath);
2933 return normalize(buff.toString().toCharArray());
2934 }
2935 }
2936
2937
2938 /***
2939 * Get the raw-escaped current hierarchy level in the given path.
2940 * If the last namespace is a collection, the slash mark ('/') should be
2941 * ended with at the last character of the path string.
2942 *
2943 * @param path the path
2944 * @return the current hierarchy level
2945 * @throws URIException no hierarchy level
2946 */
2947 protected char[] getRawCurrentHierPath(char[] path) throws URIException {
2948
2949 if (_is_opaque_part) {
2950 throw new URIException(URIException.PARSING, "no hierarchy level");
2951 }
2952 if (path == null) {
2953 throw new URIException(URIException.PARSING, "empty path");
2954 }
2955 String buff = new String(path);
2956 int first = buff.indexOf('/');
2957 int last = buff.lastIndexOf('/');
2958 if (last == 0) {
2959 return rootPath;
2960 } else if (first != last && last != -1) {
2961 return buff.substring(0, last).toCharArray();
2962 }
2963 // FIXME: it could be a document on the server side
2964 return path;
2965 }
2966
2967
2968 /***
2969 * Get the raw-escaped current hierarchy level.
2970 *
2971 * @return the raw-escaped current hierarchy level
2972 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2973 */
2974 public char[] getRawCurrentHierPath() throws URIException {
2975 return (_path == null) ? null : getRawCurrentHierPath(_path);
2976 }
2977
2978
2979 /***
2980 * Get the escaped current hierarchy level.
2981 *
2982 * @return the escaped current hierarchy level
2983 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2984 */
2985 public String getEscapedCurrentHierPath() throws URIException {
2986 char[] path = getRawCurrentHierPath();
2987 return (path == null) ? null : new String(path);
2988 }
2989
2990
2991 /***
2992 * Get the current hierarchy level.
2993 *
2994 * @return the current hierarchy level
2995 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2996 * @see #decode
2997 */
2998 public String getCurrentHierPath() throws URIException {
2999 char[] path = getRawCurrentHierPath();
3000 return (path == null) ? null : decode(path, getProtocolCharset());
3001 }
3002
3003
3004 /***
3005 * Get the level above the this hierarchy level.
3006 *
3007 * @return the raw above hierarchy level
3008 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3009 */
3010 public char[] getRawAboveHierPath() throws URIException {
3011 char[] path = getRawCurrentHierPath();
3012 return (path == null) ? null : getRawCurrentHierPath(path);
3013 }
3014
3015
3016 /***
3017 * Get the level above the this hierarchy level.
3018 *
3019 * @return the raw above hierarchy level
3020 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3021 */
3022 public String getEscapedAboveHierPath() throws URIException {
3023 char[] path = getRawAboveHierPath();
3024 return (path == null) ? null : new String(path);
3025 }
3026
3027
3028 /***
3029 * Get the level above the this hierarchy level.
3030 *
3031 * @return the above hierarchy level
3032 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3033 * @see #decode
3034 */
3035 public String getAboveHierPath() throws URIException {
3036 char[] path = getRawAboveHierPath();
3037 return (path == null) ? null : decode(path, getProtocolCharset());
3038 }
3039
3040
3041 /***
3042 * Get the raw-escaped path.
3043 * <p><blockquote><pre>
3044 * path = [ abs_path | opaque_part ]
3045 * </pre></blockquote><p>
3046 *
3047 * @return the raw-escaped path
3048 */
3049 public char[] getRawPath() {
3050 return _is_opaque_part ? _opaque : _path;
3051 }
3052
3053
3054 /***
3055 * Get the escaped path.
3056 * <p><blockquote><pre>
3057 * path = [ abs_path | opaque_part ]
3058 * abs_path = "/" path_segments
3059 * opaque_part = uric_no_slash *uric
3060 * </pre></blockquote><p>
3061 *
3062 * @return the escaped path string
3063 */
3064 public String getEscapedPath() {
3065 char[] path = getRawPath();
3066 return (path == null) ? null : new String(path);
3067 }
3068
3069
3070 /***
3071 * Get the path.
3072 * <p><blockquote><pre>
3073 * path = [ abs_path | opaque_part ]
3074 * </pre></blockquote><p>
3075 * @return the path string
3076 * @throws URIException If {@link #decode} fails.
3077 * @see #decode
3078 */
3079 public String getPath() throws URIException {
3080 char[] path = getRawPath();
3081 return (path == null) ? null : decode(path, getProtocolCharset());
3082 }
3083
3084
3085 /***
3086 * Get the raw-escaped basename of the path.
3087 *
3088 * @return the raw-escaped basename
3089 */
3090 public char[] getRawName() {
3091 if (_path == null) {
3092 return null;
3093 }
3094
3095 int at = 0;
3096 for (int i = _path.length - 1; i >= 0; i--) {
3097 if (_path[i] == '/') {
3098 at = i + 1;
3099 break;
3100 }
3101 }
3102 int len = _path.length - at;
3103 char[] basename = new char[len];
3104 System.arraycopy(_path, at, basename, 0, len);
3105 return basename;
3106 }
3107
3108
3109 /***
3110 * Get the escaped basename of the path.
3111 *
3112 * @return the escaped basename string
3113 */
3114 public String getEscapedName() {
3115 char[] basename = getRawName();
3116 return (basename == null) ? null : new String(basename);
3117 }
3118
3119
3120 /***
3121 * Get the basename of the path.
3122 *
3123 * @return the basename string
3124 * @throws URIException incomplete trailing escape pattern or unsupported
3125 * character encoding
3126 * @see #decode
3127 */
3128 public String getName() throws URIException {
3129 char[] basename = getRawName();
3130 return (basename == null) ? null : decode(getRawName(),
3131 getProtocolCharset());
3132 }
3133
3134 // ----------------------------------------------------- The path and query
3135
3136 /***
3137 * Get the raw-escaped path and query.
3138 *
3139 * @return the raw-escaped path and query
3140 */
3141 public char[] getRawPathQuery() {
3142
3143 if (_path == null && _query == null) {
3144 return null;
3145 }
3146 StringBuffer buff = new StringBuffer();
3147 if (_path != null) {
3148 buff.append(_path);
3149 }
3150 if (_query != null) {
3151 buff.append('?');
3152 buff.append(_query);
3153 }
3154 return buff.toString().toCharArray();
3155 }
3156
3157
3158 /***
3159 * Get the escaped query.
3160 *
3161 * @return the escaped path and query string
3162 */
3163 public String getEscapedPathQuery() {
3164 char[] rawPathQuery = getRawPathQuery();
3165 return (rawPathQuery == null) ? null : new String(rawPathQuery);
3166 }
3167
3168
3169 /***
3170 * Get the path and query.
3171 *
3172 * @return the path and query string.
3173 * @throws URIException incomplete trailing escape pattern or unsupported
3174 * character encoding
3175 * @see #decode
3176 */
3177 public String getPathQuery() throws URIException {
3178 char[] rawPathQuery = getRawPathQuery();
3179 return (rawPathQuery == null) ? null : decode(rawPathQuery,
3180 getProtocolCharset());
3181 }
3182
3183 // -------------------------------------------------------------- The query
3184
3185 /***
3186 * Set the raw-escaped query.
3187 *
3188 * @param escapedQuery the raw-escaped query
3189 * @throws URIException escaped query not valid
3190 */
3191 public void setRawQuery(char[] escapedQuery) throws URIException {
3192 if (escapedQuery == null || escapedQuery.length == 0) {
3193 _query = escapedQuery;
3194 setURI();
3195 return;
3196 }
3197 // remove the fragment identifier
3198 escapedQuery = removeFragmentIdentifier(escapedQuery);
3199 if (!validate(escapedQuery, query)) {
3200 throw new URIException(URIException.ESCAPING,
3201 "escaped query not valid");
3202 }
3203 _query = escapedQuery;
3204 setURI();
3205 }
3206
3207
3208 /***
3209 * Set the escaped query string.
3210 *
3211 * @param escapedQuery the escaped query string
3212 * @throws URIException escaped query not valid
3213 */
3214 public void setEscapedQuery(String escapedQuery) throws URIException {
3215 if (escapedQuery == null) {
3216 _query = null;
3217 setURI();
3218 return;
3219 }
3220 setRawQuery(escapedQuery.toCharArray());
3221 }
3222
3223
3224 /***
3225 * Set the query.
3226 * <p>
3227 * When a query string is not misunderstood the reserved special characters
3228 * ("&", "=", "+", ",", and "$") within a query component, it is
3229 * recommended to use in encoding the whole query with this method.
3230 * <p>
3231 * The additional APIs for the special purpose using by the reserved
3232 * special characters used in each protocol are implemented in each protocol
3233 * classes inherited from <code>URI</code>. So refer to the same-named APIs
3234 * implemented in each specific protocol instance.
3235 *
3236 * @param query the query string.
3237 * @throws URIException incomplete trailing escape pattern or unsupported
3238 * character encoding
3239 * @see #encode
3240 */
3241 public void setQuery(String query) throws URIException {
3242 if (query == null || query.length() == 0) {
3243 _query = (query == null) ? null : query.toCharArray();
3244 setURI();
3245 return;
3246 }
3247 setRawQuery(encode(query, allowed_query, getProtocolCharset()));
3248 }
3249
3250
3251 /***
3252 * Get the raw-escaped query.
3253 *
3254 * @return the raw-escaped query
3255 */
3256 public char[] getRawQuery() {
3257 return _query;
3258 }
3259
3260
3261 /***
3262 * Get the escaped query.
3263 *
3264 * @return the escaped query string
3265 */
3266 public String getEscapedQuery() {
3267 return (_query == null) ? null : new String(_query);
3268 }
3269
3270
3271 /***
3272 * Get the query.
3273 *
3274 * @return the query string.
3275 * @throws URIException incomplete trailing escape pattern or unsupported
3276 * character encoding
3277 * @see #decode
3278 */
3279 public String getQuery() throws URIException {
3280 return (_query == null) ? null : decode(_query, getProtocolCharset());
3281 }
3282
3283 // ----------------------------------------------------------- The fragment
3284
3285 /***
3286 * Set the raw-escaped fragment.
3287 *
3288 * @param escapedFragment the raw-escaped fragment
3289 * @throws URIException escaped fragment not valid
3290 */
3291 public void setRawFragment(char[] escapedFragment) throws URIException {
3292 if (escapedFragment == null || escapedFragment.length == 0) {
3293 _fragment = escapedFragment;
3294 hash = 0;
3295 return;
3296 }
3297 if (!validate(escapedFragment, fragment)) {
3298 throw new URIException(URIException.ESCAPING,
3299 "escaped fragment not valid");
3300 }
3301 _fragment = escapedFragment;
3302 hash = 0;
3303 }
3304
3305
3306 /***
3307 * Set the escaped fragment string.
3308 *
3309 * @param escapedFragment the escaped fragment string
3310 * @throws URIException escaped fragment not valid
3311 */
3312 public void setEscapedFragment(String escapedFragment) throws URIException {
3313 if (escapedFragment == null) {
3314 _fragment = null;
3315 hash = 0;
3316 return;
3317 }
3318 setRawFragment(escapedFragment.toCharArray());
3319 }
3320
3321
3322 /***
3323 * Set the fragment.
3324 *
3325 * @param fragment the fragment string.
3326 * @throws URIException If an error occurs.
3327 */
3328 public void setFragment(String fragment) throws URIException {
3329 if (fragment == null || fragment.length() == 0) {
3330 _fragment = (fragment == null) ? null : fragment.toCharArray();
3331 hash = 0;
3332 return;
3333 }
3334 _fragment = encode(fragment, allowed_fragment, getProtocolCharset());
3335 hash = 0;
3336 }
3337
3338
3339 /***
3340 * Get the raw-escaped fragment.
3341 * <p>
3342 * The optional fragment identifier is not part of a URI, but is often used
3343 * in conjunction with a URI.
3344 * <p>
3345 * The format and interpretation of fragment identifiers is dependent on
3346 * the media type [RFC2046] of the retrieval result.
3347 * <p>
3348 * A fragment identifier is only meaningful when a URI reference is
3349 * intended for retrieval and the result of that retrieval is a document
3350 * for which the identified fragment is consistently defined.
3351 *
3352 * @return the raw-escaped fragment
3353 */
3354 public char[] getRawFragment() {
3355 return _fragment;
3356 }
3357
3358
3359 /***
3360 * Get the escaped fragment.
3361 *
3362 * @return the escaped fragment string
3363 */
3364 public String getEscapedFragment() {
3365 return (_fragment == null) ? null : new String(_fragment);
3366 }
3367
3368
3369 /***
3370 * Get the fragment.
3371 *
3372 * @return the fragment string
3373 * @throws URIException incomplete trailing escape pattern or unsupported
3374 * character encoding
3375 * @see #decode
3376 */
3377 public String getFragment() throws URIException {
3378 return (_fragment == null) ? null : decode(_fragment,
3379 getProtocolCharset());
3380 }
3381
3382 // ------------------------------------------------------------- Utilities
3383
3384 /***
3385 * Remove the fragment identifier of the given component.
3386 *
3387 * @param component the component that a fragment may be included
3388 * @return the component that the fragment identifier is removed
3389 */
3390 protected char[] removeFragmentIdentifier(char[] component) {
3391 if (component == null) {
3392 return null;
3393 }
3394 int lastIndex = new String(component).indexOf('#');
3395 if (lastIndex != -1) {
3396 component = new String(component).substring(0,
3397 lastIndex).toCharArray();
3398 }
3399 return component;
3400 }
3401
3402
3403 /***
3404 * Normalize the given hier path part.
3405 *
3406 * <p>Algorithm taken from URI reference parser at
3407 * http://www.apache.org/~fielding/uri/rev-2002/issues.html.
3408 *
3409 * @param path the path to normalize
3410 * @return the normalized path
3411 * @throws URIException no more higher path level to be normalized
3412 */
3413 protected char[] normalize(char[] path) throws URIException {
3414
3415 if (path == null) {
3416 return null;
3417 }
3418
3419 String normalized = new String(path);
3420
3421 // If the buffer begins with "./" or "../", the "." or ".." is removed.
3422 if (normalized.startsWith("./")) {
3423 normalized = normalized.substring(1);
3424 } else if (normalized.startsWith("../")) {
3425 normalized = normalized.substring(2);
3426 } else if (normalized.startsWith("..")) {
3427 normalized = normalized.substring(2);
3428 }
3429
3430 // All occurrences of "/./" in the buffer are replaced with "/"
3431 int index = -1;
3432 while ((index = normalized.indexOf("/./")) != -1) {
3433 normalized = normalized.substring(0, index) + normalized.substring(index + 2);
3434 }
3435
3436 // If the buffer ends with "/.", the "." is removed.
3437 if (normalized.endsWith("/.")) {
3438 normalized = normalized.substring(0, normalized.length() - 1);
3439 }
3440
3441 int startIndex = 0;
3442
3443 // All occurrences of "/<segment>/../" in the buffer, where ".."
3444 // and <segment> are complete path segments, are iteratively replaced
3445 // with "/" in order from left to right until no matching pattern remains.
3446 // If the buffer ends with "/<segment>/..", that is also replaced
3447 // with "/". Note that <segment> may be empty.
3448 while ((index = normalized.indexOf("/../", startIndex)) != -1) {
3449 int slashIndex = normalized.lastIndexOf('/', index - 1);
3450 if (slashIndex >= 0) {
3451 normalized = normalized.substring(0, slashIndex) + normalized.substring(index + 3);
3452 } else {
3453 startIndex = index + 3;
3454 }
3455 }
3456 if (normalized.endsWith("/..")) {
3457 int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3458 if (slashIndex >= 0) {
3459 normalized = normalized.substring(0, slashIndex + 1);
3460 }
3461 }
3462
3463 // All prefixes of "<segment>/../" in the buffer, where ".."
3464 // and <segment> are complete path segments, are iteratively replaced
3465 // with "/" in order from left to right until no matching pattern remains.
3466 // If the buffer ends with "<segment>/..", that is also replaced
3467 // with "/". Note that <segment> may be empty.
3468 while ((index = normalized.indexOf("/../")) != -1) {
3469 int slashIndex = normalized.lastIndexOf('/', index - 1);
3470 if (slashIndex >= 0) {
3471 break;
3472 } else {
3473 normalized = normalized.substring(index + 3);
3474 }
3475 }
3476 if (normalized.endsWith("/..")) {
3477 int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3478 if (slashIndex < 0) {
3479 normalized = "/";
3480 }
3481 }
3482
3483 return normalized.toCharArray();
3484 }
3485
3486
3487 /***
3488 * Normalize the path part of this URI.
3489 *
3490 * @throws URIException no more higher path level to be normalized
3491 */
3492 public void normalize() throws URIException {
3493 _path = normalize(_path);
3494 setURI();
3495 }
3496
3497
3498 /***
3499 * Test if the first array is equal to the second array.
3500 *
3501 * @param first the first character array
3502 * @param second the second character array
3503 * @return true if they're equal
3504 */
3505 protected boolean equals(char[] first, char[] second) {
3506
3507 if (first == null && second == null) {
3508 return true;
3509 }
3510 if (first == null || second == null) {
3511 return false;
3512 }
3513 if (first.length != second.length) {
3514 return false;
3515 }
3516 for (int i = 0; i < first.length; i++) {
3517 if (first[i] != second[i]) {
3518 return false;
3519 }
3520 }
3521 return true;
3522 }
3523
3524
3525 /***
3526 * Test an object if this URI is equal to another.
3527 *
3528 * @param obj an object to compare
3529 * @return true if two URI objects are equal
3530 */
3531 public boolean equals(Object obj) {
3532
3533 // normalize and test each components
3534 if (obj == this) {
3535 return true;
3536 }
3537 if (!(obj instanceof URI)) {
3538 return false;
3539 }
3540 URI another = (URI) obj;
3541 // scheme
3542 if (!equals(_scheme, another._scheme)) {
3543 return false;
3544 }
3545 // is_opaque_part or is_hier_part? and opaque
3546 if (!equals(_opaque, another._opaque)) {
3547 return false;
3548 }
3549 // is_hier_part
3550 // has_authority
3551 if (!equals(_authority, another._authority)) {
3552 return false;
3553 }
3554 // path
3555 if (!equals(_path, another._path)) {
3556 return false;
3557 }
3558 // has_query
3559 if (!equals(_query, another._query)) {
3560 return false;
3561 }
3562 // has_fragment? should be careful of the only fragment case.
3563 if (!equals(_fragment, another._fragment)) {
3564 return false;
3565 }
3566 return true;
3567 }
3568
3569 // ---------------------------------------------------------- Serialization
3570
3571 /***
3572 * Write the content of this URI.
3573 *
3574 * @param oos the object-output stream
3575 * @throws IOException If an IO problem occurs.
3576 */
3577 protected void writeObject(ObjectOutputStream oos)
3578 throws IOException {
3579
3580 oos.defaultWriteObject();
3581 }
3582
3583
3584 /***
3585 * Read a URI.
3586 *
3587 * @param ois the object-input stream
3588 * @throws ClassNotFoundException If one of the classes specified in the
3589 * input stream cannot be found.
3590 * @throws IOException If an IO problem occurs.
3591 */
3592 protected void readObject(ObjectInputStream ois)
3593 throws ClassNotFoundException, IOException {
3594
3595 ois.defaultReadObject();
3596 }
3597
3598 // -------------------------------------------------------------- Hash code
3599
3600 /***
3601 * Return a hash code for this URI.
3602 *
3603 * @return a has code value for this URI
3604 */
3605 public int hashCode() {
3606 if (hash == 0) {
3607 char[] c = _uri;
3608 if (c != null) {
3609 for (int i = 0, len = c.length; i < len; i++) {
3610 hash = 31 * hash + c[i];
3611 }
3612 }
3613 c = _fragment;
3614 if (c != null) {
3615 for (int i = 0, len = c.length; i < len; i++) {
3616 hash = 31 * hash + c[i];
3617 }
3618 }
3619 }
3620 return hash;
3621 }
3622
3623 // ------------------------------------------------------------- Comparison
3624
3625 /***
3626 * Compare this URI to another object.
3627 *
3628 * @param obj the object to be compared.
3629 * @return 0, if it's same,
3630 * -1, if failed, first being compared with in the authority component
3631 * @throws ClassCastException not URI argument
3632 */
3633 public int compareTo(Object obj) throws ClassCastException {
3634
3635 URI another = (URI) obj;
3636 if (!equals(_authority, another.getRawAuthority())) {
3637 return -1;
3638 }
3639 return toString().compareTo(another.toString());
3640 }
3641
3642 // ------------------------------------------------------------------ Clone
3643
3644 /***
3645 * Create and return a copy of this object, the URI-reference containing
3646 * the userinfo component. Notice that the whole URI-reference including
3647 * the userinfo component counld not be gotten as a <code>String</code>.
3648 * <p>
3649 * To copy the identical <code>URI</code> object including the userinfo
3650 * component, it should be used.
3651 *
3652 * @return a clone of this instance
3653 */
3654 public synchronized Object clone() {
3655
3656 URI instance = new URI();
3657
3658 instance._uri = _uri;
3659 instance._scheme = _scheme;
3660 instance._opaque = _opaque;
3661 instance._authority = _authority;
3662 instance._userinfo = _userinfo;
3663 instance._host = _host;
3664 instance._port = _port;
3665 instance._path = _path;
3666 instance._query = _query;
3667 instance._fragment = _fragment;
3668 // the charset to do escape encoding for this instance
3669 instance.protocolCharset = protocolCharset;
3670 // flags
3671 instance._is_hier_part = _is_hier_part;
3672 instance._is_opaque_part = _is_opaque_part;
3673 instance._is_net_path = _is_net_path;
3674 instance._is_abs_path = _is_abs_path;
3675 instance._is_rel_path = _is_rel_path;
3676 instance._is_reg_name = _is_reg_name;
3677 instance._is_server = _is_server;
3678 instance._is_hostname = _is_hostname;
3679 instance._is_IPv4address = _is_IPv4address;
3680 instance._is_IPv6reference = _is_IPv6reference;
3681
3682 return instance;
3683 }
3684
3685 // ------------------------------------------------------------ Get the URI
3686
3687 /***
3688 * It can be gotten the URI character sequence. It's raw-escaped.
3689 * For the purpose of the protocol to be transported, it will be useful.
3690 * <p>
3691 * It is clearly unwise to use a URL that contains a password which is
3692 * intended to be secret. In particular, the use of a password within
3693 * the 'userinfo' component of a URL is strongly disrecommended except
3694 * in those rare cases where the 'password' parameter is intended to be
3695 * public.
3696 * <p>
3697 * When you want to get each part of the userinfo, you need to use the
3698 * specific methods in the specific URL. It depends on the specific URL.
3699 *
3700 * @return the URI character sequence
3701 */
3702 public char[] getRawURI() {
3703 return _uri;
3704 }
3705
3706
3707 /***
3708 * It can be gotten the URI character sequence. It's escaped.
3709 * For the purpose of the protocol to be transported, it will be useful.
3710 *
3711 * @return the escaped URI string
3712 */
3713 public String getEscapedURI() {
3714 return (_uri == null) ? null : new String(_uri);
3715 }
3716
3717
3718 /***
3719 * It can be gotten the URI character sequence.
3720 *
3721 * @return the original URI string
3722 * @throws URIException incomplete trailing escape pattern or unsupported
3723 * character encoding
3724 * @see #decode
3725 */
3726 public String getURI() throws URIException {
3727 return (_uri == null) ? null : decode(_uri, getProtocolCharset());
3728 }
3729
3730
3731 /***
3732 * Get the URI reference character sequence.
3733 *
3734 * @return the URI reference character sequence
3735 */
3736 public char[] getRawURIReference() {
3737 if (_fragment == null) {
3738 return _uri;
3739 }
3740 if (_uri == null) {
3741 return _fragment;
3742 }
3743 // if _uri != null && _fragment != null
3744 String uriReference = new String(_uri) + "#" + new String(_fragment);
3745 return uriReference.toCharArray();
3746 }
3747
3748
3749 /***
3750 * Get the escaped URI reference string.
3751 *
3752 * @return the escaped URI reference string
3753 */
3754 public String getEscapedURIReference() {
3755 char[] uriReference = getRawURIReference();
3756 return (uriReference == null) ? null : new String(uriReference);
3757 }
3758
3759
3760 /***
3761 * Get the original URI reference string.
3762 *
3763 * @return the original URI reference string
3764 * @throws URIException If {@link #decode} fails.
3765 */
3766 public String getURIReference() throws URIException {
3767 char[] uriReference = getRawURIReference();
3768 return (uriReference == null) ? null : decode(uriReference,
3769 getProtocolCharset());
3770 }
3771
3772
3773 /***
3774 * Get the escaped URI string.
3775 * <p>
3776 * On the document, the URI-reference form is only used without the userinfo
3777 * component like http://jakarta.apache.org/ by the security reason.
3778 * But the URI-reference form with the userinfo component could be parsed.
3779 * <p>
3780 * In other words, this URI and any its subclasses must not expose the
3781 * URI-reference expression with the userinfo component like
3782 * http://user:password@hostport/restricted_zone.<br>
3783 * It means that the API client programmer should extract each user and
3784 * password to access manually. Probably it will be supported in the each
3785 * subclass, however, not a whole URI-reference expression.
3786 *
3787 * @return the escaped URI string
3788 * @see #clone()
3789 */
3790 public String toString() {
3791 return getEscapedURI();
3792 }
3793
3794
3795 // ------------------------------------------------------------ Inner class
3796
3797 /***
3798 * The charset-changed normal operation to represent to be required to
3799 * alert to user the fact the default charset is changed.
3800 */
3801 public static class DefaultCharsetChanged extends RuntimeException {
3802
3803 // ------------------------------------------------------- constructors
3804
3805 /***
3806 * The constructor with a reason string and its code arguments.
3807 *
3808 * @param reasonCode the reason code
3809 * @param reason the reason
3810 */
3811 public DefaultCharsetChanged(int reasonCode, String reason) {
3812 super(reason);
3813 this.reason = reason;
3814 this.reasonCode = reasonCode;
3815 }
3816
3817 // ---------------------------------------------------------- constants
3818
3819 /*** No specified reason code. */
3820 public static final int UNKNOWN = 0;
3821
3822 /*** Protocol charset changed. */
3823 public static final int PROTOCOL_CHARSET = 1;
3824
3825 /*** Document charset changed. */
3826 public static final int DOCUMENT_CHARSET = 2;
3827
3828 // ------------------------------------------------- instance variables
3829
3830 /*** The reason code. */
3831 private int reasonCode;
3832
3833 /*** The reason message. */
3834 private String reason;
3835
3836 // ------------------------------------------------------------ methods
3837
3838 /***
3839 * Get the reason code.
3840 *
3841 * @return the reason code
3842 */
3843 public int getReasonCode() {
3844 return reasonCode;
3845 }
3846
3847 /***
3848 * Get the reason message.
3849 *
3850 * @return the reason message
3851 */
3852 public String getReason() {
3853 return reason;
3854 }
3855
3856 }
3857
3858
3859 /***
3860 * A mapping to determine the (somewhat arbitrarily) preferred charset for a
3861 * given locale. Supports all locales recognized in JDK 1.1.
3862 * <p>
3863 * The distribution of this class is Servlets.com. It was originally
3864 * written by Jason Hunter [jhunter at acm.org] and used by with permission.
3865 */
3866 public static class LocaleToCharsetMap {
3867
3868 /*** A mapping of language code to charset */
3869 private static final Hashtable LOCALE_TO_CHARSET_MAP;
3870 static {
3871 LOCALE_TO_CHARSET_MAP = new Hashtable();
3872 LOCALE_TO_CHARSET_MAP.put("ar", "ISO-8859-6");
3873 LOCALE_TO_CHARSET_MAP.put("be", "ISO-8859-5");
3874 LOCALE_TO_CHARSET_MAP.put("bg", "ISO-8859-5");
3875 LOCALE_TO_CHARSET_MAP.put("ca", "ISO-8859-1");
3876 LOCALE_TO_CHARSET_MAP.put("cs", "ISO-8859-2");
3877 LOCALE_TO_CHARSET_MAP.put("da", "ISO-8859-1");
3878 LOCALE_TO_CHARSET_MAP.put("de", "ISO-8859-1");
3879 LOCALE_TO_CHARSET_MAP.put("el", "ISO-8859-7");
3880 LOCALE_TO_CHARSET_MAP.put("en", "ISO-8859-1");
3881 LOCALE_TO_CHARSET_MAP.put("es", "ISO-8859-1");
3882 LOCALE_TO_CHARSET_MAP.put("et", "ISO-8859-1");
3883 LOCALE_TO_CHARSET_MAP.put("fi", "ISO-8859-1");
3884 LOCALE_TO_CHARSET_MAP.put("fr", "ISO-8859-1");
3885 LOCALE_TO_CHARSET_MAP.put("hr", "ISO-8859-2");
3886 LOCALE_TO_CHARSET_MAP.put("hu", "ISO-8859-2");
3887 LOCALE_TO_CHARSET_MAP.put("is", "ISO-8859-1");
3888 LOCALE_TO_CHARSET_MAP.put("it", "ISO-8859-1");
3889 LOCALE_TO_CHARSET_MAP.put("iw", "ISO-8859-8");
3890 LOCALE_TO_CHARSET_MAP.put("ja", "Shift_JIS");
3891 LOCALE_TO_CHARSET_MAP.put("ko", "EUC-KR");
3892 LOCALE_TO_CHARSET_MAP.put("lt", "ISO-8859-2");
3893 LOCALE_TO_CHARSET_MAP.put("lv", "ISO-8859-2");
3894 LOCALE_TO_CHARSET_MAP.put("mk", "ISO-8859-5");
3895 LOCALE_TO_CHARSET_MAP.put("nl", "ISO-8859-1");
3896 LOCALE_TO_CHARSET_MAP.put("no", "ISO-8859-1");
3897 LOCALE_TO_CHARSET_MAP.put("pl", "ISO-8859-2");
3898 LOCALE_TO_CHARSET_MAP.put("pt", "ISO-8859-1");
3899 LOCALE_TO_CHARSET_MAP.put("ro", "ISO-8859-2");
3900 LOCALE_TO_CHARSET_MAP.put("ru", "ISO-8859-5");
3901 LOCALE_TO_CHARSET_MAP.put("sh", "ISO-8859-5");
3902 LOCALE_TO_CHARSET_MAP.put("sk", "ISO-8859-2");
3903 LOCALE_TO_CHARSET_MAP.put("sl", "ISO-8859-2");
3904 LOCALE_TO_CHARSET_MAP.put("sq", "ISO-8859-2");
3905 LOCALE_TO_CHARSET_MAP.put("sr", "ISO-8859-5");
3906 LOCALE_TO_CHARSET_MAP.put("sv", "ISO-8859-1");
3907 LOCALE_TO_CHARSET_MAP.put("tr", "ISO-8859-9");
3908 LOCALE_TO_CHARSET_MAP.put("uk", "ISO-8859-5");
3909 LOCALE_TO_CHARSET_MAP.put("zh", "GB2312");
3910 LOCALE_TO_CHARSET_MAP.put("zh_TW", "Big5");
3911 }
3912
3913 /***
3914 * Get the preferred charset for the given locale.
3915 *
3916 * @param locale the locale
3917 * @return the preferred charset or null if the locale is not
3918 * recognized.
3919 */
3920 public static String getCharset(Locale locale) {
3921 // try for an full name match (may include country)
3922 String charset =
3923 (String) LOCALE_TO_CHARSET_MAP.get(locale.toString());
3924 if (charset != null) {
3925 return charset;
3926 }
3927
3928 // if a full name didn't match, try just the language
3929 charset = (String) LOCALE_TO_CHARSET_MAP.get(locale.getLanguage());
3930 return charset; // may be null
3931 }
3932
3933 }
3934
3935 }
3936
This page was automatically generated by Maven